run_InfiAgent-DABench_all.py
1 import fire 2 import pandas as pd 3 from DABench import DABench 4 5 from metagpt.logs import logger 6 from metagpt.roles.di.data_interpreter import DataInterpreter 7 from metagpt.utils.recovery_util import save_history 8 9 10 async def main(): 11 """Evaluate all""" 12 bench = DABench() 13 id_list, predictions, labels, is_true = [], [], [], [] 14 for key, value in bench.answers.items(): 15 id_list.append(key) 16 labels.append(str(bench.get_answer(key))) 17 try: 18 requirement = bench.generate_formatted_prompt(key) 19 di = DataInterpreter() 20 result = await di.run(requirement) 21 logger.info(result) 22 save_history(role=di) 23 temp_prediction, temp_istrue = bench.eval(key, str(result)) 24 is_true.append(str(temp_istrue)) 25 predictions.append(str(temp_prediction)) 26 except: 27 is_true.append(str(bench.eval(key, ""))) 28 predictions.append(str("")) 29 df = pd.DataFrame({"Label": labels, "Prediction": predictions, "T/F": is_true}) 30 df.to_excel("DABench_output.xlsx", index=False) 31 logger.info(bench.eval_all(id_list, predictions)) 32 33 34 if __name__ == "__main__": 35 fire.Fire(main)