custom_inspect.py
1 from data import get_bigcodebench 2 import os 3 import shutil 4 import json 5 import argparse 6 7 8 def inspection(args): 9 """ 10 Write a series of files for each task into a directory. 11 12 Each Directory Structure: 13 -- task_id 14 -- ground_truth.py: prompt + canonical_solution 15 -- completion.py: prompt + completion 16 -- execution_trace.txt: execution trace 17 """ 18 path = os.path.join("inspect", args.eval_results.split("/")[-1].replace(".json", "")) 19 if args.in_place: 20 shutil.rmtree(path, ignore_errors=True) 21 if not os.path.exists(path): 22 os.makedirs(path) 23 problems = get_bigcodebench() 24 25 eval_results = json.load(open(args.eval_results, "r")) 26 for task_id, results in eval_results["eval"].items(): 27 if all(result["status"] == "pass" for result in results): 28 continue 29 task_path = os.path.join(path, task_id) 30 if not os.path.exists(task_path): 31 os.makedirs(task_path) 32 task_id_data = problems[task_id] 33 with open(os.path.join(task_path, "ground_truth.py"), "w") as f: 34 f.write(task_id_data[f"{args.subset}_prompt"] + "\n\n" + task_id_data["canonical_solution"]) 35 36 # write test 37 with open(os.path.join(task_path, "test_case.py"), "w") as f: 38 f.write(task_id_data["test"]) 39 40 for i, result in enumerate(results): 41 with open(os.path.join(task_path, f"completion_{i}.py"), "w") as f: 42 f.write(result["solution"]) 43 44 for i, result in enumerate(results): 45 with open(os.path.join(task_path, f"complete_{i}_execution_trace.txt"), "w") as f: 46 for test_case, execution_trace in result["details"].items(): 47 f.write(f"Test Case: {test_case}\n\n") 48 f.write(execution_trace) 49 f.write("=" * 50 + "\n") 50 51 52 def main(): 53 parser = argparse.ArgumentParser() 54 parser.add_argument("--eval-results", required=True, type=str) 55 parser.add_argument("--subset", required=True, type=str) 56 parser.add_argument("--in-place", action="store_true") 57 args = parser.parse_args() 58 59 inspection(args) 60 61 62 if __name__ == "__main__": 63 main()