/ qwencoder-eval / instruct / BigCodeBench / custom_inspect.py
custom_inspect.py
 1  from data import get_bigcodebench
 2  import os
 3  import shutil
 4  import json
 5  import argparse
 6  
 7  
 8  def inspection(args):
 9      """
10      Write a series of files for each task into a directory.
11      
12      Each Directory Structure:
13      -- task_id
14          -- ground_truth.py: prompt + canonical_solution
15          -- completion.py: prompt + completion
16          -- execution_trace.txt: execution trace
17      """
18      path = os.path.join("inspect", args.eval_results.split("/")[-1].replace(".json", ""))
19      if args.in_place:
20          shutil.rmtree(path, ignore_errors=True)
21      if not os.path.exists(path):
22          os.makedirs(path)
23      problems = get_bigcodebench()
24  
25      eval_results = json.load(open(args.eval_results, "r"))
26      for task_id, results in eval_results["eval"].items():
27          if all(result["status"] == "pass" for result in results):
28              continue
29          task_path = os.path.join(path, task_id)
30          if not os.path.exists(task_path):
31              os.makedirs(task_path)
32          task_id_data = problems[task_id]
33          with open(os.path.join(task_path, "ground_truth.py"), "w") as f:
34              f.write(task_id_data[f"{args.subset}_prompt"] + "\n\n" + task_id_data["canonical_solution"])
35  
36          # write test
37          with open(os.path.join(task_path, "test_case.py"), "w") as f:
38              f.write(task_id_data["test"])
39  
40          for i, result in enumerate(results):
41              with open(os.path.join(task_path, f"completion_{i}.py"), "w") as f:
42                  f.write(result["solution"])
43  
44          for i, result in enumerate(results):
45              with open(os.path.join(task_path, f"complete_{i}_execution_trace.txt"), "w") as f:
46                  for test_case, execution_trace in result["details"].items():
47                      f.write(f"Test Case: {test_case}\n\n")
48                      f.write(execution_trace)
49                      f.write("=" * 50 + "\n")
50  
51  
52  def main():
53      parser = argparse.ArgumentParser()
54      parser.add_argument("--eval-results", required=True, type=str)
55      parser.add_argument("--subset", required=True, type=str)
56      parser.add_argument("--in-place", action="store_true")
57      args = parser.parse_args()
58  
59      inspection(args)
60  
61  
62  if __name__ == "__main__":
63      main()