temp.py
 1  from data import get_bigcodebench, write_jsonl
 2  
 3  subset = 'hard'
 4  split = 'complete'
 5  dataset = get_bigcodebench(subset=subset)
 6  
 7  task = f'bigcodebench/bcb_{split}_{subset}'
 8  outputs = []
 9  for key, value in dataset.items():
10      outputs.append({
11          'task_id': value['task_id'],
12          'prompt': value['complete_prompt'] if split == 'complete' else value['instruct_prompt'],
13          'complete_prompt': value['complete_prompt'],
14          'instruct_prompt': value['instruct_prompt'],
15          'canonical_solution': value['canonical_solution'],
16          'code_prompt': value['code_prompt'],
17          'test': value['test'],
18          'entry_point': value['entry_point'],
19          'doc_struct': value['doc_struct'],
20          'libs': value['libs'],
21          'task': task,
22      })
23  
24  save_path = f'{task}.jsonl'
25  write_jsonl(save_path, outputs)