temp.py
1 from data import get_bigcodebench, write_jsonl 2 3 subset = 'hard' 4 split = 'complete' 5 dataset = get_bigcodebench(subset=subset) 6 7 task = f'bigcodebench/bcb_{split}_{subset}' 8 outputs = [] 9 for key, value in dataset.items(): 10 outputs.append({ 11 'task_id': value['task_id'], 12 'prompt': value['complete_prompt'] if split == 'complete' else value['instruct_prompt'], 13 'complete_prompt': value['complete_prompt'], 14 'instruct_prompt': value['instruct_prompt'], 15 'canonical_solution': value['canonical_solution'], 16 'code_prompt': value['code_prompt'], 17 'test': value['test'], 18 'entry_point': value['entry_point'], 19 'doc_struct': value['doc_struct'], 20 'libs': value['libs'], 21 'task': task, 22 }) 23 24 save_path = f'{task}.jsonl' 25 write_jsonl(save_path, outputs)