/ qwencoder-eval / instruct / livecode_bench / convert_data.py
convert_data.py
  1  import json
  2  import datasets
  3  import os
  4  from enum import Enum
  5  from datetime import datetime
  6  from dataclasses import dataclass
  7  import jsonlines
  8  import tqdm
  9  import numpy as np
 10  class PromptConstants:
 11      SYSTEM_MESSAGE_GENERIC = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program."
 12  
 13      SYSTEM_MESSAGE_DEEPSEEK = f"You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you answer questions related to computer science."
 14  
 15      SYSTEM_MESSAGE_CODEQWEN = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user"
 16  
 17      SYSTEM_MESSAGE_MAGIC = f"You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.\n\n@@ Instruction\n"
 18  
 19      SYSTEM_MESSAGE_WIZARD = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
 20  
 21      SYSTEM_MESSAGE_PHIND = f"""You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. Put your fixed program within code delimiters, for example: 
 22  ```python 
 23  # YOUR CODE HERE
 24  ```"""
 25  
 26      FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters."
 27  
 28      FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows."
 29  
 30  class Platform(Enum):
 31      LEETCODE = "leetcode"
 32      CODEFORCES = "codeforces"
 33      ATCODER = "atcoder"
 34  
 35  
 36  class Difficulty(Enum):
 37      EASY = "easy"
 38      MEDIUM = "medium"
 39      HARD = "hard"
 40  
 41  
 42  class TestType(Enum):
 43      STDIN = "stdin"
 44      FUNCTIONAL = "functional"
 45  
 46  @dataclass
 47  class Test:
 48      input: str
 49      output: str
 50      testtype: TestType
 51  
 52      def __post_init__(self):
 53          self.testtype = TestType(self.testtype)
 54          
 55  @dataclass
 56  class CodeGenerationProblem:
 57      question_title: str
 58      question_content: str
 59      platform: Platform
 60      question_id: str
 61      contest_id: str
 62      contest_date: datetime
 63      starter_code: str
 64      difficulty: Difficulty
 65      public_test_cases: list[Test]
 66      private_test_cases: list[Test]
 67      metadata: dict
 68  
 69      def __post_init__(self):
 70          self.platform = Platform(self.platform)
 71          self.difficulty = Difficulty(self.difficulty)
 72  
 73          self.public_test_cases = json.loads(self.public_test_cases)
 74          self.public_test_cases = [Test(**t) for t in self.public_test_cases]
 75  
 76          self.private_test_cases = json.loads(self.private_test_cases)
 77          self.private_test_cases = [Test(**t) for t in self.private_test_cases]
 78  
 79          self.metadata = json.loads(self.metadata)
 80  
 81      def insert_output(self, output_list: list[str], code_list: list[str]) -> dict:
 82          return {
 83              "question_title": self.question_title,
 84              "question_content": self.question_content,
 85              "platform": self.platform.value,
 86              "question_id": self.question_id,
 87              "contest_id": self.contest_id,
 88              "contest_date": self.contest_date.isoformat(),
 89              "starter_code": self.starter_code,
 90              "difficulty": self.difficulty.value,
 91              "output_list": output_list,
 92              "code_list": code_list,
 93          }
 94  
 95      def insert_output_evaluation(
 96          self, output_list: list[str], code_list: list[str], graded_list: list[bool]
 97      ) -> dict:
 98          output = self.insert_output(output_list, code_list)
 99          output["graded_list"] = graded_list
100          output["pass@1"] = graded_list.count(True) / len(graded_list)
101          return output
102  
103      def get_evaluation_sample(self):
104          return {
105              "input_output":
106                  json.dumps(
107                      {
108                          "inputs": [
109                              t.input
110                              for t in self.public_test_cases + self.private_test_cases
111                          ],
112                          "outputs": [
113                              t.output
114                              for t in self.public_test_cases + self.private_test_cases
115                          ],
116                          "fn_name": self.metadata.get("func_name", None),
117                      }
118                  )
119          }
120  
121  
122  def convert_file(source_path, target_path):
123      def get_codeqwen_question_template_answer(question: CodeGenerationProblem):
124          prompt = "You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
125          prompt += f"Question: {question.question_content}\n\n"
126          if question.starter_code:
127              prompt += (
128                  f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
129              )
130              prompt += f"```python\n{question.starter_code}\n```\n\n"
131          else:
132              prompt += (
133                  f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
134              )
135              prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
136          return prompt
137  
138      def convert(sample):
139          prompt = get_codeqwen_question_template_answer(sample)
140          tests = sample.get_evaluation_sample()
141          data = {
142              "prompt": prompt,
143              "_test": tests,
144              "entry_point": sample.starter_code,
145              "tags": f"coding,en,python,core",
146              "task": f"livecodebench",
147              "source": f"livecodebench",
148              "eval_args": {
149                  "greedy": False,
150                  "seed": 1234,
151                  "out_seq_length": 1200,		                             
152                  "repetition_penalty": 1.0,
153                  "temperature": 0.2,
154                  #"beam_size": 10,
155                  #"presence_penalty": 2.0,
156                  #"system_str": "你是一个专业的数学家,擅长解答数学问题。",  
157                  "top_k": -1,
158                  "top_p": 0.95,
159              }
160          }
161          return data
162  
163      if not os.path.exists(os.path.dirname(target_path)):
164          os.makedirs(os.path.dirname(target_path))
165  
166      with jsonlines.open(target_path, 'w') as w:
167          dataset = datasets.load_dataset(source_path)["test"]
168          dataset = [CodeGenerationProblem(**p) for p in dataset]
169          for i, sample in tqdm.tqdm(enumerate(dataset)):
170              new_data = convert(sample)
171              new_data[f"sampling_cluster"] = i
172              n_sampling = 1
173              for _ in range(n_sampling):
174                  w.write(new_data)
175      
176  
177      with jsonlines.open(target_path + ".sampled", 'w') as w:
178          dataset = datasets.load_dataset(source_path)["test"]
179          dataset = [CodeGenerationProblem(**p) for p in dataset][:5]
180          for i, sample in tqdm.tqdm(enumerate(dataset)):
181              new_data = convert(sample)
182              new_data[f"sampling_cluster"] = i
183              n_sampling = 1
184              for _ in range(n_sampling):
185                  w.write(new_data)
186  
187  
188  if __name__ == "__main__":
189      convert_file("./data/livecodebench___code_generation", "./data/livecodebench.jsonl")
190