code_generation.py
  1  import json
  2  
  3  try:
  4      from anthropic import HUMAN_PROMPT, AI_PROMPT
  5  except ImportError:
  6      HUMAN_PROMPT = None
  7      AI_PROMPT = None
  8  
  9  from lcb_runner.lm_styles import LMStyle
 10  from lcb_runner.benchmarks.code_generation import CodeGenerationProblem
 11  import os
 12  
 13  
 14  class PromptConstants:
 15      SYSTEM_MESSAGE_GENERIC = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program."
 16  
 17      SYSTEM_MESSAGE_DEEPSEEK = f"You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you answer questions related to computer science."
 18  
 19      SYSTEM_MESSAGE_CODEQWEN = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user"
 20  
 21      SYSTEM_MESSAGE_MAGIC = f"You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.\n\n@@ Instruction\n"
 22  
 23      SYSTEM_MESSAGE_WIZARD = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
 24  
 25      SYSTEM_MESSAGE_PHIND = f"""You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. Put your fixed program within code delimiters, for example: 
 26  ```python 
 27  # YOUR CODE HERE
 28  ```"""
 29  
 30      FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters."
 31  
 32      FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows."
 33  
 34  
 35  def get_generic_question_template_answer(question: CodeGenerationProblem):
 36      prompt = f"### Question:\n{question.question_content}\n\n"
 37      if question.starter_code:
 38          prompt += (
 39              f"### Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
 40          )
 41          prompt += f"```python\n{question.starter_code}\n```\n\n"
 42      else:
 43          prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
 44          prompt += "```python\n# YOUR CODE HERE\n```\n\n"
 45      prompt += f"### Answer: (use the provided format with backticks)\n\n"
 46      return prompt
 47  
 48  
 49  def get_cllama_question_template_answer(question: CodeGenerationProblem):
 50      prompt = f"### Question\n{question.question_content}\n\n"
 51      if question.starter_code:
 52          prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
 53          prompt += f"[PYTHON]\n{question.starter_code}\n[/PYTHON]\n\n"
 54      else:
 55          prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
 56          prompt += f"[PYTHON]\n# WRITE YOUR CODE HERE\n[/PYTHON]\n\n"
 57      prompt += f"### ANSWER (use the provided delimiters, read the inputs from stdin and write response to stdout)\n\n"
 58      return prompt
 59  
 60  
 61  def get_deepseekcode_question_template_answer(question: CodeGenerationProblem):
 62      prompt = f"### Instruction: You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
 63      prompt += f"Question:\n{question.question_content}\n\n"
 64      if question.starter_code:
 65          prompt += (
 66              f"### Instruction: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
 67          )
 68          prompt += f"```python\n{question.starter_code}\n```\n\n"
 69      else:
 70          prompt += (
 71              f"### Instruction: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
 72          )
 73          prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
 74      prompt += f"### Response:\n\n"
 75      return prompt
 76  
 77  
 78  def get_codeqwen_question_template_answer(question: CodeGenerationProblem):
 79      prompt = "You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
 80      prompt += f"Question: {question.question_content}\n\n"
 81      if question.starter_code:
 82          prompt += (
 83              f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
 84          )
 85          prompt += f"```python\n{question.starter_code}\n```\n\n<|im_end|>\n"
 86      else:
 87          prompt += (
 88              f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
 89          )
 90          prompt += f"```python\n# YOUR CODE HERE\n```\n\n<|im_end|>\n"
 91      prompt += f"<|im_start|>assistant\n"
 92      return prompt
 93  
 94  def get_magicoder_question_template_answer(question: CodeGenerationProblem):
 95      prompt = f"You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
 96      prompt += f"Question:\n{question.question_content}\n\n"
 97      if question.starter_code:
 98          prompt += f"Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
 99          prompt += f"```python\n{question.starter_code}\n```\n\n"
100      else:
101          prompt += f"Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
102          prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
103      prompt += f"@@ Response\n"
104      return prompt
105  
106  
107  def get_wizard_question_template_answer(question: CodeGenerationProblem):
108      prompt = f"""### Instruction: You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. Put your fixed program within code delimiters, for example:
109  ```python 
110  # YOUR CODE HERE
111  ```
112  """
113      prompt += f"{question.question_content}\n\n"
114      if question.starter_code:
115          prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
116          prompt += f"```python\n{question.starter_code}\n```\n\n"
117      else:
118          prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n"
119          prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
120      prompt += f"### Response:\n\n"
121      return prompt
122  
123  
124  def get_phind_question_template_answer(question: CodeGenerationProblem):
125      prompt = f"{question.question_content}\n\n"
126      if question.starter_code:
127          prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
128          prompt += f"```python\n{question.starter_code}\n```\n\n"
129      else:
130          prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n"
131          prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
132      prompt += f"\n\n### Assistant"
133      return prompt
134  
135  
136  with open(f"{os.path.dirname(os.path.abspath(__file__))}/few_shot_examples/generation/func.json") as f:
137      func = json.load(f)
138  
139  with open(f"{os.path.dirname(os.path.abspath(__file__))}/few_shot_examples/generation/stdin.json") as f:
140      stdin = json.load(f)
141  
142  
143  def get_base_model_question_template_answer(question: CodeGenerationProblem):
144      if question.starter_code:
145          examples_json = func
146      else:
147          examples_json = stdin
148  
149      def get_example_prompt(example):
150          prompt = ""
151          prompt += "### Question\n"
152          prompt += example["question"]
153          prompt += "\n\n"
154          if question.starter_code:
155              prompt += "### Starter Code\n"
156              prompt += example["sample_code"]
157              prompt += "\n\n"
158          prompt += "### Answer\n\n"
159          prompt += example["answer"]
160          if example["answer"]:
161              prompt += "\n\n"
162          return prompt
163  
164      prompt = ""
165      prompt += get_example_prompt(examples_json[0])
166      prompt += get_example_prompt(
167          {
168              "question": question.question_content,
169              "sample_code": question.starter_code,
170              "answer": "",
171          }
172      )
173      return prompt
174  
175  
176  def format_prompt_generation(
177      question: CodeGenerationProblem, LanguageModelStyle: LMStyle
178  ) -> str:
179      if LanguageModelStyle == LMStyle.OpenAIChat:
180          chat_messages = [
181              {
182                  "role": "system",
183                  "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
184              },
185          ]
186          chat_messages += [
187              {
188                  "role": "user",
189                  "content": get_generic_question_template_answer(question),
190              },
191          ]
192          return chat_messages
193  
194      if LanguageModelStyle == LMStyle.Anthropic:
195          prompt = f"{HUMAN_PROMPT}\n"
196          prompt += f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n\n"
197          prompt += f"{get_generic_question_template_answer(question).rstrip()}\n"
198          prompt += f"{AI_PROMPT}"
199          return prompt
200  
201      if LanguageModelStyle == LMStyle.AnthropicMessage:
202          system = PromptConstants.SYSTEM_MESSAGE_GENERIC
203          prompt = [
204              {
205                  "role": "user",
206                  "content": get_generic_question_template_answer(question).rstrip(),
207              }
208          ]
209          return system, prompt
210  
211      if LanguageModelStyle == LMStyle.Gemini:
212          prompt = f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n"
213          prompt += f"{get_generic_question_template_answer(question)}"
214          return prompt
215  
216      if LanguageModelStyle == LMStyle.MistralWeb:
217          chat_messages = [
218              {
219                  "role": "system",
220                  "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
221              },
222              {
223                  "role": "user",
224                  "content": get_generic_question_template_answer(question),
225              },
226          ]
227          return chat_messages
228  
229      if LanguageModelStyle == LMStyle.DeepSeekCodeInstruct:
230          prompt = f"{PromptConstants.SYSTEM_MESSAGE_DEEPSEEK}\n\n"
231          prompt += f"{get_deepseekcode_question_template_answer(question)}"
232          return prompt
233  
234      if LanguageModelStyle == LMStyle.CodeQwenChat:
235          prompt = f"{PromptConstants.SYSTEM_MESSAGE_CODEQWEN}\n\n"
236          prompt += f"{get_codeqwen_question_template_answer(question)}"
237          return prompt
238  
239      if LanguageModelStyle == LMStyle.CodeLLaMaInstruct:
240          prompt = f"[INST] <<SYS>>\n"
241          prompt += f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n"
242          prompt += f"<</SYS>>\n\n"
243          prompt += f"{get_cllama_question_template_answer(question)}\n"
244          prompt += f"[/INST]"
245          return prompt
246  
247      if LanguageModelStyle == LMStyle.MagiCoder:
248          prompt = f"{PromptConstants.SYSTEM_MESSAGE_MAGIC}\n"
249          prompt += f"{get_magicoder_question_template_answer(question)}"
250          return prompt
251  
252      if LanguageModelStyle == LMStyle.WizardCoder:
253          prompt = f"{PromptConstants.SYSTEM_MESSAGE_WIZARD}\n\n"
254          prompt += f"{get_wizard_question_template_answer(question)}"
255          return prompt
256  
257      if LanguageModelStyle == LMStyle.Phind:
258          prompt = f"### System Prompt\n\n"
259          prompt += f"{PromptConstants.SYSTEM_MESSAGE_PHIND}\n\n"
260          prompt += f"### User Message\n\n"
261          prompt += f"{get_phind_question_template_answer(question)}"
262          return prompt
263  
264      if LanguageModelStyle == LMStyle.OC:
265          prompt = f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n\n"
266          prompt += f"{get_generic_question_template_answer(question)}"
267          return prompt
268  
269      if LanguageModelStyle in [
270          LMStyle.DeepSeekBase,
271          LMStyle.CodeLLaMaBase,
272          LMStyle.StarCoder2Base,
273          LMStyle.StableCodeBase,
274          LMStyle.CodeQwenBase,
275      ]:
276          prompt = get_base_model_question_template_answer(question)
277          return prompt
278  
279      raise NotImplementedError(
280          f"LanguageModelStyle {LanguageModelStyle} not implemented"
281      )
282  
283  
284  def test():
285      import pathlib
286  
287      base_dir = "logs/example_prompts/generation"
288      pathlib.Path(base_dir).mkdir(parents=True, exist_ok=True)
289  
290      for lmstyle in LMStyle:
291          generation_problem = CodeGenerationProblem(
292              "title",
293              "question-content",
294              "leetcode",
295              "question_id",
296              "contest_id",
297              "contest_date",
298              "",
299              "easy",
300              "[]",
301              "[]",
302              "{}",
303          )
304          prompt1 = format_prompt_generation(generation_problem, lmstyle)
305          with open(f"{base_dir}/{lmstyle}_1.txt", "w") as f:
306              try:
307                  f.write(prompt1)
308              except TypeError:
309                  f.write(json.dumps(prompt1))
310  
311          generation_problem.starter_code = "starter code"
312          prompt2 = format_prompt_generation(generation_problem, lmstyle)
313          with open(f"{base_dir}/{lmstyle}_2.txt", "w") as f:
314              try:
315                  f.write(prompt2)
316              except TypeError:
317                  f.write(json.dumps(prompt2))
318  
319  
320  if __name__ == "__main__":
321      test()