code_generation.py
1 import json 2 3 try: 4 from anthropic import HUMAN_PROMPT, AI_PROMPT 5 except ImportError: 6 HUMAN_PROMPT = None 7 AI_PROMPT = None 8 9 from lcb_runner.lm_styles import LMStyle 10 from lcb_runner.benchmarks.code_generation import CodeGenerationProblem 11 import os 12 13 14 class PromptConstants: 15 SYSTEM_MESSAGE_GENERIC = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program." 16 17 SYSTEM_MESSAGE_DEEPSEEK = f"You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you answer questions related to computer science." 18 19 SYSTEM_MESSAGE_CODEQWEN = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user" 20 21 SYSTEM_MESSAGE_MAGIC = f"You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.\n\n@@ Instruction\n" 22 23 SYSTEM_MESSAGE_WIZARD = "Below is an instruction that describes a task. Write a response that appropriately completes the request." 24 25 SYSTEM_MESSAGE_PHIND = f"""You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. Put your fixed program within code delimiters, for example: 26 ```python 27 # YOUR CODE HERE 28 ```""" 29 30 FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters." 31 32 FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows." 33 34 35 def get_generic_question_template_answer(question: CodeGenerationProblem): 36 prompt = f"### Question:\n{question.question_content}\n\n" 37 if question.starter_code: 38 prompt += ( 39 f"### Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" 40 ) 41 prompt += f"```python\n{question.starter_code}\n```\n\n" 42 else: 43 prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n" 44 prompt += "```python\n# YOUR CODE HERE\n```\n\n" 45 prompt += f"### Answer: (use the provided format with backticks)\n\n" 46 return prompt 47 48 49 def get_cllama_question_template_answer(question: CodeGenerationProblem): 50 prompt = f"### Question\n{question.question_content}\n\n" 51 if question.starter_code: 52 prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" 53 prompt += f"[PYTHON]\n{question.starter_code}\n[/PYTHON]\n\n" 54 else: 55 prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n" 56 prompt += f"[PYTHON]\n# WRITE YOUR CODE HERE\n[/PYTHON]\n\n" 57 prompt += f"### ANSWER (use the provided delimiters, read the inputs from stdin and write response to stdout)\n\n" 58 return prompt 59 60 61 def get_deepseekcode_question_template_answer(question: CodeGenerationProblem): 62 prompt = f"### Instruction: You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n" 63 prompt += f"Question:\n{question.question_content}\n\n" 64 if question.starter_code: 65 prompt += ( 66 f"### Instruction: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" 67 ) 68 prompt += f"```python\n{question.starter_code}\n```\n\n" 69 else: 70 prompt += ( 71 f"### Instruction: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n" 72 ) 73 prompt += f"```python\n# YOUR CODE HERE\n```\n\n" 74 prompt += f"### Response:\n\n" 75 return prompt 76 77 78 def get_codeqwen_question_template_answer(question: CodeGenerationProblem): 79 prompt = "You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n" 80 prompt += f"Question: {question.question_content}\n\n" 81 if question.starter_code: 82 prompt += ( 83 f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" 84 ) 85 prompt += f"```python\n{question.starter_code}\n```\n\n<|im_end|>\n" 86 else: 87 prompt += ( 88 f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n" 89 ) 90 prompt += f"```python\n# YOUR CODE HERE\n```\n\n<|im_end|>\n" 91 prompt += f"<|im_start|>assistant\n" 92 return prompt 93 94 def get_magicoder_question_template_answer(question: CodeGenerationProblem): 95 prompt = f"You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n" 96 prompt += f"Question:\n{question.question_content}\n\n" 97 if question.starter_code: 98 prompt += f"Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" 99 prompt += f"```python\n{question.starter_code}\n```\n\n" 100 else: 101 prompt += f"Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n" 102 prompt += f"```python\n# YOUR CODE HERE\n```\n\n" 103 prompt += f"@@ Response\n" 104 return prompt 105 106 107 def get_wizard_question_template_answer(question: CodeGenerationProblem): 108 prompt = f"""### Instruction: You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. Put your fixed program within code delimiters, for example: 109 ```python 110 # YOUR CODE HERE 111 ``` 112 """ 113 prompt += f"{question.question_content}\n\n" 114 if question.starter_code: 115 prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" 116 prompt += f"```python\n{question.starter_code}\n```\n\n" 117 else: 118 prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n" 119 prompt += f"```python\n# YOUR CODE HERE\n```\n\n" 120 prompt += f"### Response:\n\n" 121 return prompt 122 123 124 def get_phind_question_template_answer(question: CodeGenerationProblem): 125 prompt = f"{question.question_content}\n\n" 126 if question.starter_code: 127 prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" 128 prompt += f"```python\n{question.starter_code}\n```\n\n" 129 else: 130 prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n" 131 prompt += f"```python\n# YOUR CODE HERE\n```\n\n" 132 prompt += f"\n\n### Assistant" 133 return prompt 134 135 136 with open(f"{os.path.dirname(os.path.abspath(__file__))}/few_shot_examples/generation/func.json") as f: 137 func = json.load(f) 138 139 with open(f"{os.path.dirname(os.path.abspath(__file__))}/few_shot_examples/generation/stdin.json") as f: 140 stdin = json.load(f) 141 142 143 def get_base_model_question_template_answer(question: CodeGenerationProblem): 144 if question.starter_code: 145 examples_json = func 146 else: 147 examples_json = stdin 148 149 def get_example_prompt(example): 150 prompt = "" 151 prompt += "### Question\n" 152 prompt += example["question"] 153 prompt += "\n\n" 154 if question.starter_code: 155 prompt += "### Starter Code\n" 156 prompt += example["sample_code"] 157 prompt += "\n\n" 158 prompt += "### Answer\n\n" 159 prompt += example["answer"] 160 if example["answer"]: 161 prompt += "\n\n" 162 return prompt 163 164 prompt = "" 165 prompt += get_example_prompt(examples_json[0]) 166 prompt += get_example_prompt( 167 { 168 "question": question.question_content, 169 "sample_code": question.starter_code, 170 "answer": "", 171 } 172 ) 173 return prompt 174 175 176 def format_prompt_generation( 177 question: CodeGenerationProblem, LanguageModelStyle: LMStyle 178 ) -> str: 179 if LanguageModelStyle == LMStyle.OpenAIChat: 180 chat_messages = [ 181 { 182 "role": "system", 183 "content": PromptConstants.SYSTEM_MESSAGE_GENERIC, 184 }, 185 ] 186 chat_messages += [ 187 { 188 "role": "user", 189 "content": get_generic_question_template_answer(question), 190 }, 191 ] 192 return chat_messages 193 194 if LanguageModelStyle == LMStyle.Anthropic: 195 prompt = f"{HUMAN_PROMPT}\n" 196 prompt += f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n\n" 197 prompt += f"{get_generic_question_template_answer(question).rstrip()}\n" 198 prompt += f"{AI_PROMPT}" 199 return prompt 200 201 if LanguageModelStyle == LMStyle.AnthropicMessage: 202 system = PromptConstants.SYSTEM_MESSAGE_GENERIC 203 prompt = [ 204 { 205 "role": "user", 206 "content": get_generic_question_template_answer(question).rstrip(), 207 } 208 ] 209 return system, prompt 210 211 if LanguageModelStyle == LMStyle.Gemini: 212 prompt = f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n" 213 prompt += f"{get_generic_question_template_answer(question)}" 214 return prompt 215 216 if LanguageModelStyle == LMStyle.MistralWeb: 217 chat_messages = [ 218 { 219 "role": "system", 220 "content": PromptConstants.SYSTEM_MESSAGE_GENERIC, 221 }, 222 { 223 "role": "user", 224 "content": get_generic_question_template_answer(question), 225 }, 226 ] 227 return chat_messages 228 229 if LanguageModelStyle == LMStyle.DeepSeekCodeInstruct: 230 prompt = f"{PromptConstants.SYSTEM_MESSAGE_DEEPSEEK}\n\n" 231 prompt += f"{get_deepseekcode_question_template_answer(question)}" 232 return prompt 233 234 if LanguageModelStyle == LMStyle.CodeQwenChat: 235 prompt = f"{PromptConstants.SYSTEM_MESSAGE_CODEQWEN}\n\n" 236 prompt += f"{get_codeqwen_question_template_answer(question)}" 237 return prompt 238 239 if LanguageModelStyle == LMStyle.CodeLLaMaInstruct: 240 prompt = f"[INST] <<SYS>>\n" 241 prompt += f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n" 242 prompt += f"<</SYS>>\n\n" 243 prompt += f"{get_cllama_question_template_answer(question)}\n" 244 prompt += f"[/INST]" 245 return prompt 246 247 if LanguageModelStyle == LMStyle.MagiCoder: 248 prompt = f"{PromptConstants.SYSTEM_MESSAGE_MAGIC}\n" 249 prompt += f"{get_magicoder_question_template_answer(question)}" 250 return prompt 251 252 if LanguageModelStyle == LMStyle.WizardCoder: 253 prompt = f"{PromptConstants.SYSTEM_MESSAGE_WIZARD}\n\n" 254 prompt += f"{get_wizard_question_template_answer(question)}" 255 return prompt 256 257 if LanguageModelStyle == LMStyle.Phind: 258 prompt = f"### System Prompt\n\n" 259 prompt += f"{PromptConstants.SYSTEM_MESSAGE_PHIND}\n\n" 260 prompt += f"### User Message\n\n" 261 prompt += f"{get_phind_question_template_answer(question)}" 262 return prompt 263 264 if LanguageModelStyle == LMStyle.OC: 265 prompt = f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n\n" 266 prompt += f"{get_generic_question_template_answer(question)}" 267 return prompt 268 269 if LanguageModelStyle in [ 270 LMStyle.DeepSeekBase, 271 LMStyle.CodeLLaMaBase, 272 LMStyle.StarCoder2Base, 273 LMStyle.StableCodeBase, 274 LMStyle.CodeQwenBase, 275 ]: 276 prompt = get_base_model_question_template_answer(question) 277 return prompt 278 279 raise NotImplementedError( 280 f"LanguageModelStyle {LanguageModelStyle} not implemented" 281 ) 282 283 284 def test(): 285 import pathlib 286 287 base_dir = "logs/example_prompts/generation" 288 pathlib.Path(base_dir).mkdir(parents=True, exist_ok=True) 289 290 for lmstyle in LMStyle: 291 generation_problem = CodeGenerationProblem( 292 "title", 293 "question-content", 294 "leetcode", 295 "question_id", 296 "contest_id", 297 "contest_date", 298 "", 299 "easy", 300 "[]", 301 "[]", 302 "{}", 303 ) 304 prompt1 = format_prompt_generation(generation_problem, lmstyle) 305 with open(f"{base_dir}/{lmstyle}_1.txt", "w") as f: 306 try: 307 f.write(prompt1) 308 except TypeError: 309 f.write(json.dumps(prompt1)) 310 311 generation_problem.starter_code = "starter code" 312 prompt2 = format_prompt_generation(generation_problem, lmstyle) 313 with open(f"{base_dir}/{lmstyle}_2.txt", "w") as f: 314 try: 315 f.write(prompt2) 316 except TypeError: 317 f.write(json.dumps(prompt2)) 318 319 320 if __name__ == "__main__": 321 test()