lm_styles.py
1 from dataclasses import dataclass 2 from datetime import datetime 3 from enum import Enum 4 5 6 class LMStyle(Enum): 7 CodeQwenBase = "CodeQwenBase" 8 CodeQwenChat = "CodeQwenChat" 9 10 OpenAIChat = "OpenAIChat" 11 Anthropic = "Anthropic" 12 AnthropicMessage = "AnthropicMessage" 13 Gemini = "Gemini" 14 MistralWeb = "MistralWeb" 15 16 DeepSeekBase = "DeepSeekBase" 17 CodeLLaMaBase = "CodeLLaMaBase" 18 StarCoder2Base = "StarCoder2Base" 19 StableCodeBase = "StableCodeBase" 20 21 DeepSeekCodeInstruct = "DeepSeekCodeInstruct" 22 CodeLLaMaInstruct = "CodeLLaMaInstruct" 23 24 Phind = "Phind" 25 WizardCoder = "WizardCoder" 26 MagiCoder = "MagiCoder" 27 OC = "OC" 28 29 30 @dataclass 31 class LanguageModel: 32 model_name: str 33 model_repr: str 34 model_style: LMStyle 35 release_date: datetime | None # XXX Should we use timezone.utc? 36 link: str | None = None 37 38 def __hash__(self) -> int: 39 return hash(self.model_name) 40 41 42 LanguageModelList: list[LanguageModel] = [ 43 LanguageModel( 44 "SelfDefined", 45 "SelfDefined", 46 LMStyle.CodeQwenChat, 47 datetime(2024, 4, 16), 48 link=None, 49 ), 50 LanguageModel( 51 "Qwen/CodeQwen1.5-7B", 52 "CodeQwen1.5-7B", 53 LMStyle.CodeQwenBase, 54 datetime(2024, 4, 16), 55 link="https://huggingface.co/Qwen/CodeQwen1.5-7B", 56 ), 57 LanguageModel( 58 "Qwen/CodeQwen1.5-7B-Chat", 59 "CodeQwen1.5-7B-Chat", 60 LMStyle.CodeQwenChat, 61 datetime(2024, 4, 16), 62 link="https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat", 63 ), 64 LanguageModel( 65 "deepseek-ai/deepseek-coder-33b-instruct", 66 "DSCoder-33b-Ins", 67 LMStyle.DeepSeekCodeInstruct, 68 datetime(2023, 9, 1), 69 link="https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct", 70 ), 71 LanguageModel( 72 "deepseek-ai/deepseek-coder-6.7b-instruct", 73 "DSCoder-6.7b-Ins", 74 LMStyle.DeepSeekCodeInstruct, 75 datetime(2023, 9, 1), 76 link="https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct", 77 ), 78 LanguageModel( 79 "deepseek-ai/deepseek-coder-1.3b-instruct", 80 "DSCoder-1.3b-Ins", 81 LMStyle.DeepSeekCodeInstruct, 82 datetime(2023, 8, 1), 83 link="https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-instruct", 84 ), 85 LanguageModel( 86 "codellama/CodeLlama-34b-Instruct-hf", 87 "Cllama-34b-Ins", 88 LMStyle.CodeLLaMaInstruct, 89 datetime(2023, 1, 1), 90 link="https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf", 91 ), 92 LanguageModel( 93 "codellama/CodeLlama-13b-Instruct-hf", 94 "Cllama-13b-Ins", 95 LMStyle.CodeLLaMaInstruct, 96 datetime(2023, 1, 1), 97 link="https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf", 98 ), 99 LanguageModel( 100 "codellama/CodeLlama-7b-Instruct-hf", 101 "Cllama-7b-Ins", 102 LMStyle.CodeLLaMaInstruct, 103 datetime(2023, 1, 1), 104 link="https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf", 105 ), 106 LanguageModel( 107 "WizardCoderLM/WizardCoderCoder-Python-34B-V1.0", 108 "WCoder-34B-V1", 109 LMStyle.WizardCoder, 110 datetime(2023, 1, 1), 111 link="https://huggingface.co/WizardCoderLM/WizardCoderCoder-Python-34B-V1.0", 112 ), 113 LanguageModel( 114 "WizardCoderLM/WizardCoderCoder-33B-V1.1", 115 "WCoder-33B-V1.1", 116 LMStyle.WizardCoder, 117 datetime(2023, 9, 1), 118 link="https://huggingface.co/WizardCoderLM/WizardCoderCoder-33B-V1.1", 119 ), 120 LanguageModel( 121 "Phind/Phind-CodeLlama-34B-v2", 122 "Phind-34B-V2", 123 LMStyle.Phind, 124 datetime(2023, 1, 1), 125 link="https://huggingface.co/Phind/Phind-CodeLlama-34B-v2", 126 ), 127 LanguageModel( 128 "gpt-3.5-turbo-0301", 129 "GPT-3.5-Turbo-0301", 130 LMStyle.OpenAIChat, 131 datetime(2021, 10, 1), 132 link="https://openai.com/blog/new-models-and-developer-products-announced-at-devday", 133 ), 134 LanguageModel( 135 "gpt-3.5-turbo-0125", 136 "GPT-3.5-Turbo-0125", 137 LMStyle.OpenAIChat, 138 datetime(2021, 10, 1), 139 link="https://openai.com/blog/new-embedding-models-and-api-updates#:~:text=Other%20new%20models%20and%20lower%20pricing", 140 ), 141 LanguageModel( 142 "gpt-4-0613", 143 "GPT-4-0613", 144 LMStyle.OpenAIChat, 145 datetime(2021, 10, 1), 146 link="https://openai.com/blog/new-models-and-developer-products-announced-at-devday", 147 ), 148 LanguageModel( 149 "gpt-4-1106-preview", 150 "GPT-4-Turbo-1106", 151 LMStyle.OpenAIChat, 152 datetime(2023, 4, 30), 153 link="https://openai.com/blog/new-models-and-developer-products-announced-at-devday", 154 ), 155 LanguageModel( 156 "claude-2", 157 "Claude-2", 158 LMStyle.Anthropic, 159 datetime(2022, 12, 31), 160 link="https://www.anthropic.com/index/claude-2", 161 ), 162 LanguageModel( 163 "claude-instant-1", 164 "Claude-Instant-1", 165 LMStyle.Anthropic, 166 datetime(2022, 12, 31), 167 link="https://www.anthropic.com/index/introducing-claude", 168 ), 169 LanguageModel( 170 "claude-3-opus-20240229", 171 "Claude-3-Opus", 172 LMStyle.AnthropicMessage, 173 datetime(2023, 9, 1), 174 link="https://www.anthropic.com/index/claude-3", 175 ), 176 LanguageModel( 177 "claude-3-sonnet-20240229", 178 "Claude-3-Sonnet", 179 LMStyle.AnthropicMessage, 180 datetime(2023, 9, 1), 181 link="https://www.anthropic.com/index/claude-3", 182 ), 183 LanguageModel( 184 "gemini-pro", 185 "Gemini-Gemini-Pro", 186 LMStyle.Gemini, 187 datetime(2023, 5, 1), 188 link="https://blog.Gemini/technology/ai/gemini-api-developers-cloud", 189 ), 190 LanguageModel( 191 "ise-uiuc/Magicoder-S-DS-6.7B", 192 "MagiCoderS-DS-6.7B", 193 LMStyle.MagiCoder, 194 datetime(2023, 7, 30), 195 link="https://huggingface.co/ise-uiuc/Magicoder-S-DS-6.7B", 196 ), 197 LanguageModel( 198 "ise-uiuc/Magicoder-S-CL-7B", 199 "MagiCoderS-CL-7B", 200 LMStyle.MagiCoder, 201 datetime(2023, 1, 1), 202 link="https://huggingface.co/ise-uiuc/Magicoder-S-CL-7B", 203 ), 204 LanguageModel( 205 "bigcode/starcoder2-3b", 206 "StarCoder2-3b", 207 LMStyle.StarCoder2Base, 208 datetime(2023, 1, 1), 209 link="https://huggingface.co/bigcode/starcoder2-7b-magicoder-instruct/tree/main", 210 ), 211 LanguageModel( 212 "bigcode/starcoder2-7b", 213 "StarCoder2-7b", 214 LMStyle.StarCoder2Base, 215 datetime(2023, 1, 1), 216 link="https://huggingface.co/bigcode/starcoder2-7b-magicoder-instruct/tree/main", 217 ), 218 LanguageModel( 219 "bigcode/starcoder2-15b", 220 "StarCoder2-15b", 221 LMStyle.StarCoder2Base, 222 datetime(2023, 1, 1), 223 link="https://huggingface.co/bigcode/starcoder2-7b-magicoder-instruct/tree/main", 224 ), 225 LanguageModel( 226 "codellama/CodeLlama-34b-hf", 227 "CodeLlama-34b-Base", 228 LMStyle.CodeLLaMaBase, 229 datetime(2023, 1, 1), 230 link="https://huggingface.co/codellama/CodeLlama-34b-hf", 231 ), 232 LanguageModel( 233 "codellama/CodeLlama-13b-hf", 234 "CodeLlama-13b-Base", 235 LMStyle.CodeLLaMaBase, 236 datetime(2023, 1, 1), 237 link="https://huggingface.co/codellama/CodeLlama-13b-hf", 238 ), 239 LanguageModel( 240 "codellama/CodeLlama-7b-hf", 241 "CodeLlama-7b-Base", 242 LMStyle.CodeLLaMaBase, 243 datetime(2023, 1, 1), 244 link="https://huggingface.co/codellama/CodeLlama-7b-hf", 245 ), 246 LanguageModel( 247 "deepseek-ai/deepseek-coder-33b-base", 248 "DSCoder-33b-Base", 249 LMStyle.DeepSeekBase, 250 datetime(2023, 1, 1), 251 link="https://huggingface.co/deepseek-ai/deepseek-coder-33b-base", 252 ), 253 LanguageModel( 254 "deepseek-ai/deepseek-coder-6.7b-base", 255 "DSCoder-6.7b-Base", 256 LMStyle.DeepSeekBase, 257 datetime(2023, 1, 1), 258 link="https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", 259 ), 260 LanguageModel( 261 "deepseek-ai/deepseek-coder-1.3b-base", 262 "DSCoder-1.3b-Base", 263 LMStyle.DeepSeekBase, 264 datetime(2023, 1, 1), 265 link="https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base", 266 ), 267 LanguageModel( 268 "mistral-large-latest", 269 "Mistral-Large", 270 LMStyle.MistralWeb, 271 datetime(2023, 1, 1), 272 link="https://mistral.ai/news/mistral-large/", 273 ), 274 LanguageModel( 275 "m-a-p/OpenCodeInterpreter-DS-33B", 276 "OC-DS-33B", 277 LMStyle.OC, 278 datetime(2023, 1, 1), 279 link="https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-33B/", 280 ), 281 LanguageModel( 282 "m-a-p/OpenCodeInterpreter-DS-6.7B", 283 "OC-DS-6.7B", 284 LMStyle.OC, 285 datetime(2023, 9, 1), 286 link="https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-6.7B/", 287 ), 288 LanguageModel( 289 "m-a-p/OpenCodeInterpreter-DS-1.3B", 290 "OC-DS-1.3B", 291 LMStyle.OC, 292 datetime(2023, 9, 1), 293 link="https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-1.3B/", 294 ), 295 LanguageModel( 296 "stabilityai/stable-code-3b", 297 "StableCode-3B", 298 LMStyle.StableCodeBase, 299 datetime(2023, 9, 1), 300 link="https://huggingface.co/stabilityai/stable-code-3b/", 301 ), 302 ] 303 304 LanguageModelStore: dict[str, LanguageModel] = { 305 lm.model_name: lm for lm in LanguageModelList 306 } 307 308 if __name__ == "__main__": 309 print(list(LanguageModelStore.keys()))