lm_styles.py
  1  from dataclasses import dataclass
  2  from datetime import datetime
  3  from enum import Enum
  4  
  5  
  6  class LMStyle(Enum):
  7      CodeQwenBase = "CodeQwenBase"
  8      CodeQwenChat = "CodeQwenChat"
  9      
 10      OpenAIChat = "OpenAIChat"
 11      Anthropic = "Anthropic"
 12      AnthropicMessage = "AnthropicMessage"
 13      Gemini = "Gemini"
 14      MistralWeb = "MistralWeb"
 15  
 16      DeepSeekBase = "DeepSeekBase"
 17      CodeLLaMaBase = "CodeLLaMaBase"
 18      StarCoder2Base = "StarCoder2Base"
 19      StableCodeBase = "StableCodeBase"
 20  
 21      DeepSeekCodeInstruct = "DeepSeekCodeInstruct"
 22      CodeLLaMaInstruct = "CodeLLaMaInstruct"
 23  
 24      Phind = "Phind"
 25      WizardCoder = "WizardCoder"
 26      MagiCoder = "MagiCoder"
 27      OC = "OC"
 28  
 29  
 30  @dataclass
 31  class LanguageModel:
 32      model_name: str
 33      model_repr: str
 34      model_style: LMStyle
 35      release_date: datetime | None  # XXX Should we use timezone.utc?
 36      link: str | None = None
 37  
 38      def __hash__(self) -> int:
 39          return hash(self.model_name)
 40  
 41  
 42  LanguageModelList: list[LanguageModel] = [
 43      LanguageModel(
 44          "SelfDefined",
 45          "SelfDefined",
 46          LMStyle.CodeQwenChat,
 47          datetime(2024, 4, 16),
 48          link=None,
 49      ),
 50      LanguageModel(
 51          "Qwen/CodeQwen1.5-7B",
 52          "CodeQwen1.5-7B",
 53          LMStyle.CodeQwenBase,
 54          datetime(2024, 4, 16),
 55          link="https://huggingface.co/Qwen/CodeQwen1.5-7B",
 56      ),
 57      LanguageModel(
 58          "Qwen/CodeQwen1.5-7B-Chat",
 59          "CodeQwen1.5-7B-Chat",
 60          LMStyle.CodeQwenChat,
 61          datetime(2024, 4, 16),
 62          link="https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat",
 63      ),
 64      LanguageModel(
 65          "deepseek-ai/deepseek-coder-33b-instruct",
 66          "DSCoder-33b-Ins",
 67          LMStyle.DeepSeekCodeInstruct,
 68          datetime(2023, 9, 1),
 69          link="https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
 70      ),
 71      LanguageModel(
 72          "deepseek-ai/deepseek-coder-6.7b-instruct",
 73          "DSCoder-6.7b-Ins",
 74          LMStyle.DeepSeekCodeInstruct,
 75          datetime(2023, 9, 1),
 76          link="https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
 77      ),
 78      LanguageModel(
 79          "deepseek-ai/deepseek-coder-1.3b-instruct",
 80          "DSCoder-1.3b-Ins",
 81          LMStyle.DeepSeekCodeInstruct,
 82          datetime(2023, 8, 1),
 83          link="https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-instruct",
 84      ),
 85      LanguageModel(
 86          "codellama/CodeLlama-34b-Instruct-hf",
 87          "Cllama-34b-Ins",
 88          LMStyle.CodeLLaMaInstruct,
 89          datetime(2023, 1, 1),
 90          link="https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf",
 91      ),
 92      LanguageModel(
 93          "codellama/CodeLlama-13b-Instruct-hf",
 94          "Cllama-13b-Ins",
 95          LMStyle.CodeLLaMaInstruct,
 96          datetime(2023, 1, 1),
 97          link="https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf",
 98      ),
 99      LanguageModel(
100          "codellama/CodeLlama-7b-Instruct-hf",
101          "Cllama-7b-Ins",
102          LMStyle.CodeLLaMaInstruct,
103          datetime(2023, 1, 1),
104          link="https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf",
105      ),
106      LanguageModel(
107          "WizardCoderLM/WizardCoderCoder-Python-34B-V1.0",
108          "WCoder-34B-V1",
109          LMStyle.WizardCoder,
110          datetime(2023, 1, 1),
111          link="https://huggingface.co/WizardCoderLM/WizardCoderCoder-Python-34B-V1.0",
112      ),
113      LanguageModel(
114          "WizardCoderLM/WizardCoderCoder-33B-V1.1",
115          "WCoder-33B-V1.1",
116          LMStyle.WizardCoder,
117          datetime(2023, 9, 1),
118          link="https://huggingface.co/WizardCoderLM/WizardCoderCoder-33B-V1.1",
119      ),
120      LanguageModel(
121          "Phind/Phind-CodeLlama-34B-v2",
122          "Phind-34B-V2",
123          LMStyle.Phind,
124          datetime(2023, 1, 1),
125          link="https://huggingface.co/Phind/Phind-CodeLlama-34B-v2",
126      ),
127      LanguageModel(
128          "gpt-3.5-turbo-0301",
129          "GPT-3.5-Turbo-0301",
130          LMStyle.OpenAIChat,
131          datetime(2021, 10, 1),
132          link="https://openai.com/blog/new-models-and-developer-products-announced-at-devday",
133      ),
134      LanguageModel(
135          "gpt-3.5-turbo-0125",
136          "GPT-3.5-Turbo-0125",
137          LMStyle.OpenAIChat,
138          datetime(2021, 10, 1),
139          link="https://openai.com/blog/new-embedding-models-and-api-updates#:~:text=Other%20new%20models%20and%20lower%20pricing",
140      ),
141      LanguageModel(
142          "gpt-4-0613",
143          "GPT-4-0613",
144          LMStyle.OpenAIChat,
145          datetime(2021, 10, 1),
146          link="https://openai.com/blog/new-models-and-developer-products-announced-at-devday",
147      ),
148      LanguageModel(
149          "gpt-4-1106-preview",
150          "GPT-4-Turbo-1106",
151          LMStyle.OpenAIChat,
152          datetime(2023, 4, 30),
153          link="https://openai.com/blog/new-models-and-developer-products-announced-at-devday",
154      ),
155      LanguageModel(
156          "claude-2",
157          "Claude-2",
158          LMStyle.Anthropic,
159          datetime(2022, 12, 31),
160          link="https://www.anthropic.com/index/claude-2",
161      ),
162      LanguageModel(
163          "claude-instant-1",
164          "Claude-Instant-1",
165          LMStyle.Anthropic,
166          datetime(2022, 12, 31),
167          link="https://www.anthropic.com/index/introducing-claude",
168      ),
169      LanguageModel(
170          "claude-3-opus-20240229",
171          "Claude-3-Opus",
172          LMStyle.AnthropicMessage,
173          datetime(2023, 9, 1),
174          link="https://www.anthropic.com/index/claude-3",
175      ),
176      LanguageModel(
177          "claude-3-sonnet-20240229",
178          "Claude-3-Sonnet",
179          LMStyle.AnthropicMessage,
180          datetime(2023, 9, 1),
181          link="https://www.anthropic.com/index/claude-3",
182      ),
183      LanguageModel(
184          "gemini-pro",
185          "Gemini-Gemini-Pro",
186          LMStyle.Gemini,
187          datetime(2023, 5, 1),
188          link="https://blog.Gemini/technology/ai/gemini-api-developers-cloud",
189      ),
190      LanguageModel(
191          "ise-uiuc/Magicoder-S-DS-6.7B",
192          "MagiCoderS-DS-6.7B",
193          LMStyle.MagiCoder,
194          datetime(2023, 7, 30),
195          link="https://huggingface.co/ise-uiuc/Magicoder-S-DS-6.7B",
196      ),
197      LanguageModel(
198          "ise-uiuc/Magicoder-S-CL-7B",
199          "MagiCoderS-CL-7B",
200          LMStyle.MagiCoder,
201          datetime(2023, 1, 1),
202          link="https://huggingface.co/ise-uiuc/Magicoder-S-CL-7B",
203      ),
204      LanguageModel(
205          "bigcode/starcoder2-3b",
206          "StarCoder2-3b",
207          LMStyle.StarCoder2Base,
208          datetime(2023, 1, 1),
209          link="https://huggingface.co/bigcode/starcoder2-7b-magicoder-instruct/tree/main",
210      ),
211      LanguageModel(
212          "bigcode/starcoder2-7b",
213          "StarCoder2-7b",
214          LMStyle.StarCoder2Base,
215          datetime(2023, 1, 1),
216          link="https://huggingface.co/bigcode/starcoder2-7b-magicoder-instruct/tree/main",
217      ),
218      LanguageModel(
219          "bigcode/starcoder2-15b",
220          "StarCoder2-15b",
221          LMStyle.StarCoder2Base,
222          datetime(2023, 1, 1),
223          link="https://huggingface.co/bigcode/starcoder2-7b-magicoder-instruct/tree/main",
224      ),
225      LanguageModel(
226          "codellama/CodeLlama-34b-hf",
227          "CodeLlama-34b-Base",
228          LMStyle.CodeLLaMaBase,
229          datetime(2023, 1, 1),
230          link="https://huggingface.co/codellama/CodeLlama-34b-hf",
231      ),
232      LanguageModel(
233          "codellama/CodeLlama-13b-hf",
234          "CodeLlama-13b-Base",
235          LMStyle.CodeLLaMaBase,
236          datetime(2023, 1, 1),
237          link="https://huggingface.co/codellama/CodeLlama-13b-hf",
238      ),
239      LanguageModel(
240          "codellama/CodeLlama-7b-hf",
241          "CodeLlama-7b-Base",
242          LMStyle.CodeLLaMaBase,
243          datetime(2023, 1, 1),
244          link="https://huggingface.co/codellama/CodeLlama-7b-hf",
245      ),
246      LanguageModel(
247          "deepseek-ai/deepseek-coder-33b-base",
248          "DSCoder-33b-Base",
249          LMStyle.DeepSeekBase,
250          datetime(2023, 1, 1),
251          link="https://huggingface.co/deepseek-ai/deepseek-coder-33b-base",
252      ),
253      LanguageModel(
254          "deepseek-ai/deepseek-coder-6.7b-base",
255          "DSCoder-6.7b-Base",
256          LMStyle.DeepSeekBase,
257          datetime(2023, 1, 1),
258          link="https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base",
259      ),
260      LanguageModel(
261          "deepseek-ai/deepseek-coder-1.3b-base",
262          "DSCoder-1.3b-Base",
263          LMStyle.DeepSeekBase,
264          datetime(2023, 1, 1),
265          link="https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base",
266      ),
267      LanguageModel(
268          "mistral-large-latest",
269          "Mistral-Large",
270          LMStyle.MistralWeb,
271          datetime(2023, 1, 1),
272          link="https://mistral.ai/news/mistral-large/",
273      ),
274      LanguageModel(
275          "m-a-p/OpenCodeInterpreter-DS-33B",
276          "OC-DS-33B",
277          LMStyle.OC,
278          datetime(2023, 1, 1),
279          link="https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-33B/",
280      ),
281      LanguageModel(
282          "m-a-p/OpenCodeInterpreter-DS-6.7B",
283          "OC-DS-6.7B",
284          LMStyle.OC,
285          datetime(2023, 9, 1),
286          link="https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-6.7B/",
287      ),
288      LanguageModel(
289          "m-a-p/OpenCodeInterpreter-DS-1.3B",
290          "OC-DS-1.3B",
291          LMStyle.OC,
292          datetime(2023, 9, 1),
293          link="https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-1.3B/",
294      ),
295      LanguageModel(
296          "stabilityai/stable-code-3b",
297          "StableCode-3B",
298          LMStyle.StableCodeBase,
299          datetime(2023, 9, 1),
300          link="https://huggingface.co/stabilityai/stable-code-3b/",
301      ),
302  ]
303  
304  LanguageModelStore: dict[str, LanguageModel] = {
305      lm.model_name: lm for lm in LanguageModelList
306  }
307  
308  if __name__ == "__main__":
309      print(list(LanguageModelStore.keys()))