/ mlflow / utils / model_catalog / llamagate.json
llamagate.json
  1  {
  2    "schema_version": "1.0",
  3    "models": {
  4      "codellama-7b": {
  5        "mode": "chat",
  6        "context_window": {
  7          "max_input": 16384,
  8          "max_output": 4096,
  9          "max_tokens": 4096
 10        },
 11        "pricing": {
 12          "input_per_million_tokens": 0.06,
 13          "output_per_million_tokens": 0.12
 14        },
 15        "capabilities": {
 16          "function_calling": true,
 17          "vision": false,
 18          "reasoning": false,
 19          "prompt_caching": false,
 20          "response_schema": true
 21        },
 22        "last_updated_at": "2026-04-24"
 23      },
 24      "deepseek-coder-6.7b": {
 25        "mode": "chat",
 26        "context_window": {
 27          "max_input": 16384,
 28          "max_output": 4096,
 29          "max_tokens": 4096
 30        },
 31        "pricing": {
 32          "input_per_million_tokens": 0.06,
 33          "output_per_million_tokens": 0.12
 34        },
 35        "capabilities": {
 36          "function_calling": true,
 37          "vision": false,
 38          "reasoning": false,
 39          "prompt_caching": false,
 40          "response_schema": true
 41        },
 42        "last_updated_at": "2026-04-24"
 43      },
 44      "deepseek-r1-7b-qwen": {
 45        "mode": "chat",
 46        "context_window": {
 47          "max_input": 131072,
 48          "max_output": 16384,
 49          "max_tokens": 16384
 50        },
 51        "pricing": {
 52          "input_per_million_tokens": 0.08,
 53          "output_per_million_tokens": 0.15
 54        },
 55        "capabilities": {
 56          "function_calling": true,
 57          "vision": false,
 58          "reasoning": true,
 59          "prompt_caching": false,
 60          "response_schema": true
 61        },
 62        "last_updated_at": "2026-04-24"
 63      },
 64      "deepseek-r1-8b": {
 65        "mode": "chat",
 66        "context_window": {
 67          "max_input": 65536,
 68          "max_output": 16384,
 69          "max_tokens": 16384
 70        },
 71        "pricing": {
 72          "input_per_million_tokens": 0.1,
 73          "output_per_million_tokens": 0.2
 74        },
 75        "capabilities": {
 76          "function_calling": true,
 77          "vision": false,
 78          "reasoning": true,
 79          "prompt_caching": false,
 80          "response_schema": true
 81        },
 82        "last_updated_at": "2026-04-24"
 83      },
 84      "dolphin3-8b": {
 85        "mode": "chat",
 86        "context_window": {
 87          "max_input": 128000,
 88          "max_output": 8192,
 89          "max_tokens": 8192
 90        },
 91        "pricing": {
 92          "input_per_million_tokens": 0.08,
 93          "output_per_million_tokens": 0.15
 94        },
 95        "capabilities": {
 96          "function_calling": true,
 97          "vision": false,
 98          "reasoning": false,
 99          "prompt_caching": false,
100          "response_schema": true
101        },
102        "last_updated_at": "2026-04-24"
103      },
104      "gemma3-4b": {
105        "mode": "chat",
106        "context_window": {
107          "max_input": 128000,
108          "max_output": 8192,
109          "max_tokens": 8192
110        },
111        "pricing": {
112          "input_per_million_tokens": 0.03,
113          "output_per_million_tokens": 0.08
114        },
115        "capabilities": {
116          "function_calling": true,
117          "vision": true,
118          "reasoning": false,
119          "prompt_caching": false,
120          "response_schema": true
121        },
122        "last_updated_at": "2026-04-24"
123      },
124      "llama-3.1-8b": {
125        "mode": "chat",
126        "context_window": {
127          "max_input": 131072,
128          "max_output": 8192,
129          "max_tokens": 8192
130        },
131        "pricing": {
132          "input_per_million_tokens": 0.03,
133          "output_per_million_tokens": 0.05
134        },
135        "capabilities": {
136          "function_calling": true,
137          "vision": false,
138          "reasoning": false,
139          "prompt_caching": false,
140          "response_schema": true
141        },
142        "last_updated_at": "2026-04-24"
143      },
144      "llama-3.2-3b": {
145        "mode": "chat",
146        "context_window": {
147          "max_input": 131072,
148          "max_output": 8192,
149          "max_tokens": 8192
150        },
151        "pricing": {
152          "input_per_million_tokens": 0.04,
153          "output_per_million_tokens": 0.08
154        },
155        "capabilities": {
156          "function_calling": true,
157          "vision": false,
158          "reasoning": false,
159          "prompt_caching": false,
160          "response_schema": true
161        },
162        "last_updated_at": "2026-04-24"
163      },
164      "llava-7b": {
165        "mode": "chat",
166        "context_window": {
167          "max_input": 4096,
168          "max_output": 2048,
169          "max_tokens": 2048
170        },
171        "pricing": {
172          "input_per_million_tokens": 0.1,
173          "output_per_million_tokens": 0.2
174        },
175        "capabilities": {
176          "function_calling": false,
177          "vision": true,
178          "reasoning": false,
179          "prompt_caching": false,
180          "response_schema": true
181        },
182        "last_updated_at": "2026-04-24"
183      },
184      "mistral-7b-v0.3": {
185        "mode": "chat",
186        "context_window": {
187          "max_input": 32768,
188          "max_output": 8192,
189          "max_tokens": 8192
190        },
191        "pricing": {
192          "input_per_million_tokens": 0.1,
193          "output_per_million_tokens": 0.15
194        },
195        "capabilities": {
196          "function_calling": true,
197          "vision": false,
198          "reasoning": false,
199          "prompt_caching": false,
200          "response_schema": true
201        },
202        "last_updated_at": "2026-04-24"
203      },
204      "nomic-embed-text": {
205        "mode": "embedding",
206        "context_window": {
207          "max_input": 8192,
208          "max_tokens": 8192
209        },
210        "pricing": {
211          "input_per_million_tokens": 0.02,
212          "output_per_million_tokens": 0
213        },
214        "capabilities": {
215          "function_calling": false,
216          "vision": false,
217          "reasoning": false,
218          "prompt_caching": false,
219          "response_schema": false
220        },
221        "last_updated_at": "2026-04-24"
222      },
223      "openthinker-7b": {
224        "mode": "chat",
225        "context_window": {
226          "max_input": 32768,
227          "max_output": 8192,
228          "max_tokens": 8192
229        },
230        "pricing": {
231          "input_per_million_tokens": 0.08,
232          "output_per_million_tokens": 0.15
233        },
234        "capabilities": {
235          "function_calling": true,
236          "vision": false,
237          "reasoning": true,
238          "prompt_caching": false,
239          "response_schema": true
240        },
241        "last_updated_at": "2026-04-24"
242      },
243      "qwen2.5-coder-7b": {
244        "mode": "chat",
245        "context_window": {
246          "max_input": 32768,
247          "max_output": 8192,
248          "max_tokens": 8192
249        },
250        "pricing": {
251          "input_per_million_tokens": 0.06,
252          "output_per_million_tokens": 0.12
253        },
254        "capabilities": {
255          "function_calling": true,
256          "vision": false,
257          "reasoning": false,
258          "prompt_caching": false,
259          "response_schema": true
260        },
261        "last_updated_at": "2026-04-24"
262      },
263      "qwen3-8b": {
264        "mode": "chat",
265        "context_window": {
266          "max_input": 32768,
267          "max_output": 8192,
268          "max_tokens": 8192
269        },
270        "pricing": {
271          "input_per_million_tokens": 0.04,
272          "output_per_million_tokens": 0.14
273        },
274        "capabilities": {
275          "function_calling": true,
276          "vision": false,
277          "reasoning": false,
278          "prompt_caching": false,
279          "response_schema": true
280        },
281        "last_updated_at": "2026-04-24"
282      },
283      "qwen3-embedding-8b": {
284        "mode": "embedding",
285        "context_window": {
286          "max_input": 40960,
287          "max_tokens": 40960
288        },
289        "pricing": {
290          "input_per_million_tokens": 0.02,
291          "output_per_million_tokens": 0
292        },
293        "capabilities": {
294          "function_calling": false,
295          "vision": false,
296          "reasoning": false,
297          "prompt_caching": false,
298          "response_schema": false
299        },
300        "last_updated_at": "2026-04-24"
301      },
302      "qwen3-vl-8b": {
303        "mode": "chat",
304        "context_window": {
305          "max_input": 32768,
306          "max_output": 8192,
307          "max_tokens": 8192
308        },
309        "pricing": {
310          "input_per_million_tokens": 0.15,
311          "output_per_million_tokens": 0.55
312        },
313        "capabilities": {
314          "function_calling": true,
315          "vision": true,
316          "reasoning": false,
317          "prompt_caching": false,
318          "response_schema": true
319        },
320        "last_updated_at": "2026-04-24"
321      }
322    }
323  }