Cradicle Explorer

/ tests / agent / test_models_dev.py
test_models_dev.py
  1  """Tests for agent.models_dev — models.dev registry integration."""
  2  import json
  3  from unittest.mock import patch, MagicMock
  4  
  5  import pytest
  6  from agent.models_dev import (
  7      PROVIDER_TO_MODELS_DEV,
  8      _extract_context,
  9      fetch_models_dev,
 10      get_model_capabilities,
 11      lookup_models_dev_context,
 12  )
 13  
 14  
 15  SAMPLE_REGISTRY = {
 16      "anthropic": {
 17          "id": "anthropic",
 18          "name": "Anthropic",
 19          "models": {
 20              "claude-opus-4-6": {
 21                  "id": "claude-opus-4-6",
 22                  "limit": {"context": 1000000, "output": 128000},
 23              },
 24              "claude-sonnet-4-6": {
 25                  "id": "claude-sonnet-4-6",
 26                  "limit": {"context": 1000000, "output": 64000},
 27              },
 28              "claude-sonnet-4-0": {
 29                  "id": "claude-sonnet-4-0",
 30                  "limit": {"context": 200000, "output": 64000},
 31              },
 32          },
 33      },
 34      "github-copilot": {
 35          "id": "github-copilot",
 36          "name": "GitHub Copilot",
 37          "models": {
 38              "claude-opus-4.6": {
 39                  "id": "claude-opus-4.6",
 40                  "limit": {"context": 128000, "output": 32000},
 41              },
 42          },
 43      },
 44      "kilo": {
 45          "id": "kilo",
 46          "name": "Kilo Gateway",
 47          "models": {
 48              "anthropic/claude-sonnet-4.6": {
 49                  "id": "anthropic/claude-sonnet-4.6",
 50                  "limit": {"context": 1000000, "output": 128000},
 51              },
 52          },
 53      },
 54      "deepseek": {
 55          "id": "deepseek",
 56          "name": "DeepSeek",
 57          "models": {
 58              "deepseek-chat": {
 59                  "id": "deepseek-chat",
 60                  "limit": {"context": 128000, "output": 8192},
 61              },
 62          },
 63      },
 64      "audio-only": {
 65          "id": "audio-only",
 66          "models": {
 67              "tts-model": {
 68                  "id": "tts-model",
 69                  "limit": {"context": 0, "output": 0},
 70              },
 71          },
 72      },
 73  }
 74  
 75  
 76  class TestProviderMapping:
 77      def test_all_mapped_providers_are_strings(self):
 78          for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
 79              assert isinstance(hermes_id, str)
 80              assert isinstance(mdev_id, str)
 81  
 82      def test_known_providers_mapped(self):
 83          assert PROVIDER_TO_MODELS_DEV["anthropic"] == "anthropic"
 84          assert PROVIDER_TO_MODELS_DEV["copilot"] == "github-copilot"
 85          assert PROVIDER_TO_MODELS_DEV["stepfun"] == "stepfun"
 86          assert PROVIDER_TO_MODELS_DEV["kilocode"] == "kilo"
 87          assert PROVIDER_TO_MODELS_DEV["ai-gateway"] == "vercel"
 88  
 89      def test_unmapped_provider_not_in_dict(self):
 90          assert "nous" not in PROVIDER_TO_MODELS_DEV
 91  
 92      def test_openai_codex_mapped_to_openai(self):
 93          assert PROVIDER_TO_MODELS_DEV["openai"] == "openai"
 94          assert PROVIDER_TO_MODELS_DEV["openai-codex"] == "openai"
 95  
 96  
 97  class TestExtractContext:
 98      def test_valid_entry(self):
 99          assert _extract_context({"limit": {"context": 128000}}) == 128000
100  
101      def test_zero_context_returns_none(self):
102          assert _extract_context({"limit": {"context": 0}}) is None
103  
104      def test_missing_limit_returns_none(self):
105          assert _extract_context({"id": "test"}) is None
106  
107      def test_missing_context_returns_none(self):
108          assert _extract_context({"limit": {"output": 8192}}) is None
109  
110      def test_non_dict_returns_none(self):
111          assert _extract_context("not a dict") is None
112  
113      def test_float_context_coerced_to_int(self):
114          assert _extract_context({"limit": {"context": 131072.0}}) == 131072
115  
116  
117  class TestLookupModelsDevContext:
118      @patch("agent.models_dev.fetch_models_dev")
119      def test_exact_match(self, mock_fetch):
120          mock_fetch.return_value = SAMPLE_REGISTRY
121          assert lookup_models_dev_context("anthropic", "claude-opus-4-6") == 1000000
122  
123      @patch("agent.models_dev.fetch_models_dev")
124      def test_case_insensitive_match(self, mock_fetch):
125          mock_fetch.return_value = SAMPLE_REGISTRY
126          assert lookup_models_dev_context("anthropic", "Claude-Opus-4-6") == 1000000
127  
128      @patch("agent.models_dev.fetch_models_dev")
129      def test_provider_not_mapped(self, mock_fetch):
130          mock_fetch.return_value = SAMPLE_REGISTRY
131          assert lookup_models_dev_context("nous", "some-model") is None
132  
133      @patch("agent.models_dev.fetch_models_dev")
134      def test_model_not_found(self, mock_fetch):
135          mock_fetch.return_value = SAMPLE_REGISTRY
136          assert lookup_models_dev_context("anthropic", "nonexistent-model") is None
137  
138      @patch("agent.models_dev.fetch_models_dev")
139      def test_provider_aware_context(self, mock_fetch):
140          """Same model, different context per provider."""
141          mock_fetch.return_value = SAMPLE_REGISTRY
142          # Anthropic direct: 1M
143          assert lookup_models_dev_context("anthropic", "claude-opus-4-6") == 1000000
144          # GitHub Copilot: only 128K for same model
145          assert lookup_models_dev_context("copilot", "claude-opus-4.6") == 128000
146  
147      @patch("agent.models_dev.fetch_models_dev")
148      def test_zero_context_filtered(self, mock_fetch):
149          mock_fetch.return_value = SAMPLE_REGISTRY
150          # audio-only is not a mapped provider, but test the filtering directly
151          data = SAMPLE_REGISTRY["audio-only"]["models"]["tts-model"]
152          assert _extract_context(data) is None
153  
154      @patch("agent.models_dev.fetch_models_dev")
155      def test_empty_registry(self, mock_fetch):
156          mock_fetch.return_value = {}
157          assert lookup_models_dev_context("anthropic", "claude-opus-4-6") is None
158  
159  
160  class TestFetchModelsDev:
161      @patch("agent.models_dev.requests.get")
162      def test_fetch_success(self, mock_get):
163          mock_resp = MagicMock()
164          mock_resp.status_code = 200
165          mock_resp.json.return_value = SAMPLE_REGISTRY
166          mock_resp.raise_for_status = MagicMock()
167          mock_get.return_value = mock_resp
168  
169          # Clear caches
170          import agent.models_dev as md
171          md._models_dev_cache = {}
172          md._models_dev_cache_time = 0
173  
174          with patch.object(md, "_save_disk_cache"):
175              result = fetch_models_dev(force_refresh=True)
176  
177          assert "anthropic" in result
178          assert len(result) == len(SAMPLE_REGISTRY)
179  
180      @patch("agent.models_dev.requests.get")
181      def test_fetch_failure_returns_stale_cache(self, mock_get):
182          mock_get.side_effect = Exception("network error")
183  
184          import agent.models_dev as md
185          md._models_dev_cache = SAMPLE_REGISTRY
186          md._models_dev_cache_time = 0  # expired
187  
188          with patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY):
189              result = fetch_models_dev(force_refresh=True)
190  
191          assert "anthropic" in result
192  
193      @patch("agent.models_dev.requests.get")
194      def test_in_memory_cache_used(self, mock_get):
195          import agent.models_dev as md
196          import time
197          md._models_dev_cache = SAMPLE_REGISTRY
198          md._models_dev_cache_time = time.time()  # fresh
199  
200          result = fetch_models_dev()
201          mock_get.assert_not_called()
202          assert result == SAMPLE_REGISTRY
203  
204  
205  # ---------------------------------------------------------------------------
206  # get_model_capabilities — vision via modalities.input
207  # ---------------------------------------------------------------------------
208  
209  
210  CAPS_REGISTRY = {
211      "google": {
212          "id": "google",
213          "models": {
214              "gemma-4-31b-it": {
215                  "id": "gemma-4-31b-it",
216                  "attachment": False,
217                  "tool_call": True,
218                  "modalities": {"input": ["text", "image"]},
219                  "limit": {"context": 128000, "output": 8192},
220              },
221              "gemma-3-1b": {
222                  "id": "gemma-3-1b",
223                  "tool_call": True,
224                  "limit": {"context": 32000, "output": 8192},
225              },
226          },
227      },
228      "anthropic": {
229          "id": "anthropic",
230          "models": {
231              "claude-sonnet-4": {
232                  "id": "claude-sonnet-4",
233                  "attachment": True,
234                  "tool_call": True,
235                  "limit": {"context": 200000, "output": 64000},
236              },
237          },
238      },
239  }
240  
241  
242  class TestGetModelCapabilities:
243      """Tests for get_model_capabilities vision detection."""
244  
245      def test_vision_from_attachment_flag(self):
246          """Models with attachment=True should report supports_vision=True."""
247          with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
248              caps = get_model_capabilities("anthropic", "claude-sonnet-4")
249          assert caps is not None
250          assert caps.supports_vision is True
251  
252      def test_vision_from_modalities_input_image(self):
253          """Models with 'image' in modalities.input but attachment=False should
254          still report supports_vision=True (the core fix in this PR)."""
255          with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
256              caps = get_model_capabilities("google", "gemma-4-31b-it")
257          assert caps is not None
258          assert caps.supports_vision is True
259  
260      def test_no_vision_without_attachment_or_modalities(self):
261          """Models with neither attachment nor image modality should be non-vision."""
262          with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
263              caps = get_model_capabilities("google", "gemma-3-1b")
264          assert caps is not None
265          assert caps.supports_vision is False
266  
267      def test_modalities_non_dict_handled(self):
268          """Non-dict modalities field should not crash."""
269          registry = {
270              "google": {"id": "google", "models": {
271                  "weird-model": {
272                      "id": "weird-model",
273                      "modalities": "text",  # not a dict
274                      "limit": {"context": 200000, "output": 8192},
275                  },
276              }},
277          }
278          with patch("agent.models_dev.fetch_models_dev", return_value=registry):
279              caps = get_model_capabilities("gemini", "weird-model")
280          assert caps is not None
281          assert caps.supports_vision is False
282  
283      def test_model_not_found_returns_none(self):
284          """Unknown model should return None."""
285          with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
286              caps = get_model_capabilities("anthropic", "nonexistent-model")
287          assert caps is None