test_model_metadata.py
1 """Tests for agent/model_metadata.py — token estimation, context lengths, 2 probing, caching, and error parsing. 3 4 Coverage levels: 5 Token estimation — concrete value assertions, edge cases 6 Context length lookup — resolution order, fuzzy match, cache priority 7 API metadata fetch — caching, TTL, canonical slugs, stale fallback 8 Probe tiers — descending, boundaries, extreme inputs 9 Error parsing — OpenAI, Ollama, Anthropic, edge cases 10 Persistent cache — save/load, corruption, update, provider isolation 11 """ 12 13 import os 14 import time 15 import tempfile 16 17 import pytest 18 import yaml 19 from pathlib import Path 20 from unittest.mock import patch, MagicMock 21 22 from agent.model_metadata import ( 23 CONTEXT_PROBE_TIERS, 24 DEFAULT_CONTEXT_LENGTHS, 25 _strip_provider_prefix, 26 estimate_tokens_rough, 27 estimate_messages_tokens_rough, 28 get_model_context_length, 29 get_next_probe_tier, 30 get_cached_context_length, 31 parse_context_limit_from_error, 32 save_context_length, 33 fetch_model_metadata, 34 _MODEL_CACHE_TTL, 35 ) 36 37 38 # ========================================================================= 39 # Token estimation 40 # ========================================================================= 41 42 class TestEstimateTokensRough: 43 def test_empty_string(self): 44 assert estimate_tokens_rough("") == 0 45 46 def test_none_returns_zero(self): 47 assert estimate_tokens_rough(None) == 0 48 49 def test_known_length(self): 50 assert estimate_tokens_rough("a" * 400) == 100 51 52 def test_short_text(self): 53 # "hello" = 5 chars → ceil(5/4) = 2 54 assert estimate_tokens_rough("hello") == 2 55 56 def test_proportional(self): 57 short = estimate_tokens_rough("hello world") 58 long = estimate_tokens_rough("hello world " * 100) 59 assert long > short 60 61 def test_unicode_multibyte(self): 62 """Unicode chars are still 1 Python char each — 4 chars/token holds.""" 63 text = "你好世界" # 4 CJK characters 64 assert estimate_tokens_rough(text) == 1 65 66 67 class TestEstimateMessagesTokensRough: 68 def test_empty_list(self): 69 assert estimate_messages_tokens_rough([]) == 0 70 71 def test_single_message_concrete_value(self): 72 """Verify against known str(msg) length (ceiling division).""" 73 msg = {"role": "user", "content": "a" * 400} 74 result = estimate_messages_tokens_rough([msg]) 75 n = len(str(msg)) 76 expected = (n + 3) // 4 77 assert result == expected 78 79 def test_multiple_messages_additive(self): 80 msgs = [ 81 {"role": "user", "content": "Hello"}, 82 {"role": "assistant", "content": "Hi there, how can I help?"}, 83 ] 84 result = estimate_messages_tokens_rough(msgs) 85 n = sum(len(str(m)) for m in msgs) 86 expected = (n + 3) // 4 87 assert result == expected 88 89 def test_tool_call_message(self): 90 """Tool call messages with no 'content' key still contribute tokens.""" 91 msg = {"role": "assistant", "content": None, 92 "tool_calls": [{"id": "1", "function": {"name": "terminal", "arguments": "{}"}}]} 93 result = estimate_messages_tokens_rough([msg]) 94 assert result > 0 95 assert result == (len(str(msg)) + 3) // 4 96 97 def test_message_with_list_content(self): 98 """Vision messages with multimodal content arrays.""" 99 msg = {"role": "user", "content": [ 100 {"type": "text", "text": "describe"}, 101 {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}} 102 ]} 103 result = estimate_messages_tokens_rough([msg]) 104 assert result == (len(str(msg)) + 3) // 4 105 106 107 # ========================================================================= 108 # Default context lengths 109 # ========================================================================= 110 111 class TestDefaultContextLengths: 112 def test_claude_models_context_lengths(self): 113 for key, value in DEFAULT_CONTEXT_LENGTHS.items(): 114 if "claude" not in key: 115 continue 116 # Claude 4.6+ models (4.6 and 4.7) have 1M context at standard 117 # API pricing (no long-context premium). Older Claude 4.x and 118 # 3.x models cap at 200k. 119 if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7")): 120 assert value == 1000000, f"{key} should be 1000000" 121 else: 122 assert value == 200000, f"{key} should be 200000" 123 124 def test_gpt4_models_128k_or_1m(self): 125 # gpt-4.1 and gpt-4.1-mini have 1M context; other gpt-4* have 128k 126 for key, value in DEFAULT_CONTEXT_LENGTHS.items(): 127 if "gpt-4" in key and "gpt-4.1" not in key: 128 assert value == 128000, f"{key} should be 128000" 129 130 def test_gpt41_models_1m(self): 131 for key, value in DEFAULT_CONTEXT_LENGTHS.items(): 132 if "gpt-4.1" in key: 133 assert value == 1047576, f"{key} should be 1047576" 134 135 def test_gemini_models_1m(self): 136 for key, value in DEFAULT_CONTEXT_LENGTHS.items(): 137 if "gemini" in key: 138 assert value == 1048576, f"{key} should be 1048576" 139 140 def test_grok_models_context_lengths(self): 141 # xAI /v1/models does not return context_length metadata, so 142 # DEFAULT_CONTEXT_LENGTHS must cover the Grok family explicitly. 143 # Values sourced from models.dev (2026-04). 144 expected = { 145 "grok-4.20": 2000000, 146 "grok-4-1-fast": 2000000, 147 "grok-4-fast": 2000000, 148 "grok-4": 256000, 149 "grok-code-fast": 256000, 150 "grok-3": 131072, 151 "grok-2": 131072, 152 "grok-2-vision": 8192, 153 "grok": 131072, 154 } 155 for key, value in expected.items(): 156 assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing from DEFAULT_CONTEXT_LENGTHS" 157 assert DEFAULT_CONTEXT_LENGTHS[key] == value, ( 158 f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}" 159 ) 160 161 def test_grok_substring_matching(self): 162 # Longest-first substring matching must resolve the real xAI model 163 # IDs to the correct fallback entries without 128k probe-down. 164 from agent.model_metadata import get_model_context_length 165 from unittest.mock import patch as mock_patch 166 167 # Fake the provider/API/cache layers so the lookup falls through 168 # to DEFAULT_CONTEXT_LENGTHS. 169 with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), mock_patch("agent.model_metadata.get_cached_context_length", return_value=None): 170 cases = [ 171 ("grok-4.20-0309-reasoning", 2000000), 172 ("grok-4.20-0309-non-reasoning", 2000000), 173 ("grok-4.20-multi-agent-0309", 2000000), 174 ("grok-4-1-fast-reasoning", 2000000), 175 ("grok-4-1-fast-non-reasoning", 2000000), 176 ("grok-4-fast-reasoning", 2000000), 177 ("grok-4-fast-non-reasoning", 2000000), 178 ("grok-4", 256000), 179 ("grok-4-0709", 256000), 180 ("grok-code-fast-1", 256000), 181 ("grok-3", 131072), 182 ("grok-3-mini", 131072), 183 ("grok-3-mini-fast", 131072), 184 ("grok-2", 131072), 185 ("grok-2-vision", 8192), 186 ("grok-2-vision-1212", 8192), 187 ("grok-beta", 131072), 188 ] 189 for model_id, expected_ctx in cases: 190 actual = get_model_context_length(model_id) 191 assert actual == expected_ctx, ( 192 f"{model_id}: expected {expected_ctx}, got {actual}" 193 ) 194 195 def test_deepseek_v4_models_1m_context(self): 196 from agent.model_metadata import get_model_context_length 197 from unittest.mock import patch as mock_patch 198 199 expected_keys = { 200 "deepseek-v4-pro": 1_000_000, 201 "deepseek-v4-flash": 1_000_000, 202 "deepseek-chat": 1_000_000, 203 "deepseek-reasoner": 1_000_000, 204 } 205 for key, value in expected_keys.items(): 206 assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing" 207 assert DEFAULT_CONTEXT_LENGTHS[key] == value, ( 208 f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}" 209 ) 210 211 # Longest-first substring matching must resolve both the bare V4 212 # ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter 213 # / Nous Portal) to 1M without probing down to the legacy 128K 214 # ``deepseek`` substring fallback. 215 with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \ 216 mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \ 217 mock_patch("agent.model_metadata.get_cached_context_length", return_value=None): 218 cases = [ 219 ("deepseek-v4-pro", 1_000_000), 220 ("deepseek-v4-flash", 1_000_000), 221 ("deepseek/deepseek-v4-pro", 1_000_000), 222 ("deepseek/deepseek-v4-flash", 1_000_000), 223 ("deepseek-chat", 1_000_000), 224 ("deepseek-reasoner", 1_000_000), 225 ] 226 for model_id, expected_ctx in cases: 227 actual = get_model_context_length(model_id) 228 assert actual == expected_ctx, ( 229 f"{model_id}: expected {expected_ctx}, got {actual}" 230 ) 231 232 def test_all_values_positive(self): 233 for key, value in DEFAULT_CONTEXT_LENGTHS.items(): 234 assert value > 0, f"{key} has non-positive context length" 235 236 def test_dict_is_not_empty(self): 237 assert len(DEFAULT_CONTEXT_LENGTHS) >= 10 238 239 240 # ========================================================================= 241 # Codex OAuth context-window resolution (provider="openai-codex") 242 # ========================================================================= 243 244 class TestCodexOAuthContextLength: 245 """ChatGPT Codex OAuth imposes lower context limits than the direct 246 OpenAI API for the same slugs. Verified Apr 2026 via live probe of 247 chatgpt.com/backend-api/codex/models: every model returns 272k, while 248 models.dev reports 1.05M for gpt-5.5/gpt-5.4 and 400k for the rest. 249 """ 250 251 def setup_method(self): 252 import agent.model_metadata as mm 253 mm._codex_oauth_context_cache = {} 254 mm._codex_oauth_context_cache_time = 0.0 255 256 def test_fallback_table_used_without_token(self): 257 """With no access token, the hardcoded Codex fallback table wins 258 over models.dev (which reports 1.05M for gpt-5.5 but Codex is 272k). 259 """ 260 from agent.model_metadata import get_model_context_length 261 262 with patch("agent.model_metadata.get_cached_context_length", return_value=None), \ 263 patch("agent.model_metadata.save_context_length"): 264 for model in ( 265 "gpt-5.5", 266 "gpt-5.4", 267 "gpt-5.4-mini", 268 "gpt-5.3-codex", 269 "gpt-5.2-codex", 270 "gpt-5.1-codex-max", 271 "gpt-5.1-codex-mini", 272 ): 273 ctx = get_model_context_length( 274 model=model, 275 base_url="https://chatgpt.com/backend-api/codex", 276 api_key="", 277 provider="openai-codex", 278 ) 279 assert ctx == 272_000, ( 280 f"Codex {model}: expected 272000 fallback, got {ctx} " 281 "(models.dev leakage?)" 282 ) 283 284 def test_live_probe_overrides_fallback(self): 285 """When a token is provided, the live /models probe is preferred 286 and its context_window drives the result.""" 287 from agent.model_metadata import get_model_context_length 288 289 fake_response = MagicMock() 290 fake_response.status_code = 200 291 fake_response.json.return_value = { 292 "models": [ 293 {"slug": "gpt-5.5", "context_window": 300_000}, 294 {"slug": "gpt-5.4", "context_window": 400_000}, 295 ] 296 } 297 298 with patch("agent.model_metadata.requests.get", return_value=fake_response), \ 299 patch("agent.model_metadata.get_cached_context_length", return_value=None), \ 300 patch("agent.model_metadata.save_context_length"): 301 ctx_55 = get_model_context_length( 302 model="gpt-5.5", 303 base_url="https://chatgpt.com/backend-api/codex", 304 api_key="fake-token", 305 provider="openai-codex", 306 ) 307 ctx_54 = get_model_context_length( 308 model="gpt-5.4", 309 base_url="https://chatgpt.com/backend-api/codex", 310 api_key="fake-token", 311 provider="openai-codex", 312 ) 313 assert ctx_55 == 300_000 314 assert ctx_54 == 400_000 315 316 def test_probe_failure_falls_back_to_hardcoded(self): 317 """If the probe fails (non-200 / network error), we still return 318 the hardcoded 272k rather than leaking through to models.dev 1.05M.""" 319 from agent.model_metadata import get_model_context_length 320 321 fake_response = MagicMock() 322 fake_response.status_code = 401 323 fake_response.json.return_value = {} 324 325 with patch("agent.model_metadata.requests.get", return_value=fake_response), \ 326 patch("agent.model_metadata.get_cached_context_length", return_value=None), \ 327 patch("agent.model_metadata.save_context_length"): 328 ctx = get_model_context_length( 329 model="gpt-5.5", 330 base_url="https://chatgpt.com/backend-api/codex", 331 api_key="expired-token", 332 provider="openai-codex", 333 ) 334 assert ctx == 272_000 335 336 def test_non_codex_providers_unaffected(self): 337 """Resolving gpt-5.5 on non-Codex providers must NOT use the Codex 338 272k override — OpenRouter / direct OpenAI API have different limits. 339 """ 340 from agent.model_metadata import get_model_context_length 341 342 # OpenRouter — should hit its own catalog path first; when mocked 343 # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M, 344 # matching the real direct-API value — Codex OAuth's 272k cap is 345 # provider-specific and must not leak here). 346 with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \ 347 patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \ 348 patch("agent.model_metadata.get_cached_context_length", return_value=None), \ 349 patch("agent.models_dev.lookup_models_dev_context", return_value=None): 350 ctx = get_model_context_length( 351 model="openai/gpt-5.5", 352 base_url="https://openrouter.ai/api/v1", 353 api_key="", 354 provider="openrouter", 355 ) 356 assert ctx == 1_050_000, ( 357 f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override " 358 "leaked outside openai-codex provider" 359 ) 360 361 def test_stale_codex_cache_over_400k_is_invalidated(self, tmp_path, monkeypatch): 362 """Pre-PR #14935 builds cached gpt-5.5 at 1.05M (from models.dev) 363 before the Codex-aware branch existed. Upgrading users keep that 364 stale entry on disk and the cache-first lookup returns it forever. 365 Codex OAuth caps at 272k for every slug, so any cached Codex 366 entry >= 400k must be dropped and re-resolved via the live probe. 367 """ 368 from agent import model_metadata as mm 369 370 # Isolate the cache file to tmp_path 371 cache_file = tmp_path / "context_length_cache.yaml" 372 monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) 373 374 base_url = "https://chatgpt.com/backend-api/codex/" 375 stale_key = f"gpt-5.5@{base_url}" 376 other_key = "other-model@https://api.openai.com/v1/" 377 import yaml as _yaml 378 cache_file.write_text(_yaml.dump({"context_lengths": { 379 stale_key: 1_050_000, # stale pre-fix value 380 other_key: 128_000, # unrelated, must survive 381 }})) 382 383 fake_response = MagicMock() 384 fake_response.status_code = 200 385 fake_response.json.return_value = { 386 "models": [{"slug": "gpt-5.5", "context_window": 272_000}] 387 } 388 389 with patch("agent.model_metadata.requests.get", return_value=fake_response), \ 390 patch("agent.model_metadata.save_context_length") as mock_save: 391 ctx = mm.get_model_context_length( 392 model="gpt-5.5", 393 base_url=base_url, 394 api_key="fake-token", 395 provider="openai-codex", 396 ) 397 398 assert ctx == 272_000, f"Stale entry should have been re-resolved to 272k, got {ctx}" 399 # Live save was called with the fresh value 400 mock_save.assert_called_with("gpt-5.5", base_url, 272_000) 401 # The stale entry was removed from disk; unrelated entries survived 402 remaining = _yaml.safe_load(cache_file.read_text()).get("context_lengths", {}) 403 assert stale_key not in remaining, "Stale entry was not invalidated from the cache file" 404 assert remaining.get(other_key) == 128_000, "Unrelated cache entries must not be touched" 405 406 def test_fresh_codex_cache_under_400k_is_respected(self, tmp_path, monkeypatch): 407 """Codex entries at the correct 272k must NOT be invalidated — 408 only stale pre-fix values (>= 400k) get dropped.""" 409 from agent import model_metadata as mm 410 411 cache_file = tmp_path / "context_length_cache.yaml" 412 monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) 413 414 base_url = "https://chatgpt.com/backend-api/codex/" 415 import yaml as _yaml 416 cache_file.write_text(_yaml.dump({"context_lengths": { 417 f"gpt-5.5@{base_url}": 272_000, 418 }})) 419 420 # If the invalidation incorrectly fired, this would be called; assert it isn't. 421 with patch("agent.model_metadata.requests.get") as mock_get: 422 ctx = mm.get_model_context_length( 423 model="gpt-5.5", 424 base_url=base_url, 425 api_key="fake-token", 426 provider="openai-codex", 427 ) 428 assert ctx == 272_000 429 mock_get.assert_not_called() 430 431 def test_stale_invalidation_scoped_to_codex_provider(self, tmp_path, monkeypatch): 432 """A cached 1M entry for a non-Codex provider (e.g. Anthropic opus on 433 OpenRouter, legitimately 1M) must NOT be invalidated by this guard.""" 434 from agent import model_metadata as mm 435 436 cache_file = tmp_path / "context_length_cache.yaml" 437 monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) 438 439 base_url = "https://openrouter.ai/api/v1" 440 import yaml as _yaml 441 cache_file.write_text(_yaml.dump({"context_lengths": { 442 f"anthropic/claude-opus-4.6@{base_url}": 1_000_000, 443 }})) 444 445 ctx = mm.get_model_context_length( 446 model="anthropic/claude-opus-4.6", 447 base_url=base_url, 448 api_key="fake", 449 provider="openrouter", 450 ) 451 assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected" 452 453 454 # ========================================================================= 455 # get_model_context_length — resolution order 456 # ========================================================================= 457 458 class TestGetModelContextLength: 459 @patch("agent.model_metadata.fetch_model_metadata") 460 def test_known_model_from_api(self, mock_fetch): 461 mock_fetch.return_value = { 462 "test/model": {"context_length": 32000} 463 } 464 assert get_model_context_length("test/model") == 32000 465 466 @patch("agent.model_metadata.fetch_model_metadata") 467 def test_fallback_to_defaults(self, mock_fetch): 468 mock_fetch.return_value = {} 469 assert get_model_context_length("anthropic/claude-sonnet-4") == 200000 470 471 @patch("agent.model_metadata.fetch_model_metadata") 472 def test_unknown_model_returns_first_probe_tier(self, mock_fetch): 473 mock_fetch.return_value = {} 474 assert get_model_context_length("unknown/never-heard-of-this") == CONTEXT_PROBE_TIERS[0] 475 476 @patch("agent.model_metadata.fetch_model_metadata") 477 def test_partial_match_in_defaults(self, mock_fetch): 478 mock_fetch.return_value = {} 479 assert get_model_context_length("openai/gpt-4o") == 128000 480 481 @patch("agent.model_metadata.fetch_model_metadata") 482 def test_qwen3_coder_plus_context_length(self, mock_fetch): 483 """qwen3-coder-plus has a 1M context window, not the generic 128K Qwen default.""" 484 mock_fetch.return_value = {} 485 assert get_model_context_length("qwen3-coder-plus") == 1000000 486 487 @patch("agent.model_metadata.fetch_model_metadata") 488 def test_qwen3_coder_context_length(self, mock_fetch): 489 """qwen3-coder has a 256K context window, not the generic 128K Qwen default.""" 490 mock_fetch.return_value = {} 491 assert get_model_context_length("qwen3-coder") == 262144 492 493 @patch("agent.model_metadata.fetch_model_metadata") 494 def test_qwen_generic_context_length(self, mock_fetch): 495 """Generic qwen models still get the 128K default.""" 496 mock_fetch.return_value = {} 497 assert get_model_context_length("qwen3-plus") == 131072 498 499 @patch("agent.model_metadata.fetch_model_metadata") 500 def test_api_missing_context_length_key(self, mock_fetch): 501 """Model in API but without context_length → defaults to the top 502 probe tier (currently 256K).""" 503 mock_fetch.return_value = {"test/model": {"name": "Test"}} 504 assert get_model_context_length("test/model") == CONTEXT_PROBE_TIERS[0] 505 506 @patch("agent.model_metadata.fetch_model_metadata") 507 def test_cache_takes_priority_over_api(self, mock_fetch, tmp_path): 508 """Persistent cache should be checked BEFORE API metadata.""" 509 mock_fetch.return_value = {"my/model": {"context_length": 999999}} 510 cache_file = tmp_path / "cache.yaml" 511 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 512 save_context_length("my/model", "http://local", 32768) 513 result = get_model_context_length("my/model", base_url="http://local") 514 assert result == 32768 # cache wins over API's 999999 515 516 @patch("agent.model_metadata.fetch_model_metadata") 517 def test_no_base_url_skips_cache(self, mock_fetch, tmp_path): 518 """Without base_url, cache lookup is skipped.""" 519 mock_fetch.return_value = {} 520 cache_file = tmp_path / "cache.yaml" 521 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 522 save_context_length("custom/model", "http://local", 32768) 523 # No base_url → cache skipped → falls to probe tier 524 result = get_model_context_length("custom/model") 525 assert result == CONTEXT_PROBE_TIERS[0] 526 527 @patch("agent.model_metadata.fetch_model_metadata") 528 @patch("agent.model_metadata.fetch_endpoint_model_metadata") 529 def test_custom_endpoint_metadata_beats_fuzzy_default(self, mock_endpoint_fetch, mock_fetch): 530 mock_fetch.return_value = {} 531 mock_endpoint_fetch.return_value = { 532 "zai-org/GLM-5-TEE": {"context_length": 65536} 533 } 534 535 result = get_model_context_length( 536 "zai-org/GLM-5-TEE", 537 base_url="https://llm.chutes.ai/v1", 538 api_key="test-key", 539 ) 540 541 assert result == 65536 542 543 @patch("agent.model_metadata.fetch_model_metadata") 544 @patch("agent.model_metadata.fetch_endpoint_model_metadata") 545 def test_custom_endpoint_without_metadata_skips_name_based_default(self, mock_endpoint_fetch, mock_fetch): 546 mock_fetch.return_value = {} 547 mock_endpoint_fetch.return_value = {} 548 549 result = get_model_context_length( 550 "zai-org/GLM-5-TEE", 551 base_url="https://llm.chutes.ai/v1", 552 api_key="test-key", 553 ) 554 555 assert result == CONTEXT_PROBE_TIERS[0] 556 557 @patch("agent.model_metadata.fetch_model_metadata") 558 @patch("agent.model_metadata.fetch_endpoint_model_metadata") 559 def test_custom_endpoint_single_model_fallback(self, mock_endpoint_fetch, mock_fetch): 560 """Single-model servers: use the only model even if name doesn't match.""" 561 mock_fetch.return_value = {} 562 mock_endpoint_fetch.return_value = { 563 "Qwen3.5-9B-Q4_K_M.gguf": {"context_length": 131072} 564 } 565 566 result = get_model_context_length( 567 "qwen3.5:9b", 568 base_url="http://myserver.example.com:8080/v1", 569 api_key="test-key", 570 ) 571 572 assert result == 131072 573 574 @patch("agent.model_metadata.fetch_model_metadata") 575 @patch("agent.model_metadata.fetch_endpoint_model_metadata") 576 def test_custom_endpoint_fuzzy_substring_match(self, mock_endpoint_fetch, mock_fetch): 577 """Fuzzy match: configured model name is substring of endpoint model.""" 578 mock_fetch.return_value = {} 579 mock_endpoint_fetch.return_value = { 580 "org/llama-3.3-70b-instruct-fp8": {"context_length": 131072}, 581 "org/qwen-2.5-72b": {"context_length": 32768}, 582 } 583 584 result = get_model_context_length( 585 "llama-3.3-70b-instruct", 586 base_url="http://myserver.example.com:8080/v1", 587 api_key="test-key", 588 ) 589 590 assert result == 131072 591 592 @patch("agent.model_metadata.fetch_model_metadata") 593 def test_config_context_length_overrides_all(self, mock_fetch): 594 """Explicit config_context_length takes priority over everything.""" 595 mock_fetch.return_value = { 596 "test/model": {"context_length": 200000} 597 } 598 599 result = get_model_context_length( 600 "test/model", 601 config_context_length=65536, 602 ) 603 604 assert result == 65536 605 606 @patch("agent.model_metadata.fetch_model_metadata") 607 def test_config_context_length_zero_is_ignored(self, mock_fetch): 608 """config_context_length=0 should be treated as unset.""" 609 mock_fetch.return_value = {} 610 611 result = get_model_context_length( 612 "anthropic/claude-sonnet-4", 613 config_context_length=0, 614 ) 615 616 assert result == 200000 617 618 @patch("agent.model_metadata.fetch_model_metadata") 619 def test_config_context_length_none_is_ignored(self, mock_fetch): 620 """config_context_length=None should be treated as unset.""" 621 mock_fetch.return_value = {} 622 623 result = get_model_context_length( 624 "anthropic/claude-sonnet-4", 625 config_context_length=None, 626 ) 627 628 assert result == 200000 629 630 631 # ========================================================================= 632 # Bedrock context resolution — must run BEFORE custom-endpoint probe 633 # ========================================================================= 634 635 class TestBedrockContextResolution: 636 """Regression tests for Bedrock context-length resolution order. 637 638 Bug: because ``bedrock-runtime.<region>.amazonaws.com`` is not listed in 639 ``_URL_TO_PROVIDER``, ``_is_known_provider_base_url`` returned False and 640 the custom-endpoint probe at step 2 ran first — fetching ``/models`` from 641 Bedrock (which it doesn't serve), returning the 128K default-fallback 642 before execution ever reached the Bedrock branch. 643 644 Fix: promote the Bedrock branch ahead of the custom-endpoint probe. 645 """ 646 647 @patch("agent.model_metadata.fetch_endpoint_model_metadata") 648 def test_bedrock_provider_returns_static_table_before_probe(self, mock_fetch): 649 """provider='bedrock' resolves via static table, bypasses /models probe.""" 650 ctx = get_model_context_length( 651 "anthropic.claude-opus-4-v1:0", 652 provider="bedrock", 653 base_url="https://bedrock-runtime.us-east-1.amazonaws.com", 654 ) 655 # Must return the static Bedrock table value (200K for Claude), 656 # NOT DEFAULT_FALLBACK_CONTEXT (128K). 657 assert ctx == 200000 658 mock_fetch.assert_not_called() 659 660 @patch("agent.model_metadata.fetch_endpoint_model_metadata") 661 def test_bedrock_url_without_provider_hint(self, mock_fetch): 662 """bedrock-runtime host infers Bedrock even when provider is omitted.""" 663 ctx = get_model_context_length( 664 "anthropic.claude-sonnet-4-v1:0", 665 base_url="https://bedrock-runtime.us-west-2.amazonaws.com", 666 ) 667 assert ctx == 200000 668 mock_fetch.assert_not_called() 669 670 @patch("agent.model_metadata.fetch_endpoint_model_metadata") 671 def test_non_bedrock_url_still_probes(self, mock_fetch): 672 """Non-Bedrock hosts still reach the custom-endpoint probe.""" 673 mock_fetch.return_value = {"some-model": {"context_length": 50000}} 674 ctx = get_model_context_length( 675 "some-model", 676 base_url="https://api.example.com/v1", 677 ) 678 assert ctx == 50000 679 assert mock_fetch.called 680 681 682 # ========================================================================= 683 # _strip_provider_prefix — Ollama model:tag vs provider:model 684 # ========================================================================= 685 686 class TestStripProviderPrefix: 687 def test_known_provider_prefix_is_stripped(self): 688 assert _strip_provider_prefix("local:my-model") == "my-model" 689 assert _strip_provider_prefix("openrouter:anthropic/claude-sonnet-4") == "anthropic/claude-sonnet-4" 690 assert _strip_provider_prefix("anthropic:claude-sonnet-4") == "claude-sonnet-4" 691 assert _strip_provider_prefix("stepfun:step-3.5-flash") == "step-3.5-flash" 692 693 def test_ollama_model_tag_preserved(self): 694 """Ollama model:tag format must NOT be stripped.""" 695 assert _strip_provider_prefix("qwen3.5:27b") == "qwen3.5:27b" 696 assert _strip_provider_prefix("llama3.3:70b") == "llama3.3:70b" 697 assert _strip_provider_prefix("gemma2:9b") == "gemma2:9b" 698 assert _strip_provider_prefix("codellama:13b-instruct-q4_0") == "codellama:13b-instruct-q4_0" 699 700 def test_http_urls_preserved(self): 701 assert _strip_provider_prefix("http://example.com") == "http://example.com" 702 assert _strip_provider_prefix("https://example.com") == "https://example.com" 703 704 def test_no_colon_returns_unchanged(self): 705 assert _strip_provider_prefix("gpt-4o") == "gpt-4o" 706 assert _strip_provider_prefix("anthropic/claude-sonnet-4") == "anthropic/claude-sonnet-4" 707 708 @patch("agent.model_metadata.fetch_model_metadata") 709 def test_ollama_model_tag_not_mangled_in_context_lookup(self, mock_fetch): 710 """Ensure 'qwen3.5:27b' is NOT reduced to '27b' during context length lookup. 711 712 We mock a custom endpoint that knows 'qwen3.5:27b' — the full name 713 must reach the endpoint metadata lookup intact. 714 """ 715 mock_fetch.return_value = {} 716 with patch("agent.model_metadata.fetch_endpoint_model_metadata") as mock_ep, \ 717 patch("agent.model_metadata._is_custom_endpoint", return_value=True): 718 mock_ep.return_value = {"qwen3.5:27b": {"context_length": 32768}} 719 result = get_model_context_length( 720 "qwen3.5:27b", 721 base_url="http://localhost:11434/v1", 722 ) 723 assert result == 32768 724 725 726 # ========================================================================= 727 # fetch_model_metadata — caching, TTL, slugs, failures 728 # ========================================================================= 729 730 class TestFetchModelMetadata: 731 def _reset_cache(self): 732 import agent.model_metadata as mm 733 mm._model_metadata_cache = {} 734 mm._model_metadata_cache_time = 0 735 736 @patch("agent.model_metadata.requests.get") 737 def test_caches_result(self, mock_get): 738 self._reset_cache() 739 mock_response = MagicMock() 740 mock_response.json.return_value = { 741 "data": [{"id": "test/model", "context_length": 99999, "name": "Test"}] 742 } 743 mock_response.raise_for_status = MagicMock() 744 mock_get.return_value = mock_response 745 746 result1 = fetch_model_metadata(force_refresh=True) 747 assert "test/model" in result1 748 assert mock_get.call_count == 1 749 750 result2 = fetch_model_metadata() 751 assert "test/model" in result2 752 assert mock_get.call_count == 1 # cached 753 754 @patch("agent.model_metadata.requests.get") 755 def test_api_failure_returns_empty_on_cold_cache(self, mock_get): 756 self._reset_cache() 757 mock_get.side_effect = Exception("Network error") 758 result = fetch_model_metadata(force_refresh=True) 759 assert result == {} 760 761 @patch("agent.model_metadata.requests.get") 762 def test_api_failure_returns_stale_cache(self, mock_get): 763 """On API failure with existing cache, stale data is returned.""" 764 import agent.model_metadata as mm 765 mm._model_metadata_cache = {"old/model": {"context_length": 50000}} 766 mm._model_metadata_cache_time = 0 # expired 767 768 mock_get.side_effect = Exception("Network error") 769 result = fetch_model_metadata(force_refresh=True) 770 assert "old/model" in result 771 assert result["old/model"]["context_length"] == 50000 772 773 @patch("agent.model_metadata.requests.get") 774 def test_canonical_slug_aliasing(self, mock_get): 775 """Models with canonical_slug get indexed under both IDs.""" 776 self._reset_cache() 777 mock_response = MagicMock() 778 mock_response.json.return_value = { 779 "data": [{ 780 "id": "anthropic/claude-3.5-sonnet:beta", 781 "canonical_slug": "anthropic/claude-3.5-sonnet", 782 "context_length": 200000, 783 "name": "Claude 3.5 Sonnet" 784 }] 785 } 786 mock_response.raise_for_status = MagicMock() 787 mock_get.return_value = mock_response 788 789 result = fetch_model_metadata(force_refresh=True) 790 # Both the original ID and canonical slug should work 791 assert "anthropic/claude-3.5-sonnet:beta" in result 792 assert "anthropic/claude-3.5-sonnet" in result 793 assert result["anthropic/claude-3.5-sonnet"]["context_length"] == 200000 794 795 @patch("agent.model_metadata.requests.get") 796 def test_provider_prefixed_models_get_bare_aliases(self, mock_get): 797 self._reset_cache() 798 mock_response = MagicMock() 799 mock_response.json.return_value = { 800 "data": [{ 801 "id": "provider/test-model", 802 "context_length": 123456, 803 "name": "Provider: Test Model", 804 }] 805 } 806 mock_response.raise_for_status = MagicMock() 807 mock_get.return_value = mock_response 808 809 result = fetch_model_metadata(force_refresh=True) 810 811 assert result["provider/test-model"]["context_length"] == 123456 812 assert result["test-model"]["context_length"] == 123456 813 814 @patch("agent.model_metadata.requests.get") 815 def test_ttl_expiry_triggers_refetch(self, mock_get): 816 """Cache expires after _MODEL_CACHE_TTL seconds.""" 817 import agent.model_metadata as mm 818 self._reset_cache() 819 820 mock_response = MagicMock() 821 mock_response.json.return_value = { 822 "data": [{"id": "m1", "context_length": 1000, "name": "M1"}] 823 } 824 mock_response.raise_for_status = MagicMock() 825 mock_get.return_value = mock_response 826 827 fetch_model_metadata(force_refresh=True) 828 assert mock_get.call_count == 1 829 830 # Simulate TTL expiry 831 mm._model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL - 1 832 fetch_model_metadata() 833 assert mock_get.call_count == 2 # refetched 834 835 @patch("agent.model_metadata.requests.get") 836 def test_malformed_json_no_data_key(self, mock_get): 837 """API returns JSON without 'data' key — empty cache, no crash.""" 838 self._reset_cache() 839 mock_response = MagicMock() 840 mock_response.json.return_value = {"error": "something"} 841 mock_response.raise_for_status = MagicMock() 842 mock_get.return_value = mock_response 843 844 result = fetch_model_metadata(force_refresh=True) 845 assert result == {} 846 847 848 # ========================================================================= 849 # Context probe tiers 850 # ========================================================================= 851 852 class TestContextProbeTiers: 853 def test_tiers_descending(self): 854 for i in range(len(CONTEXT_PROBE_TIERS) - 1): 855 assert CONTEXT_PROBE_TIERS[i] > CONTEXT_PROBE_TIERS[i + 1] 856 857 def test_first_tier_is_256k(self): 858 assert CONTEXT_PROBE_TIERS[0] == 256_000 859 860 def test_last_tier_is_8k(self): 861 assert CONTEXT_PROBE_TIERS[-1] == 8_000 862 863 864 class TestGetNextProbeTier: 865 def test_from_256k(self): 866 assert get_next_probe_tier(256_000) == 128_000 867 868 def test_from_128k(self): 869 assert get_next_probe_tier(128_000) == 64_000 870 871 def test_from_64k(self): 872 assert get_next_probe_tier(64_000) == 32_000 873 874 def test_from_32k(self): 875 assert get_next_probe_tier(32_000) == 16_000 876 877 def test_from_8k_returns_none(self): 878 assert get_next_probe_tier(8_000) is None 879 880 def test_from_below_min_returns_none(self): 881 assert get_next_probe_tier(4_000) is None 882 883 def test_from_arbitrary_value(self): 884 assert get_next_probe_tier(100_000) == 64_000 885 886 def test_above_max_tier(self): 887 """Value above 256K should return 256K.""" 888 assert get_next_probe_tier(500_000) == 256_000 889 890 def test_zero_returns_none(self): 891 assert get_next_probe_tier(0) is None 892 893 894 # ========================================================================= 895 # Error message parsing 896 # ========================================================================= 897 898 class TestParseContextLimitFromError: 899 def test_openai_format(self): 900 msg = "This model's maximum context length is 32768 tokens. However, your messages resulted in 45000 tokens." 901 assert parse_context_limit_from_error(msg) == 32768 902 903 def test_context_length_exceeded(self): 904 msg = "context_length_exceeded: maximum context length is 131072" 905 assert parse_context_limit_from_error(msg) == 131072 906 907 def test_context_size_exceeded(self): 908 msg = "Maximum context size 65536 exceeded" 909 assert parse_context_limit_from_error(msg) == 65536 910 911 def test_no_limit_in_message(self): 912 assert parse_context_limit_from_error("Something went wrong with the API") is None 913 914 def test_unreasonable_small_number_rejected(self): 915 assert parse_context_limit_from_error("context length is 42 tokens") is None 916 917 def test_ollama_format(self): 918 msg = "Context size has been exceeded. Maximum context size is 32768" 919 assert parse_context_limit_from_error(msg) == 32768 920 921 def test_anthropic_format(self): 922 msg = "prompt is too long: 250000 tokens > 200000 maximum" 923 # Should extract 200000 (the limit), not 250000 (the input size) 924 assert parse_context_limit_from_error(msg) == 200000 925 926 def test_lmstudio_format(self): 927 msg = "Error: context window of 4096 tokens exceeded" 928 assert parse_context_limit_from_error(msg) == 4096 929 930 def test_minimax_delta_only_message_returns_none(self): 931 msg = "invalid params, context window exceeds limit (2013)" 932 assert parse_context_limit_from_error(msg) is None 933 934 def test_completely_unrelated_error(self): 935 assert parse_context_limit_from_error("Invalid API key") is None 936 937 def test_empty_string(self): 938 assert parse_context_limit_from_error("") is None 939 940 def test_number_outside_reasonable_range(self): 941 """Very large number (>10M) should be rejected.""" 942 msg = "maximum context length is 99999999999" 943 assert parse_context_limit_from_error(msg) is None 944 945 946 # ========================================================================= 947 # Persistent context length cache 948 # ========================================================================= 949 950 class TestContextLengthCache: 951 def test_save_and_load(self, tmp_path): 952 cache_file = tmp_path / "cache.yaml" 953 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 954 save_context_length("test/model", "http://localhost:8080/v1", 32768) 955 assert get_cached_context_length("test/model", "http://localhost:8080/v1") == 32768 956 957 def test_missing_cache_returns_none(self, tmp_path): 958 cache_file = tmp_path / "nonexistent.yaml" 959 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 960 assert get_cached_context_length("test/model", "http://x") is None 961 962 def test_multiple_models_cached(self, tmp_path): 963 cache_file = tmp_path / "cache.yaml" 964 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 965 save_context_length("model-a", "http://a", 64000) 966 save_context_length("model-b", "http://b", 128000) 967 assert get_cached_context_length("model-a", "http://a") == 64000 968 assert get_cached_context_length("model-b", "http://b") == 128000 969 970 def test_same_model_different_providers(self, tmp_path): 971 cache_file = tmp_path / "cache.yaml" 972 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 973 save_context_length("llama-3", "http://local:8080", 32768) 974 save_context_length("llama-3", "https://openrouter.ai/api/v1", 131072) 975 assert get_cached_context_length("llama-3", "http://local:8080") == 32768 976 assert get_cached_context_length("llama-3", "https://openrouter.ai/api/v1") == 131072 977 978 def test_idempotent_save(self, tmp_path): 979 cache_file = tmp_path / "cache.yaml" 980 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 981 save_context_length("model", "http://x", 32768) 982 save_context_length("model", "http://x", 32768) 983 with open(cache_file) as f: 984 data = yaml.safe_load(f) 985 assert len(data["context_lengths"]) == 1 986 987 def test_update_existing_value(self, tmp_path): 988 """Saving a different value for the same key overwrites it.""" 989 cache_file = tmp_path / "cache.yaml" 990 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 991 save_context_length("model", "http://x", 128000) 992 save_context_length("model", "http://x", 64000) 993 assert get_cached_context_length("model", "http://x") == 64000 994 995 def test_corrupted_yaml_returns_empty(self, tmp_path): 996 """Corrupted cache file is handled gracefully.""" 997 cache_file = tmp_path / "cache.yaml" 998 cache_file.write_text("{{{{not valid yaml: [[[") 999 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 1000 assert get_cached_context_length("model", "http://x") is None 1001 1002 def test_wrong_structure_returns_none(self, tmp_path): 1003 """YAML that loads but has wrong structure.""" 1004 cache_file = tmp_path / "cache.yaml" 1005 cache_file.write_text("just_a_string\n") 1006 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 1007 assert get_cached_context_length("model", "http://x") is None 1008 1009 @patch("agent.model_metadata.fetch_model_metadata") 1010 def test_cached_value_takes_priority(self, mock_fetch, tmp_path): 1011 mock_fetch.return_value = {} 1012 cache_file = tmp_path / "cache.yaml" 1013 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 1014 save_context_length("unknown/model", "http://local", 65536) 1015 assert get_model_context_length("unknown/model", base_url="http://local") == 65536 1016 1017 def test_special_chars_in_model_name(self, tmp_path): 1018 """Model names with colons, slashes, etc. don't break the cache.""" 1019 cache_file = tmp_path / "cache.yaml" 1020 model = "anthropic/claude-3.5-sonnet:beta" 1021 url = "https://api.example.com/v1" 1022 with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): 1023 save_context_length(model, url, 200000) 1024 assert get_cached_context_length(model, url) == 200000