test_auxiliary_client.py
1 """Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides.""" 2 3 import json 4 import logging 5 import os 6 from pathlib import Path 7 from unittest.mock import patch, MagicMock, AsyncMock 8 9 import pytest 10 11 from agent.auxiliary_client import ( 12 get_text_auxiliary_client, 13 get_available_vision_backends, 14 resolve_vision_provider_client, 15 resolve_provider_client, 16 auxiliary_max_tokens_param, 17 call_llm, 18 async_call_llm, 19 _build_call_kwargs, 20 _read_codex_access_token, 21 _get_provider_chain, 22 _is_payment_error, 23 _normalize_aux_provider, 24 _try_payment_fallback, 25 _resolve_auto, 26 ) 27 28 29 @pytest.fixture(autouse=True) 30 def _clean_env(monkeypatch): 31 """Strip provider env vars so each test starts clean.""" 32 for key in ( 33 "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", 34 "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL", 35 "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN", 36 ): 37 monkeypatch.delenv(key, raising=False) 38 39 40 @pytest.fixture 41 def codex_auth_dir(tmp_path, monkeypatch): 42 """Provide a writable ~/.codex/ directory with a valid auth.json.""" 43 codex_dir = tmp_path / ".codex" 44 codex_dir.mkdir() 45 auth_file = codex_dir / "auth.json" 46 auth_file.write_text(json.dumps({ 47 "tokens": { 48 "access_token": "codex-test-token-abc123", 49 "refresh_token": "codex-refresh-xyz", 50 } 51 })) 52 monkeypatch.setattr( 53 "agent.auxiliary_client._read_codex_access_token", 54 lambda: "codex-test-token-abc123", 55 ) 56 return codex_dir 57 58 59 class TestNormalizeAuxProvider: 60 def test_maps_github_copilot_aliases(self): 61 assert _normalize_aux_provider("github") == "copilot" 62 assert _normalize_aux_provider("github-copilot") == "copilot" 63 assert _normalize_aux_provider("github-models") == "copilot" 64 65 def test_maps_github_copilot_acp_aliases(self): 66 assert _normalize_aux_provider("github-copilot-acp") == "copilot-acp" 67 assert _normalize_aux_provider("copilot-acp-agent") == "copilot-acp" 68 69 70 class TestReadCodexAccessToken: 71 def test_valid_auth_store(self, tmp_path, monkeypatch): 72 hermes_home = tmp_path / "hermes" 73 hermes_home.mkdir(parents=True, exist_ok=True) 74 (hermes_home / "auth.json").write_text(json.dumps({ 75 "version": 1, 76 "providers": { 77 "openai-codex": { 78 "tokens": {"access_token": "tok-123", "refresh_token": "r-456"}, 79 }, 80 }, 81 })) 82 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 83 result = _read_codex_access_token() 84 assert result == "tok-123" 85 86 def test_pool_without_selected_entry_falls_back_to_auth_store(self, tmp_path, monkeypatch): 87 hermes_home = tmp_path / "hermes" 88 hermes_home.mkdir(parents=True, exist_ok=True) 89 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 90 91 valid_jwt = "eyJhbGciOiJSUzI1NiJ9.eyJleHAiOjk5OTk5OTk5OTl9.sig" 92 with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)), \ 93 patch("hermes_cli.auth._read_codex_tokens", return_value={ 94 "tokens": {"access_token": valid_jwt, "refresh_token": "refresh"} 95 }): 96 result = _read_codex_access_token() 97 98 assert result == valid_jwt 99 100 def test_missing_returns_none(self, tmp_path, monkeypatch): 101 hermes_home = tmp_path / "hermes" 102 hermes_home.mkdir(parents=True, exist_ok=True) 103 (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) 104 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 105 with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 106 result = _read_codex_access_token() 107 assert result is None 108 109 def test_empty_token_returns_none(self, tmp_path, monkeypatch): 110 hermes_home = tmp_path / "hermes" 111 hermes_home.mkdir(parents=True, exist_ok=True) 112 (hermes_home / "auth.json").write_text(json.dumps({ 113 "version": 1, 114 "providers": { 115 "openai-codex": { 116 "tokens": {"access_token": " ", "refresh_token": "r"}, 117 }, 118 }, 119 })) 120 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 121 result = _read_codex_access_token() 122 assert result is None 123 124 def test_malformed_json_returns_none(self, tmp_path): 125 codex_dir = tmp_path / ".codex" 126 codex_dir.mkdir() 127 (codex_dir / "auth.json").write_text("{bad json") 128 with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): 129 result = _read_codex_access_token() 130 assert result is None 131 132 def test_missing_tokens_key_returns_none(self, tmp_path): 133 codex_dir = tmp_path / ".codex" 134 codex_dir.mkdir() 135 (codex_dir / "auth.json").write_text(json.dumps({"other": "data"})) 136 with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): 137 result = _read_codex_access_token() 138 assert result is None 139 140 141 def test_expired_jwt_returns_none(self, tmp_path, monkeypatch): 142 """Expired JWT tokens should be skipped so auto chain continues.""" 143 import base64 144 import time as _time 145 146 # Build a JWT with exp in the past 147 header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode() 148 payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode() 149 payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode() 150 expired_jwt = f"{header}.{payload}.fakesig" 151 152 hermes_home = tmp_path / "hermes" 153 hermes_home.mkdir(parents=True, exist_ok=True) 154 (hermes_home / "auth.json").write_text(json.dumps({ 155 "version": 1, 156 "providers": { 157 "openai-codex": { 158 "tokens": {"access_token": expired_jwt, "refresh_token": "r"}, 159 }, 160 }, 161 })) 162 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 163 with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 164 result = _read_codex_access_token() 165 assert result is None, "Expired JWT should return None" 166 167 def test_valid_jwt_returns_token(self, tmp_path, monkeypatch): 168 """Non-expired JWT tokens should be returned.""" 169 import base64 170 import time as _time 171 172 header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode() 173 payload_data = json.dumps({"exp": int(_time.time()) + 3600}).encode() 174 payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode() 175 valid_jwt = f"{header}.{payload}.fakesig" 176 177 hermes_home = tmp_path / "hermes" 178 hermes_home.mkdir(parents=True, exist_ok=True) 179 (hermes_home / "auth.json").write_text(json.dumps({ 180 "version": 1, 181 "providers": { 182 "openai-codex": { 183 "tokens": {"access_token": valid_jwt, "refresh_token": "r"}, 184 }, 185 }, 186 })) 187 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 188 result = _read_codex_access_token() 189 assert result == valid_jwt 190 191 def test_non_jwt_token_passes_through(self, tmp_path, monkeypatch): 192 """Non-JWT tokens (no dots) should be returned as-is.""" 193 hermes_home = tmp_path / "hermes" 194 hermes_home.mkdir(parents=True, exist_ok=True) 195 (hermes_home / "auth.json").write_text(json.dumps({ 196 "version": 1, 197 "providers": { 198 "openai-codex": { 199 "tokens": {"access_token": "plain-token-no-jwt", "refresh_token": "r"}, 200 }, 201 }, 202 })) 203 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 204 result = _read_codex_access_token() 205 assert result == "plain-token-no-jwt" 206 207 208 class TestAnthropicOAuthFlag: 209 """Test that OAuth tokens get is_oauth=True in auxiliary Anthropic client.""" 210 211 def test_oauth_token_sets_flag(self, monkeypatch): 212 """OAuth tokens (sk-ant-oat01-*) should create client with is_oauth=True.""" 213 monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-test-token") 214 with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: 215 mock_build.return_value = MagicMock() 216 from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient 217 client, model = _try_anthropic() 218 assert client is not None 219 assert isinstance(client, AnthropicAuxiliaryClient) 220 # The adapter inside should have is_oauth=True 221 adapter = client.chat.completions 222 assert adapter._is_oauth is True 223 224 def test_api_key_no_oauth_flag(self, monkeypatch): 225 """Regular API keys (sk-ant-api-*) should create client with is_oauth=False.""" 226 with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-testkey1234"), \ 227 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ 228 patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 229 mock_build.return_value = MagicMock() 230 from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient 231 client, model = _try_anthropic() 232 assert client is not None 233 assert isinstance(client, AnthropicAuxiliaryClient) 234 adapter = client.chat.completions 235 assert adapter._is_oauth is False 236 237 def test_pool_entry_takes_priority_over_legacy_resolution(self): 238 class _Entry: 239 access_token = "sk-ant-oat01-pooled" 240 base_url = "https://api.anthropic.com" 241 242 class _Pool: 243 def has_credentials(self): 244 return True 245 246 def select(self): 247 return _Entry() 248 249 with ( 250 patch("agent.auxiliary_client.load_pool", return_value=_Pool()), 251 patch("agent.anthropic_adapter.resolve_anthropic_token", side_effect=AssertionError("legacy path should not run")), 252 patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()) as mock_build, 253 ): 254 from agent.auxiliary_client import _try_anthropic 255 256 client, model = _try_anthropic() 257 258 assert client is not None 259 assert model == "claude-haiku-4-5-20251001" 260 assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled" 261 262 263 class TestBuildCodexClient: 264 def test_pool_without_selected_entry_falls_back_to_auth_store(self): 265 with ( 266 patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)), 267 patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-auth-token"), 268 patch("agent.auxiliary_client.OpenAI") as mock_openai, 269 ): 270 mock_openai.return_value = MagicMock() 271 from agent.auxiliary_client import _build_codex_client 272 273 client, model = _build_codex_client("gpt-5.4") 274 275 assert client is not None 276 assert model == "gpt-5.4" 277 assert mock_openai.call_args.kwargs["api_key"] == "codex-auth-token" 278 assert mock_openai.call_args.kwargs["base_url"] == "https://chatgpt.com/backend-api/codex" 279 280 def test_rejects_missing_model(self): 281 """Callers must pass an explicit model; no hardcoded default.""" 282 from agent.auxiliary_client import _build_codex_client 283 284 client, model = _build_codex_client("") 285 assert client is None 286 assert model is None 287 288 289 class TestExpiredCodexFallback: 290 """Test that expired Codex tokens don't block the auto chain.""" 291 292 def test_expired_codex_falls_through_to_next(self, tmp_path, monkeypatch): 293 """When Codex token is expired, auto chain should skip it and try next provider.""" 294 import base64 295 import time as _time 296 297 # Expired Codex JWT 298 header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode() 299 payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode() 300 payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode() 301 expired_jwt = f"{header}.{payload}.fakesig" 302 303 hermes_home = tmp_path / "hermes" 304 hermes_home.mkdir(parents=True, exist_ok=True) 305 (hermes_home / "auth.json").write_text(json.dumps({ 306 "version": 1, 307 "providers": { 308 "openai-codex": { 309 "tokens": {"access_token": expired_jwt, "refresh_token": "r"}, 310 }, 311 }, 312 })) 313 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 314 315 # Set up Anthropic as fallback 316 monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-test-fallback") 317 with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: 318 mock_build.return_value = MagicMock() 319 from agent.auxiliary_client import _resolve_auto, AnthropicAuxiliaryClient 320 client, model = _resolve_auto() 321 # Should NOT be Codex, should be Anthropic (or another available provider) 322 assert not isinstance(client, type(None)), "Should find a provider after expired Codex" 323 324 325 def test_expired_codex_openrouter_wins(self, tmp_path, monkeypatch): 326 """With expired Codex + OpenRouter key, OpenRouter should win (1st in chain).""" 327 import base64 328 import time as _time 329 330 header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode() 331 payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode() 332 payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode() 333 expired_jwt = f"{header}.{payload}.fakesig" 334 335 hermes_home = tmp_path / "hermes" 336 hermes_home.mkdir(parents=True, exist_ok=True) 337 (hermes_home / "auth.json").write_text(json.dumps({ 338 "version": 1, 339 "providers": { 340 "openai-codex": { 341 "tokens": {"access_token": expired_jwt, "refresh_token": "r"}, 342 }, 343 }, 344 })) 345 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 346 monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key") 347 348 with patch("agent.auxiliary_client.OpenAI") as mock_openai: 349 mock_openai.return_value = MagicMock() 350 from agent.auxiliary_client import _resolve_auto 351 client, model = _resolve_auto() 352 assert client is not None 353 # OpenRouter is 1st in chain, should win 354 mock_openai.assert_called() 355 356 def test_expired_codex_custom_endpoint_wins(self, tmp_path, monkeypatch): 357 """With expired Codex + custom endpoint (Ollama), custom should win (3rd in chain).""" 358 import base64 359 import time as _time 360 361 header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode() 362 payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode() 363 payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode() 364 expired_jwt = f"{header}.{payload}.fakesig" 365 366 hermes_home = tmp_path / "hermes" 367 hermes_home.mkdir(parents=True, exist_ok=True) 368 (hermes_home / "auth.json").write_text(json.dumps({ 369 "version": 1, 370 "providers": { 371 "openai-codex": { 372 "tokens": {"access_token": expired_jwt, "refresh_token": "r"}, 373 }, 374 }, 375 })) 376 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 377 378 # Simulate Ollama or custom endpoint 379 with patch("agent.auxiliary_client._resolve_custom_runtime", 380 return_value=("http://localhost:11434/v1", "sk-dummy")): 381 with patch("agent.auxiliary_client.OpenAI") as mock_openai: 382 mock_openai.return_value = MagicMock() 383 from agent.auxiliary_client import _resolve_auto 384 client, model = _resolve_auto() 385 assert client is not None 386 387 388 def test_hermes_oauth_file_sets_oauth_flag(self, monkeypatch): 389 """OAuth-style tokens should get is_oauth=*** (token is not sk-ant-api-*).""" 390 # Mock resolve_anthropic_token to return an OAuth-style token 391 with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat-hermes-token"), \ 392 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ 393 patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 394 mock_build.return_value = MagicMock() 395 from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient 396 client, model = _try_anthropic() 397 assert client is not None, "Should resolve token" 398 adapter = client.chat.completions 399 assert adapter._is_oauth is True, "Non-sk-ant-api token should set is_oauth=True" 400 401 def test_jwt_missing_exp_passes_through(self, tmp_path, monkeypatch): 402 """JWT with valid JSON but no exp claim should pass through.""" 403 import base64 404 header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode() 405 payload_data = json.dumps({"sub": "user123"}).encode() # no exp 406 payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode() 407 no_exp_jwt = f"{header}.{payload}.fakesig" 408 409 hermes_home = tmp_path / "hermes" 410 hermes_home.mkdir(parents=True, exist_ok=True) 411 (hermes_home / "auth.json").write_text(json.dumps({ 412 "version": 1, 413 "providers": { 414 "openai-codex": { 415 "tokens": {"access_token": no_exp_jwt, "refresh_token": "r"}, 416 }, 417 }, 418 })) 419 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 420 result = _read_codex_access_token() 421 assert result == no_exp_jwt, "JWT without exp should pass through" 422 423 def test_jwt_invalid_json_payload_passes_through(self, tmp_path, monkeypatch): 424 """JWT with valid base64 but invalid JSON payload should pass through.""" 425 import base64 426 header = base64.urlsafe_b64encode(b'{"alg":"RS256"}').rstrip(b"=").decode() 427 payload = base64.urlsafe_b64encode(b"not-json-content").rstrip(b"=").decode() 428 bad_jwt = f"{header}.{payload}.fakesig" 429 430 hermes_home = tmp_path / "hermes" 431 hermes_home.mkdir(parents=True, exist_ok=True) 432 (hermes_home / "auth.json").write_text(json.dumps({ 433 "version": 1, 434 "providers": { 435 "openai-codex": { 436 "tokens": {"access_token": bad_jwt, "refresh_token": "r"}, 437 }, 438 }, 439 })) 440 monkeypatch.setenv("HERMES_HOME", str(hermes_home)) 441 result = _read_codex_access_token() 442 assert result == bad_jwt, "JWT with invalid JSON payload should pass through" 443 444 def test_claude_code_oauth_env_sets_flag(self, monkeypatch): 445 """CLAUDE_CODE_OAUTH_TOKEN env var should get is_oauth=True.""" 446 monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "sk-ant-oat-cc-test-token") 447 monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) 448 with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: 449 mock_build.return_value = MagicMock() 450 from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient 451 client, model = _try_anthropic() 452 assert client is not None 453 adapter = client.chat.completions 454 assert adapter._is_oauth is True 455 456 457 class TestExplicitProviderRouting: 458 """Test explicit provider selection bypasses auto chain correctly.""" 459 460 def test_explicit_anthropic_api_key(self, monkeypatch): 461 """provider='anthropic' + regular API key should work with is_oauth=False.""" 462 with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api-regular-key"), \ 463 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ 464 patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 465 mock_build.return_value = MagicMock() 466 client, model = resolve_provider_client("anthropic") 467 assert client is not None 468 adapter = client.chat.completions 469 assert adapter._is_oauth is False 470 471 def test_explicit_openrouter_pool_exhausted_logs_precise_warning(self, monkeypatch, caplog): 472 monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) 473 with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)): 474 with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): 475 client, model = resolve_provider_client("openrouter") 476 assert client is None 477 assert model is None 478 assert any( 479 "credential pool has no usable entries" in record.message 480 for record in caplog.records 481 ) 482 assert not any( 483 "OPENROUTER_API_KEY not set" in record.message 484 for record in caplog.records 485 ) 486 487 def test_explicit_openrouter_missing_env_keeps_not_set_warning(self, monkeypatch, caplog): 488 monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) 489 with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 490 with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): 491 client, model = resolve_provider_client("openrouter") 492 assert client is None 493 assert model is None 494 assert any( 495 "OPENROUTER_API_KEY not set" in record.message 496 for record in caplog.records 497 ) 498 499 class TestGetTextAuxiliaryClient: 500 """Test the full resolution chain for get_text_auxiliary_client.""" 501 502 def test_codex_pool_entry_takes_priority_over_auth_store(self): 503 class _Entry: 504 access_token = "pooled-codex-token" 505 base_url = "https://chatgpt.com/backend-api/codex" 506 507 class _Pool: 508 def has_credentials(self): 509 return True 510 511 def select(self): 512 return _Entry() 513 514 with ( 515 patch("agent.auxiliary_client.load_pool", return_value=_Pool()), 516 patch("agent.auxiliary_client.OpenAI"), 517 patch("hermes_cli.auth._read_codex_tokens", side_effect=AssertionError("legacy codex store should not run")), 518 ): 519 from agent.auxiliary_client import _build_codex_client 520 521 client, model = _build_codex_client("gpt-5.4") 522 523 from agent.auxiliary_client import CodexAuxiliaryClient 524 525 assert isinstance(client, CodexAuxiliaryClient) 526 assert model == "gpt-5.4" 527 528 def test_returns_none_when_nothing_available(self, monkeypatch): 529 monkeypatch.delenv("OPENAI_BASE_URL", raising=False) 530 monkeypatch.delenv("OPENAI_API_KEY", raising=False) 531 monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) 532 with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ 533 patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \ 534 patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)): 535 client, model = get_text_auxiliary_client() 536 assert client is None 537 assert model is None 538 539 def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self): 540 with patch("agent.auxiliary_client._resolve_custom_runtime", 541 return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \ 542 patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \ 543 patch("agent.auxiliary_client.OpenAI") as mock_openai: 544 client, model = get_text_auxiliary_client() 545 546 from agent.auxiliary_client import CodexAuxiliaryClient 547 assert isinstance(client, CodexAuxiliaryClient) 548 assert model == "gpt-5.3-codex" 549 assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1" 550 assert mock_openai.call_args.kwargs["api_key"] == "sk-test" 551 552 553 class TestVisionClientFallback: 554 """Vision client auto mode resolves known-good multimodal backends.""" 555 556 def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch): 557 """Active provider appears in available backends when credentials exist.""" 558 monkeypatch.setenv("ANTHROPIC_API_KEY", "***") 559 with ( 560 patch("agent.auxiliary_client._read_nous_auth", return_value=None), 561 patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"), 562 patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"), 563 patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), 564 patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"), 565 ): 566 backends = get_available_vision_backends() 567 568 assert "anthropic" in backends 569 570 def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch): 571 monkeypatch.setenv("ANTHROPIC_API_KEY", "***") 572 with ( 573 patch("agent.auxiliary_client._read_nous_auth", return_value=None), 574 patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), 575 patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"), 576 ): 577 client, model = resolve_provider_client("anthropic") 578 579 assert client is not None 580 assert client.__class__.__name__ == "AnthropicAuxiliaryClient" 581 assert model == "claude-haiku-4-5-20251001" 582 583 584 class TestAuxiliaryPoolAwareness: 585 def test_try_nous_uses_pool_entry(self): 586 class _Entry: 587 access_token = "pooled-access-token" 588 agent_key = "pooled-agent-key" 589 inference_base_url = "https://inference.pool.example/v1" 590 591 class _Pool: 592 def has_credentials(self): 593 return True 594 595 def select(self): 596 return _Entry() 597 598 with ( 599 patch("agent.auxiliary_client.load_pool", return_value=_Pool()), 600 patch("agent.auxiliary_client.OpenAI") as mock_openai, 601 ): 602 from agent.auxiliary_client import _try_nous 603 604 client, model = _try_nous() 605 606 assert client is not None 607 assert model == "google/gemini-3-flash-preview" 608 assert mock_openai.call_args.kwargs["api_key"] == "pooled-agent-key" 609 assert mock_openai.call_args.kwargs["base_url"] == "https://inference.pool.example/v1" 610 611 def test_try_nous_uses_portal_recommendation_for_text(self): 612 """When the Portal recommends a compaction model, _try_nous honors it.""" 613 fresh_base = "https://inference-api.nousresearch.com/v1" 614 with ( 615 patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}), 616 patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), 617 patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec, 618 patch("agent.auxiliary_client.OpenAI") as mock_openai, 619 ): 620 from agent.auxiliary_client import _try_nous 621 622 mock_openai.return_value = MagicMock() 623 client, model = _try_nous(vision=False) 624 625 assert client is not None 626 assert model == "minimax/minimax-m2.7" 627 assert mock_rec.call_args.kwargs["vision"] is False 628 629 def test_try_nous_uses_portal_recommendation_for_vision(self): 630 """Vision tasks should ask for the vision-specific recommendation.""" 631 fresh_base = "https://inference-api.nousresearch.com/v1" 632 with ( 633 patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}), 634 patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), 635 patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec, 636 patch("agent.auxiliary_client.OpenAI"), 637 ): 638 from agent.auxiliary_client import _try_nous 639 client, model = _try_nous(vision=True) 640 641 assert client is not None 642 assert model == "google/gemini-3-flash-preview" 643 assert mock_rec.call_args.kwargs["vision"] is True 644 645 def test_try_nous_falls_back_when_recommendation_lookup_raises(self): 646 """If the Portal lookup throws, we must still return a usable model.""" 647 fresh_base = "https://inference-api.nousresearch.com/v1" 648 with ( 649 patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}), 650 patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), 651 patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")), 652 patch("agent.auxiliary_client.OpenAI"), 653 ): 654 from agent.auxiliary_client import _try_nous 655 client, model = _try_nous() 656 657 assert client is not None 658 assert model == "google/gemini-3-flash-preview" 659 660 def test_call_llm_retries_nous_after_401(self): 661 class _Auth401(Exception): 662 status_code = 401 663 664 stale_client = MagicMock() 665 stale_client.base_url = "https://inference-api.nousresearch.com/v1" 666 stale_client.chat.completions.create.side_effect = _Auth401("stale nous key") 667 668 fresh_client = MagicMock() 669 fresh_client.base_url = "https://inference-api.nousresearch.com/v1" 670 fresh_client.chat.completions.create.return_value = {"ok": True} 671 672 with ( 673 patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)), 674 patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")), 675 patch("agent.auxiliary_client.OpenAI", return_value=fresh_client), 676 patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp), 677 patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")), 678 ): 679 result = call_llm( 680 task="compression", 681 messages=[{"role": "user", "content": "hi"}], 682 ) 683 684 assert result == {"ok": True} 685 assert stale_client.chat.completions.create.call_count == 1 686 assert fresh_client.chat.completions.create.call_count == 1 687 688 @pytest.mark.asyncio 689 async def test_async_call_llm_retries_nous_after_401(self): 690 class _Auth401(Exception): 691 status_code = 401 692 693 stale_client = MagicMock() 694 stale_client.base_url = "https://inference-api.nousresearch.com/v1" 695 stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key")) 696 697 fresh_async_client = MagicMock() 698 fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1" 699 fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True}) 700 701 with ( 702 patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)), 703 patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")), 704 patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")), 705 patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp), 706 patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")), 707 ): 708 result = await async_call_llm( 709 task="session_search", 710 messages=[{"role": "user", "content": "hi"}], 711 ) 712 713 assert result == {"ok": True} 714 assert stale_client.chat.completions.create.await_count == 1 715 assert fresh_async_client.chat.completions.create.await_count == 1 716 717 def test_auto_cache_miss_uses_resolved_native_model_and_warns(self, caplog): 718 import agent.auxiliary_client as aux 719 720 fake_client = MagicMock() 721 aux.shutdown_cached_clients() 722 aux._incompatible_aux_override_warned.clear() 723 try: 724 with patch( 725 "agent.auxiliary_client.resolve_provider_client", 726 return_value=(fake_client, "gpt-5.4"), 727 ) as mock_resolve, caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): 728 client, model = aux._get_cached_client( 729 "auto", 730 "google/gemini-3-flash-preview", 731 main_runtime={ 732 "provider": "openai-codex", 733 "model": "gpt-5.4", 734 "base_url": "https://chatgpt.com/backend-api/codex", 735 "api_key": "codex-token", 736 "api_mode": "codex_responses", 737 }, 738 ) 739 finally: 740 aux.shutdown_cached_clients() 741 aux._incompatible_aux_override_warned.clear() 742 743 assert client is fake_client 744 assert model == "gpt-5.4" 745 assert mock_resolve.call_count == 1 746 assert "Auxiliary auto-routing dropped incompatible model override" in caplog.text 747 assert "google/gemini-3-flash-preview" in caplog.text 748 assert "gpt-5.4" in caplog.text 749 750 def test_cached_gmi_client_keeps_explicit_slash_model_override(self): 751 import agent.auxiliary_client as aux 752 753 fake_client = MagicMock() 754 755 with patch( 756 "agent.auxiliary_client.resolve_provider_client", 757 return_value=(fake_client, "google/gemini-3.1-flash-lite-preview"), 758 ) as mock_resolve: 759 aux.shutdown_cached_clients() 760 try: 761 client, model = aux._get_cached_client( 762 "gmi", 763 "google/gemini-3.1-flash-lite-preview", 764 base_url="https://api.gmi-serving.com/v1", 765 api_key="gmi-key", 766 ) 767 assert client is fake_client 768 assert model == "google/gemini-3.1-flash-lite-preview" 769 770 client, model = aux._get_cached_client( 771 "gmi", 772 "openai/gpt-5.4-mini", 773 base_url="https://api.gmi-serving.com/v1", 774 api_key="gmi-key", 775 ) 776 finally: 777 aux.shutdown_cached_clients() 778 779 assert client is fake_client 780 assert model == "openai/gpt-5.4-mini" 781 assert mock_resolve.call_count == 1 782 783 784 # ── Payment / credit exhaustion fallback ───────────────────────────────── 785 786 787 class TestIsPaymentError: 788 """_is_payment_error detects 402 and credit-related errors.""" 789 790 def test_402_status_code(self): 791 exc = Exception("Payment Required") 792 exc.status_code = 402 793 assert _is_payment_error(exc) is True 794 795 def test_402_with_credits_message(self): 796 exc = Exception("You requested up to 65535 tokens, but can only afford 8029") 797 exc.status_code = 402 798 assert _is_payment_error(exc) is True 799 800 def test_429_with_credits_message(self): 801 exc = Exception("insufficient credits remaining") 802 exc.status_code = 429 803 assert _is_payment_error(exc) is True 804 805 def test_429_without_credits_message_is_not_payment(self): 806 """Normal rate limits should NOT be treated as payment errors.""" 807 exc = Exception("Rate limit exceeded, try again in 2 seconds") 808 exc.status_code = 429 809 assert _is_payment_error(exc) is False 810 811 def test_generic_500_is_not_payment(self): 812 exc = Exception("Internal server error") 813 exc.status_code = 500 814 assert _is_payment_error(exc) is False 815 816 def test_no_status_code_with_billing_message(self): 817 exc = Exception("billing: payment required for this request") 818 assert _is_payment_error(exc) is True 819 820 def test_no_status_code_no_message(self): 821 exc = Exception("connection reset") 822 assert _is_payment_error(exc) is False 823 824 825 class TestGetProviderChain: 826 """_get_provider_chain() resolves functions at call time (testable).""" 827 828 def test_returns_four_entries(self): 829 chain = _get_provider_chain() 830 assert len(chain) == 4 831 labels = [label for label, _ in chain] 832 assert labels == ["openrouter", "nous", "local/custom", "api-key"] 833 # Codex is deliberately NOT in this chain — see _get_provider_chain 834 # docstring. ChatGPT-account Codex has a shifting model allow-list; 835 # guessing a model to fall back on breaks more often than it helps. 836 assert "openai-codex" not in labels 837 838 def test_picks_up_patched_functions(self): 839 """Patches on _try_* functions must be visible in the chain.""" 840 sentinel = lambda: ("patched", "model") 841 with patch("agent.auxiliary_client._try_openrouter", sentinel): 842 chain = _get_provider_chain() 843 assert chain[0] == ("openrouter", sentinel) 844 845 846 class TestTryPaymentFallback: 847 """_try_payment_fallback skips the failed provider and tries alternatives.""" 848 849 def test_skips_failed_provider(self): 850 mock_client = MagicMock() 851 with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \ 852 patch("agent.auxiliary_client._try_nous", return_value=(mock_client, "nous-model")), \ 853 patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"): 854 client, model, label = _try_payment_fallback("openrouter", task="compression") 855 assert client is mock_client 856 assert model == "nous-model" 857 assert label == "nous" 858 859 def test_returns_none_when_no_fallback(self): 860 with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \ 861 patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \ 862 patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \ 863 patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ 864 patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"): 865 client, model, label = _try_payment_fallback("openrouter") 866 assert client is None 867 assert label == "" 868 869 def test_codex_alias_maps_to_chain_label(self): 870 """'codex' should map to 'openai-codex' in the skip set.""" 871 mock_client = MagicMock() 872 with patch("agent.auxiliary_client._try_openrouter", return_value=(mock_client, "or-model")), \ 873 patch("agent.auxiliary_client._read_main_provider", return_value="openai-codex"): 874 client, model, label = _try_payment_fallback("openai-codex", task="vision") 875 assert client is mock_client 876 assert label == "openrouter" 877 878 def test_codex_not_in_fallback_chain(self): 879 """Codex is deliberately NOT a fallback rung (shifting model allow-list). 880 881 When OR/Nous/custom/api-key all fail, payment-fallback returns None — 882 Codex is never tried with a guessed model. 883 """ 884 with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \ 885 patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \ 886 patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \ 887 patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ 888 patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"): 889 client, model, label = _try_payment_fallback("openrouter") 890 assert client is None 891 assert model is None 892 assert label == "" 893 894 895 class TestCallLlmPaymentFallback: 896 """call_llm() retries with a different provider on 402 / payment errors.""" 897 898 def _make_402_error(self, msg="Payment Required: insufficient credits"): 899 exc = Exception(msg) 900 exc.status_code = 402 901 return exc 902 903 def test_non_payment_error_not_caught(self, monkeypatch): 904 """Non-payment/non-connection errors (500) should NOT trigger fallback.""" 905 monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") 906 907 primary_client = MagicMock() 908 server_err = Exception("Internal Server Error") 909 server_err.status_code = 500 910 primary_client.chat.completions.create.side_effect = server_err 911 912 with patch("agent.auxiliary_client._get_cached_client", 913 return_value=(primary_client, "google/gemini-3-flash-preview")), \ 914 patch("agent.auxiliary_client._resolve_task_provider_model", 915 return_value=("auto", "google/gemini-3-flash-preview", None, None, None)): 916 with pytest.raises(Exception, match="Internal Server Error"): 917 call_llm( 918 task="compression", 919 messages=[{"role": "user", "content": "hello"}], 920 ) 921 922 # --------------------------------------------------------------------------- 923 # Gate: _resolve_api_key_provider must skip anthropic when not configured 924 # --------------------------------------------------------------------------- 925 926 927 def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch): 928 """_resolve_api_key_provider must not try anthropic when user never configured it.""" 929 from collections import OrderedDict 930 from hermes_cli.auth import ProviderConfig 931 932 # Build a minimal registry with only "anthropic" so the loop is guaranteed 933 # to reach it without being short-circuited by earlier providers. 934 fake_registry = OrderedDict({ 935 "anthropic": ProviderConfig( 936 id="anthropic", 937 name="Anthropic", 938 auth_type="api_key", 939 inference_base_url="https://api.anthropic.com", 940 api_key_env_vars=("ANTHROPIC_API_KEY",), 941 ), 942 }) 943 944 called = [] 945 946 def mock_try_anthropic(): 947 called.append("anthropic") 948 return None, None 949 950 monkeypatch.setattr("agent.auxiliary_client._try_anthropic", mock_try_anthropic) 951 monkeypatch.setattr("hermes_cli.auth.PROVIDER_REGISTRY", fake_registry) 952 monkeypatch.setattr( 953 "hermes_cli.auth.is_provider_explicitly_configured", 954 lambda pid: False, 955 ) 956 957 from agent.auxiliary_client import _resolve_api_key_provider 958 _resolve_api_key_provider() 959 960 assert "anthropic" not in called, \ 961 "_try_anthropic() should not be called when anthropic is not explicitly configured" 962 963 964 # --------------------------------------------------------------------------- 965 # model="default" elimination (#7512) 966 # --------------------------------------------------------------------------- 967 968 969 # --------------------------------------------------------------------------- 970 # _try_payment_fallback reason parameter (#7512 bug 3) 971 # --------------------------------------------------------------------------- 972 973 974 # --------------------------------------------------------------------------- 975 # _is_connection_error coverage 976 # --------------------------------------------------------------------------- 977 978 979 class TestIsConnectionError: 980 """Tests for _is_connection_error detection.""" 981 982 def test_connection_refused(self): 983 from agent.auxiliary_client import _is_connection_error 984 err = Exception("Connection refused") 985 assert _is_connection_error(err) is True 986 987 def test_timeout(self): 988 from agent.auxiliary_client import _is_connection_error 989 err = Exception("Request timed out.") 990 assert _is_connection_error(err) is True 991 992 def test_dns_failure(self): 993 from agent.auxiliary_client import _is_connection_error 994 err = Exception("Name or service not known") 995 assert _is_connection_error(err) is True 996 997 def test_normal_api_error_not_connection(self): 998 from agent.auxiliary_client import _is_connection_error 999 err = Exception("Bad Request: invalid model") 1000 err.status_code = 400 1001 assert _is_connection_error(err) is False 1002 1003 def test_500_not_connection(self): 1004 from agent.auxiliary_client import _is_connection_error 1005 err = Exception("Internal Server Error") 1006 err.status_code = 500 1007 assert _is_connection_error(err) is False 1008 1009 1010 class TestKimiTemperatureOmitted: 1011 """Kimi/Moonshot models should have temperature OMITTED from API kwargs. 1012 1013 The Kimi gateway selects the correct temperature server-side based on the 1014 active mode (thinking → 1.0, non-thinking → 0.6). Sending any temperature 1015 value conflicts with gateway-managed defaults. 1016 """ 1017 1018 @pytest.mark.parametrize( 1019 "model", 1020 [ 1021 "kimi-for-coding", 1022 "kimi-k2.5", 1023 "kimi-k2.6", 1024 "kimi-k2-turbo-preview", 1025 "kimi-k2-0905-preview", 1026 "kimi-k2-thinking", 1027 "kimi-k2-thinking-turbo", 1028 "kimi-k2-instruct", 1029 "kimi-k2-instruct-0905", 1030 "moonshotai/kimi-k2.5", 1031 "moonshotai/Kimi-K2-Thinking", 1032 "moonshotai/Kimi-K2-Instruct", 1033 ], 1034 ) 1035 def test_kimi_models_omit_temperature(self, model): 1036 """No kimi model should have a temperature key in kwargs.""" 1037 from agent.auxiliary_client import _build_call_kwargs 1038 1039 kwargs = _build_call_kwargs( 1040 provider="kimi-coding", 1041 model=model, 1042 messages=[{"role": "user", "content": "hello"}], 1043 temperature=0.3, 1044 ) 1045 1046 assert "temperature" not in kwargs 1047 1048 def test_kimi_for_coding_no_temperature_when_none(self): 1049 """When caller passes temperature=None, still no temperature key.""" 1050 from agent.auxiliary_client import _build_call_kwargs 1051 1052 kwargs = _build_call_kwargs( 1053 provider="kimi-coding", 1054 model="kimi-for-coding", 1055 messages=[{"role": "user", "content": "hello"}], 1056 temperature=None, 1057 ) 1058 1059 assert "temperature" not in kwargs 1060 1061 def test_sync_call_omits_temperature(self): 1062 client = MagicMock() 1063 client.base_url = "https://api.kimi.com/coding/v1" 1064 response = MagicMock() 1065 client.chat.completions.create.return_value = response 1066 1067 with patch( 1068 "agent.auxiliary_client._get_cached_client", 1069 return_value=(client, "kimi-for-coding"), 1070 ), patch( 1071 "agent.auxiliary_client._resolve_task_provider_model", 1072 return_value=("auto", "kimi-for-coding", None, None, None), 1073 ): 1074 result = call_llm( 1075 task="session_search", 1076 messages=[{"role": "user", "content": "hello"}], 1077 temperature=0.1, 1078 ) 1079 1080 assert result is response 1081 kwargs = client.chat.completions.create.call_args.kwargs 1082 assert kwargs["model"] == "kimi-for-coding" 1083 assert "temperature" not in kwargs 1084 1085 @pytest.mark.asyncio 1086 async def test_async_call_omits_temperature(self): 1087 client = MagicMock() 1088 client.base_url = "https://api.kimi.com/coding/v1" 1089 response = MagicMock() 1090 client.chat.completions.create = AsyncMock(return_value=response) 1091 1092 with patch( 1093 "agent.auxiliary_client._get_cached_client", 1094 return_value=(client, "kimi-for-coding"), 1095 ), patch( 1096 "agent.auxiliary_client._resolve_task_provider_model", 1097 return_value=("auto", "kimi-for-coding", None, None, None), 1098 ): 1099 result = await async_call_llm( 1100 task="session_search", 1101 messages=[{"role": "user", "content": "hello"}], 1102 temperature=0.1, 1103 ) 1104 1105 assert result is response 1106 kwargs = client.chat.completions.create.call_args.kwargs 1107 assert kwargs["model"] == "kimi-for-coding" 1108 assert "temperature" not in kwargs 1109 1110 @pytest.mark.parametrize( 1111 "model", 1112 [ 1113 "anthropic/claude-sonnet-4-6", 1114 "gpt-5.4", 1115 "deepseek-chat", 1116 ], 1117 ) 1118 def test_non_kimi_models_preserve_temperature(self, model): 1119 from agent.auxiliary_client import _build_call_kwargs 1120 1121 kwargs = _build_call_kwargs( 1122 provider="openrouter", 1123 model=model, 1124 messages=[{"role": "user", "content": "hello"}], 1125 temperature=0.3, 1126 ) 1127 1128 assert kwargs["temperature"] == 0.3 1129 1130 @pytest.mark.parametrize( 1131 "base_url", 1132 [ 1133 "https://api.moonshot.ai/v1", 1134 "https://api.moonshot.cn/v1", 1135 "https://api.kimi.com/coding/v1", 1136 ], 1137 ) 1138 def test_kimi_k2_5_omits_temperature_regardless_of_endpoint(self, base_url): 1139 """Temperature is omitted regardless of which Kimi endpoint is used.""" 1140 from agent.auxiliary_client import _build_call_kwargs 1141 1142 kwargs = _build_call_kwargs( 1143 provider="kimi-coding", 1144 model="kimi-k2.5", 1145 messages=[{"role": "user", "content": "hello"}], 1146 temperature=0.1, 1147 base_url=base_url, 1148 ) 1149 1150 assert "temperature" not in kwargs 1151 1152 1153 # --------------------------------------------------------------------------- 1154 # async_call_llm payment / connection fallback (#7512 bug 2) 1155 # --------------------------------------------------------------------------- 1156 1157 1158 class TestStaleBaseUrlWarning: 1159 """_resolve_auto() warns when OPENAI_BASE_URL conflicts with config provider (#5161).""" 1160 1161 def test_warns_when_openai_base_url_set_with_named_provider(self, monkeypatch, caplog): 1162 """Warning fires when OPENAI_BASE_URL is set but provider is a named provider.""" 1163 import agent.auxiliary_client as mod 1164 # Reset the module-level flag so the warning fires 1165 monkeypatch.setattr(mod, "_stale_base_url_warned", False) 1166 monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1") 1167 monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test") 1168 1169 with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \ 1170 patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \ 1171 caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): 1172 _resolve_auto() 1173 1174 assert any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \ 1175 "Expected a warning about stale OPENAI_BASE_URL" 1176 assert mod._stale_base_url_warned is True 1177 1178 1179 class TestAuxiliaryTaskExtraBody: 1180 def test_sync_call_merges_task_extra_body_from_config(self): 1181 client = MagicMock() 1182 client.base_url = "https://api.example.com/v1" 1183 response = MagicMock() 1184 client.chat.completions.create.return_value = response 1185 1186 config = { 1187 "auxiliary": { 1188 "session_search": { 1189 "extra_body": { 1190 "enable_thinking": False, 1191 "reasoning": {"effort": "none"}, 1192 } 1193 } 1194 } 1195 } 1196 1197 with patch("hermes_cli.config.load_config", return_value=config), patch( 1198 "agent.auxiliary_client._get_cached_client", 1199 return_value=(client, "glm-4.5-air"), 1200 ): 1201 result = call_llm( 1202 task="session_search", 1203 messages=[{"role": "user", "content": "hello"}], 1204 extra_body={"metadata": {"source": "test"}}, 1205 ) 1206 1207 assert result is response 1208 kwargs = client.chat.completions.create.call_args.kwargs 1209 assert kwargs["extra_body"]["enable_thinking"] is False 1210 assert kwargs["extra_body"]["reasoning"] == {"effort": "none"} 1211 assert kwargs["extra_body"]["metadata"] == {"source": "test"} 1212 1213 @pytest.mark.asyncio 1214 async def test_async_call_explicit_extra_body_overrides_task_config(self): 1215 client = MagicMock() 1216 client.base_url = "https://api.example.com/v1" 1217 response = MagicMock() 1218 client.chat.completions.create = AsyncMock(return_value=response) 1219 1220 config = { 1221 "auxiliary": { 1222 "session_search": { 1223 "extra_body": {"enable_thinking": False} 1224 } 1225 } 1226 } 1227 1228 with patch("hermes_cli.config.load_config", return_value=config), patch( 1229 "agent.auxiliary_client._get_cached_client", 1230 return_value=(client, "glm-4.5-air"), 1231 ): 1232 result = await async_call_llm( 1233 task="session_search", 1234 messages=[{"role": "user", "content": "hello"}], 1235 extra_body={"enable_thinking": True}, 1236 ) 1237 1238 assert result is response 1239 kwargs = client.chat.completions.create.call_args.kwargs 1240 assert kwargs["extra_body"]["enable_thinking"] is True 1241 1242 def test_no_warning_when_provider_is_custom(self, monkeypatch, caplog): 1243 """No warning when the provider is 'custom' — OPENAI_BASE_URL is expected.""" 1244 import agent.auxiliary_client as mod 1245 monkeypatch.setattr(mod, "_stale_base_url_warned", False) 1246 monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1") 1247 monkeypatch.setenv("OPENAI_API_KEY", "test-key") 1248 1249 with patch("agent.auxiliary_client._read_main_provider", return_value="custom"), \ 1250 patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \ 1251 patch("agent.auxiliary_client._resolve_custom_runtime", 1252 return_value=("http://localhost:11434/v1", "test-key", None)), \ 1253 patch("agent.auxiliary_client.OpenAI") as mock_openai, \ 1254 caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): 1255 mock_openai.return_value = MagicMock() 1256 _resolve_auto() 1257 1258 assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \ 1259 "Should NOT warn when provider is 'custom'" 1260 1261 def test_no_warning_when_provider_is_named_custom(self, monkeypatch, caplog): 1262 """No warning when the provider is 'custom:myname' — base_url comes from config.""" 1263 import agent.auxiliary_client as mod 1264 monkeypatch.setattr(mod, "_stale_base_url_warned", False) 1265 monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1") 1266 monkeypatch.setenv("OPENAI_API_KEY", "test-key") 1267 1268 with patch("agent.auxiliary_client._read_main_provider", return_value="custom:ollama-local"), \ 1269 patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \ 1270 patch("agent.auxiliary_client.resolve_provider_client", 1271 return_value=(MagicMock(), "llama3")), \ 1272 caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): 1273 _resolve_auto() 1274 1275 assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \ 1276 "Should NOT warn when provider is 'custom:*'" 1277 1278 def test_no_warning_when_openai_base_url_not_set(self, monkeypatch, caplog): 1279 """No warning when OPENAI_BASE_URL is absent.""" 1280 import agent.auxiliary_client as mod 1281 monkeypatch.setattr(mod, "_stale_base_url_warned", False) 1282 monkeypatch.delenv("OPENAI_BASE_URL", raising=False) 1283 monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test") 1284 1285 with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \ 1286 patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \ 1287 caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): 1288 _resolve_auto() 1289 1290 assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \ 1291 "Should NOT warn when OPENAI_BASE_URL is not set" 1292 1293 # --------------------------------------------------------------------------- 1294 # Anthropic-compatible image block conversion 1295 # --------------------------------------------------------------------------- 1296 1297 class TestAnthropicCompatImageConversion: 1298 """Tests for _is_anthropic_compat_endpoint and _convert_openai_images_to_anthropic.""" 1299 1300 def test_known_providers_detected(self): 1301 from agent.auxiliary_client import _is_anthropic_compat_endpoint 1302 assert _is_anthropic_compat_endpoint("minimax", "") 1303 assert _is_anthropic_compat_endpoint("minimax-cn", "") 1304 1305 def test_openrouter_not_detected(self): 1306 from agent.auxiliary_client import _is_anthropic_compat_endpoint 1307 assert not _is_anthropic_compat_endpoint("openrouter", "") 1308 assert not _is_anthropic_compat_endpoint("anthropic", "") 1309 1310 def test_url_based_detection(self): 1311 from agent.auxiliary_client import _is_anthropic_compat_endpoint 1312 assert _is_anthropic_compat_endpoint("custom", "https://api.minimax.io/anthropic") 1313 assert _is_anthropic_compat_endpoint("custom", "https://example.com/anthropic/v1") 1314 assert not _is_anthropic_compat_endpoint("custom", "https://api.openai.com/v1") 1315 1316 def test_base64_image_converted(self): 1317 from agent.auxiliary_client import _convert_openai_images_to_anthropic 1318 messages = [{ 1319 "role": "user", 1320 "content": [ 1321 {"type": "text", "text": "describe"}, 1322 {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR="}} 1323 ] 1324 }] 1325 result = _convert_openai_images_to_anthropic(messages) 1326 img_block = result[0]["content"][1] 1327 assert img_block["type"] == "image" 1328 assert img_block["source"]["type"] == "base64" 1329 assert img_block["source"]["media_type"] == "image/png" 1330 assert img_block["source"]["data"] == "iVBOR=" 1331 1332 def test_url_image_converted(self): 1333 from agent.auxiliary_client import _convert_openai_images_to_anthropic 1334 messages = [{ 1335 "role": "user", 1336 "content": [ 1337 {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}} 1338 ] 1339 }] 1340 result = _convert_openai_images_to_anthropic(messages) 1341 img_block = result[0]["content"][0] 1342 assert img_block["type"] == "image" 1343 assert img_block["source"]["type"] == "url" 1344 assert img_block["source"]["url"] == "https://example.com/img.jpg" 1345 1346 def test_text_only_messages_unchanged(self): 1347 from agent.auxiliary_client import _convert_openai_images_to_anthropic 1348 messages = [{"role": "user", "content": "Hello"}] 1349 result = _convert_openai_images_to_anthropic(messages) 1350 assert result[0] is messages[0] # same object, not copied 1351 1352 def test_jpeg_media_type_parsed(self): 1353 from agent.auxiliary_client import _convert_openai_images_to_anthropic 1354 messages = [{ 1355 "role": "user", 1356 "content": [ 1357 {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/="}} 1358 ] 1359 }] 1360 result = _convert_openai_images_to_anthropic(messages) 1361 assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg" 1362 1363 1364 class _AuxAuth401(Exception): 1365 status_code = 401 1366 1367 def __init__(self, message="Provided authentication token is expired"): 1368 super().__init__(message) 1369 1370 1371 class _DummyResponse: 1372 def __init__(self, text="ok"): 1373 self.choices = [MagicMock(message=MagicMock(content=text))] 1374 1375 1376 class _FailingThenSuccessCompletions: 1377 def __init__(self): 1378 self.calls = 0 1379 1380 def create(self, **kwargs): 1381 self.calls += 1 1382 if self.calls == 1: 1383 raise _AuxAuth401() 1384 return _DummyResponse("sync-ok") 1385 1386 1387 class _AsyncFailingThenSuccessCompletions: 1388 def __init__(self): 1389 self.calls = 0 1390 1391 async def create(self, **kwargs): 1392 self.calls += 1 1393 if self.calls == 1: 1394 raise _AuxAuth401() 1395 return _DummyResponse("async-ok") 1396 1397 1398 class TestAuxiliaryAuthRefreshRetry: 1399 def test_call_llm_refreshes_codex_on_401_for_vision(self): 1400 failing_client = MagicMock() 1401 failing_client.base_url = "https://chatgpt.com/backend-api/codex" 1402 failing_client.chat.completions = _FailingThenSuccessCompletions() 1403 1404 fresh_client = MagicMock() 1405 fresh_client.base_url = "https://chatgpt.com/backend-api/codex" 1406 fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-sync") 1407 1408 with ( 1409 patch( 1410 "agent.auxiliary_client.resolve_vision_provider_client", 1411 side_effect=[("openai-codex", failing_client, "gpt-5.4"), ("openai-codex", fresh_client, "gpt-5.4")], 1412 ), 1413 patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, 1414 ): 1415 resp = call_llm( 1416 task="vision", 1417 provider="openai-codex", 1418 model="gpt-5.4", 1419 messages=[{"role": "user", "content": "hi"}], 1420 ) 1421 1422 assert resp.choices[0].message.content == "fresh-sync" 1423 mock_refresh.assert_called_once_with("openai-codex") 1424 1425 def test_call_llm_refreshes_codex_on_401_for_non_vision(self): 1426 stale_client = MagicMock() 1427 stale_client.base_url = "https://chatgpt.com/backend-api/codex" 1428 stale_client.chat.completions.create.side_effect = _AuxAuth401("stale codex token") 1429 1430 fresh_client = MagicMock() 1431 fresh_client.base_url = "https://chatgpt.com/backend-api/codex" 1432 fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-non-vision") 1433 1434 with ( 1435 patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.4", None, None, None)), 1436 patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "gpt-5.4"), (fresh_client, "gpt-5.4")]), 1437 patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, 1438 ): 1439 resp = call_llm( 1440 task="compression", 1441 provider="openai-codex", 1442 model="gpt-5.4", 1443 messages=[{"role": "user", "content": "hi"}], 1444 ) 1445 1446 assert resp.choices[0].message.content == "fresh-non-vision" 1447 mock_refresh.assert_called_once_with("openai-codex") 1448 assert stale_client.chat.completions.create.call_count == 1 1449 assert fresh_client.chat.completions.create.call_count == 1 1450 1451 def test_call_llm_refreshes_anthropic_on_401_for_non_vision(self): 1452 stale_client = MagicMock() 1453 stale_client.base_url = "https://api.anthropic.com" 1454 stale_client.chat.completions.create.side_effect = _AuxAuth401("anthropic token expired") 1455 1456 fresh_client = MagicMock() 1457 fresh_client.base_url = "https://api.anthropic.com" 1458 fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-anthropic") 1459 1460 with ( 1461 patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("anthropic", "claude-haiku-4-5-20251001", None, None, None)), 1462 patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "claude-haiku-4-5-20251001"), (fresh_client, "claude-haiku-4-5-20251001")]), 1463 patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, 1464 ): 1465 resp = call_llm( 1466 task="compression", 1467 provider="anthropic", 1468 model="claude-haiku-4-5-20251001", 1469 messages=[{"role": "user", "content": "hi"}], 1470 ) 1471 1472 assert resp.choices[0].message.content == "fresh-anthropic" 1473 mock_refresh.assert_called_once_with("anthropic") 1474 assert stale_client.chat.completions.create.call_count == 1 1475 assert fresh_client.chat.completions.create.call_count == 1 1476 1477 @pytest.mark.asyncio 1478 async def test_async_call_llm_refreshes_codex_on_401_for_vision(self): 1479 failing_client = MagicMock() 1480 failing_client.base_url = "https://chatgpt.com/backend-api/codex" 1481 failing_client.chat.completions = _AsyncFailingThenSuccessCompletions() 1482 1483 fresh_client = MagicMock() 1484 fresh_client.base_url = "https://chatgpt.com/backend-api/codex" 1485 fresh_client.chat.completions.create = AsyncMock(return_value=_DummyResponse("fresh-async")) 1486 1487 with ( 1488 patch( 1489 "agent.auxiliary_client.resolve_vision_provider_client", 1490 side_effect=[("openai-codex", failing_client, "gpt-5.4"), ("openai-codex", fresh_client, "gpt-5.4")], 1491 ), 1492 patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, 1493 ): 1494 resp = await async_call_llm( 1495 task="vision", 1496 provider="openai-codex", 1497 model="gpt-5.4", 1498 messages=[{"role": "user", "content": "hi"}], 1499 ) 1500 1501 assert resp.choices[0].message.content == "fresh-async" 1502 mock_refresh.assert_called_once_with("openai-codex") 1503 1504 def test_refresh_provider_credentials_force_refreshes_anthropic_oauth_and_evicts_cache(self, monkeypatch): 1505 stale_client = MagicMock() 1506 cache_key = ("anthropic", False, None, None, None) 1507 1508 monkeypatch.setenv("ANTHROPIC_TOKEN", "") 1509 monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "") 1510 monkeypatch.setenv("ANTHROPIC_API_KEY", "") 1511 1512 with ( 1513 patch("agent.auxiliary_client._client_cache", {cache_key: (stale_client, "claude-haiku-4-5-20251001", None)}), 1514 patch("agent.anthropic_adapter.read_claude_code_credentials", return_value={ 1515 "accessToken": "expired-token", 1516 "refreshToken": "refresh-token", 1517 "expiresAt": 0, 1518 }), 1519 patch("agent.anthropic_adapter.refresh_anthropic_oauth_pure", return_value={ 1520 "access_token": "fresh-token", 1521 "refresh_token": "refresh-token-2", 1522 "expires_at_ms": 9999999999999, 1523 }) as mock_refresh_oauth, 1524 patch("agent.anthropic_adapter._write_claude_code_credentials") as mock_write, 1525 ): 1526 from agent.auxiliary_client import _refresh_provider_credentials 1527 1528 assert _refresh_provider_credentials("anthropic") is True 1529 1530 mock_refresh_oauth.assert_called_once_with("refresh-token", use_json=False) 1531 mock_write.assert_called_once_with("fresh-token", "refresh-token-2", 9999999999999) 1532 stale_client.close.assert_called_once() 1533 1534 @pytest.mark.asyncio 1535 async def test_async_call_llm_refreshes_anthropic_on_401_for_non_vision(self): 1536 stale_client = MagicMock() 1537 stale_client.base_url = "https://api.anthropic.com" 1538 stale_client.chat.completions.create = AsyncMock(side_effect=_AuxAuth401("anthropic token expired")) 1539 1540 fresh_client = MagicMock() 1541 fresh_client.base_url = "https://api.anthropic.com" 1542 fresh_client.chat.completions.create = AsyncMock(return_value=_DummyResponse("fresh-async-anthropic")) 1543 1544 with ( 1545 patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("anthropic", "claude-haiku-4-5-20251001", None, None, None)), 1546 patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "claude-haiku-4-5-20251001"), (fresh_client, "claude-haiku-4-5-20251001")]), 1547 patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, 1548 ): 1549 resp = await async_call_llm( 1550 task="compression", 1551 provider="anthropic", 1552 model="claude-haiku-4-5-20251001", 1553 messages=[{"role": "user", "content": "hi"}], 1554 ) 1555 1556 assert resp.choices[0].message.content == "fresh-async-anthropic" 1557 mock_refresh.assert_called_once_with("anthropic") 1558 assert stale_client.chat.completions.create.await_count == 1 1559 assert fresh_client.chat.completions.create.await_count == 1 1560 1561 1562 class TestCodexAdapterReasoningTranslation: 1563 """Verify _CodexCompletionsAdapter translates extra_body.reasoning 1564 into the Responses API's top-level reasoning + include fields, matching 1565 agent/transports/codex.py::build_kwargs() behavior. 1566 1567 Regression for user feedback (Apr 26): auxiliary callers that configure 1568 reasoning via auxiliary.<task>.extra_body.reasoning had that config 1569 silently dropped because the adapter only forwarded messages/model/tools. 1570 """ 1571 1572 @staticmethod 1573 def _build_adapter(): 1574 """Build a _CodexCompletionsAdapter with a mocked responses.stream().""" 1575 from agent.auxiliary_client import _CodexCompletionsAdapter 1576 from types import SimpleNamespace 1577 1578 # Mock the stream context manager: yields no events, get_final_response 1579 # returns a minimal empty-output response. 1580 fake_final = SimpleNamespace( 1581 output=[SimpleNamespace( 1582 type="message", 1583 content=[SimpleNamespace(type="output_text", text="hi")], 1584 )], 1585 usage=SimpleNamespace(input_tokens=1, output_tokens=1, total_tokens=2), 1586 ) 1587 1588 class _FakeStream: 1589 def __enter__(self): return self 1590 def __exit__(self, *a): return False 1591 def __iter__(self): return iter([]) 1592 def get_final_response(self): return fake_final 1593 1594 captured_kwargs = {} 1595 1596 def _stream(**kwargs): 1597 captured_kwargs.update(kwargs) 1598 return _FakeStream() 1599 1600 real_client = MagicMock() 1601 real_client.responses.stream = _stream 1602 adapter = _CodexCompletionsAdapter(real_client, "gpt-5.3-codex") 1603 return adapter, captured_kwargs 1604 1605 def test_reasoning_effort_medium_translated_to_top_level(self): 1606 adapter, captured = self._build_adapter() 1607 adapter.create( 1608 messages=[{"role": "user", "content": "hi"}], 1609 extra_body={"reasoning": {"effort": "medium"}}, 1610 ) 1611 assert captured.get("reasoning") == {"effort": "medium", "summary": "auto"} 1612 assert captured.get("include") == ["reasoning.encrypted_content"] 1613 1614 def test_reasoning_effort_minimal_clamped_to_low(self): 1615 """Codex backend rejects 'minimal'; adapter clamps to 'low' per main transport.""" 1616 adapter, captured = self._build_adapter() 1617 adapter.create( 1618 messages=[{"role": "user", "content": "hi"}], 1619 extra_body={"reasoning": {"effort": "minimal"}}, 1620 ) 1621 assert captured.get("reasoning") == {"effort": "low", "summary": "auto"} 1622 assert captured.get("include") == ["reasoning.encrypted_content"] 1623 1624 def test_reasoning_effort_low_passed_through(self): 1625 adapter, captured = self._build_adapter() 1626 adapter.create( 1627 messages=[{"role": "user", "content": "hi"}], 1628 extra_body={"reasoning": {"effort": "low"}}, 1629 ) 1630 assert captured.get("reasoning") == {"effort": "low", "summary": "auto"} 1631 1632 def test_reasoning_effort_high_passed_through(self): 1633 adapter, captured = self._build_adapter() 1634 adapter.create( 1635 messages=[{"role": "user", "content": "hi"}], 1636 extra_body={"reasoning": {"effort": "high"}}, 1637 ) 1638 assert captured.get("reasoning") == {"effort": "high", "summary": "auto"} 1639 1640 def test_reasoning_disabled_omits_reasoning_and_include(self): 1641 adapter, captured = self._build_adapter() 1642 adapter.create( 1643 messages=[{"role": "user", "content": "hi"}], 1644 extra_body={"reasoning": {"enabled": False}}, 1645 ) 1646 assert "reasoning" not in captured 1647 assert "include" not in captured 1648 1649 def test_reasoning_default_effort_when_only_enabled_flag(self): 1650 """extra_body={"reasoning": {}} (truthy enabled by omission) → default 'medium'.""" 1651 adapter, captured = self._build_adapter() 1652 adapter.create( 1653 messages=[{"role": "user", "content": "hi"}], 1654 extra_body={"reasoning": {}}, 1655 ) 1656 assert captured.get("reasoning") == {"effort": "medium", "summary": "auto"} 1657 assert captured.get("include") == ["reasoning.encrypted_content"] 1658 1659 def test_no_extra_body_means_no_reasoning_keys(self): 1660 """Baseline: without extra_body, no reasoning/include is sent (preserves 1661 current behavior for callers that don't opt in).""" 1662 adapter, captured = self._build_adapter() 1663 adapter.create(messages=[{"role": "user", "content": "hi"}]) 1664 assert "reasoning" not in captured 1665 assert "include" not in captured 1666 1667 def test_extra_body_without_reasoning_key_is_noop(self): 1668 adapter, captured = self._build_adapter() 1669 adapter.create( 1670 messages=[{"role": "user", "content": "hi"}], 1671 extra_body={"metadata": {"source": "test"}}, 1672 ) 1673 assert "reasoning" not in captured 1674 assert "include" not in captured 1675 1676 def test_non_dict_reasoning_value_is_ignored_gracefully(self): 1677 """Defensive: if a caller accidentally passes a string/None, we 1678 silently skip instead of crashing inside the adapter.""" 1679 adapter, captured = self._build_adapter() 1680 adapter.create( 1681 messages=[{"role": "user", "content": "hi"}], 1682 extra_body={"reasoning": "medium"}, # wrong shape — must not crash 1683 ) 1684 assert "reasoning" not in captured 1685 1686 1687 1688 class TestVisionAutoSkipsKimiCoding: 1689 """_resolve_auto vision branch skips providers that have no vision on 1690 their main endpoint (e.g. Kimi Coding Plan /coding) and falls through 1691 to the aggregator chain instead of handing back a client that will 404 1692 on every request (#17076). 1693 """ 1694 1695 def test_kimi_coding_skipped_falls_through_to_openrouter(self, monkeypatch): 1696 """kimi-coding as main + vision auto → OpenRouter (not kimi).""" 1697 fake_or_client = MagicMock(name="openrouter_client") 1698 1699 monkeypatch.setattr( 1700 "agent.auxiliary_client._read_main_provider", lambda: "kimi-coding", 1701 ) 1702 monkeypatch.setattr( 1703 "agent.auxiliary_client._read_main_model", lambda: "kimi-code", 1704 ) 1705 # Guard: if the skip doesn't fire, _resolve_strict_vision_backend 1706 # and resolve_provider_client both would try kimi-coding — detect 1707 # either via the main-provider call and fail loud. 1708 rpc_mock = MagicMock(side_effect=AssertionError( 1709 "resolve_provider_client should NOT be called for kimi-coding " 1710 "on the vision auto path")) 1711 monkeypatch.setattr( 1712 "agent.auxiliary_client.resolve_provider_client", rpc_mock, 1713 ) 1714 1715 def fake_strict(provider, model=None): 1716 if provider == "openrouter": 1717 return fake_or_client, "google/gemini-3-flash-preview" 1718 if provider == "nous": 1719 return None, None 1720 raise AssertionError( 1721 f"strict vision backend should not be called for {provider!r} " 1722 "when main provider is kimi-coding" 1723 ) 1724 monkeypatch.setattr( 1725 "agent.auxiliary_client._resolve_strict_vision_backend", 1726 fake_strict, 1727 ) 1728 1729 provider, client, model = resolve_vision_provider_client() 1730 assert provider == "openrouter" 1731 assert client is fake_or_client 1732 assert model == "google/gemini-3-flash-preview" 1733 1734 def test_kimi_coding_cn_skipped_too(self, monkeypatch): 1735 """Same skip applies to the CN variant.""" 1736 fake_or_client = MagicMock(name="openrouter_client") 1737 1738 monkeypatch.setattr( 1739 "agent.auxiliary_client._read_main_provider", lambda: "kimi-coding-cn", 1740 ) 1741 monkeypatch.setattr( 1742 "agent.auxiliary_client._read_main_model", lambda: "kimi-code", 1743 ) 1744 rpc_mock = MagicMock(side_effect=AssertionError( 1745 "resolve_provider_client should NOT be called for kimi-coding-cn")) 1746 monkeypatch.setattr( 1747 "agent.auxiliary_client.resolve_provider_client", rpc_mock, 1748 ) 1749 monkeypatch.setattr( 1750 "agent.auxiliary_client._resolve_strict_vision_backend", 1751 lambda p, m=None: (fake_or_client, "gemini") 1752 if p == "openrouter" 1753 else (None, None), 1754 ) 1755 1756 provider, client, _ = resolve_vision_provider_client() 1757 assert provider == "openrouter" 1758 assert client is fake_or_client 1759 1760 def test_explicit_override_to_kimi_coding_still_honored(self, monkeypatch): 1761 """When a user *explicitly* requests kimi-coding for vision (e.g. 1762 they know what they're doing, or are running a future build that 1763 adds image_in capability to Kimi Code), the explicit path still 1764 routes to kimi-coding — only the auto branch applies the skip. 1765 """ 1766 monkeypatch.setattr( 1767 "agent.auxiliary_client._read_main_provider", lambda: "openrouter", 1768 ) 1769 fake_kimi_client = MagicMock(name="kimi_client") 1770 gcc_mock = MagicMock(return_value=(fake_kimi_client, "kimi-code")) 1771 monkeypatch.setattr( 1772 "agent.auxiliary_client._get_cached_client", gcc_mock, 1773 ) 1774 1775 provider, client, model = resolve_vision_provider_client( 1776 provider="kimi-coding", 1777 ) 1778 assert provider == "kimi-coding" 1779 assert client is fake_kimi_client 1780 gcc_mock.assert_called_once() 1781 1782 def test_skip_set_covers_exactly_known_entries(self): 1783 """Guard against accidental widening of the skip list.""" 1784 from agent.auxiliary_client import _PROVIDERS_WITHOUT_VISION 1785 assert _PROVIDERS_WITHOUT_VISION == frozenset({ 1786 "kimi-coding", 1787 "kimi-coding-cn", 1788 }) 1789 1790 1791 # --------------------------------------------------------------------------- 1792 # _build_call_kwargs — tool dedup at API boundary 1793 # --------------------------------------------------------------------------- 1794 1795 class TestBuildCallKwargsToolDedup: 1796 """_build_call_kwargs must deduplicate tool names before passing to API. 1797 1798 Providers like Google Vertex, Azure, and Bedrock reject requests with 1799 duplicate tool names (HTTP 400). This guard converts a hard failure into 1800 a warning log so agent turns succeed even if an upstream injection path 1801 regresses. See: https://github.com/NousResearch/hermes-agent/issues/18478 1802 """ 1803 1804 def _make_tool(self, name: str) -> dict: 1805 return { 1806 "type": "function", 1807 "function": { 1808 "name": name, 1809 "description": f"Tool {name}", 1810 "parameters": {"type": "object", "properties": {}}, 1811 }, 1812 } 1813 1814 def test_unique_tools_pass_through_unchanged(self): 1815 tools = [self._make_tool("alpha"), self._make_tool("beta")] 1816 kwargs = _build_call_kwargs( 1817 provider="openai", model="gpt-4o", messages=[], tools=tools, 1818 ) 1819 assert len(kwargs["tools"]) == 2 1820 names = [t["function"]["name"] for t in kwargs["tools"]] 1821 assert names == ["alpha", "beta"] 1822 1823 def test_duplicate_tool_names_are_deduplicated(self): 1824 """RED test — must fail until dedup guard is added.""" 1825 tools = [ 1826 self._make_tool("lcm_grep"), 1827 self._make_tool("lcm_describe"), 1828 self._make_tool("lcm_grep"), # duplicate 1829 self._make_tool("lcm_expand"), 1830 self._make_tool("lcm_describe"), # duplicate 1831 ] 1832 kwargs = _build_call_kwargs( 1833 provider="google", model="gemini-2.5-pro", messages=[], tools=tools, 1834 ) 1835 result_tools = kwargs["tools"] 1836 names = [t["function"]["name"] for t in result_tools] 1837 # Must be deduplicated — no repeated names 1838 assert len(names) == len(set(names)), ( 1839 f"Duplicate tool names found: {names}" 1840 ) 1841 assert len(result_tools) == 3 # lcm_grep, lcm_describe, lcm_expand 1842 1843 def test_empty_tools_unchanged(self): 1844 kwargs = _build_call_kwargs( 1845 provider="openai", model="gpt-4o", messages=[], tools=[], 1846 ) 1847 assert kwargs.get("tools") == [] or "tools" not in kwargs 1848 1849 def test_none_tools_unchanged(self): 1850 kwargs = _build_call_kwargs( 1851 provider="openai", model="gpt-4o", messages=[], tools=None, 1852 ) 1853 assert "tools" not in kwargs 1854 1855 1856 @pytest.fixture(autouse=True) 1857 def _clean_env(monkeypatch): 1858 """Strip provider env vars so each test starts clean.""" 1859 for key in ( 1860 "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", 1861 ): 1862 monkeypatch.delenv(key, raising=False) 1863 1864 1865 class TestOpenRouterExplicitApiKey: 1866 """Test that explicit_api_key is correctly propagated to _try_openrouter().""" 1867 1868 def test_resolve_provider_client_passes_explicit_api_key_to_openrouter( 1869 self, monkeypatch 1870 ): 1871 """ 1872 When resolve_provider_client() is called with explicit_api_key for OpenRouter, 1873 the explicit key should be passed to the OpenAI client instead of falling back 1874 to OPENROUTER_API_KEY env var. 1875 """ 1876 # Set up env var as fallback (should NOT be used when explicit_api_key is provided) 1877 monkeypatch.setenv("OPENROUTER_API_KEY", "env-fallback-key") 1878 1879 # Mock OpenAI to capture the api_key used 1880 mock_openai = MagicMock() 1881 mock_openai.return_value = MagicMock(name="openrouter-client") 1882 1883 with patch("agent.auxiliary_client.OpenAI", mock_openai): 1884 client, model = resolve_provider_client( 1885 provider="openrouter", 1886 explicit_api_key="explicit-pool-key", 1887 ) 1888 1889 # Verify a client was created 1890 assert client is not None 1891 # Verify the explicit key was used, not the env var fallback 1892 mock_openai.assert_called_once() 1893 call_kwargs = mock_openai.call_args[1] 1894 assert call_kwargs["api_key"] == "explicit-pool-key", ( 1895 f"Expected explicit_api_key to be passed, got: {call_kwargs['api_key']}" 1896 ) 1897 assert call_kwargs["api_key"] != "env-fallback-key", ( 1898 "Should NOT fall back to OPENROUTER_API_KEY when explicit_api_key is provided" 1899 ) 1900 1901 def test_resolve_provider_client_without_explicit_api_key_falls_back_to_env( 1902 self, monkeypatch 1903 ): 1904 """ 1905 When resolve_provider_client() is called WITHOUT explicit_api_key for OpenRouter, 1906 it should fall back to OPENROUTER_API_KEY env var. 1907 """ 1908 # Set up env var as fallback (should be used when explicit_api_key is NOT provided) 1909 monkeypatch.setenv("OPENROUTER_API_KEY", "env-fallback-key") 1910 1911 # Mock OpenAI to capture the api_key used 1912 mock_openai = MagicMock() 1913 mock_openai.return_value = MagicMock(name="openrouter-client") 1914 1915 with patch("agent.auxiliary_client.OpenAI", mock_openai): 1916 client, model = resolve_provider_client( 1917 provider="openrouter", 1918 explicit_api_key=None, 1919 ) 1920 1921 # Verify a client was created 1922 assert client is not None 1923 # Verify the env var fallback was used 1924 mock_openai.assert_called_once() 1925 call_kwargs = mock_openai.call_args[1] 1926 assert call_kwargs["api_key"] == "env-fallback-key", ( 1927 f"Expected env fallback key to be used when explicit_api_key is None, got: {call_kwargs['api_key']}" 1928 ) 1929 1930 1931 class TestAnthropicExplicitApiKey: 1932 """Test that explicit_api_key is correctly propagated to _try_anthropic(). 1933 1934 Parity with the OpenRouter fix in #18768: resolve_provider_client() passes 1935 explicit_api_key to _try_openrouter(), but the anthropic branch was not 1936 updated — _try_anthropic() always fell back to resolve_anthropic_token() 1937 even when an explicit key was supplied (e.g. from a fallback_model entry). 1938 """ 1939 1940 def test_try_anthropic_uses_explicit_api_key_over_env(self): 1941 """_try_anthropic(explicit_api_key) must use the supplied key, not the env fallback.""" 1942 with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-fallback-key"), \ 1943 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ 1944 patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 1945 mock_build.return_value = MagicMock() 1946 from agent.auxiliary_client import _try_anthropic 1947 client, model = _try_anthropic("explicit-pool-key") 1948 assert client is not None 1949 assert mock_build.call_args.args[0] == "explicit-pool-key", ( 1950 f"Expected explicit_api_key to be passed, got: {mock_build.call_args.args[0]}" 1951 ) 1952 assert mock_build.call_args.args[0] != "env-fallback-key" 1953 1954 def test_try_anthropic_without_explicit_key_falls_back_to_resolve(self): 1955 """Without explicit_api_key, _try_anthropic falls back to resolve_anthropic_token.""" 1956 with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-fallback-key"), \ 1957 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ 1958 patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 1959 mock_build.return_value = MagicMock() 1960 from agent.auxiliary_client import _try_anthropic 1961 client, model = _try_anthropic() 1962 assert client is not None 1963 assert mock_build.call_args.args[0] == "env-fallback-key" 1964 1965 def test_resolve_provider_client_passes_explicit_api_key_to_anthropic(self): 1966 """resolve_provider_client(provider='anthropic', explicit_api_key=...) must propagate the key.""" 1967 with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-key"), \ 1968 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ 1969 patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): 1970 mock_build.return_value = MagicMock() 1971 client, model = resolve_provider_client( 1972 provider="anthropic", 1973 explicit_api_key="explicit-fallback-key", 1974 ) 1975 assert client is not None 1976 assert mock_build.call_args.args[0] == "explicit-fallback-key", ( 1977 "resolve_provider_client must forward explicit_api_key to _try_anthropic()" 1978 )