test_api_server.py
1 """ 2 Tests for the OpenAI-compatible API server gateway adapter. 3 4 Tests cover: 5 - Chat Completions endpoint (request parsing, response format) 6 - Responses API endpoint (request parsing, response format) 7 - previous_response_id chaining (store/retrieve) 8 - Auth (valid key, invalid key, no key configured) 9 - /v1/models endpoint 10 - /health endpoint 11 - System prompt extraction 12 - Error handling (invalid JSON, missing fields) 13 """ 14 15 import asyncio 16 import json 17 import time 18 import uuid 19 from unittest.mock import AsyncMock, MagicMock, patch 20 21 import pytest 22 from aiohttp import web 23 from aiohttp.test_utils import AioHTTPTestCase, TestClient, TestServer 24 25 from gateway.config import GatewayConfig, Platform, PlatformConfig 26 from gateway.platforms.api_server import ( 27 APIServerAdapter, 28 ResponseStore, 29 _IdempotencyCache, 30 _CORS_HEADERS, 31 _derive_chat_session_id, 32 check_api_server_requirements, 33 cors_middleware, 34 security_headers_middleware, 35 ) 36 37 38 # --------------------------------------------------------------------------- 39 # check_api_server_requirements 40 # --------------------------------------------------------------------------- 41 42 43 class TestCheckRequirements: 44 def test_returns_true_when_aiohttp_available(self): 45 assert check_api_server_requirements() is True 46 47 @patch("gateway.platforms.api_server.AIOHTTP_AVAILABLE", False) 48 def test_returns_false_without_aiohttp(self): 49 assert check_api_server_requirements() is False 50 51 52 # --------------------------------------------------------------------------- 53 # ResponseStore 54 # --------------------------------------------------------------------------- 55 56 57 class TestResponseStore: 58 def test_put_and_get(self): 59 store = ResponseStore(max_size=10) 60 store.put("resp_1", {"output": "hello"}) 61 assert store.get("resp_1") == {"output": "hello"} 62 63 def test_get_missing_returns_none(self): 64 store = ResponseStore(max_size=10) 65 assert store.get("resp_missing") is None 66 67 def test_lru_eviction(self): 68 store = ResponseStore(max_size=3) 69 store.put("resp_1", {"output": "one"}) 70 store.put("resp_2", {"output": "two"}) 71 store.put("resp_3", {"output": "three"}) 72 # Adding a 4th should evict resp_1 73 store.put("resp_4", {"output": "four"}) 74 assert store.get("resp_1") is None 75 assert store.get("resp_2") is not None 76 assert len(store) == 3 77 78 def test_access_refreshes_lru(self): 79 store = ResponseStore(max_size=3) 80 store.put("resp_1", {"output": "one"}) 81 store.put("resp_2", {"output": "two"}) 82 store.put("resp_3", {"output": "three"}) 83 # Access resp_1 to move it to end 84 store.get("resp_1") 85 # Now resp_2 is the oldest — adding a 4th should evict resp_2 86 store.put("resp_4", {"output": "four"}) 87 assert store.get("resp_2") is None 88 assert store.get("resp_1") is not None 89 90 def test_update_existing_key(self): 91 store = ResponseStore(max_size=10) 92 store.put("resp_1", {"output": "v1"}) 93 store.put("resp_1", {"output": "v2"}) 94 assert store.get("resp_1") == {"output": "v2"} 95 assert len(store) == 1 96 97 def test_delete_existing(self): 98 store = ResponseStore(max_size=10) 99 store.put("resp_1", {"output": "hello"}) 100 assert store.delete("resp_1") is True 101 assert store.get("resp_1") is None 102 assert len(store) == 0 103 104 def test_delete_missing(self): 105 store = ResponseStore(max_size=10) 106 assert store.delete("resp_missing") is False 107 108 109 # --------------------------------------------------------------------------- 110 # _IdempotencyCache 111 # --------------------------------------------------------------------------- 112 113 114 class TestIdempotencyCache: 115 @pytest.mark.asyncio 116 async def test_concurrent_same_key_and_fingerprint_runs_once(self): 117 cache = _IdempotencyCache() 118 gate = asyncio.Event() 119 started = asyncio.Event() 120 calls = 0 121 122 async def compute(): 123 nonlocal calls 124 calls += 1 125 started.set() 126 await gate.wait() 127 return ("response", {"total_tokens": 1}) 128 129 first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute)) 130 second = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute)) 131 132 await started.wait() 133 assert calls == 1 134 135 gate.set() 136 first_result, second_result = await asyncio.gather(first, second) 137 138 assert first_result == second_result == ("response", {"total_tokens": 1}) 139 140 @pytest.mark.asyncio 141 async def test_different_fingerprint_does_not_reuse_inflight_task(self): 142 cache = _IdempotencyCache() 143 gate = asyncio.Event() 144 started = asyncio.Event() 145 calls = 0 146 147 async def compute(): 148 nonlocal calls 149 calls += 1 150 result = calls 151 if calls == 2: 152 started.set() 153 await gate.wait() 154 return result 155 156 first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute)) 157 second = asyncio.create_task(cache.get_or_set("idem-key", "fp-2", compute)) 158 159 await started.wait() 160 assert calls == 2 161 162 gate.set() 163 results = await asyncio.gather(first, second) 164 165 assert sorted(results) == [1, 2] 166 167 @pytest.mark.asyncio 168 async def test_cancelled_waiter_does_not_drop_shared_inflight_task(self): 169 cache = _IdempotencyCache() 170 gate = asyncio.Event() 171 started = asyncio.Event() 172 calls = 0 173 174 async def compute(): 175 nonlocal calls 176 calls += 1 177 started.set() 178 await gate.wait() 179 return "response" 180 181 first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute)) 182 183 await started.wait() 184 assert calls == 1 185 186 first.cancel() 187 with pytest.raises(asyncio.CancelledError): 188 await first 189 190 second = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute)) 191 await asyncio.sleep(0) 192 assert calls == 1 193 194 gate.set() 195 assert await second == "response" 196 197 198 # --------------------------------------------------------------------------- 199 # Adapter initialization 200 # --------------------------------------------------------------------------- 201 202 203 class TestAdapterInit: 204 def test_default_config(self): 205 config = PlatformConfig(enabled=True) 206 adapter = APIServerAdapter(config) 207 assert adapter._host == "127.0.0.1" 208 assert adapter._port == 8642 209 assert adapter._api_key == "" 210 assert adapter.platform == Platform.API_SERVER 211 212 def test_custom_config_from_extra(self): 213 config = PlatformConfig( 214 enabled=True, 215 extra={ 216 "host": "0.0.0.0", 217 "port": 9999, 218 "key": "sk-test", 219 "cors_origins": ["http://localhost:3000"], 220 }, 221 ) 222 adapter = APIServerAdapter(config) 223 assert adapter._host == "0.0.0.0" 224 assert adapter._port == 9999 225 assert adapter._api_key == "sk-test" 226 assert adapter._cors_origins == ("http://localhost:3000",) 227 228 def test_config_from_env(self, monkeypatch): 229 monkeypatch.setenv("API_SERVER_HOST", "10.0.0.1") 230 monkeypatch.setenv("API_SERVER_PORT", "7777") 231 monkeypatch.setenv("API_SERVER_KEY", "sk-env") 232 monkeypatch.setenv("API_SERVER_CORS_ORIGINS", "http://localhost:3000, http://127.0.0.1:3000") 233 config = PlatformConfig(enabled=True) 234 adapter = APIServerAdapter(config) 235 assert adapter._host == "10.0.0.1" 236 assert adapter._port == 7777 237 assert adapter._api_key == "sk-env" 238 assert adapter._cors_origins == ( 239 "http://localhost:3000", 240 "http://127.0.0.1:3000", 241 ) 242 243 def test_invalid_port_from_env_falls_back_to_default(self, monkeypatch): 244 monkeypatch.setenv("API_SERVER_PORT", "not-a-port") 245 config = PlatformConfig(enabled=True) 246 adapter = APIServerAdapter(config) 247 assert adapter._port == 8642 248 249 def test_create_agent_forwards_config_reasoning_effort(self, monkeypatch): 250 captured = {} 251 252 class FakeAgent: 253 def __init__(self, **kwargs): 254 captured.update(kwargs) 255 256 monkeypatch.setattr("run_agent.AIAgent", FakeAgent) 257 monkeypatch.setattr( 258 "gateway.run._resolve_runtime_agent_kwargs", 259 lambda: { 260 "provider": "openai-codex", 261 "base_url": "https://example.test/v1", 262 "api_mode": "codex_responses", 263 }, 264 ) 265 monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "gpt-5.5") 266 monkeypatch.setattr( 267 "gateway.run._load_gateway_config", 268 lambda: {"agent": {"reasoning_effort": "xhigh"}}, 269 ) 270 monkeypatch.setattr( 271 "gateway.run.GatewayRunner._load_reasoning_config", 272 staticmethod(lambda: {"enabled": True, "effort": "xhigh"}), 273 ) 274 monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None)) 275 monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set()) 276 277 adapter = APIServerAdapter(PlatformConfig(enabled=True)) 278 monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None) 279 280 agent = adapter._create_agent(session_id="api-session") 281 282 assert isinstance(agent, FakeAgent) 283 assert captured["reasoning_config"] == {"enabled": True, "effort": "xhigh"} 284 285 286 # --------------------------------------------------------------------------- 287 # Auth checking 288 # --------------------------------------------------------------------------- 289 290 291 class TestAuth: 292 def test_no_key_configured_allows_all(self): 293 config = PlatformConfig(enabled=True) 294 adapter = APIServerAdapter(config) 295 mock_request = MagicMock() 296 mock_request.headers = {} 297 assert adapter._check_auth(mock_request) is None 298 299 def test_valid_key_passes(self): 300 config = PlatformConfig(enabled=True, extra={"key": "sk-test123"}) 301 adapter = APIServerAdapter(config) 302 mock_request = MagicMock() 303 mock_request.headers = {"Authorization": "Bearer sk-test123"} 304 assert adapter._check_auth(mock_request) is None 305 306 def test_invalid_key_returns_401(self): 307 config = PlatformConfig(enabled=True, extra={"key": "sk-test123"}) 308 adapter = APIServerAdapter(config) 309 mock_request = MagicMock() 310 mock_request.headers = {"Authorization": "Bearer wrong-key"} 311 result = adapter._check_auth(mock_request) 312 assert result is not None 313 assert result.status == 401 314 315 def test_missing_auth_header_returns_401(self): 316 config = PlatformConfig(enabled=True, extra={"key": "sk-test123"}) 317 adapter = APIServerAdapter(config) 318 mock_request = MagicMock() 319 mock_request.headers = {} 320 result = adapter._check_auth(mock_request) 321 assert result is not None 322 assert result.status == 401 323 324 def test_malformed_auth_header_returns_401(self): 325 config = PlatformConfig(enabled=True, extra={"key": "sk-test123"}) 326 adapter = APIServerAdapter(config) 327 mock_request = MagicMock() 328 mock_request.headers = {"Authorization": "Basic dXNlcjpwYXNz"} 329 result = adapter._check_auth(mock_request) 330 assert result is not None 331 assert result.status == 401 332 333 334 # --------------------------------------------------------------------------- 335 # Helpers for HTTP tests 336 # --------------------------------------------------------------------------- 337 338 339 def _make_adapter(api_key: str = "", cors_origins=None) -> APIServerAdapter: 340 """Create an adapter with optional API key.""" 341 extra = {} 342 if api_key: 343 extra["key"] = api_key 344 if cors_origins is not None: 345 extra["cors_origins"] = cors_origins 346 config = PlatformConfig(enabled=True, extra=extra) 347 return APIServerAdapter(config) 348 349 350 def _create_app(adapter: APIServerAdapter) -> web.Application: 351 """Create the aiohttp app from the adapter (without starting the full server).""" 352 mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None] 353 app = web.Application(middlewares=mws) 354 app["api_server_adapter"] = adapter 355 app.router.add_get("/health", adapter._handle_health) 356 app.router.add_get("/health/detailed", adapter._handle_health_detailed) 357 app.router.add_get("/v1/health", adapter._handle_health) 358 app.router.add_get("/v1/models", adapter._handle_models) 359 app.router.add_get("/v1/capabilities", adapter._handle_capabilities) 360 app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions) 361 app.router.add_post("/v1/responses", adapter._handle_responses) 362 app.router.add_get("/v1/responses/{response_id}", adapter._handle_get_response) 363 app.router.add_delete("/v1/responses/{response_id}", adapter._handle_delete_response) 364 return app 365 366 367 @pytest.fixture 368 def adapter(): 369 return _make_adapter() 370 371 372 @pytest.fixture 373 def auth_adapter(): 374 return _make_adapter(api_key="sk-secret") 375 376 377 # --------------------------------------------------------------------------- 378 # Adapter internals 379 # --------------------------------------------------------------------------- 380 381 382 class TestAgentExecution: 383 @pytest.mark.asyncio 384 async def test_run_agent_uses_session_id_as_task_id(self, adapter): 385 mock_agent = MagicMock() 386 mock_agent.run_conversation.return_value = {"final_response": "ok"} 387 mock_agent.session_prompt_tokens = 1 388 mock_agent.session_completion_tokens = 2 389 mock_agent.session_total_tokens = 3 390 391 with patch.object(adapter, "_create_agent", return_value=mock_agent): 392 result, usage = await adapter._run_agent( 393 user_message="hello", 394 conversation_history=[], 395 session_id="session-123", 396 ) 397 398 assert result == {"final_response": "ok"} 399 assert usage == {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} 400 mock_agent.run_conversation.assert_called_once_with( 401 user_message="hello", 402 conversation_history=[], 403 task_id="session-123", 404 ) 405 406 407 # --------------------------------------------------------------------------- 408 # /health endpoint 409 # --------------------------------------------------------------------------- 410 411 412 class TestHealthEndpoint: 413 @pytest.mark.asyncio 414 async def test_security_headers_present(self, adapter): 415 """Responses should include basic security headers.""" 416 app = _create_app(adapter) 417 async with TestClient(TestServer(app)) as cli: 418 resp = await cli.get("/health") 419 assert resp.status == 200 420 assert resp.headers.get("X-Content-Type-Options") == "nosniff" 421 assert resp.headers.get("Referrer-Policy") == "no-referrer" 422 423 @pytest.mark.asyncio 424 async def test_health_returns_ok(self, adapter): 425 app = _create_app(adapter) 426 async with TestClient(TestServer(app)) as cli: 427 resp = await cli.get("/health") 428 assert resp.status == 200 429 data = await resp.json() 430 assert data["status"] == "ok" 431 assert data["platform"] == "hermes-agent" 432 433 @pytest.mark.asyncio 434 async def test_v1_health_alias_returns_ok(self, adapter): 435 """GET /v1/health should return the same response as /health.""" 436 app = _create_app(adapter) 437 async with TestClient(TestServer(app)) as cli: 438 resp = await cli.get("/v1/health") 439 assert resp.status == 200 440 data = await resp.json() 441 assert data["status"] == "ok" 442 assert data["platform"] == "hermes-agent" 443 444 445 # --------------------------------------------------------------------------- 446 # /health/detailed endpoint 447 # --------------------------------------------------------------------------- 448 449 450 class TestHealthDetailedEndpoint: 451 @pytest.mark.asyncio 452 async def test_health_detailed_returns_ok(self, adapter): 453 """GET /health/detailed returns status, platform, and runtime fields.""" 454 app = _create_app(adapter) 455 with patch("gateway.status.read_runtime_status", return_value={ 456 "gateway_state": "running", 457 "platforms": {"telegram": {"state": "connected"}}, 458 "active_agents": 2, 459 "exit_reason": None, 460 "updated_at": "2026-04-14T00:00:00Z", 461 }): 462 async with TestClient(TestServer(app)) as cli: 463 resp = await cli.get("/health/detailed") 464 assert resp.status == 200 465 data = await resp.json() 466 assert data["status"] == "ok" 467 assert data["platform"] == "hermes-agent" 468 assert data["gateway_state"] == "running" 469 assert data["platforms"] == {"telegram": {"state": "connected"}} 470 assert data["active_agents"] == 2 471 assert isinstance(data["pid"], int) 472 assert "updated_at" in data 473 474 @pytest.mark.asyncio 475 async def test_health_detailed_no_runtime_status(self, adapter): 476 """When gateway_state.json is missing, fields are None.""" 477 app = _create_app(adapter) 478 with patch("gateway.status.read_runtime_status", return_value=None): 479 async with TestClient(TestServer(app)) as cli: 480 resp = await cli.get("/health/detailed") 481 assert resp.status == 200 482 data = await resp.json() 483 assert data["status"] == "ok" 484 assert data["gateway_state"] is None 485 assert data["platforms"] == {} 486 487 @pytest.mark.asyncio 488 async def test_health_detailed_does_not_require_auth(self, auth_adapter): 489 """Health detailed endpoint should be accessible without auth, like /health.""" 490 app = _create_app(auth_adapter) 491 with patch("gateway.status.read_runtime_status", return_value=None): 492 async with TestClient(TestServer(app)) as cli: 493 resp = await cli.get("/health/detailed") 494 assert resp.status == 200 495 496 497 # --------------------------------------------------------------------------- 498 # /v1/models endpoint 499 # --------------------------------------------------------------------------- 500 501 502 class TestModelsEndpoint: 503 @pytest.mark.asyncio 504 async def test_models_returns_hermes_agent(self, adapter): 505 app = _create_app(adapter) 506 async with TestClient(TestServer(app)) as cli: 507 resp = await cli.get("/v1/models") 508 assert resp.status == 200 509 data = await resp.json() 510 assert data["object"] == "list" 511 assert len(data["data"]) == 1 512 assert data["data"][0]["id"] == "hermes-agent" 513 assert data["data"][0]["owned_by"] == "hermes" 514 515 @pytest.mark.asyncio 516 async def test_models_returns_profile_name(self): 517 """When running under a named profile, /v1/models advertises the profile name.""" 518 with patch("gateway.platforms.api_server.APIServerAdapter._resolve_model_name", return_value="lucas"): 519 adapter = _make_adapter() 520 app = _create_app(adapter) 521 async with TestClient(TestServer(app)) as cli: 522 resp = await cli.get("/v1/models") 523 assert resp.status == 200 524 data = await resp.json() 525 assert data["data"][0]["id"] == "lucas" 526 assert data["data"][0]["root"] == "lucas" 527 528 @pytest.mark.asyncio 529 async def test_models_returns_explicit_model_name(self): 530 """Explicit model_name in config overrides profile name.""" 531 extra = {"model_name": "my-custom-agent"} 532 config = PlatformConfig(enabled=True, extra=extra) 533 adapter = APIServerAdapter(config) 534 assert adapter._model_name == "my-custom-agent" 535 536 def test_resolve_model_name_explicit(self): 537 assert APIServerAdapter._resolve_model_name("my-bot") == "my-bot" 538 539 def test_resolve_model_name_default_profile(self): 540 """Default profile falls back to 'hermes-agent'.""" 541 with patch("hermes_cli.profiles.get_active_profile_name", return_value="default"): 542 assert APIServerAdapter._resolve_model_name("") == "hermes-agent" 543 544 def test_resolve_model_name_named_profile(self): 545 """Named profile uses the profile name as model name.""" 546 with patch("hermes_cli.profiles.get_active_profile_name", return_value="lucas"): 547 assert APIServerAdapter._resolve_model_name("") == "lucas" 548 549 @pytest.mark.asyncio 550 async def test_models_requires_auth(self, auth_adapter): 551 app = _create_app(auth_adapter) 552 async with TestClient(TestServer(app)) as cli: 553 resp = await cli.get("/v1/models") 554 assert resp.status == 401 555 556 @pytest.mark.asyncio 557 async def test_models_with_valid_auth(self, auth_adapter): 558 app = _create_app(auth_adapter) 559 async with TestClient(TestServer(app)) as cli: 560 resp = await cli.get( 561 "/v1/models", 562 headers={"Authorization": "Bearer sk-secret"}, 563 ) 564 assert resp.status == 200 565 566 567 # --------------------------------------------------------------------------- 568 # /v1/capabilities endpoint 569 # --------------------------------------------------------------------------- 570 571 572 class TestCapabilitiesEndpoint: 573 @pytest.mark.asyncio 574 async def test_capabilities_advertises_plugin_safe_contract(self, adapter): 575 app = _create_app(adapter) 576 async with TestClient(TestServer(app)) as cli: 577 resp = await cli.get("/v1/capabilities") 578 assert resp.status == 200 579 data = await resp.json() 580 assert data["object"] == "hermes.api_server.capabilities" 581 assert data["platform"] == "hermes-agent" 582 assert data["model"] == "hermes-agent" 583 assert data["auth"]["type"] == "bearer" 584 assert data["auth"]["required"] is False 585 assert data["features"]["chat_completions"] is True 586 assert data["features"]["run_status"] is True 587 assert data["features"]["run_events_sse"] is True 588 assert data["features"]["session_continuity_header"] == "X-Hermes-Session-Id" 589 assert data["endpoints"]["run_status"]["path"] == "/v1/runs/{run_id}" 590 591 @pytest.mark.asyncio 592 async def test_capabilities_requires_auth_when_key_configured(self, auth_adapter): 593 app = _create_app(auth_adapter) 594 async with TestClient(TestServer(app)) as cli: 595 resp = await cli.get("/v1/capabilities") 596 assert resp.status == 401 597 598 authed = await cli.get( 599 "/v1/capabilities", 600 headers={"Authorization": "Bearer sk-secret"}, 601 ) 602 assert authed.status == 200 603 data = await authed.json() 604 assert data["auth"]["required"] is True 605 606 607 # --------------------------------------------------------------------------- 608 # /v1/chat/completions endpoint 609 # --------------------------------------------------------------------------- 610 611 612 class TestChatCompletionsEndpoint: 613 @pytest.mark.asyncio 614 async def test_invalid_json_returns_400(self, adapter): 615 app = _create_app(adapter) 616 async with TestClient(TestServer(app)) as cli: 617 resp = await cli.post( 618 "/v1/chat/completions", 619 data="not json", 620 headers={"Content-Type": "application/json"}, 621 ) 622 assert resp.status == 400 623 data = await resp.json() 624 assert "Invalid JSON" in data["error"]["message"] 625 626 @pytest.mark.asyncio 627 async def test_missing_messages_returns_400(self, adapter): 628 app = _create_app(adapter) 629 async with TestClient(TestServer(app)) as cli: 630 resp = await cli.post("/v1/chat/completions", json={"model": "test"}) 631 assert resp.status == 400 632 data = await resp.json() 633 assert "messages" in data["error"]["message"] 634 635 @pytest.mark.asyncio 636 async def test_empty_messages_returns_400(self, adapter): 637 app = _create_app(adapter) 638 async with TestClient(TestServer(app)) as cli: 639 resp = await cli.post("/v1/chat/completions", json={"model": "test", "messages": []}) 640 assert resp.status == 400 641 642 @pytest.mark.asyncio 643 async def test_stream_true_returns_sse(self, adapter): 644 """stream=true returns SSE format with the full response.""" 645 app = _create_app(adapter) 646 async with TestClient(TestServer(app)) as cli: 647 async def _mock_run_agent(**kwargs): 648 # Simulate streaming: invoke stream_delta_callback with tokens 649 cb = kwargs.get("stream_delta_callback") 650 if cb: 651 cb("Hello!") 652 cb(None) # End signal 653 return ( 654 {"final_response": "Hello!", "messages": [], "api_calls": 1}, 655 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 656 ) 657 658 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent) as mock_run: 659 resp = await cli.post( 660 "/v1/chat/completions", 661 json={ 662 "model": "test", 663 "messages": [{"role": "user", "content": "hi"}], 664 "stream": True, 665 }, 666 ) 667 assert resp.status == 200 668 assert "text/event-stream" in resp.headers.get("Content-Type", "") 669 assert resp.headers.get("X-Accel-Buffering") == "no" 670 body = await resp.text() 671 assert "data: " in body 672 assert "[DONE]" in body 673 assert "Hello!" in body 674 675 @pytest.mark.asyncio 676 async def test_stream_sends_keepalive_during_quiet_tool_gap(self, adapter): 677 """Idle SSE streams should send keepalive comments while tools run silently.""" 678 import asyncio 679 import gateway.platforms.api_server as api_server_mod 680 681 app = _create_app(adapter) 682 async with TestClient(TestServer(app)) as cli: 683 async def _mock_run_agent(**kwargs): 684 cb = kwargs.get("stream_delta_callback") 685 if cb: 686 cb("Working") 687 await asyncio.sleep(0.65) 688 cb("...done") 689 return ( 690 {"final_response": "Working...done", "messages": [], "api_calls": 1}, 691 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 692 ) 693 694 with ( 695 patch.object(api_server_mod, "CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS", 0.01), 696 patch.object(adapter, "_run_agent", side_effect=_mock_run_agent), 697 ): 698 resp = await cli.post( 699 "/v1/chat/completions", 700 json={ 701 "model": "test", 702 "messages": [{"role": "user", "content": "do the thing"}], 703 "stream": True, 704 }, 705 ) 706 assert resp.status == 200 707 body = await resp.text() 708 assert ": keepalive" in body 709 assert "Working" in body 710 assert "...done" in body 711 assert "[DONE]" in body 712 713 @pytest.mark.asyncio 714 async def test_stream_survives_tool_call_none_sentinel(self, adapter): 715 """stream_delta_callback(None) mid-stream (tool calls) must NOT kill the SSE stream. 716 717 The agent fires stream_delta_callback(None) to tell the CLI display to 718 close its response box before executing tool calls. The API server's 719 _on_delta must filter this out so the SSE response stays open and the 720 final answer (streamed after tool execution) reaches the client. 721 """ 722 import asyncio 723 724 app = _create_app(adapter) 725 async with TestClient(TestServer(app)) as cli: 726 async def _mock_run_agent(**kwargs): 727 cb = kwargs.get("stream_delta_callback") 728 if cb: 729 # Simulate: agent streams partial text, then fires None 730 # (tool call box-close signal), then streams the final answer 731 cb("Thinking") 732 cb(None) # mid-stream None from tool calls 733 await asyncio.sleep(0.05) # simulate tool execution delay 734 cb(" about it...") 735 cb(None) # another None (possible second tool round) 736 await asyncio.sleep(0.05) 737 cb(" The answer is 42.") 738 return ( 739 {"final_response": "Thinking about it... The answer is 42.", "messages": [], "api_calls": 3}, 740 {"input_tokens": 20, "output_tokens": 15, "total_tokens": 35}, 741 ) 742 743 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): 744 resp = await cli.post( 745 "/v1/chat/completions", 746 json={ 747 "model": "test", 748 "messages": [{"role": "user", "content": "What is the answer?"}], 749 "stream": True, 750 }, 751 ) 752 assert resp.status == 200 753 body = await resp.text() 754 assert "[DONE]" in body 755 # The final answer text must appear in the SSE stream 756 assert "The answer is 42." in body 757 # All partial text must be present too 758 assert "Thinking" in body 759 assert " about it..." in body 760 761 @pytest.mark.asyncio 762 async def test_stream_includes_tool_progress(self, adapter): 763 """tool_start_callback fires → progress appears as custom SSE event, not in delta.content.""" 764 import asyncio 765 766 app = _create_app(adapter) 767 async with TestClient(TestServer(app)) as cli: 768 async def _mock_run_agent(**kwargs): 769 cb = kwargs.get("stream_delta_callback") 770 ts_cb = kwargs.get("tool_start_callback") 771 # Simulate the structured tool start the gateway now consumes. 772 if ts_cb: 773 ts_cb("call_terminal_1", "terminal", {"command": "ls -la"}) 774 if cb: 775 await asyncio.sleep(0.05) 776 cb("Here are the files.") 777 return ( 778 {"final_response": "Here are the files.", "messages": [], "api_calls": 1}, 779 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 780 ) 781 782 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): 783 resp = await cli.post( 784 "/v1/chat/completions", 785 json={ 786 "model": "test", 787 "messages": [{"role": "user", "content": "list files"}], 788 "stream": True, 789 }, 790 ) 791 assert resp.status == 200 792 body = await resp.text() 793 assert "[DONE]" in body 794 # Tool progress must appear as a custom SSE event, not in 795 # delta.content — prevents model from learning to imitate 796 # markers instead of calling tools (#6972). 797 assert "event: hermes.tool.progress" in body 798 assert '"tool": "terminal"' in body 799 # ``label`` is now derived by ``build_tool_preview`` from the 800 # tool args rather than passed by the caller, so we assert 801 # only that *some* label exists rather than a literal value. 802 assert '"label":' in body 803 # The progress marker must NOT appear inside any 804 # chat.completion.chunk delta.content field. 805 import json as _json 806 for line in body.splitlines(): 807 if line.startswith("data: ") and line.strip() != "data: [DONE]": 808 try: 809 chunk = _json.loads(line[len("data: "):]) 810 except _json.JSONDecodeError: 811 continue 812 if chunk.get("object") == "chat.completion.chunk": 813 for choice in chunk.get("choices", []): 814 content = choice.get("delta", {}).get("content", "") 815 # Tool emoji markers must never leak into content 816 assert "ls -la" not in content or content == "Here are the files." 817 # Final content must also be present 818 assert "Here are the files." in body 819 820 @pytest.mark.asyncio 821 async def test_stream_tool_progress_skips_internal_events(self, adapter): 822 """Internal tool calls (name starting with ``_``) are not streamed.""" 823 import asyncio 824 825 app = _create_app(adapter) 826 async with TestClient(TestServer(app)) as cli: 827 async def _mock_run_agent(**kwargs): 828 cb = kwargs.get("stream_delta_callback") 829 ts_cb = kwargs.get("tool_start_callback") 830 if ts_cb: 831 ts_cb("call_internal_1", "_thinking", {"text": "some internal state"}) 832 ts_cb("call_search_1", "web_search", {"query": "Python docs"}) 833 if cb: 834 await asyncio.sleep(0.05) 835 cb("Found it.") 836 return ( 837 {"final_response": "Found it.", "messages": [], "api_calls": 1}, 838 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 839 ) 840 841 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): 842 resp = await cli.post( 843 "/v1/chat/completions", 844 json={ 845 "model": "test", 846 "messages": [{"role": "user", "content": "search"}], 847 "stream": True, 848 }, 849 ) 850 assert resp.status == 200 851 body = await resp.text() 852 # Internal _thinking event should NOT appear anywhere 853 assert "some internal state" not in body 854 assert "call_internal_1" not in body 855 # Real tool progress should appear as custom SSE event 856 assert "event: hermes.tool.progress" in body 857 assert '"tool": "web_search"' in body 858 # Label is derived from the args dict by build_tool_preview; 859 # asserting on the structural fact (label exists, call id 860 # is correlated) rather than a literal preview string keeps 861 # the test robust against preview-formatter tweaks. 862 assert '"label":' in body 863 assert '"toolCallId": "call_search_1"' in body 864 865 @pytest.mark.asyncio 866 async def test_stream_emits_tool_lifecycle_with_call_id(self, adapter): 867 """Regression for #16588. 868 869 ``/v1/chat/completions`` streaming previously emitted only a 870 ``tool.started``-style ``hermes.tool.progress`` event; clients 871 rendering tool lifecycle UI had no way to mark a tool as finished 872 because no matching ``status: completed`` event was emitted, and 873 no ``toolCallId`` was carried for correlation. 874 875 The fix adds ``tool_start_callback`` / ``tool_complete_callback`` 876 to the chat completions agent invocation and writes both halves 877 of the lifecycle pair on the same ``event: hermes.tool.progress`` 878 SSE line, with stable ``toolCallId`` and ``status``. 879 """ 880 import asyncio 881 import json as _json 882 883 app = _create_app(adapter) 884 async with TestClient(TestServer(app)) as cli: 885 async def _mock_run_agent(**kwargs): 886 cb = kwargs.get("stream_delta_callback") 887 ts_cb = kwargs.get("tool_start_callback") 888 tc_cb = kwargs.get("tool_complete_callback") 889 # The structured callbacks own the chat-completions SSE 890 # channel now; ``tool_progress_callback`` is intentionally 891 # not wired so each tool start emits exactly one event. 892 if ts_cb: 893 ts_cb("call_terminal_1", "terminal", {"command": "ls -la"}) 894 if tc_cb: 895 tc_cb("call_terminal_1", "terminal", {"command": "ls -la"}, "ok") 896 if cb: 897 await asyncio.sleep(0.05) 898 cb("done.") 899 return ( 900 {"final_response": "done.", "messages": [], "api_calls": 1}, 901 {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2}, 902 ) 903 904 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): 905 resp = await cli.post( 906 "/v1/chat/completions", 907 json={ 908 "model": "test", 909 "messages": [{"role": "user", "content": "list"}], 910 "stream": True, 911 }, 912 ) 913 assert resp.status == 200 914 body = await resp.text() 915 916 # Walk the SSE body and collect *(status, toolCallId)* pairs 917 # per event so the assertions verify per-event correlation — 918 # an event missing ``toolCallId`` would not pass even if a 919 # different event happens to carry the right id. 920 pairs: list[tuple[str | None, str | None]] = [] 921 lines = body.splitlines() 922 for i, line in enumerate(lines): 923 if line.strip() != "event: hermes.tool.progress": 924 continue 925 for follow in lines[i + 1: i + 4]: 926 if follow.startswith("data: "): 927 try: 928 payload = _json.loads(follow[len("data: "):]) 929 except _json.JSONDecodeError: 930 break 931 pairs.append((payload.get("status"), payload.get("toolCallId"))) 932 break 933 934 # Each tool start must emit exactly one event (no duplicate 935 # legacy + new emit), and each lifecycle pair must carry the 936 # same toolCallId on every event — not just somewhere in the 937 # aggregate. 938 assert len(pairs) == 2, f"expected 2 events (running+completed), got {pairs}" 939 assert pairs[0] == ("running", "call_terminal_1"), pairs 940 assert pairs[1] == ("completed", "call_terminal_1"), pairs 941 942 @pytest.mark.asyncio 943 async def test_stream_tool_lifecycle_skips_internal_and_orphan_completes(self, adapter): 944 """Internal tools (``_thinking``-style) and ``completed`` events 945 without a prior matching ``running`` must produce no lifecycle 946 events on the wire — otherwise clients would see orphaned 947 ``status: completed`` updates they cannot correlate.""" 948 import asyncio 949 950 app = _create_app(adapter) 951 async with TestClient(TestServer(app)) as cli: 952 async def _mock_run_agent(**kwargs): 953 cb = kwargs.get("stream_delta_callback") 954 ts_cb = kwargs.get("tool_start_callback") 955 tc_cb = kwargs.get("tool_complete_callback") 956 # Internal tool — must be filtered. 957 if ts_cb: 958 ts_cb("call_internal_1", "_thinking", {}) 959 if tc_cb: 960 tc_cb("call_internal_1", "_thinking", {}, "") 961 # Completion without start — orphan, must be dropped. 962 if tc_cb: 963 tc_cb("call_orphan_1", "web_search", {}, "ok") 964 if cb: 965 await asyncio.sleep(0.05) 966 cb("ok.") 967 return ( 968 {"final_response": "ok.", "messages": [], "api_calls": 1}, 969 {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2}, 970 ) 971 972 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): 973 resp = await cli.post( 974 "/v1/chat/completions", 975 json={ 976 "model": "test", 977 "messages": [{"role": "user", "content": "ok"}], 978 "stream": True, 979 }, 980 ) 981 assert resp.status == 200 982 body = await resp.text() 983 984 # Neither the internal call_id nor the orphan call_id should 985 # surface as a lifecycle payload on the wire. 986 assert "call_internal_1" not in body 987 assert "call_orphan_1" not in body 988 assert '"status": "running"' not in body 989 assert '"status": "completed"' not in body 990 991 @pytest.mark.asyncio 992 async def test_no_user_message_returns_400(self, adapter): 993 app = _create_app(adapter) 994 async with TestClient(TestServer(app)) as cli: 995 resp = await cli.post( 996 "/v1/chat/completions", 997 json={ 998 "model": "test", 999 "messages": [{"role": "system", "content": "You are helpful."}], 1000 }, 1001 ) 1002 assert resp.status == 400 1003 1004 @pytest.mark.asyncio 1005 async def test_successful_completion(self, adapter): 1006 """Test a successful chat completion with mocked agent.""" 1007 mock_result = { 1008 "final_response": "Hello! How can I help you today?", 1009 "messages": [], 1010 "api_calls": 1, 1011 } 1012 1013 app = _create_app(adapter) 1014 async with TestClient(TestServer(app)) as cli: 1015 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1016 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1017 resp = await cli.post( 1018 "/v1/chat/completions", 1019 json={ 1020 "model": "hermes-agent", 1021 "messages": [{"role": "user", "content": "Hello"}], 1022 }, 1023 ) 1024 1025 assert resp.status == 200 1026 data = await resp.json() 1027 assert data["object"] == "chat.completion" 1028 assert data["id"].startswith("chatcmpl-") 1029 assert data["model"] == "hermes-agent" 1030 assert len(data["choices"]) == 1 1031 assert data["choices"][0]["message"]["role"] == "assistant" 1032 assert data["choices"][0]["message"]["content"] == "Hello! How can I help you today?" 1033 assert data["choices"][0]["finish_reason"] == "stop" 1034 assert "usage" in data 1035 1036 @pytest.mark.asyncio 1037 async def test_system_prompt_extracted(self, adapter): 1038 """System messages from the client are passed as ephemeral_system_prompt.""" 1039 mock_result = { 1040 "final_response": "I am a pirate! Arrr!", 1041 "messages": [], 1042 "api_calls": 1, 1043 } 1044 1045 app = _create_app(adapter) 1046 async with TestClient(TestServer(app)) as cli: 1047 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1048 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1049 resp = await cli.post( 1050 "/v1/chat/completions", 1051 json={ 1052 "model": "hermes-agent", 1053 "messages": [ 1054 {"role": "system", "content": "You are a pirate."}, 1055 {"role": "user", "content": "Hello"}, 1056 ], 1057 }, 1058 ) 1059 1060 assert resp.status == 200 1061 # Check that _run_agent was called with the system prompt 1062 call_kwargs = mock_run.call_args 1063 assert call_kwargs.kwargs.get("ephemeral_system_prompt") == "You are a pirate." 1064 assert call_kwargs.kwargs.get("user_message") == "Hello" 1065 1066 @pytest.mark.asyncio 1067 async def test_conversation_history_passed(self, adapter): 1068 """Previous user/assistant messages become conversation_history.""" 1069 mock_result = {"final_response": "3", "messages": [], "api_calls": 1} 1070 1071 app = _create_app(adapter) 1072 async with TestClient(TestServer(app)) as cli: 1073 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1074 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1075 resp = await cli.post( 1076 "/v1/chat/completions", 1077 json={ 1078 "model": "hermes-agent", 1079 "messages": [ 1080 {"role": "user", "content": "1+1=?"}, 1081 {"role": "assistant", "content": "2"}, 1082 {"role": "user", "content": "Now add 1 more"}, 1083 ], 1084 }, 1085 ) 1086 1087 assert resp.status == 200 1088 call_kwargs = mock_run.call_args.kwargs 1089 assert call_kwargs["user_message"] == "Now add 1 more" 1090 assert len(call_kwargs["conversation_history"]) == 2 1091 assert call_kwargs["conversation_history"][0] == {"role": "user", "content": "1+1=?"} 1092 assert call_kwargs["conversation_history"][1] == {"role": "assistant", "content": "2"} 1093 1094 @pytest.mark.asyncio 1095 async def test_agent_error_returns_500(self, adapter): 1096 """Agent exception returns 500.""" 1097 app = _create_app(adapter) 1098 async with TestClient(TestServer(app)) as cli: 1099 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1100 mock_run.side_effect = RuntimeError("Provider failed") 1101 resp = await cli.post( 1102 "/v1/chat/completions", 1103 json={ 1104 "model": "hermes-agent", 1105 "messages": [{"role": "user", "content": "Hello"}], 1106 }, 1107 ) 1108 1109 assert resp.status == 500 1110 data = await resp.json() 1111 assert "Provider failed" in data["error"]["message"] 1112 1113 @pytest.mark.asyncio 1114 async def test_stable_session_id_across_turns(self, adapter): 1115 """Same conversation (same first user message) produces the same session_id.""" 1116 mock_result = {"final_response": "ok", "messages": [], "api_calls": 1} 1117 1118 app = _create_app(adapter) 1119 session_ids = [] 1120 async with TestClient(TestServer(app)) as cli: 1121 # Turn 1: single user message 1122 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1123 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1124 await cli.post( 1125 "/v1/chat/completions", 1126 json={ 1127 "model": "hermes-agent", 1128 "messages": [{"role": "user", "content": "Hello"}], 1129 }, 1130 ) 1131 session_ids.append(mock_run.call_args.kwargs["session_id"]) 1132 1133 # Turn 2: same first message, conversation grew 1134 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1135 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1136 await cli.post( 1137 "/v1/chat/completions", 1138 json={ 1139 "model": "hermes-agent", 1140 "messages": [ 1141 {"role": "user", "content": "Hello"}, 1142 {"role": "assistant", "content": "Hi there!"}, 1143 {"role": "user", "content": "How are you?"}, 1144 ], 1145 }, 1146 ) 1147 session_ids.append(mock_run.call_args.kwargs["session_id"]) 1148 1149 assert session_ids[0] == session_ids[1], "Session ID should be stable across turns" 1150 assert session_ids[0].startswith("api-"), "Derived session IDs should have api- prefix" 1151 1152 @pytest.mark.asyncio 1153 async def test_different_conversations_get_different_session_ids(self, adapter): 1154 """Different first messages produce different session_ids.""" 1155 mock_result = {"final_response": "ok", "messages": [], "api_calls": 1} 1156 1157 app = _create_app(adapter) 1158 session_ids = [] 1159 async with TestClient(TestServer(app)) as cli: 1160 for first_msg in ["Hello", "Goodbye"]: 1161 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1162 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1163 await cli.post( 1164 "/v1/chat/completions", 1165 json={ 1166 "model": "hermes-agent", 1167 "messages": [{"role": "user", "content": first_msg}], 1168 }, 1169 ) 1170 session_ids.append(mock_run.call_args.kwargs["session_id"]) 1171 1172 assert session_ids[0] != session_ids[1] 1173 1174 1175 # --------------------------------------------------------------------------- 1176 # _derive_chat_session_id unit tests 1177 # --------------------------------------------------------------------------- 1178 1179 1180 class TestDeriveChatSessionId: 1181 def test_deterministic(self): 1182 """Same inputs always produce the same session ID.""" 1183 a = _derive_chat_session_id("sys", "hello") 1184 b = _derive_chat_session_id("sys", "hello") 1185 assert a == b 1186 1187 def test_prefix(self): 1188 assert _derive_chat_session_id(None, "hi").startswith("api-") 1189 1190 def test_different_system_prompt(self): 1191 a = _derive_chat_session_id("You are a pirate.", "Hello") 1192 b = _derive_chat_session_id("You are a robot.", "Hello") 1193 assert a != b 1194 1195 def test_different_first_message(self): 1196 a = _derive_chat_session_id(None, "Hello") 1197 b = _derive_chat_session_id(None, "Goodbye") 1198 assert a != b 1199 1200 def test_none_system_prompt(self): 1201 """None system prompt doesn't crash.""" 1202 sid = _derive_chat_session_id(None, "test") 1203 assert isinstance(sid, str) and len(sid) > 4 1204 1205 1206 # --------------------------------------------------------------------------- 1207 # /v1/responses endpoint 1208 # --------------------------------------------------------------------------- 1209 1210 1211 class TestResponsesEndpoint: 1212 @pytest.mark.asyncio 1213 async def test_missing_input_returns_400(self, adapter): 1214 app = _create_app(adapter) 1215 async with TestClient(TestServer(app)) as cli: 1216 resp = await cli.post("/v1/responses", json={"model": "test"}) 1217 assert resp.status == 400 1218 data = await resp.json() 1219 assert "input" in data["error"]["message"] 1220 1221 @pytest.mark.asyncio 1222 async def test_invalid_json_returns_400(self, adapter): 1223 app = _create_app(adapter) 1224 async with TestClient(TestServer(app)) as cli: 1225 resp = await cli.post( 1226 "/v1/responses", 1227 data="not json", 1228 headers={"Content-Type": "application/json"}, 1229 ) 1230 assert resp.status == 400 1231 1232 @pytest.mark.asyncio 1233 async def test_successful_response_with_string_input(self, adapter): 1234 """String input is wrapped in a user message.""" 1235 mock_result = { 1236 "final_response": "Paris is the capital of France.", 1237 "messages": [], 1238 "api_calls": 1, 1239 } 1240 1241 app = _create_app(adapter) 1242 async with TestClient(TestServer(app)) as cli: 1243 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1244 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1245 resp = await cli.post( 1246 "/v1/responses", 1247 json={ 1248 "model": "hermes-agent", 1249 "input": "What is the capital of France?", 1250 }, 1251 ) 1252 1253 assert resp.status == 200 1254 data = await resp.json() 1255 assert data["object"] == "response" 1256 assert data["id"].startswith("resp_") 1257 assert data["status"] == "completed" 1258 assert len(data["output"]) == 1 1259 assert data["output"][0]["type"] == "message" 1260 assert data["output"][0]["content"][0]["type"] == "output_text" 1261 assert data["output"][0]["content"][0]["text"] == "Paris is the capital of France." 1262 1263 @pytest.mark.asyncio 1264 async def test_successful_response_with_array_input(self, adapter): 1265 """Array input with role/content objects.""" 1266 mock_result = {"final_response": "Done", "messages": [], "api_calls": 1} 1267 1268 app = _create_app(adapter) 1269 async with TestClient(TestServer(app)) as cli: 1270 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1271 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1272 resp = await cli.post( 1273 "/v1/responses", 1274 json={ 1275 "model": "hermes-agent", 1276 "input": [ 1277 {"role": "user", "content": "Hello"}, 1278 {"role": "user", "content": "What is 2+2?"}, 1279 ], 1280 }, 1281 ) 1282 1283 assert resp.status == 200 1284 call_kwargs = mock_run.call_args.kwargs 1285 # Last message is user_message, rest are history 1286 assert call_kwargs["user_message"] == "What is 2+2?" 1287 assert len(call_kwargs["conversation_history"]) == 1 1288 1289 @pytest.mark.asyncio 1290 async def test_instructions_as_ephemeral_prompt(self, adapter): 1291 """The instructions field maps to ephemeral_system_prompt.""" 1292 mock_result = {"final_response": "Ahoy!", "messages": [], "api_calls": 1} 1293 1294 app = _create_app(adapter) 1295 async with TestClient(TestServer(app)) as cli: 1296 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1297 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1298 resp = await cli.post( 1299 "/v1/responses", 1300 json={ 1301 "model": "hermes-agent", 1302 "input": "Hello", 1303 "instructions": "Talk like a pirate.", 1304 }, 1305 ) 1306 1307 assert resp.status == 200 1308 call_kwargs = mock_run.call_args.kwargs 1309 assert call_kwargs["ephemeral_system_prompt"] == "Talk like a pirate." 1310 1311 @pytest.mark.asyncio 1312 async def test_previous_response_id_chaining(self, adapter): 1313 """Test that responses can be chained via previous_response_id.""" 1314 mock_result_1 = { 1315 "final_response": "2", 1316 "messages": [{"role": "assistant", "content": "2"}], 1317 "api_calls": 1, 1318 } 1319 1320 app = _create_app(adapter) 1321 async with TestClient(TestServer(app)) as cli: 1322 # First request 1323 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1324 mock_run.return_value = (mock_result_1, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1325 resp1 = await cli.post( 1326 "/v1/responses", 1327 json={"model": "hermes-agent", "input": "What is 1+1?"}, 1328 ) 1329 1330 assert resp1.status == 200 1331 data1 = await resp1.json() 1332 response_id = data1["id"] 1333 1334 # Second request chaining from the first 1335 mock_result_2 = { 1336 "final_response": "3", 1337 "messages": [{"role": "assistant", "content": "3"}], 1338 "api_calls": 1, 1339 } 1340 1341 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1342 mock_run.return_value = (mock_result_2, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1343 resp2 = await cli.post( 1344 "/v1/responses", 1345 json={ 1346 "model": "hermes-agent", 1347 "input": "Now add 1 more", 1348 "previous_response_id": response_id, 1349 }, 1350 ) 1351 1352 assert resp2.status == 200 1353 # The conversation_history should contain the full history from the first response 1354 call_kwargs = mock_run.call_args.kwargs 1355 assert len(call_kwargs["conversation_history"]) > 0 1356 assert call_kwargs["user_message"] == "Now add 1 more" 1357 1358 @pytest.mark.asyncio 1359 async def test_previous_response_id_preserves_session(self, adapter): 1360 """Chained responses via previous_response_id reuse the same session_id.""" 1361 mock_result = { 1362 "final_response": "ok", 1363 "messages": [{"role": "assistant", "content": "ok"}], 1364 "api_calls": 1, 1365 } 1366 usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} 1367 1368 app = _create_app(adapter) 1369 async with TestClient(TestServer(app)) as cli: 1370 # First request — establishes a session 1371 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1372 mock_run.return_value = (mock_result, usage) 1373 resp1 = await cli.post( 1374 "/v1/responses", 1375 json={"model": "hermes-agent", "input": "Hello"}, 1376 ) 1377 assert resp1.status == 200 1378 first_session_id = mock_run.call_args.kwargs["session_id"] 1379 data1 = await resp1.json() 1380 response_id = data1["id"] 1381 1382 # Second request — chains from the first 1383 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1384 mock_run.return_value = (mock_result, usage) 1385 resp2 = await cli.post( 1386 "/v1/responses", 1387 json={ 1388 "model": "hermes-agent", 1389 "input": "Follow up", 1390 "previous_response_id": response_id, 1391 }, 1392 ) 1393 assert resp2.status == 200 1394 second_session_id = mock_run.call_args.kwargs["session_id"] 1395 1396 # Session must be the same across the chain 1397 assert first_session_id == second_session_id 1398 1399 @pytest.mark.asyncio 1400 async def test_invalid_previous_response_id_returns_404(self, adapter): 1401 app = _create_app(adapter) 1402 async with TestClient(TestServer(app)) as cli: 1403 resp = await cli.post( 1404 "/v1/responses", 1405 json={ 1406 "model": "hermes-agent", 1407 "input": "follow up", 1408 "previous_response_id": "resp_nonexistent", 1409 }, 1410 ) 1411 assert resp.status == 404 1412 1413 @pytest.mark.asyncio 1414 async def test_store_false_does_not_store(self, adapter): 1415 """When store=false, the response is NOT stored.""" 1416 mock_result = {"final_response": "OK", "messages": [], "api_calls": 1} 1417 1418 app = _create_app(adapter) 1419 async with TestClient(TestServer(app)) as cli: 1420 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1421 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1422 resp = await cli.post( 1423 "/v1/responses", 1424 json={ 1425 "model": "hermes-agent", 1426 "input": "Hello", 1427 "store": False, 1428 }, 1429 ) 1430 1431 assert resp.status == 200 1432 data = await resp.json() 1433 # The response has an ID but it shouldn't be retrievable 1434 assert adapter._response_store.get(data["id"]) is None 1435 1436 @pytest.mark.asyncio 1437 async def test_instructions_inherited_from_previous(self, adapter): 1438 """If no instructions provided, carry forward from previous response.""" 1439 mock_result = {"final_response": "Ahoy!", "messages": [], "api_calls": 1} 1440 1441 app = _create_app(adapter) 1442 async with TestClient(TestServer(app)) as cli: 1443 # First request with instructions 1444 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1445 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1446 resp1 = await cli.post( 1447 "/v1/responses", 1448 json={ 1449 "model": "hermes-agent", 1450 "input": "Hello", 1451 "instructions": "Be a pirate", 1452 }, 1453 ) 1454 1455 data1 = await resp1.json() 1456 resp_id = data1["id"] 1457 1458 # Second request without instructions 1459 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1460 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1461 resp2 = await cli.post( 1462 "/v1/responses", 1463 json={ 1464 "model": "hermes-agent", 1465 "input": "Tell me more", 1466 "previous_response_id": resp_id, 1467 }, 1468 ) 1469 1470 assert resp2.status == 200 1471 call_kwargs = mock_run.call_args.kwargs 1472 assert call_kwargs["ephemeral_system_prompt"] == "Be a pirate" 1473 1474 @pytest.mark.asyncio 1475 async def test_agent_error_returns_500(self, adapter): 1476 app = _create_app(adapter) 1477 async with TestClient(TestServer(app)) as cli: 1478 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1479 mock_run.side_effect = RuntimeError("Boom") 1480 resp = await cli.post( 1481 "/v1/responses", 1482 json={"model": "hermes-agent", "input": "Hello"}, 1483 ) 1484 1485 assert resp.status == 500 1486 1487 @pytest.mark.asyncio 1488 async def test_invalid_input_type_returns_400(self, adapter): 1489 app = _create_app(adapter) 1490 async with TestClient(TestServer(app)) as cli: 1491 resp = await cli.post( 1492 "/v1/responses", 1493 json={"model": "hermes-agent", "input": 42}, 1494 ) 1495 assert resp.status == 400 1496 1497 1498 class TestResponsesStreaming: 1499 @pytest.mark.asyncio 1500 async def test_stream_true_returns_responses_sse(self, adapter): 1501 app = _create_app(adapter) 1502 async with TestClient(TestServer(app)) as cli: 1503 async def _mock_run_agent(**kwargs): 1504 cb = kwargs.get("stream_delta_callback") 1505 if cb: 1506 cb("Hello") 1507 cb(" world") 1508 return ( 1509 {"final_response": "Hello world", "messages": [], "api_calls": 1}, 1510 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 1511 ) 1512 1513 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): 1514 resp = await cli.post( 1515 "/v1/responses", 1516 json={"model": "hermes-agent", "input": "hi", "stream": True}, 1517 ) 1518 assert resp.status == 200 1519 assert "text/event-stream" in resp.headers.get("Content-Type", "") 1520 body = await resp.text() 1521 assert "event: response.created" in body 1522 assert "event: response.output_text.delta" in body 1523 assert "event: response.output_text.done" in body 1524 assert "event: response.completed" in body 1525 assert '"sequence_number":' in body 1526 assert '"logprobs": []' in body 1527 assert "Hello" in body 1528 assert " world" in body 1529 1530 @pytest.mark.asyncio 1531 async def test_stream_emits_function_call_and_output_items(self, adapter): 1532 app = _create_app(adapter) 1533 async with TestClient(TestServer(app)) as cli: 1534 async def _mock_run_agent(**kwargs): 1535 start_cb = kwargs.get("tool_start_callback") 1536 complete_cb = kwargs.get("tool_complete_callback") 1537 text_cb = kwargs.get("stream_delta_callback") 1538 if start_cb: 1539 start_cb("call_123", "read_file", {"path": "/tmp/test.txt"}) 1540 if complete_cb: 1541 complete_cb("call_123", "read_file", {"path": "/tmp/test.txt"}, '{"content":"hello"}') 1542 if text_cb: 1543 text_cb("Done.") 1544 return ( 1545 { 1546 "final_response": "Done.", 1547 "messages": [ 1548 { 1549 "role": "assistant", 1550 "tool_calls": [ 1551 { 1552 "id": "call_123", 1553 "function": { 1554 "name": "read_file", 1555 "arguments": '{"path":"/tmp/test.txt"}', 1556 }, 1557 } 1558 ], 1559 }, 1560 { 1561 "role": "tool", 1562 "tool_call_id": "call_123", 1563 "content": '{"content":"hello"}', 1564 }, 1565 ], 1566 "api_calls": 1, 1567 }, 1568 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 1569 ) 1570 1571 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): 1572 resp = await cli.post( 1573 "/v1/responses", 1574 json={"model": "hermes-agent", "input": "read the file", "stream": True}, 1575 ) 1576 assert resp.status == 200 1577 body = await resp.text() 1578 assert "event: response.output_item.added" in body 1579 assert "event: response.output_item.done" in body 1580 assert body.count("event: response.output_item.done") >= 2 1581 assert '"type": "function_call"' in body 1582 assert '"type": "function_call_output"' in body 1583 assert '"call_id": "call_123"' in body 1584 assert '"name": "read_file"' in body 1585 assert '"output": [{"type": "input_text", "text": "{\\"content\\":\\"hello\\"}"}]' in body 1586 1587 @pytest.mark.asyncio 1588 async def test_streamed_response_is_stored_for_get(self, adapter): 1589 app = _create_app(adapter) 1590 async with TestClient(TestServer(app)) as cli: 1591 async def _mock_run_agent(**kwargs): 1592 cb = kwargs.get("stream_delta_callback") 1593 if cb: 1594 cb("Stored response") 1595 return ( 1596 {"final_response": "Stored response", "messages": [], "api_calls": 1}, 1597 {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3}, 1598 ) 1599 1600 with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): 1601 resp = await cli.post( 1602 "/v1/responses", 1603 json={"model": "hermes-agent", "input": "store this", "stream": True}, 1604 ) 1605 body = await resp.text() 1606 response_id = None 1607 for line in body.splitlines(): 1608 if line.startswith("data: "): 1609 try: 1610 payload = json.loads(line[len("data: "):]) 1611 except json.JSONDecodeError: 1612 continue 1613 if payload.get("type") == "response.completed": 1614 response_id = payload["response"]["id"] 1615 break 1616 assert response_id 1617 1618 get_resp = await cli.get(f"/v1/responses/{response_id}") 1619 assert get_resp.status == 200 1620 data = await get_resp.json() 1621 assert data["id"] == response_id 1622 assert data["status"] == "completed" 1623 assert data["output"][-1]["content"][0]["text"] == "Stored response" 1624 1625 @pytest.mark.asyncio 1626 async def test_stream_cancelled_persists_incomplete_snapshot(self, adapter): 1627 """Server-side asyncio.CancelledError (shutdown, request timeout) must 1628 still leave an ``incomplete`` snapshot in ResponseStore so 1629 GET /v1/responses/{id} and previous_response_id chaining keep 1630 working. Regression for PR #15171 follow-up. 1631 1632 Calls _write_sse_responses directly so the test can await the 1633 handler to completion (TestClient disconnection races the server 1634 handler, which makes end-to-end assertion on the final stored 1635 snapshot flaky). 1636 """ 1637 # Build a minimal fake request + stream queue the writer understands. 1638 fake_request = MagicMock() 1639 fake_request.headers = {} 1640 1641 written_payloads: list = [] 1642 1643 class _FakeStreamResponse: 1644 async def prepare(self, req): 1645 pass 1646 1647 async def write(self, payload): 1648 written_payloads.append(payload) 1649 1650 # Patch web.StreamResponse for the duration of the writer call. 1651 import gateway.platforms.api_server as api_mod 1652 import queue as _q 1653 1654 stream_q: _q.Queue = _q.Queue() 1655 1656 async def _agent_coro(): 1657 # Feed one partial delta into the stream queue... 1658 stream_q.put("partial output") 1659 # ...then give the drain loop a moment to pick it up before 1660 # raising CancelledError to simulate a server-side cancel. 1661 await asyncio.sleep(0.01) 1662 raise asyncio.CancelledError() 1663 1664 agent_task = asyncio.ensure_future(_agent_coro()) 1665 response_id = f"resp_{uuid.uuid4().hex[:28]}" 1666 1667 with patch.object(api_mod.web, "StreamResponse", return_value=_FakeStreamResponse()): 1668 with pytest.raises(asyncio.CancelledError): 1669 await adapter._write_sse_responses( 1670 request=fake_request, 1671 response_id=response_id, 1672 model="hermes-agent", 1673 created_at=int(time.time()), 1674 stream_q=stream_q, 1675 agent_task=agent_task, 1676 agent_ref=[None], 1677 conversation_history=[], 1678 user_message="will be cancelled", 1679 instructions=None, 1680 conversation=None, 1681 store=True, 1682 session_id=None, 1683 ) 1684 1685 # The in_progress snapshot was persisted on response.created, 1686 # and the CancelledError handler must have updated it to 1687 # ``incomplete`` with the partial text it saw. 1688 stored = adapter._response_store.get(response_id) 1689 assert stored is not None, "snapshot must be retrievable after cancellation" 1690 assert stored["response"]["status"] == "incomplete" 1691 # Partial text captured before cancel should be preserved. 1692 output_text = "".join( 1693 part.get("text", "") 1694 for item in stored["response"].get("output", []) 1695 if item.get("type") == "message" 1696 for part in item.get("content", []) 1697 ) 1698 assert "partial output" in output_text 1699 1700 @pytest.mark.asyncio 1701 async def test_stream_client_disconnect_persists_incomplete_snapshot(self, adapter): 1702 """Client disconnect (ConnectionResetError) during streaming must 1703 persist an ``incomplete`` snapshot in ResponseStore. Regression 1704 for PR #15171.""" 1705 fake_request = MagicMock() 1706 fake_request.headers = {} 1707 1708 write_call_count = {"n": 0} 1709 1710 class _DisconnectingStreamResponse: 1711 async def prepare(self, req): 1712 pass 1713 1714 async def write(self, payload): 1715 # First two writes succeed (prepare + response.created). 1716 # On the third write (a text delta), the "client" 1717 # disconnects — simulate with ConnectionResetError. 1718 write_call_count["n"] += 1 1719 if write_call_count["n"] >= 3: 1720 raise ConnectionResetError("simulated client disconnect") 1721 1722 import gateway.platforms.api_server as api_mod 1723 import queue as _q 1724 1725 stream_q: _q.Queue = _q.Queue() 1726 stream_q.put("some streamed text") 1727 stream_q.put(None) # EOS sentinel 1728 1729 async def _agent_coro(): 1730 await asyncio.sleep(0.01) 1731 return ({"final_response": "", "messages": [], "api_calls": 0}, 1732 {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1733 1734 agent_task = asyncio.ensure_future(_agent_coro()) 1735 response_id = f"resp_{uuid.uuid4().hex[:28]}" 1736 1737 with patch.object(api_mod.web, "StreamResponse", return_value=_DisconnectingStreamResponse()): 1738 await adapter._write_sse_responses( 1739 request=fake_request, 1740 response_id=response_id, 1741 model="hermes-agent", 1742 created_at=int(time.time()), 1743 stream_q=stream_q, 1744 agent_task=agent_task, 1745 agent_ref=[None], 1746 conversation_history=[], 1747 user_message="will disconnect", 1748 instructions=None, 1749 conversation=None, 1750 store=True, 1751 session_id=None, 1752 ) 1753 1754 stored = adapter._response_store.get(response_id) 1755 assert stored is not None, "snapshot must survive client disconnect" 1756 assert stored["response"]["status"] == "incomplete" 1757 1758 1759 # --------------------------------------------------------------------------- 1760 # Auth on endpoints 1761 # --------------------------------------------------------------------------- 1762 1763 1764 class TestEndpointAuth: 1765 @pytest.mark.asyncio 1766 async def test_chat_completions_requires_auth(self, auth_adapter): 1767 app = _create_app(auth_adapter) 1768 async with TestClient(TestServer(app)) as cli: 1769 resp = await cli.post( 1770 "/v1/chat/completions", 1771 json={"model": "test", "messages": [{"role": "user", "content": "hi"}]}, 1772 ) 1773 assert resp.status == 401 1774 1775 @pytest.mark.asyncio 1776 async def test_responses_requires_auth(self, auth_adapter): 1777 app = _create_app(auth_adapter) 1778 async with TestClient(TestServer(app)) as cli: 1779 resp = await cli.post( 1780 "/v1/responses", 1781 json={"model": "test", "input": "hi"}, 1782 ) 1783 assert resp.status == 401 1784 1785 @pytest.mark.asyncio 1786 async def test_models_requires_auth(self, auth_adapter): 1787 app = _create_app(auth_adapter) 1788 async with TestClient(TestServer(app)) as cli: 1789 resp = await cli.get("/v1/models") 1790 assert resp.status == 401 1791 1792 @pytest.mark.asyncio 1793 async def test_health_does_not_require_auth(self, auth_adapter): 1794 app = _create_app(auth_adapter) 1795 async with TestClient(TestServer(app)) as cli: 1796 resp = await cli.get("/health") 1797 assert resp.status == 200 1798 1799 1800 # --------------------------------------------------------------------------- 1801 # Config integration 1802 # --------------------------------------------------------------------------- 1803 1804 1805 class TestConfigIntegration: 1806 def test_platform_enum_has_api_server(self): 1807 assert Platform.API_SERVER.value == "api_server" 1808 1809 def test_env_override_enables_api_server(self, monkeypatch): 1810 monkeypatch.setenv("API_SERVER_ENABLED", "true") 1811 from gateway.config import load_gateway_config 1812 config = load_gateway_config() 1813 assert Platform.API_SERVER in config.platforms 1814 assert config.platforms[Platform.API_SERVER].enabled is True 1815 1816 def test_env_override_with_key(self, monkeypatch): 1817 monkeypatch.setenv("API_SERVER_KEY", "sk-mykey") 1818 from gateway.config import load_gateway_config 1819 config = load_gateway_config() 1820 assert Platform.API_SERVER in config.platforms 1821 assert config.platforms[Platform.API_SERVER].extra.get("key") == "sk-mykey" 1822 1823 def test_env_override_port_and_host(self, monkeypatch): 1824 monkeypatch.setenv("API_SERVER_ENABLED", "true") 1825 monkeypatch.setenv("API_SERVER_PORT", "9999") 1826 monkeypatch.setenv("API_SERVER_HOST", "0.0.0.0") 1827 from gateway.config import load_gateway_config 1828 config = load_gateway_config() 1829 assert config.platforms[Platform.API_SERVER].extra.get("port") == 9999 1830 assert config.platforms[Platform.API_SERVER].extra.get("host") == "0.0.0.0" 1831 1832 def test_env_override_cors_origins(self, monkeypatch): 1833 monkeypatch.setenv("API_SERVER_ENABLED", "true") 1834 monkeypatch.setenv( 1835 "API_SERVER_CORS_ORIGINS", 1836 "http://localhost:3000, http://127.0.0.1:3000", 1837 ) 1838 from gateway.config import load_gateway_config 1839 config = load_gateway_config() 1840 assert config.platforms[Platform.API_SERVER].extra.get("cors_origins") == [ 1841 "http://localhost:3000", 1842 "http://127.0.0.1:3000", 1843 ] 1844 1845 def test_api_server_in_connected_platforms(self): 1846 config = GatewayConfig() 1847 config.platforms[Platform.API_SERVER] = PlatformConfig(enabled=True) 1848 connected = config.get_connected_platforms() 1849 assert Platform.API_SERVER in connected 1850 1851 def test_api_server_not_in_connected_when_disabled(self): 1852 config = GatewayConfig() 1853 config.platforms[Platform.API_SERVER] = PlatformConfig(enabled=False) 1854 connected = config.get_connected_platforms() 1855 assert Platform.API_SERVER not in connected 1856 1857 1858 # --------------------------------------------------------------------------- 1859 # Multiple system messages 1860 # --------------------------------------------------------------------------- 1861 1862 1863 class TestMultipleSystemMessages: 1864 @pytest.mark.asyncio 1865 async def test_multiple_system_messages_concatenated(self, adapter): 1866 mock_result = {"final_response": "OK", "messages": [], "api_calls": 1} 1867 1868 app = _create_app(adapter) 1869 async with TestClient(TestServer(app)) as cli: 1870 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1871 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1872 resp = await cli.post( 1873 "/v1/chat/completions", 1874 json={ 1875 "model": "hermes-agent", 1876 "messages": [ 1877 {"role": "system", "content": "You are helpful."}, 1878 {"role": "system", "content": "Be concise."}, 1879 {"role": "user", "content": "Hello"}, 1880 ], 1881 }, 1882 ) 1883 1884 assert resp.status == 200 1885 call_kwargs = mock_run.call_args.kwargs 1886 prompt = call_kwargs["ephemeral_system_prompt"] 1887 assert "You are helpful." in prompt 1888 assert "Be concise." in prompt 1889 1890 1891 # --------------------------------------------------------------------------- 1892 # send() method (not used but required by base) 1893 # --------------------------------------------------------------------------- 1894 1895 1896 class TestSendMethod: 1897 @pytest.mark.asyncio 1898 async def test_send_returns_not_supported(self): 1899 config = PlatformConfig(enabled=True) 1900 adapter = APIServerAdapter(config) 1901 result = await adapter.send("chat1", "hello") 1902 assert result.success is False 1903 assert "HTTP request/response" in result.error 1904 1905 1906 # --------------------------------------------------------------------------- 1907 # GET /v1/responses/{response_id} 1908 # --------------------------------------------------------------------------- 1909 1910 1911 class TestGetResponse: 1912 @pytest.mark.asyncio 1913 async def test_get_stored_response(self, adapter): 1914 """GET returns a previously stored response.""" 1915 mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1} 1916 1917 app = _create_app(adapter) 1918 async with TestClient(TestServer(app)) as cli: 1919 # Create a response first 1920 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1921 mock_run.return_value = (mock_result, {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}) 1922 resp = await cli.post( 1923 "/v1/responses", 1924 json={"model": "hermes-agent", "input": "Hi"}, 1925 ) 1926 1927 assert resp.status == 200 1928 data = await resp.json() 1929 response_id = data["id"] 1930 1931 # Now GET it 1932 resp2 = await cli.get(f"/v1/responses/{response_id}") 1933 assert resp2.status == 200 1934 data2 = await resp2.json() 1935 assert data2["id"] == response_id 1936 assert data2["object"] == "response" 1937 assert data2["status"] == "completed" 1938 1939 @pytest.mark.asyncio 1940 async def test_get_not_found(self, adapter): 1941 app = _create_app(adapter) 1942 async with TestClient(TestServer(app)) as cli: 1943 resp = await cli.get("/v1/responses/resp_nonexistent") 1944 assert resp.status == 404 1945 1946 @pytest.mark.asyncio 1947 async def test_get_requires_auth(self, auth_adapter): 1948 app = _create_app(auth_adapter) 1949 async with TestClient(TestServer(app)) as cli: 1950 resp = await cli.get("/v1/responses/resp_any") 1951 assert resp.status == 401 1952 1953 1954 # --------------------------------------------------------------------------- 1955 # DELETE /v1/responses/{response_id} 1956 # --------------------------------------------------------------------------- 1957 1958 1959 class TestDeleteResponse: 1960 @pytest.mark.asyncio 1961 async def test_delete_stored_response(self, adapter): 1962 """DELETE removes a stored response and returns confirmation.""" 1963 mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1} 1964 1965 app = _create_app(adapter) 1966 async with TestClient(TestServer(app)) as cli: 1967 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 1968 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 1969 resp = await cli.post( 1970 "/v1/responses", 1971 json={"model": "hermes-agent", "input": "Hi"}, 1972 ) 1973 1974 data = await resp.json() 1975 response_id = data["id"] 1976 1977 # Delete it 1978 resp2 = await cli.delete(f"/v1/responses/{response_id}") 1979 assert resp2.status == 200 1980 data2 = await resp2.json() 1981 assert data2["id"] == response_id 1982 assert data2["object"] == "response" 1983 assert data2["deleted"] is True 1984 1985 # Verify it's gone 1986 resp3 = await cli.get(f"/v1/responses/{response_id}") 1987 assert resp3.status == 404 1988 1989 @pytest.mark.asyncio 1990 async def test_delete_not_found(self, adapter): 1991 app = _create_app(adapter) 1992 async with TestClient(TestServer(app)) as cli: 1993 resp = await cli.delete("/v1/responses/resp_nonexistent") 1994 assert resp.status == 404 1995 1996 @pytest.mark.asyncio 1997 async def test_delete_requires_auth(self, auth_adapter): 1998 app = _create_app(auth_adapter) 1999 async with TestClient(TestServer(app)) as cli: 2000 resp = await cli.delete("/v1/responses/resp_any") 2001 assert resp.status == 401 2002 2003 2004 # --------------------------------------------------------------------------- 2005 # Tool calls in output 2006 # --------------------------------------------------------------------------- 2007 2008 2009 class TestToolCallsInOutput: 2010 @pytest.mark.asyncio 2011 async def test_tool_calls_in_output(self, adapter): 2012 """When agent returns tool calls, they appear as function_call items.""" 2013 mock_result = { 2014 "final_response": "The result is 42.", 2015 "messages": [ 2016 { 2017 "role": "assistant", 2018 "content": None, 2019 "tool_calls": [ 2020 { 2021 "id": "call_abc123", 2022 "function": { 2023 "name": "calculator", 2024 "arguments": '{"expression": "6*7"}', 2025 }, 2026 } 2027 ], 2028 }, 2029 { 2030 "role": "tool", 2031 "tool_call_id": "call_abc123", 2032 "content": "42", 2033 }, 2034 { 2035 "role": "assistant", 2036 "content": "The result is 42.", 2037 }, 2038 ], 2039 "api_calls": 2, 2040 } 2041 2042 app = _create_app(adapter) 2043 async with TestClient(TestServer(app)) as cli: 2044 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2045 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 2046 resp = await cli.post( 2047 "/v1/responses", 2048 json={"model": "hermes-agent", "input": "What is 6*7?"}, 2049 ) 2050 2051 assert resp.status == 200 2052 data = await resp.json() 2053 output = data["output"] 2054 2055 # Should have: function_call, function_call_output, message 2056 assert len(output) == 3 2057 assert output[0]["type"] == "function_call" 2058 assert output[0]["name"] == "calculator" 2059 assert output[0]["arguments"] == '{"expression": "6*7"}' 2060 assert output[0]["call_id"] == "call_abc123" 2061 assert output[1]["type"] == "function_call_output" 2062 assert output[1]["call_id"] == "call_abc123" 2063 assert output[1]["output"] == "42" 2064 assert output[2]["type"] == "message" 2065 assert output[2]["content"][0]["text"] == "The result is 42." 2066 2067 @pytest.mark.asyncio 2068 async def test_no_tool_calls_still_works(self, adapter): 2069 """Without tool calls, output is just a message.""" 2070 mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1} 2071 2072 app = _create_app(adapter) 2073 async with TestClient(TestServer(app)) as cli: 2074 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2075 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 2076 resp = await cli.post( 2077 "/v1/responses", 2078 json={"model": "hermes-agent", "input": "Hello"}, 2079 ) 2080 2081 assert resp.status == 200 2082 data = await resp.json() 2083 assert len(data["output"]) == 1 2084 assert data["output"][0]["type"] == "message" 2085 2086 2087 # --------------------------------------------------------------------------- 2088 # Usage / token counting 2089 # --------------------------------------------------------------------------- 2090 2091 2092 class TestUsageCounting: 2093 @pytest.mark.asyncio 2094 async def test_responses_usage(self, adapter): 2095 """Responses API returns real token counts.""" 2096 mock_result = {"final_response": "Done", "messages": [], "api_calls": 1} 2097 usage = {"input_tokens": 100, "output_tokens": 50, "total_tokens": 150} 2098 2099 app = _create_app(adapter) 2100 async with TestClient(TestServer(app)) as cli: 2101 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2102 mock_run.return_value = (mock_result, usage) 2103 resp = await cli.post( 2104 "/v1/responses", 2105 json={"model": "hermes-agent", "input": "Hi"}, 2106 ) 2107 2108 assert resp.status == 200 2109 data = await resp.json() 2110 assert data["usage"]["input_tokens"] == 100 2111 assert data["usage"]["output_tokens"] == 50 2112 assert data["usage"]["total_tokens"] == 150 2113 2114 @pytest.mark.asyncio 2115 async def test_chat_completions_usage(self, adapter): 2116 """Chat completions returns real token counts.""" 2117 mock_result = {"final_response": "Done", "messages": [], "api_calls": 1} 2118 usage = {"input_tokens": 200, "output_tokens": 80, "total_tokens": 280} 2119 2120 app = _create_app(adapter) 2121 async with TestClient(TestServer(app)) as cli: 2122 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2123 mock_run.return_value = (mock_result, usage) 2124 resp = await cli.post( 2125 "/v1/chat/completions", 2126 json={ 2127 "model": "hermes-agent", 2128 "messages": [{"role": "user", "content": "Hi"}], 2129 }, 2130 ) 2131 2132 assert resp.status == 200 2133 data = await resp.json() 2134 assert data["usage"]["prompt_tokens"] == 200 2135 assert data["usage"]["completion_tokens"] == 80 2136 assert data["usage"]["total_tokens"] == 280 2137 2138 2139 # --------------------------------------------------------------------------- 2140 # Truncation 2141 # --------------------------------------------------------------------------- 2142 2143 2144 class TestTruncation: 2145 @pytest.mark.asyncio 2146 async def test_truncation_auto_limits_history(self, adapter): 2147 """With truncation=auto, history over 100 messages is trimmed.""" 2148 mock_result = {"final_response": "OK", "messages": [], "api_calls": 1} 2149 2150 # Pre-seed a stored response with a long history 2151 long_history = [{"role": "user", "content": f"msg {i}"} for i in range(150)] 2152 adapter._response_store.put("resp_prev", { 2153 "response": {"id": "resp_prev", "object": "response"}, 2154 "conversation_history": long_history, 2155 "instructions": None, 2156 }) 2157 2158 app = _create_app(adapter) 2159 async with TestClient(TestServer(app)) as cli: 2160 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2161 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 2162 resp = await cli.post( 2163 "/v1/responses", 2164 json={ 2165 "model": "hermes-agent", 2166 "input": "follow up", 2167 "previous_response_id": "resp_prev", 2168 "truncation": "auto", 2169 }, 2170 ) 2171 2172 assert resp.status == 200 2173 call_kwargs = mock_run.call_args.kwargs 2174 # History should be truncated to 100 2175 assert len(call_kwargs["conversation_history"]) <= 100 2176 2177 @pytest.mark.asyncio 2178 async def test_no_truncation_keeps_full_history(self, adapter): 2179 """Without truncation=auto, long history is passed as-is.""" 2180 mock_result = {"final_response": "OK", "messages": [], "api_calls": 1} 2181 2182 long_history = [{"role": "user", "content": f"msg {i}"} for i in range(150)] 2183 adapter._response_store.put("resp_prev2", { 2184 "response": {"id": "resp_prev2", "object": "response"}, 2185 "conversation_history": long_history, 2186 "instructions": None, 2187 }) 2188 2189 app = _create_app(adapter) 2190 async with TestClient(TestServer(app)) as cli: 2191 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2192 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 2193 resp = await cli.post( 2194 "/v1/responses", 2195 json={ 2196 "model": "hermes-agent", 2197 "input": "follow up", 2198 "previous_response_id": "resp_prev2", 2199 }, 2200 ) 2201 2202 assert resp.status == 200 2203 call_kwargs = mock_run.call_args.kwargs 2204 assert len(call_kwargs["conversation_history"]) == 150 2205 2206 2207 # --------------------------------------------------------------------------- 2208 # CORS 2209 # --------------------------------------------------------------------------- 2210 2211 2212 class TestCORS: 2213 def test_origin_allowed_for_non_browser_client(self, adapter): 2214 assert adapter._origin_allowed("") is True 2215 2216 def test_origin_rejected_by_default(self, adapter): 2217 assert adapter._origin_allowed("http://evil.example") is False 2218 2219 def test_origin_allowed_for_allowlist_match(self): 2220 adapter = _make_adapter(cors_origins=["http://localhost:3000"]) 2221 assert adapter._origin_allowed("http://localhost:3000") is True 2222 2223 def test_cors_headers_for_origin_disabled_by_default(self, adapter): 2224 assert adapter._cors_headers_for_origin("http://localhost:3000") is None 2225 2226 def test_cors_headers_for_origin_matches_allowlist(self): 2227 adapter = _make_adapter(cors_origins=["http://localhost:3000"]) 2228 headers = adapter._cors_headers_for_origin("http://localhost:3000") 2229 assert headers is not None 2230 assert headers["Access-Control-Allow-Origin"] == "http://localhost:3000" 2231 assert "POST" in headers["Access-Control-Allow-Methods"] 2232 2233 def test_cors_headers_for_origin_rejects_unknown_origin(self): 2234 adapter = _make_adapter(cors_origins=["http://localhost:3000"]) 2235 assert adapter._cors_headers_for_origin("http://evil.example") is None 2236 2237 @pytest.mark.asyncio 2238 async def test_cors_headers_not_present_by_default(self, adapter): 2239 """CORS is disabled unless explicitly configured.""" 2240 app = _create_app(adapter) 2241 async with TestClient(TestServer(app)) as cli: 2242 resp = await cli.get("/health") 2243 assert resp.status == 200 2244 assert resp.headers.get("Access-Control-Allow-Origin") is None 2245 2246 @pytest.mark.asyncio 2247 async def test_browser_origin_rejected_by_default(self, adapter): 2248 """Browser-originated requests are rejected unless explicitly allowed.""" 2249 app = _create_app(adapter) 2250 async with TestClient(TestServer(app)) as cli: 2251 resp = await cli.get("/health", headers={"Origin": "http://evil.example"}) 2252 assert resp.status == 403 2253 assert resp.headers.get("Access-Control-Allow-Origin") is None 2254 2255 @pytest.mark.asyncio 2256 async def test_cors_options_preflight_rejected_by_default(self, adapter): 2257 """Browser preflight is rejected unless CORS is explicitly configured.""" 2258 app = _create_app(adapter) 2259 async with TestClient(TestServer(app)) as cli: 2260 resp = await cli.options( 2261 "/v1/chat/completions", 2262 headers={ 2263 "Origin": "http://evil.example", 2264 "Access-Control-Request-Method": "POST", 2265 }, 2266 ) 2267 assert resp.status == 403 2268 assert resp.headers.get("Access-Control-Allow-Origin") is None 2269 2270 @pytest.mark.asyncio 2271 async def test_cors_headers_present_for_allowed_origin(self): 2272 """Allowed origins receive explicit CORS headers.""" 2273 adapter = _make_adapter(cors_origins=["http://localhost:3000"]) 2274 app = _create_app(adapter) 2275 async with TestClient(TestServer(app)) as cli: 2276 resp = await cli.get("/health", headers={"Origin": "http://localhost:3000"}) 2277 assert resp.status == 200 2278 assert resp.headers.get("Access-Control-Allow-Origin") == "http://localhost:3000" 2279 assert "POST" in resp.headers.get("Access-Control-Allow-Methods", "") 2280 assert "DELETE" in resp.headers.get("Access-Control-Allow-Methods", "") 2281 2282 @pytest.mark.asyncio 2283 async def test_cors_allows_idempotency_key_header(self): 2284 adapter = _make_adapter(cors_origins=["http://localhost:3000"]) 2285 app = _create_app(adapter) 2286 async with TestClient(TestServer(app)) as cli: 2287 resp = await cli.options( 2288 "/v1/chat/completions", 2289 headers={ 2290 "Origin": "http://localhost:3000", 2291 "Access-Control-Request-Method": "POST", 2292 "Access-Control-Request-Headers": "Idempotency-Key", 2293 }, 2294 ) 2295 assert resp.status == 200 2296 assert "Idempotency-Key" in resp.headers.get("Access-Control-Allow-Headers", "") 2297 2298 @pytest.mark.asyncio 2299 async def test_cors_sets_vary_origin_header(self): 2300 adapter = _make_adapter(cors_origins=["http://localhost:3000"]) 2301 app = _create_app(adapter) 2302 async with TestClient(TestServer(app)) as cli: 2303 resp = await cli.get("/health", headers={"Origin": "http://localhost:3000"}) 2304 assert resp.status == 200 2305 assert resp.headers.get("Vary") == "Origin" 2306 2307 @pytest.mark.asyncio 2308 async def test_cors_options_preflight_allowed_for_configured_origin(self): 2309 """Configured origins can complete browser preflight.""" 2310 adapter = _make_adapter(cors_origins=["http://localhost:3000"]) 2311 app = _create_app(adapter) 2312 async with TestClient(TestServer(app)) as cli: 2313 resp = await cli.options( 2314 "/v1/chat/completions", 2315 headers={ 2316 "Origin": "http://localhost:3000", 2317 "Access-Control-Request-Method": "POST", 2318 "Access-Control-Request-Headers": "Authorization, Content-Type", 2319 }, 2320 ) 2321 assert resp.status == 200 2322 assert resp.headers.get("Access-Control-Allow-Origin") == "http://localhost:3000" 2323 assert "Authorization" in resp.headers.get("Access-Control-Allow-Headers", "") 2324 2325 2326 @pytest.mark.asyncio 2327 async def test_cors_preflight_sets_max_age(self): 2328 adapter = _make_adapter(cors_origins=["http://localhost:3000"]) 2329 app = _create_app(adapter) 2330 async with TestClient(TestServer(app)) as cli: 2331 resp = await cli.options( 2332 "/v1/chat/completions", 2333 headers={ 2334 "Origin": "http://localhost:3000", 2335 "Access-Control-Request-Method": "POST", 2336 "Access-Control-Request-Headers": "Authorization, Content-Type", 2337 }, 2338 ) 2339 assert resp.status == 200 2340 assert resp.headers.get("Access-Control-Max-Age") == "600" 2341 # --------------------------------------------------------------------------- 2342 # Conversation parameter 2343 # --------------------------------------------------------------------------- 2344 2345 2346 class TestConversationParameter: 2347 @pytest.mark.asyncio 2348 async def test_conversation_creates_new(self, adapter): 2349 """First request with a conversation name works (new conversation).""" 2350 app = _create_app(adapter) 2351 async with TestClient(TestServer(app)) as cli: 2352 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2353 mock_run.return_value = ( 2354 {"final_response": "Hello!", "messages": [], "api_calls": 1}, 2355 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 2356 ) 2357 resp = await cli.post("/v1/responses", json={ 2358 "input": "hi", 2359 "conversation": "my-chat", 2360 }) 2361 assert resp.status == 200 2362 data = await resp.json() 2363 assert data["status"] == "completed" 2364 # Conversation mapping should be set 2365 assert adapter._response_store.get_conversation("my-chat") is not None 2366 2367 @pytest.mark.asyncio 2368 async def test_conversation_chains_automatically(self, adapter): 2369 """Second request with same conversation name chains to first.""" 2370 app = _create_app(adapter) 2371 async with TestClient(TestServer(app)) as cli: 2372 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2373 mock_run.return_value = ( 2374 {"final_response": "First response", "messages": [], "api_calls": 1}, 2375 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 2376 ) 2377 # First request 2378 resp1 = await cli.post("/v1/responses", json={ 2379 "input": "hello", 2380 "conversation": "test-conv", 2381 }) 2382 assert resp1.status == 200 2383 data1 = await resp1.json() 2384 resp1_id = data1["id"] 2385 2386 # Second request — should chain 2387 mock_run.return_value = ( 2388 {"final_response": "Second response", "messages": [], "api_calls": 1}, 2389 {"input_tokens": 20, "output_tokens": 10, "total_tokens": 30}, 2390 ) 2391 resp2 = await cli.post("/v1/responses", json={ 2392 "input": "follow up", 2393 "conversation": "test-conv", 2394 }) 2395 assert resp2.status == 200 2396 2397 # The second call should have received conversation history from the first 2398 assert mock_run.call_count == 2 2399 second_call_kwargs = mock_run.call_args_list[1] 2400 history = second_call_kwargs.kwargs.get("conversation_history", 2401 second_call_kwargs[1].get("conversation_history", []) if len(second_call_kwargs) > 1 else []) 2402 # History should be non-empty (contains messages from first response) 2403 assert len(history) > 0 2404 2405 @pytest.mark.asyncio 2406 async def test_conversation_and_previous_response_id_conflict(self, adapter): 2407 """Cannot use both conversation and previous_response_id.""" 2408 app = _create_app(adapter) 2409 async with TestClient(TestServer(app)) as cli: 2410 resp = await cli.post("/v1/responses", json={ 2411 "input": "hi", 2412 "conversation": "my-chat", 2413 "previous_response_id": "resp_abc123", 2414 }) 2415 assert resp.status == 400 2416 data = await resp.json() 2417 assert "Cannot use both" in data["error"]["message"] 2418 2419 @pytest.mark.asyncio 2420 async def test_separate_conversations_are_isolated(self, adapter): 2421 """Different conversation names have independent histories.""" 2422 app = _create_app(adapter) 2423 async with TestClient(TestServer(app)) as cli: 2424 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2425 mock_run.return_value = ( 2426 {"final_response": "Response A", "messages": [], "api_calls": 1}, 2427 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 2428 ) 2429 # Conversation A 2430 await cli.post("/v1/responses", json={"input": "conv-a msg", "conversation": "conv-a"}) 2431 # Conversation B 2432 mock_run.return_value = ( 2433 {"final_response": "Response B", "messages": [], "api_calls": 1}, 2434 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 2435 ) 2436 await cli.post("/v1/responses", json={"input": "conv-b msg", "conversation": "conv-b"}) 2437 2438 # They should have different response IDs in the mapping 2439 assert adapter._response_store.get_conversation("conv-a") != adapter._response_store.get_conversation("conv-b") 2440 2441 @pytest.mark.asyncio 2442 async def test_conversation_store_false_no_mapping(self, adapter): 2443 """If store=false, conversation mapping is not updated.""" 2444 app = _create_app(adapter) 2445 async with TestClient(TestServer(app)) as cli: 2446 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2447 mock_run.return_value = ( 2448 {"final_response": "Ephemeral", "messages": [], "api_calls": 1}, 2449 {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, 2450 ) 2451 resp = await cli.post("/v1/responses", json={ 2452 "input": "hi", 2453 "conversation": "ephemeral-chat", 2454 "store": False, 2455 }) 2456 assert resp.status == 200 2457 # Conversation mapping should NOT be set since store=false 2458 assert adapter._response_store.get_conversation("ephemeral-chat") is None 2459 2460 2461 # --------------------------------------------------------------------------- 2462 # X-Hermes-Session-Id header (session continuity) 2463 # --------------------------------------------------------------------------- 2464 2465 2466 class TestSessionIdHeader: 2467 @pytest.mark.asyncio 2468 async def test_new_session_response_includes_session_id_header(self, adapter): 2469 """Without X-Hermes-Session-Id, a new session is created and returned in the header.""" 2470 mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1} 2471 app = _create_app(adapter) 2472 async with TestClient(TestServer(app)) as cli: 2473 with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2474 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 2475 resp = await cli.post( 2476 "/v1/chat/completions", 2477 json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]}, 2478 ) 2479 assert resp.status == 200 2480 assert resp.headers.get("X-Hermes-Session-Id") is not None 2481 2482 @pytest.mark.asyncio 2483 async def test_provided_session_id_is_used_and_echoed(self, auth_adapter): 2484 """When X-Hermes-Session-Id is provided, it's passed to the agent and echoed in the response.""" 2485 mock_result = {"final_response": "Continuing!", "messages": [], "api_calls": 1} 2486 mock_db = MagicMock() 2487 mock_db.get_messages_as_conversation.return_value = [ 2488 {"role": "user", "content": "previous message"}, 2489 {"role": "assistant", "content": "previous reply"}, 2490 ] 2491 auth_adapter._session_db = mock_db 2492 app = _create_app(auth_adapter) 2493 async with TestClient(TestServer(app)) as cli: 2494 with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2495 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 2496 2497 resp = await cli.post( 2498 "/v1/chat/completions", 2499 headers={"X-Hermes-Session-Id": "my-session-123", "Authorization": "Bearer sk-secret"}, 2500 json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Continue"}]}, 2501 ) 2502 2503 assert resp.status == 200 2504 assert resp.headers.get("X-Hermes-Session-Id") == "my-session-123" 2505 call_kwargs = mock_run.call_args.kwargs 2506 assert call_kwargs["session_id"] == "my-session-123" 2507 2508 @pytest.mark.asyncio 2509 async def test_provided_session_id_loads_history_from_db(self, auth_adapter): 2510 """When X-Hermes-Session-Id is provided, history comes from SessionDB not request body.""" 2511 mock_result = {"final_response": "OK", "messages": [], "api_calls": 1} 2512 db_history = [ 2513 {"role": "user", "content": "stored message 1"}, 2514 {"role": "assistant", "content": "stored reply 1"}, 2515 ] 2516 mock_db = MagicMock() 2517 mock_db.get_messages_as_conversation.return_value = db_history 2518 auth_adapter._session_db = mock_db 2519 app = _create_app(auth_adapter) 2520 async with TestClient(TestServer(app)) as cli: 2521 with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run: 2522 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 2523 2524 resp = await cli.post( 2525 "/v1/chat/completions", 2526 headers={"X-Hermes-Session-Id": "existing-session", "Authorization": "Bearer sk-secret"}, 2527 # Request body has different history — should be ignored 2528 json={ 2529 "model": "hermes-agent", 2530 "messages": [ 2531 {"role": "user", "content": "old msg from client"}, 2532 {"role": "assistant", "content": "old reply from client"}, 2533 {"role": "user", "content": "new question"}, 2534 ], 2535 }, 2536 ) 2537 2538 assert resp.status == 200 2539 call_kwargs = mock_run.call_args.kwargs 2540 # History must come from DB, not from the request body 2541 assert call_kwargs["conversation_history"] == db_history 2542 assert call_kwargs["user_message"] == "new question" 2543 2544 @pytest.mark.asyncio 2545 async def test_db_failure_falls_back_to_empty_history(self, auth_adapter): 2546 """If SessionDB raises, history falls back to empty and request still succeeds.""" 2547 mock_result = {"final_response": "OK", "messages": [], "api_calls": 1} 2548 # Simulate DB failure: _session_db is None and SessionDB() constructor raises 2549 auth_adapter._session_db = None 2550 app = _create_app(auth_adapter) 2551 async with TestClient(TestServer(app)) as cli: 2552 with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run, \ 2553 patch("hermes_state.SessionDB", side_effect=Exception("DB unavailable")): 2554 mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) 2555 2556 resp = await cli.post( 2557 "/v1/chat/completions", 2558 headers={"X-Hermes-Session-Id": "some-session", "Authorization": "Bearer sk-secret"}, 2559 json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]}, 2560 ) 2561 2562 assert resp.status == 200 2563 call_kwargs = mock_run.call_args.kwargs 2564 assert call_kwargs["conversation_history"] == [] 2565 assert call_kwargs["session_id"] == "some-session"