/ tests / gateway / test_api_server.py
test_api_server.py
   1  """
   2  Tests for the OpenAI-compatible API server gateway adapter.
   3  
   4  Tests cover:
   5  - Chat Completions endpoint (request parsing, response format)
   6  - Responses API endpoint (request parsing, response format)
   7  - previous_response_id chaining (store/retrieve)
   8  - Auth (valid key, invalid key, no key configured)
   9  - /v1/models endpoint
  10  - /health endpoint
  11  - System prompt extraction
  12  - Error handling (invalid JSON, missing fields)
  13  """
  14  
  15  import asyncio
  16  import json
  17  import time
  18  import uuid
  19  from unittest.mock import AsyncMock, MagicMock, patch
  20  
  21  import pytest
  22  from aiohttp import web
  23  from aiohttp.test_utils import AioHTTPTestCase, TestClient, TestServer
  24  
  25  from gateway.config import GatewayConfig, Platform, PlatformConfig
  26  from gateway.platforms.api_server import (
  27      APIServerAdapter,
  28      ResponseStore,
  29      _IdempotencyCache,
  30      _CORS_HEADERS,
  31      _derive_chat_session_id,
  32      check_api_server_requirements,
  33      cors_middleware,
  34      security_headers_middleware,
  35  )
  36  
  37  
  38  # ---------------------------------------------------------------------------
  39  # check_api_server_requirements
  40  # ---------------------------------------------------------------------------
  41  
  42  
  43  class TestCheckRequirements:
  44      def test_returns_true_when_aiohttp_available(self):
  45          assert check_api_server_requirements() is True
  46  
  47      @patch("gateway.platforms.api_server.AIOHTTP_AVAILABLE", False)
  48      def test_returns_false_without_aiohttp(self):
  49          assert check_api_server_requirements() is False
  50  
  51  
  52  # ---------------------------------------------------------------------------
  53  # ResponseStore
  54  # ---------------------------------------------------------------------------
  55  
  56  
  57  class TestResponseStore:
  58      def test_put_and_get(self):
  59          store = ResponseStore(max_size=10)
  60          store.put("resp_1", {"output": "hello"})
  61          assert store.get("resp_1") == {"output": "hello"}
  62  
  63      def test_get_missing_returns_none(self):
  64          store = ResponseStore(max_size=10)
  65          assert store.get("resp_missing") is None
  66  
  67      def test_lru_eviction(self):
  68          store = ResponseStore(max_size=3)
  69          store.put("resp_1", {"output": "one"})
  70          store.put("resp_2", {"output": "two"})
  71          store.put("resp_3", {"output": "three"})
  72          # Adding a 4th should evict resp_1
  73          store.put("resp_4", {"output": "four"})
  74          assert store.get("resp_1") is None
  75          assert store.get("resp_2") is not None
  76          assert len(store) == 3
  77  
  78      def test_access_refreshes_lru(self):
  79          store = ResponseStore(max_size=3)
  80          store.put("resp_1", {"output": "one"})
  81          store.put("resp_2", {"output": "two"})
  82          store.put("resp_3", {"output": "three"})
  83          # Access resp_1 to move it to end
  84          store.get("resp_1")
  85          # Now resp_2 is the oldest — adding a 4th should evict resp_2
  86          store.put("resp_4", {"output": "four"})
  87          assert store.get("resp_2") is None
  88          assert store.get("resp_1") is not None
  89  
  90      def test_update_existing_key(self):
  91          store = ResponseStore(max_size=10)
  92          store.put("resp_1", {"output": "v1"})
  93          store.put("resp_1", {"output": "v2"})
  94          assert store.get("resp_1") == {"output": "v2"}
  95          assert len(store) == 1
  96  
  97      def test_delete_existing(self):
  98          store = ResponseStore(max_size=10)
  99          store.put("resp_1", {"output": "hello"})
 100          assert store.delete("resp_1") is True
 101          assert store.get("resp_1") is None
 102          assert len(store) == 0
 103  
 104      def test_delete_missing(self):
 105          store = ResponseStore(max_size=10)
 106          assert store.delete("resp_missing") is False
 107  
 108  
 109  # ---------------------------------------------------------------------------
 110  # _IdempotencyCache
 111  # ---------------------------------------------------------------------------
 112  
 113  
 114  class TestIdempotencyCache:
 115      @pytest.mark.asyncio
 116      async def test_concurrent_same_key_and_fingerprint_runs_once(self):
 117          cache = _IdempotencyCache()
 118          gate = asyncio.Event()
 119          started = asyncio.Event()
 120          calls = 0
 121  
 122          async def compute():
 123              nonlocal calls
 124              calls += 1
 125              started.set()
 126              await gate.wait()
 127              return ("response", {"total_tokens": 1})
 128  
 129          first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
 130          second = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
 131  
 132          await started.wait()
 133          assert calls == 1
 134  
 135          gate.set()
 136          first_result, second_result = await asyncio.gather(first, second)
 137  
 138          assert first_result == second_result == ("response", {"total_tokens": 1})
 139  
 140      @pytest.mark.asyncio
 141      async def test_different_fingerprint_does_not_reuse_inflight_task(self):
 142          cache = _IdempotencyCache()
 143          gate = asyncio.Event()
 144          started = asyncio.Event()
 145          calls = 0
 146  
 147          async def compute():
 148              nonlocal calls
 149              calls += 1
 150              result = calls
 151              if calls == 2:
 152                  started.set()
 153              await gate.wait()
 154              return result
 155  
 156          first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
 157          second = asyncio.create_task(cache.get_or_set("idem-key", "fp-2", compute))
 158  
 159          await started.wait()
 160          assert calls == 2
 161  
 162          gate.set()
 163          results = await asyncio.gather(first, second)
 164  
 165          assert sorted(results) == [1, 2]
 166  
 167      @pytest.mark.asyncio
 168      async def test_cancelled_waiter_does_not_drop_shared_inflight_task(self):
 169          cache = _IdempotencyCache()
 170          gate = asyncio.Event()
 171          started = asyncio.Event()
 172          calls = 0
 173  
 174          async def compute():
 175              nonlocal calls
 176              calls += 1
 177              started.set()
 178              await gate.wait()
 179              return "response"
 180  
 181          first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
 182  
 183          await started.wait()
 184          assert calls == 1
 185  
 186          first.cancel()
 187          with pytest.raises(asyncio.CancelledError):
 188              await first
 189  
 190          second = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
 191          await asyncio.sleep(0)
 192          assert calls == 1
 193  
 194          gate.set()
 195          assert await second == "response"
 196  
 197  
 198  # ---------------------------------------------------------------------------
 199  # Adapter initialization
 200  # ---------------------------------------------------------------------------
 201  
 202  
 203  class TestAdapterInit:
 204      def test_default_config(self):
 205          config = PlatformConfig(enabled=True)
 206          adapter = APIServerAdapter(config)
 207          assert adapter._host == "127.0.0.1"
 208          assert adapter._port == 8642
 209          assert adapter._api_key == ""
 210          assert adapter.platform == Platform.API_SERVER
 211  
 212      def test_custom_config_from_extra(self):
 213          config = PlatformConfig(
 214              enabled=True,
 215              extra={
 216                  "host": "0.0.0.0",
 217                  "port": 9999,
 218                  "key": "sk-test",
 219                  "cors_origins": ["http://localhost:3000"],
 220              },
 221          )
 222          adapter = APIServerAdapter(config)
 223          assert adapter._host == "0.0.0.0"
 224          assert adapter._port == 9999
 225          assert adapter._api_key == "sk-test"
 226          assert adapter._cors_origins == ("http://localhost:3000",)
 227  
 228      def test_config_from_env(self, monkeypatch):
 229          monkeypatch.setenv("API_SERVER_HOST", "10.0.0.1")
 230          monkeypatch.setenv("API_SERVER_PORT", "7777")
 231          monkeypatch.setenv("API_SERVER_KEY", "sk-env")
 232          monkeypatch.setenv("API_SERVER_CORS_ORIGINS", "http://localhost:3000, http://127.0.0.1:3000")
 233          config = PlatformConfig(enabled=True)
 234          adapter = APIServerAdapter(config)
 235          assert adapter._host == "10.0.0.1"
 236          assert adapter._port == 7777
 237          assert adapter._api_key == "sk-env"
 238          assert adapter._cors_origins == (
 239              "http://localhost:3000",
 240              "http://127.0.0.1:3000",
 241          )
 242  
 243      def test_invalid_port_from_env_falls_back_to_default(self, monkeypatch):
 244          monkeypatch.setenv("API_SERVER_PORT", "not-a-port")
 245          config = PlatformConfig(enabled=True)
 246          adapter = APIServerAdapter(config)
 247          assert adapter._port == 8642
 248  
 249      def test_create_agent_forwards_config_reasoning_effort(self, monkeypatch):
 250          captured = {}
 251  
 252          class FakeAgent:
 253              def __init__(self, **kwargs):
 254                  captured.update(kwargs)
 255  
 256          monkeypatch.setattr("run_agent.AIAgent", FakeAgent)
 257          monkeypatch.setattr(
 258              "gateway.run._resolve_runtime_agent_kwargs",
 259              lambda: {
 260                  "provider": "openai-codex",
 261                  "base_url": "https://example.test/v1",
 262                  "api_mode": "codex_responses",
 263              },
 264          )
 265          monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "gpt-5.5")
 266          monkeypatch.setattr(
 267              "gateway.run._load_gateway_config",
 268              lambda: {"agent": {"reasoning_effort": "xhigh"}},
 269          )
 270          monkeypatch.setattr(
 271              "gateway.run.GatewayRunner._load_reasoning_config",
 272              staticmethod(lambda: {"enabled": True, "effort": "xhigh"}),
 273          )
 274          monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None))
 275          monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
 276  
 277          adapter = APIServerAdapter(PlatformConfig(enabled=True))
 278          monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
 279  
 280          agent = adapter._create_agent(session_id="api-session")
 281  
 282          assert isinstance(agent, FakeAgent)
 283          assert captured["reasoning_config"] == {"enabled": True, "effort": "xhigh"}
 284  
 285  
 286  # ---------------------------------------------------------------------------
 287  # Auth checking
 288  # ---------------------------------------------------------------------------
 289  
 290  
 291  class TestAuth:
 292      def test_no_key_configured_allows_all(self):
 293          config = PlatformConfig(enabled=True)
 294          adapter = APIServerAdapter(config)
 295          mock_request = MagicMock()
 296          mock_request.headers = {}
 297          assert adapter._check_auth(mock_request) is None
 298  
 299      def test_valid_key_passes(self):
 300          config = PlatformConfig(enabled=True, extra={"key": "sk-test123"})
 301          adapter = APIServerAdapter(config)
 302          mock_request = MagicMock()
 303          mock_request.headers = {"Authorization": "Bearer sk-test123"}
 304          assert adapter._check_auth(mock_request) is None
 305  
 306      def test_invalid_key_returns_401(self):
 307          config = PlatformConfig(enabled=True, extra={"key": "sk-test123"})
 308          adapter = APIServerAdapter(config)
 309          mock_request = MagicMock()
 310          mock_request.headers = {"Authorization": "Bearer wrong-key"}
 311          result = adapter._check_auth(mock_request)
 312          assert result is not None
 313          assert result.status == 401
 314  
 315      def test_missing_auth_header_returns_401(self):
 316          config = PlatformConfig(enabled=True, extra={"key": "sk-test123"})
 317          adapter = APIServerAdapter(config)
 318          mock_request = MagicMock()
 319          mock_request.headers = {}
 320          result = adapter._check_auth(mock_request)
 321          assert result is not None
 322          assert result.status == 401
 323  
 324      def test_malformed_auth_header_returns_401(self):
 325          config = PlatformConfig(enabled=True, extra={"key": "sk-test123"})
 326          adapter = APIServerAdapter(config)
 327          mock_request = MagicMock()
 328          mock_request.headers = {"Authorization": "Basic dXNlcjpwYXNz"}
 329          result = adapter._check_auth(mock_request)
 330          assert result is not None
 331          assert result.status == 401
 332  
 333  
 334  # ---------------------------------------------------------------------------
 335  # Helpers for HTTP tests
 336  # ---------------------------------------------------------------------------
 337  
 338  
 339  def _make_adapter(api_key: str = "", cors_origins=None) -> APIServerAdapter:
 340      """Create an adapter with optional API key."""
 341      extra = {}
 342      if api_key:
 343          extra["key"] = api_key
 344      if cors_origins is not None:
 345          extra["cors_origins"] = cors_origins
 346      config = PlatformConfig(enabled=True, extra=extra)
 347      return APIServerAdapter(config)
 348  
 349  
 350  def _create_app(adapter: APIServerAdapter) -> web.Application:
 351      """Create the aiohttp app from the adapter (without starting the full server)."""
 352      mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None]
 353      app = web.Application(middlewares=mws)
 354      app["api_server_adapter"] = adapter
 355      app.router.add_get("/health", adapter._handle_health)
 356      app.router.add_get("/health/detailed", adapter._handle_health_detailed)
 357      app.router.add_get("/v1/health", adapter._handle_health)
 358      app.router.add_get("/v1/models", adapter._handle_models)
 359      app.router.add_get("/v1/capabilities", adapter._handle_capabilities)
 360      app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions)
 361      app.router.add_post("/v1/responses", adapter._handle_responses)
 362      app.router.add_get("/v1/responses/{response_id}", adapter._handle_get_response)
 363      app.router.add_delete("/v1/responses/{response_id}", adapter._handle_delete_response)
 364      return app
 365  
 366  
 367  @pytest.fixture
 368  def adapter():
 369      return _make_adapter()
 370  
 371  
 372  @pytest.fixture
 373  def auth_adapter():
 374      return _make_adapter(api_key="sk-secret")
 375  
 376  
 377  # ---------------------------------------------------------------------------
 378  # Adapter internals
 379  # ---------------------------------------------------------------------------
 380  
 381  
 382  class TestAgentExecution:
 383      @pytest.mark.asyncio
 384      async def test_run_agent_uses_session_id_as_task_id(self, adapter):
 385          mock_agent = MagicMock()
 386          mock_agent.run_conversation.return_value = {"final_response": "ok"}
 387          mock_agent.session_prompt_tokens = 1
 388          mock_agent.session_completion_tokens = 2
 389          mock_agent.session_total_tokens = 3
 390  
 391          with patch.object(adapter, "_create_agent", return_value=mock_agent):
 392              result, usage = await adapter._run_agent(
 393                  user_message="hello",
 394                  conversation_history=[],
 395                  session_id="session-123",
 396              )
 397  
 398          assert result == {"final_response": "ok"}
 399          assert usage == {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3}
 400          mock_agent.run_conversation.assert_called_once_with(
 401              user_message="hello",
 402              conversation_history=[],
 403              task_id="session-123",
 404          )
 405  
 406  
 407  # ---------------------------------------------------------------------------
 408  # /health endpoint
 409  # ---------------------------------------------------------------------------
 410  
 411  
 412  class TestHealthEndpoint:
 413      @pytest.mark.asyncio
 414      async def test_security_headers_present(self, adapter):
 415          """Responses should include basic security headers."""
 416          app = _create_app(adapter)
 417          async with TestClient(TestServer(app)) as cli:
 418              resp = await cli.get("/health")
 419              assert resp.status == 200
 420              assert resp.headers.get("X-Content-Type-Options") == "nosniff"
 421              assert resp.headers.get("Referrer-Policy") == "no-referrer"
 422  
 423      @pytest.mark.asyncio
 424      async def test_health_returns_ok(self, adapter):
 425          app = _create_app(adapter)
 426          async with TestClient(TestServer(app)) as cli:
 427              resp = await cli.get("/health")
 428              assert resp.status == 200
 429              data = await resp.json()
 430              assert data["status"] == "ok"
 431              assert data["platform"] == "hermes-agent"
 432  
 433      @pytest.mark.asyncio
 434      async def test_v1_health_alias_returns_ok(self, adapter):
 435          """GET /v1/health should return the same response as /health."""
 436          app = _create_app(adapter)
 437          async with TestClient(TestServer(app)) as cli:
 438              resp = await cli.get("/v1/health")
 439              assert resp.status == 200
 440              data = await resp.json()
 441              assert data["status"] == "ok"
 442              assert data["platform"] == "hermes-agent"
 443  
 444  
 445  # ---------------------------------------------------------------------------
 446  # /health/detailed endpoint
 447  # ---------------------------------------------------------------------------
 448  
 449  
 450  class TestHealthDetailedEndpoint:
 451      @pytest.mark.asyncio
 452      async def test_health_detailed_returns_ok(self, adapter):
 453          """GET /health/detailed returns status, platform, and runtime fields."""
 454          app = _create_app(adapter)
 455          with patch("gateway.status.read_runtime_status", return_value={
 456              "gateway_state": "running",
 457              "platforms": {"telegram": {"state": "connected"}},
 458              "active_agents": 2,
 459              "exit_reason": None,
 460              "updated_at": "2026-04-14T00:00:00Z",
 461          }):
 462              async with TestClient(TestServer(app)) as cli:
 463                  resp = await cli.get("/health/detailed")
 464                  assert resp.status == 200
 465                  data = await resp.json()
 466                  assert data["status"] == "ok"
 467                  assert data["platform"] == "hermes-agent"
 468                  assert data["gateway_state"] == "running"
 469                  assert data["platforms"] == {"telegram": {"state": "connected"}}
 470                  assert data["active_agents"] == 2
 471                  assert isinstance(data["pid"], int)
 472                  assert "updated_at" in data
 473  
 474      @pytest.mark.asyncio
 475      async def test_health_detailed_no_runtime_status(self, adapter):
 476          """When gateway_state.json is missing, fields are None."""
 477          app = _create_app(adapter)
 478          with patch("gateway.status.read_runtime_status", return_value=None):
 479              async with TestClient(TestServer(app)) as cli:
 480                  resp = await cli.get("/health/detailed")
 481                  assert resp.status == 200
 482                  data = await resp.json()
 483                  assert data["status"] == "ok"
 484                  assert data["gateway_state"] is None
 485                  assert data["platforms"] == {}
 486  
 487      @pytest.mark.asyncio
 488      async def test_health_detailed_does_not_require_auth(self, auth_adapter):
 489          """Health detailed endpoint should be accessible without auth, like /health."""
 490          app = _create_app(auth_adapter)
 491          with patch("gateway.status.read_runtime_status", return_value=None):
 492              async with TestClient(TestServer(app)) as cli:
 493                  resp = await cli.get("/health/detailed")
 494                  assert resp.status == 200
 495  
 496  
 497  # ---------------------------------------------------------------------------
 498  # /v1/models endpoint
 499  # ---------------------------------------------------------------------------
 500  
 501  
 502  class TestModelsEndpoint:
 503      @pytest.mark.asyncio
 504      async def test_models_returns_hermes_agent(self, adapter):
 505          app = _create_app(adapter)
 506          async with TestClient(TestServer(app)) as cli:
 507              resp = await cli.get("/v1/models")
 508              assert resp.status == 200
 509              data = await resp.json()
 510              assert data["object"] == "list"
 511              assert len(data["data"]) == 1
 512              assert data["data"][0]["id"] == "hermes-agent"
 513              assert data["data"][0]["owned_by"] == "hermes"
 514  
 515      @pytest.mark.asyncio
 516      async def test_models_returns_profile_name(self):
 517          """When running under a named profile, /v1/models advertises the profile name."""
 518          with patch("gateway.platforms.api_server.APIServerAdapter._resolve_model_name", return_value="lucas"):
 519              adapter = _make_adapter()
 520          app = _create_app(adapter)
 521          async with TestClient(TestServer(app)) as cli:
 522              resp = await cli.get("/v1/models")
 523              assert resp.status == 200
 524              data = await resp.json()
 525              assert data["data"][0]["id"] == "lucas"
 526              assert data["data"][0]["root"] == "lucas"
 527  
 528      @pytest.mark.asyncio
 529      async def test_models_returns_explicit_model_name(self):
 530          """Explicit model_name in config overrides profile name."""
 531          extra = {"model_name": "my-custom-agent"}
 532          config = PlatformConfig(enabled=True, extra=extra)
 533          adapter = APIServerAdapter(config)
 534          assert adapter._model_name == "my-custom-agent"
 535  
 536      def test_resolve_model_name_explicit(self):
 537          assert APIServerAdapter._resolve_model_name("my-bot") == "my-bot"
 538  
 539      def test_resolve_model_name_default_profile(self):
 540          """Default profile falls back to 'hermes-agent'."""
 541          with patch("hermes_cli.profiles.get_active_profile_name", return_value="default"):
 542              assert APIServerAdapter._resolve_model_name("") == "hermes-agent"
 543  
 544      def test_resolve_model_name_named_profile(self):
 545          """Named profile uses the profile name as model name."""
 546          with patch("hermes_cli.profiles.get_active_profile_name", return_value="lucas"):
 547              assert APIServerAdapter._resolve_model_name("") == "lucas"
 548  
 549      @pytest.mark.asyncio
 550      async def test_models_requires_auth(self, auth_adapter):
 551          app = _create_app(auth_adapter)
 552          async with TestClient(TestServer(app)) as cli:
 553              resp = await cli.get("/v1/models")
 554              assert resp.status == 401
 555  
 556      @pytest.mark.asyncio
 557      async def test_models_with_valid_auth(self, auth_adapter):
 558          app = _create_app(auth_adapter)
 559          async with TestClient(TestServer(app)) as cli:
 560              resp = await cli.get(
 561                  "/v1/models",
 562                  headers={"Authorization": "Bearer sk-secret"},
 563              )
 564              assert resp.status == 200
 565  
 566  
 567  # ---------------------------------------------------------------------------
 568  # /v1/capabilities endpoint
 569  # ---------------------------------------------------------------------------
 570  
 571  
 572  class TestCapabilitiesEndpoint:
 573      @pytest.mark.asyncio
 574      async def test_capabilities_advertises_plugin_safe_contract(self, adapter):
 575          app = _create_app(adapter)
 576          async with TestClient(TestServer(app)) as cli:
 577              resp = await cli.get("/v1/capabilities")
 578              assert resp.status == 200
 579              data = await resp.json()
 580              assert data["object"] == "hermes.api_server.capabilities"
 581              assert data["platform"] == "hermes-agent"
 582              assert data["model"] == "hermes-agent"
 583              assert data["auth"]["type"] == "bearer"
 584              assert data["auth"]["required"] is False
 585              assert data["features"]["chat_completions"] is True
 586              assert data["features"]["run_status"] is True
 587              assert data["features"]["run_events_sse"] is True
 588              assert data["features"]["session_continuity_header"] == "X-Hermes-Session-Id"
 589              assert data["endpoints"]["run_status"]["path"] == "/v1/runs/{run_id}"
 590  
 591      @pytest.mark.asyncio
 592      async def test_capabilities_requires_auth_when_key_configured(self, auth_adapter):
 593          app = _create_app(auth_adapter)
 594          async with TestClient(TestServer(app)) as cli:
 595              resp = await cli.get("/v1/capabilities")
 596              assert resp.status == 401
 597  
 598              authed = await cli.get(
 599                  "/v1/capabilities",
 600                  headers={"Authorization": "Bearer sk-secret"},
 601              )
 602              assert authed.status == 200
 603              data = await authed.json()
 604              assert data["auth"]["required"] is True
 605  
 606  
 607  # ---------------------------------------------------------------------------
 608  # /v1/chat/completions endpoint
 609  # ---------------------------------------------------------------------------
 610  
 611  
 612  class TestChatCompletionsEndpoint:
 613      @pytest.mark.asyncio
 614      async def test_invalid_json_returns_400(self, adapter):
 615          app = _create_app(adapter)
 616          async with TestClient(TestServer(app)) as cli:
 617              resp = await cli.post(
 618                  "/v1/chat/completions",
 619                  data="not json",
 620                  headers={"Content-Type": "application/json"},
 621              )
 622              assert resp.status == 400
 623              data = await resp.json()
 624              assert "Invalid JSON" in data["error"]["message"]
 625  
 626      @pytest.mark.asyncio
 627      async def test_missing_messages_returns_400(self, adapter):
 628          app = _create_app(adapter)
 629          async with TestClient(TestServer(app)) as cli:
 630              resp = await cli.post("/v1/chat/completions", json={"model": "test"})
 631              assert resp.status == 400
 632              data = await resp.json()
 633              assert "messages" in data["error"]["message"]
 634  
 635      @pytest.mark.asyncio
 636      async def test_empty_messages_returns_400(self, adapter):
 637          app = _create_app(adapter)
 638          async with TestClient(TestServer(app)) as cli:
 639              resp = await cli.post("/v1/chat/completions", json={"model": "test", "messages": []})
 640              assert resp.status == 400
 641  
 642      @pytest.mark.asyncio
 643      async def test_stream_true_returns_sse(self, adapter):
 644          """stream=true returns SSE format with the full response."""
 645          app = _create_app(adapter)
 646          async with TestClient(TestServer(app)) as cli:
 647              async def _mock_run_agent(**kwargs):
 648                  # Simulate streaming: invoke stream_delta_callback with tokens
 649                  cb = kwargs.get("stream_delta_callback")
 650                  if cb:
 651                      cb("Hello!")
 652                      cb(None)  # End signal
 653                  return (
 654                      {"final_response": "Hello!", "messages": [], "api_calls": 1},
 655                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
 656                  )
 657  
 658              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent) as mock_run:
 659                  resp = await cli.post(
 660                      "/v1/chat/completions",
 661                      json={
 662                          "model": "test",
 663                          "messages": [{"role": "user", "content": "hi"}],
 664                          "stream": True,
 665                      },
 666                  )
 667                  assert resp.status == 200
 668                  assert "text/event-stream" in resp.headers.get("Content-Type", "")
 669                  assert resp.headers.get("X-Accel-Buffering") == "no"
 670                  body = await resp.text()
 671                  assert "data: " in body
 672                  assert "[DONE]" in body
 673                  assert "Hello!" in body
 674  
 675      @pytest.mark.asyncio
 676      async def test_stream_sends_keepalive_during_quiet_tool_gap(self, adapter):
 677          """Idle SSE streams should send keepalive comments while tools run silently."""
 678          import asyncio
 679          import gateway.platforms.api_server as api_server_mod
 680  
 681          app = _create_app(adapter)
 682          async with TestClient(TestServer(app)) as cli:
 683              async def _mock_run_agent(**kwargs):
 684                  cb = kwargs.get("stream_delta_callback")
 685                  if cb:
 686                      cb("Working")
 687                      await asyncio.sleep(0.65)
 688                      cb("...done")
 689                  return (
 690                      {"final_response": "Working...done", "messages": [], "api_calls": 1},
 691                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
 692                  )
 693  
 694              with (
 695                  patch.object(api_server_mod, "CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS", 0.01),
 696                  patch.object(adapter, "_run_agent", side_effect=_mock_run_agent),
 697              ):
 698                  resp = await cli.post(
 699                      "/v1/chat/completions",
 700                      json={
 701                          "model": "test",
 702                          "messages": [{"role": "user", "content": "do the thing"}],
 703                          "stream": True,
 704                      },
 705                  )
 706                  assert resp.status == 200
 707                  body = await resp.text()
 708                  assert ": keepalive" in body
 709                  assert "Working" in body
 710                  assert "...done" in body
 711                  assert "[DONE]" in body
 712  
 713      @pytest.mark.asyncio
 714      async def test_stream_survives_tool_call_none_sentinel(self, adapter):
 715          """stream_delta_callback(None) mid-stream (tool calls) must NOT kill the SSE stream.
 716  
 717          The agent fires stream_delta_callback(None) to tell the CLI display to
 718          close its response box before executing tool calls.  The API server's
 719          _on_delta must filter this out so the SSE response stays open and the
 720          final answer (streamed after tool execution) reaches the client.
 721          """
 722          import asyncio
 723  
 724          app = _create_app(adapter)
 725          async with TestClient(TestServer(app)) as cli:
 726              async def _mock_run_agent(**kwargs):
 727                  cb = kwargs.get("stream_delta_callback")
 728                  if cb:
 729                      # Simulate: agent streams partial text, then fires None
 730                      # (tool call box-close signal), then streams the final answer
 731                      cb("Thinking")
 732                      cb(None)          # mid-stream None from tool calls
 733                      await asyncio.sleep(0.05)  # simulate tool execution delay
 734                      cb(" about it...")
 735                      cb(None)          # another None (possible second tool round)
 736                      await asyncio.sleep(0.05)
 737                      cb(" The answer is 42.")
 738                  return (
 739                      {"final_response": "Thinking about it... The answer is 42.", "messages": [], "api_calls": 3},
 740                      {"input_tokens": 20, "output_tokens": 15, "total_tokens": 35},
 741                  )
 742  
 743              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
 744                  resp = await cli.post(
 745                      "/v1/chat/completions",
 746                      json={
 747                          "model": "test",
 748                          "messages": [{"role": "user", "content": "What is the answer?"}],
 749                          "stream": True,
 750                      },
 751                  )
 752                  assert resp.status == 200
 753                  body = await resp.text()
 754                  assert "[DONE]" in body
 755                  # The final answer text must appear in the SSE stream
 756                  assert "The answer is 42." in body
 757                  # All partial text must be present too
 758                  assert "Thinking" in body
 759                  assert " about it..." in body
 760  
 761      @pytest.mark.asyncio
 762      async def test_stream_includes_tool_progress(self, adapter):
 763          """tool_start_callback fires → progress appears as custom SSE event, not in delta.content."""
 764          import asyncio
 765  
 766          app = _create_app(adapter)
 767          async with TestClient(TestServer(app)) as cli:
 768              async def _mock_run_agent(**kwargs):
 769                  cb = kwargs.get("stream_delta_callback")
 770                  ts_cb = kwargs.get("tool_start_callback")
 771                  # Simulate the structured tool start the gateway now consumes.
 772                  if ts_cb:
 773                      ts_cb("call_terminal_1", "terminal", {"command": "ls -la"})
 774                  if cb:
 775                      await asyncio.sleep(0.05)
 776                      cb("Here are the files.")
 777                  return (
 778                      {"final_response": "Here are the files.", "messages": [], "api_calls": 1},
 779                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
 780                  )
 781  
 782              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
 783                  resp = await cli.post(
 784                      "/v1/chat/completions",
 785                      json={
 786                          "model": "test",
 787                          "messages": [{"role": "user", "content": "list files"}],
 788                          "stream": True,
 789                      },
 790                  )
 791                  assert resp.status == 200
 792                  body = await resp.text()
 793                  assert "[DONE]" in body
 794                  # Tool progress must appear as a custom SSE event, not in
 795                  # delta.content — prevents model from learning to imitate
 796                  # markers instead of calling tools (#6972).
 797                  assert "event: hermes.tool.progress" in body
 798                  assert '"tool": "terminal"' in body
 799                  # ``label`` is now derived by ``build_tool_preview`` from the
 800                  # tool args rather than passed by the caller, so we assert
 801                  # only that *some* label exists rather than a literal value.
 802                  assert '"label":' in body
 803                  # The progress marker must NOT appear inside any
 804                  # chat.completion.chunk delta.content field.
 805                  import json as _json
 806                  for line in body.splitlines():
 807                      if line.startswith("data: ") and line.strip() != "data: [DONE]":
 808                          try:
 809                              chunk = _json.loads(line[len("data: "):])
 810                          except _json.JSONDecodeError:
 811                              continue
 812                          if chunk.get("object") == "chat.completion.chunk":
 813                              for choice in chunk.get("choices", []):
 814                                  content = choice.get("delta", {}).get("content", "")
 815                                  # Tool emoji markers must never leak into content
 816                                  assert "ls -la" not in content or content == "Here are the files."
 817                  # Final content must also be present
 818                  assert "Here are the files." in body
 819  
 820      @pytest.mark.asyncio
 821      async def test_stream_tool_progress_skips_internal_events(self, adapter):
 822          """Internal tool calls (name starting with ``_``) are not streamed."""
 823          import asyncio
 824  
 825          app = _create_app(adapter)
 826          async with TestClient(TestServer(app)) as cli:
 827              async def _mock_run_agent(**kwargs):
 828                  cb = kwargs.get("stream_delta_callback")
 829                  ts_cb = kwargs.get("tool_start_callback")
 830                  if ts_cb:
 831                      ts_cb("call_internal_1", "_thinking", {"text": "some internal state"})
 832                      ts_cb("call_search_1", "web_search", {"query": "Python docs"})
 833                  if cb:
 834                      await asyncio.sleep(0.05)
 835                      cb("Found it.")
 836                  return (
 837                      {"final_response": "Found it.", "messages": [], "api_calls": 1},
 838                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
 839                  )
 840  
 841              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
 842                  resp = await cli.post(
 843                      "/v1/chat/completions",
 844                      json={
 845                          "model": "test",
 846                          "messages": [{"role": "user", "content": "search"}],
 847                          "stream": True,
 848                      },
 849                  )
 850                  assert resp.status == 200
 851                  body = await resp.text()
 852                  # Internal _thinking event should NOT appear anywhere
 853                  assert "some internal state" not in body
 854                  assert "call_internal_1" not in body
 855                  # Real tool progress should appear as custom SSE event
 856                  assert "event: hermes.tool.progress" in body
 857                  assert '"tool": "web_search"' in body
 858                  # Label is derived from the args dict by build_tool_preview;
 859                  # asserting on the structural fact (label exists, call id
 860                  # is correlated) rather than a literal preview string keeps
 861                  # the test robust against preview-formatter tweaks.
 862                  assert '"label":' in body
 863                  assert '"toolCallId": "call_search_1"' in body
 864  
 865      @pytest.mark.asyncio
 866      async def test_stream_emits_tool_lifecycle_with_call_id(self, adapter):
 867          """Regression for #16588.
 868  
 869          ``/v1/chat/completions`` streaming previously emitted only a
 870          ``tool.started``-style ``hermes.tool.progress`` event; clients
 871          rendering tool lifecycle UI had no way to mark a tool as finished
 872          because no matching ``status: completed`` event was emitted, and
 873          no ``toolCallId`` was carried for correlation.
 874  
 875          The fix adds ``tool_start_callback`` / ``tool_complete_callback``
 876          to the chat completions agent invocation and writes both halves
 877          of the lifecycle pair on the same ``event: hermes.tool.progress``
 878          SSE line, with stable ``toolCallId`` and ``status``.
 879          """
 880          import asyncio
 881          import json as _json
 882  
 883          app = _create_app(adapter)
 884          async with TestClient(TestServer(app)) as cli:
 885              async def _mock_run_agent(**kwargs):
 886                  cb = kwargs.get("stream_delta_callback")
 887                  ts_cb = kwargs.get("tool_start_callback")
 888                  tc_cb = kwargs.get("tool_complete_callback")
 889                  # The structured callbacks own the chat-completions SSE
 890                  # channel now; ``tool_progress_callback`` is intentionally
 891                  # not wired so each tool start emits exactly one event.
 892                  if ts_cb:
 893                      ts_cb("call_terminal_1", "terminal", {"command": "ls -la"})
 894                  if tc_cb:
 895                      tc_cb("call_terminal_1", "terminal", {"command": "ls -la"}, "ok")
 896                  if cb:
 897                      await asyncio.sleep(0.05)
 898                      cb("done.")
 899                  return (
 900                      {"final_response": "done.", "messages": [], "api_calls": 1},
 901                      {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
 902                  )
 903  
 904              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
 905                  resp = await cli.post(
 906                      "/v1/chat/completions",
 907                      json={
 908                          "model": "test",
 909                          "messages": [{"role": "user", "content": "list"}],
 910                          "stream": True,
 911                      },
 912                  )
 913                  assert resp.status == 200
 914                  body = await resp.text()
 915  
 916              # Walk the SSE body and collect *(status, toolCallId)* pairs
 917              # per event so the assertions verify per-event correlation —
 918              # an event missing ``toolCallId`` would not pass even if a
 919              # different event happens to carry the right id.
 920              pairs: list[tuple[str | None, str | None]] = []
 921              lines = body.splitlines()
 922              for i, line in enumerate(lines):
 923                  if line.strip() != "event: hermes.tool.progress":
 924                      continue
 925                  for follow in lines[i + 1: i + 4]:
 926                      if follow.startswith("data: "):
 927                          try:
 928                              payload = _json.loads(follow[len("data: "):])
 929                          except _json.JSONDecodeError:
 930                              break
 931                          pairs.append((payload.get("status"), payload.get("toolCallId")))
 932                          break
 933  
 934              # Each tool start must emit exactly one event (no duplicate
 935              # legacy + new emit), and each lifecycle pair must carry the
 936              # same toolCallId on every event — not just somewhere in the
 937              # aggregate.
 938              assert len(pairs) == 2, f"expected 2 events (running+completed), got {pairs}"
 939              assert pairs[0] == ("running", "call_terminal_1"), pairs
 940              assert pairs[1] == ("completed", "call_terminal_1"), pairs
 941  
 942      @pytest.mark.asyncio
 943      async def test_stream_tool_lifecycle_skips_internal_and_orphan_completes(self, adapter):
 944          """Internal tools (``_thinking``-style) and ``completed`` events
 945          without a prior matching ``running`` must produce no lifecycle
 946          events on the wire — otherwise clients would see orphaned
 947          ``status: completed`` updates they cannot correlate."""
 948          import asyncio
 949  
 950          app = _create_app(adapter)
 951          async with TestClient(TestServer(app)) as cli:
 952              async def _mock_run_agent(**kwargs):
 953                  cb = kwargs.get("stream_delta_callback")
 954                  ts_cb = kwargs.get("tool_start_callback")
 955                  tc_cb = kwargs.get("tool_complete_callback")
 956                  # Internal tool — must be filtered.
 957                  if ts_cb:
 958                      ts_cb("call_internal_1", "_thinking", {})
 959                  if tc_cb:
 960                      tc_cb("call_internal_1", "_thinking", {}, "")
 961                  # Completion without start — orphan, must be dropped.
 962                  if tc_cb:
 963                      tc_cb("call_orphan_1", "web_search", {}, "ok")
 964                  if cb:
 965                      await asyncio.sleep(0.05)
 966                      cb("ok.")
 967                  return (
 968                      {"final_response": "ok.", "messages": [], "api_calls": 1},
 969                      {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
 970                  )
 971  
 972              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
 973                  resp = await cli.post(
 974                      "/v1/chat/completions",
 975                      json={
 976                          "model": "test",
 977                          "messages": [{"role": "user", "content": "ok"}],
 978                          "stream": True,
 979                      },
 980                  )
 981                  assert resp.status == 200
 982                  body = await resp.text()
 983  
 984              # Neither the internal call_id nor the orphan call_id should
 985              # surface as a lifecycle payload on the wire.
 986              assert "call_internal_1" not in body
 987              assert "call_orphan_1" not in body
 988              assert '"status": "running"' not in body
 989              assert '"status": "completed"' not in body
 990  
 991      @pytest.mark.asyncio
 992      async def test_no_user_message_returns_400(self, adapter):
 993          app = _create_app(adapter)
 994          async with TestClient(TestServer(app)) as cli:
 995              resp = await cli.post(
 996                  "/v1/chat/completions",
 997                  json={
 998                      "model": "test",
 999                      "messages": [{"role": "system", "content": "You are helpful."}],
1000                  },
1001              )
1002              assert resp.status == 400
1003  
1004      @pytest.mark.asyncio
1005      async def test_successful_completion(self, adapter):
1006          """Test a successful chat completion with mocked agent."""
1007          mock_result = {
1008              "final_response": "Hello! How can I help you today?",
1009              "messages": [],
1010              "api_calls": 1,
1011          }
1012  
1013          app = _create_app(adapter)
1014          async with TestClient(TestServer(app)) as cli:
1015              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1016                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1017                  resp = await cli.post(
1018                      "/v1/chat/completions",
1019                      json={
1020                          "model": "hermes-agent",
1021                          "messages": [{"role": "user", "content": "Hello"}],
1022                      },
1023                  )
1024  
1025              assert resp.status == 200
1026              data = await resp.json()
1027              assert data["object"] == "chat.completion"
1028              assert data["id"].startswith("chatcmpl-")
1029              assert data["model"] == "hermes-agent"
1030              assert len(data["choices"]) == 1
1031              assert data["choices"][0]["message"]["role"] == "assistant"
1032              assert data["choices"][0]["message"]["content"] == "Hello! How can I help you today?"
1033              assert data["choices"][0]["finish_reason"] == "stop"
1034              assert "usage" in data
1035  
1036      @pytest.mark.asyncio
1037      async def test_system_prompt_extracted(self, adapter):
1038          """System messages from the client are passed as ephemeral_system_prompt."""
1039          mock_result = {
1040              "final_response": "I am a pirate! Arrr!",
1041              "messages": [],
1042              "api_calls": 1,
1043          }
1044  
1045          app = _create_app(adapter)
1046          async with TestClient(TestServer(app)) as cli:
1047              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1048                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1049                  resp = await cli.post(
1050                      "/v1/chat/completions",
1051                      json={
1052                          "model": "hermes-agent",
1053                          "messages": [
1054                              {"role": "system", "content": "You are a pirate."},
1055                              {"role": "user", "content": "Hello"},
1056                          ],
1057                      },
1058                  )
1059  
1060              assert resp.status == 200
1061              # Check that _run_agent was called with the system prompt
1062              call_kwargs = mock_run.call_args
1063              assert call_kwargs.kwargs.get("ephemeral_system_prompt") == "You are a pirate."
1064              assert call_kwargs.kwargs.get("user_message") == "Hello"
1065  
1066      @pytest.mark.asyncio
1067      async def test_conversation_history_passed(self, adapter):
1068          """Previous user/assistant messages become conversation_history."""
1069          mock_result = {"final_response": "3", "messages": [], "api_calls": 1}
1070  
1071          app = _create_app(adapter)
1072          async with TestClient(TestServer(app)) as cli:
1073              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1074                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1075                  resp = await cli.post(
1076                      "/v1/chat/completions",
1077                      json={
1078                          "model": "hermes-agent",
1079                          "messages": [
1080                              {"role": "user", "content": "1+1=?"},
1081                              {"role": "assistant", "content": "2"},
1082                              {"role": "user", "content": "Now add 1 more"},
1083                          ],
1084                      },
1085                  )
1086  
1087              assert resp.status == 200
1088              call_kwargs = mock_run.call_args.kwargs
1089              assert call_kwargs["user_message"] == "Now add 1 more"
1090              assert len(call_kwargs["conversation_history"]) == 2
1091              assert call_kwargs["conversation_history"][0] == {"role": "user", "content": "1+1=?"}
1092              assert call_kwargs["conversation_history"][1] == {"role": "assistant", "content": "2"}
1093  
1094      @pytest.mark.asyncio
1095      async def test_agent_error_returns_500(self, adapter):
1096          """Agent exception returns 500."""
1097          app = _create_app(adapter)
1098          async with TestClient(TestServer(app)) as cli:
1099              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1100                  mock_run.side_effect = RuntimeError("Provider failed")
1101                  resp = await cli.post(
1102                      "/v1/chat/completions",
1103                      json={
1104                          "model": "hermes-agent",
1105                          "messages": [{"role": "user", "content": "Hello"}],
1106                      },
1107                  )
1108  
1109              assert resp.status == 500
1110              data = await resp.json()
1111              assert "Provider failed" in data["error"]["message"]
1112  
1113      @pytest.mark.asyncio
1114      async def test_stable_session_id_across_turns(self, adapter):
1115          """Same conversation (same first user message) produces the same session_id."""
1116          mock_result = {"final_response": "ok", "messages": [], "api_calls": 1}
1117  
1118          app = _create_app(adapter)
1119          session_ids = []
1120          async with TestClient(TestServer(app)) as cli:
1121              # Turn 1: single user message
1122              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1123                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1124                  await cli.post(
1125                      "/v1/chat/completions",
1126                      json={
1127                          "model": "hermes-agent",
1128                          "messages": [{"role": "user", "content": "Hello"}],
1129                      },
1130                  )
1131                  session_ids.append(mock_run.call_args.kwargs["session_id"])
1132  
1133              # Turn 2: same first message, conversation grew
1134              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1135                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1136                  await cli.post(
1137                      "/v1/chat/completions",
1138                      json={
1139                          "model": "hermes-agent",
1140                          "messages": [
1141                              {"role": "user", "content": "Hello"},
1142                              {"role": "assistant", "content": "Hi there!"},
1143                              {"role": "user", "content": "How are you?"},
1144                          ],
1145                      },
1146                  )
1147                  session_ids.append(mock_run.call_args.kwargs["session_id"])
1148  
1149          assert session_ids[0] == session_ids[1], "Session ID should be stable across turns"
1150          assert session_ids[0].startswith("api-"), "Derived session IDs should have api- prefix"
1151  
1152      @pytest.mark.asyncio
1153      async def test_different_conversations_get_different_session_ids(self, adapter):
1154          """Different first messages produce different session_ids."""
1155          mock_result = {"final_response": "ok", "messages": [], "api_calls": 1}
1156  
1157          app = _create_app(adapter)
1158          session_ids = []
1159          async with TestClient(TestServer(app)) as cli:
1160              for first_msg in ["Hello", "Goodbye"]:
1161                  with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1162                      mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1163                      await cli.post(
1164                          "/v1/chat/completions",
1165                          json={
1166                              "model": "hermes-agent",
1167                              "messages": [{"role": "user", "content": first_msg}],
1168                          },
1169                      )
1170                      session_ids.append(mock_run.call_args.kwargs["session_id"])
1171  
1172          assert session_ids[0] != session_ids[1]
1173  
1174  
1175  # ---------------------------------------------------------------------------
1176  # _derive_chat_session_id unit tests
1177  # ---------------------------------------------------------------------------
1178  
1179  
1180  class TestDeriveChatSessionId:
1181      def test_deterministic(self):
1182          """Same inputs always produce the same session ID."""
1183          a = _derive_chat_session_id("sys", "hello")
1184          b = _derive_chat_session_id("sys", "hello")
1185          assert a == b
1186  
1187      def test_prefix(self):
1188          assert _derive_chat_session_id(None, "hi").startswith("api-")
1189  
1190      def test_different_system_prompt(self):
1191          a = _derive_chat_session_id("You are a pirate.", "Hello")
1192          b = _derive_chat_session_id("You are a robot.", "Hello")
1193          assert a != b
1194  
1195      def test_different_first_message(self):
1196          a = _derive_chat_session_id(None, "Hello")
1197          b = _derive_chat_session_id(None, "Goodbye")
1198          assert a != b
1199  
1200      def test_none_system_prompt(self):
1201          """None system prompt doesn't crash."""
1202          sid = _derive_chat_session_id(None, "test")
1203          assert isinstance(sid, str) and len(sid) > 4
1204  
1205  
1206  # ---------------------------------------------------------------------------
1207  # /v1/responses endpoint
1208  # ---------------------------------------------------------------------------
1209  
1210  
1211  class TestResponsesEndpoint:
1212      @pytest.mark.asyncio
1213      async def test_missing_input_returns_400(self, adapter):
1214          app = _create_app(adapter)
1215          async with TestClient(TestServer(app)) as cli:
1216              resp = await cli.post("/v1/responses", json={"model": "test"})
1217              assert resp.status == 400
1218              data = await resp.json()
1219              assert "input" in data["error"]["message"]
1220  
1221      @pytest.mark.asyncio
1222      async def test_invalid_json_returns_400(self, adapter):
1223          app = _create_app(adapter)
1224          async with TestClient(TestServer(app)) as cli:
1225              resp = await cli.post(
1226                  "/v1/responses",
1227                  data="not json",
1228                  headers={"Content-Type": "application/json"},
1229              )
1230              assert resp.status == 400
1231  
1232      @pytest.mark.asyncio
1233      async def test_successful_response_with_string_input(self, adapter):
1234          """String input is wrapped in a user message."""
1235          mock_result = {
1236              "final_response": "Paris is the capital of France.",
1237              "messages": [],
1238              "api_calls": 1,
1239          }
1240  
1241          app = _create_app(adapter)
1242          async with TestClient(TestServer(app)) as cli:
1243              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1244                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1245                  resp = await cli.post(
1246                      "/v1/responses",
1247                      json={
1248                          "model": "hermes-agent",
1249                          "input": "What is the capital of France?",
1250                      },
1251                  )
1252  
1253              assert resp.status == 200
1254              data = await resp.json()
1255              assert data["object"] == "response"
1256              assert data["id"].startswith("resp_")
1257              assert data["status"] == "completed"
1258              assert len(data["output"]) == 1
1259              assert data["output"][0]["type"] == "message"
1260              assert data["output"][0]["content"][0]["type"] == "output_text"
1261              assert data["output"][0]["content"][0]["text"] == "Paris is the capital of France."
1262  
1263      @pytest.mark.asyncio
1264      async def test_successful_response_with_array_input(self, adapter):
1265          """Array input with role/content objects."""
1266          mock_result = {"final_response": "Done", "messages": [], "api_calls": 1}
1267  
1268          app = _create_app(adapter)
1269          async with TestClient(TestServer(app)) as cli:
1270              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1271                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1272                  resp = await cli.post(
1273                      "/v1/responses",
1274                      json={
1275                          "model": "hermes-agent",
1276                          "input": [
1277                              {"role": "user", "content": "Hello"},
1278                              {"role": "user", "content": "What is 2+2?"},
1279                          ],
1280                      },
1281                  )
1282  
1283              assert resp.status == 200
1284              call_kwargs = mock_run.call_args.kwargs
1285              # Last message is user_message, rest are history
1286              assert call_kwargs["user_message"] == "What is 2+2?"
1287              assert len(call_kwargs["conversation_history"]) == 1
1288  
1289      @pytest.mark.asyncio
1290      async def test_instructions_as_ephemeral_prompt(self, adapter):
1291          """The instructions field maps to ephemeral_system_prompt."""
1292          mock_result = {"final_response": "Ahoy!", "messages": [], "api_calls": 1}
1293  
1294          app = _create_app(adapter)
1295          async with TestClient(TestServer(app)) as cli:
1296              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1297                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1298                  resp = await cli.post(
1299                      "/v1/responses",
1300                      json={
1301                          "model": "hermes-agent",
1302                          "input": "Hello",
1303                          "instructions": "Talk like a pirate.",
1304                      },
1305                  )
1306  
1307              assert resp.status == 200
1308              call_kwargs = mock_run.call_args.kwargs
1309              assert call_kwargs["ephemeral_system_prompt"] == "Talk like a pirate."
1310  
1311      @pytest.mark.asyncio
1312      async def test_previous_response_id_chaining(self, adapter):
1313          """Test that responses can be chained via previous_response_id."""
1314          mock_result_1 = {
1315              "final_response": "2",
1316              "messages": [{"role": "assistant", "content": "2"}],
1317              "api_calls": 1,
1318          }
1319  
1320          app = _create_app(adapter)
1321          async with TestClient(TestServer(app)) as cli:
1322              # First request
1323              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1324                  mock_run.return_value = (mock_result_1, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1325                  resp1 = await cli.post(
1326                      "/v1/responses",
1327                      json={"model": "hermes-agent", "input": "What is 1+1?"},
1328                  )
1329  
1330              assert resp1.status == 200
1331              data1 = await resp1.json()
1332              response_id = data1["id"]
1333  
1334              # Second request chaining from the first
1335              mock_result_2 = {
1336                  "final_response": "3",
1337                  "messages": [{"role": "assistant", "content": "3"}],
1338                  "api_calls": 1,
1339              }
1340  
1341              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1342                  mock_run.return_value = (mock_result_2, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1343                  resp2 = await cli.post(
1344                      "/v1/responses",
1345                      json={
1346                          "model": "hermes-agent",
1347                          "input": "Now add 1 more",
1348                          "previous_response_id": response_id,
1349                      },
1350                  )
1351  
1352              assert resp2.status == 200
1353              # The conversation_history should contain the full history from the first response
1354              call_kwargs = mock_run.call_args.kwargs
1355              assert len(call_kwargs["conversation_history"]) > 0
1356              assert call_kwargs["user_message"] == "Now add 1 more"
1357  
1358      @pytest.mark.asyncio
1359      async def test_previous_response_id_preserves_session(self, adapter):
1360          """Chained responses via previous_response_id reuse the same session_id."""
1361          mock_result = {
1362              "final_response": "ok",
1363              "messages": [{"role": "assistant", "content": "ok"}],
1364              "api_calls": 1,
1365          }
1366          usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
1367  
1368          app = _create_app(adapter)
1369          async with TestClient(TestServer(app)) as cli:
1370              # First request — establishes a session
1371              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1372                  mock_run.return_value = (mock_result, usage)
1373                  resp1 = await cli.post(
1374                      "/v1/responses",
1375                      json={"model": "hermes-agent", "input": "Hello"},
1376                  )
1377              assert resp1.status == 200
1378              first_session_id = mock_run.call_args.kwargs["session_id"]
1379              data1 = await resp1.json()
1380              response_id = data1["id"]
1381  
1382              # Second request — chains from the first
1383              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1384                  mock_run.return_value = (mock_result, usage)
1385                  resp2 = await cli.post(
1386                      "/v1/responses",
1387                      json={
1388                          "model": "hermes-agent",
1389                          "input": "Follow up",
1390                          "previous_response_id": response_id,
1391                      },
1392                  )
1393              assert resp2.status == 200
1394              second_session_id = mock_run.call_args.kwargs["session_id"]
1395  
1396              # Session must be the same across the chain
1397              assert first_session_id == second_session_id
1398  
1399      @pytest.mark.asyncio
1400      async def test_invalid_previous_response_id_returns_404(self, adapter):
1401          app = _create_app(adapter)
1402          async with TestClient(TestServer(app)) as cli:
1403              resp = await cli.post(
1404                  "/v1/responses",
1405                  json={
1406                      "model": "hermes-agent",
1407                      "input": "follow up",
1408                      "previous_response_id": "resp_nonexistent",
1409                  },
1410              )
1411              assert resp.status == 404
1412  
1413      @pytest.mark.asyncio
1414      async def test_store_false_does_not_store(self, adapter):
1415          """When store=false, the response is NOT stored."""
1416          mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
1417  
1418          app = _create_app(adapter)
1419          async with TestClient(TestServer(app)) as cli:
1420              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1421                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1422                  resp = await cli.post(
1423                      "/v1/responses",
1424                      json={
1425                          "model": "hermes-agent",
1426                          "input": "Hello",
1427                          "store": False,
1428                      },
1429                  )
1430  
1431              assert resp.status == 200
1432              data = await resp.json()
1433              # The response has an ID but it shouldn't be retrievable
1434              assert adapter._response_store.get(data["id"]) is None
1435  
1436      @pytest.mark.asyncio
1437      async def test_instructions_inherited_from_previous(self, adapter):
1438          """If no instructions provided, carry forward from previous response."""
1439          mock_result = {"final_response": "Ahoy!", "messages": [], "api_calls": 1}
1440  
1441          app = _create_app(adapter)
1442          async with TestClient(TestServer(app)) as cli:
1443              # First request with instructions
1444              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1445                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1446                  resp1 = await cli.post(
1447                      "/v1/responses",
1448                      json={
1449                          "model": "hermes-agent",
1450                          "input": "Hello",
1451                          "instructions": "Be a pirate",
1452                      },
1453                  )
1454  
1455              data1 = await resp1.json()
1456              resp_id = data1["id"]
1457  
1458              # Second request without instructions
1459              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1460                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1461                  resp2 = await cli.post(
1462                      "/v1/responses",
1463                      json={
1464                          "model": "hermes-agent",
1465                          "input": "Tell me more",
1466                          "previous_response_id": resp_id,
1467                      },
1468                  )
1469  
1470              assert resp2.status == 200
1471              call_kwargs = mock_run.call_args.kwargs
1472              assert call_kwargs["ephemeral_system_prompt"] == "Be a pirate"
1473  
1474      @pytest.mark.asyncio
1475      async def test_agent_error_returns_500(self, adapter):
1476          app = _create_app(adapter)
1477          async with TestClient(TestServer(app)) as cli:
1478              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1479                  mock_run.side_effect = RuntimeError("Boom")
1480                  resp = await cli.post(
1481                      "/v1/responses",
1482                      json={"model": "hermes-agent", "input": "Hello"},
1483                  )
1484  
1485              assert resp.status == 500
1486  
1487      @pytest.mark.asyncio
1488      async def test_invalid_input_type_returns_400(self, adapter):
1489          app = _create_app(adapter)
1490          async with TestClient(TestServer(app)) as cli:
1491              resp = await cli.post(
1492                  "/v1/responses",
1493                  json={"model": "hermes-agent", "input": 42},
1494              )
1495              assert resp.status == 400
1496  
1497  
1498  class TestResponsesStreaming:
1499      @pytest.mark.asyncio
1500      async def test_stream_true_returns_responses_sse(self, adapter):
1501          app = _create_app(adapter)
1502          async with TestClient(TestServer(app)) as cli:
1503              async def _mock_run_agent(**kwargs):
1504                  cb = kwargs.get("stream_delta_callback")
1505                  if cb:
1506                      cb("Hello")
1507                      cb(" world")
1508                  return (
1509                      {"final_response": "Hello world", "messages": [], "api_calls": 1},
1510                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
1511                  )
1512  
1513              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
1514                  resp = await cli.post(
1515                      "/v1/responses",
1516                      json={"model": "hermes-agent", "input": "hi", "stream": True},
1517                  )
1518                  assert resp.status == 200
1519                  assert "text/event-stream" in resp.headers.get("Content-Type", "")
1520                  body = await resp.text()
1521                  assert "event: response.created" in body
1522                  assert "event: response.output_text.delta" in body
1523                  assert "event: response.output_text.done" in body
1524                  assert "event: response.completed" in body
1525                  assert '"sequence_number":' in body
1526                  assert '"logprobs": []' in body
1527                  assert "Hello" in body
1528                  assert " world" in body
1529  
1530      @pytest.mark.asyncio
1531      async def test_stream_emits_function_call_and_output_items(self, adapter):
1532          app = _create_app(adapter)
1533          async with TestClient(TestServer(app)) as cli:
1534              async def _mock_run_agent(**kwargs):
1535                  start_cb = kwargs.get("tool_start_callback")
1536                  complete_cb = kwargs.get("tool_complete_callback")
1537                  text_cb = kwargs.get("stream_delta_callback")
1538                  if start_cb:
1539                      start_cb("call_123", "read_file", {"path": "/tmp/test.txt"})
1540                  if complete_cb:
1541                      complete_cb("call_123", "read_file", {"path": "/tmp/test.txt"}, '{"content":"hello"}')
1542                  if text_cb:
1543                      text_cb("Done.")
1544                  return (
1545                      {
1546                          "final_response": "Done.",
1547                          "messages": [
1548                              {
1549                                  "role": "assistant",
1550                                  "tool_calls": [
1551                                      {
1552                                          "id": "call_123",
1553                                          "function": {
1554                                              "name": "read_file",
1555                                              "arguments": '{"path":"/tmp/test.txt"}',
1556                                          },
1557                                      }
1558                                  ],
1559                              },
1560                              {
1561                                  "role": "tool",
1562                                  "tool_call_id": "call_123",
1563                                  "content": '{"content":"hello"}',
1564                              },
1565                          ],
1566                          "api_calls": 1,
1567                      },
1568                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
1569                  )
1570  
1571              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
1572                  resp = await cli.post(
1573                      "/v1/responses",
1574                      json={"model": "hermes-agent", "input": "read the file", "stream": True},
1575                  )
1576                  assert resp.status == 200
1577                  body = await resp.text()
1578                  assert "event: response.output_item.added" in body
1579                  assert "event: response.output_item.done" in body
1580                  assert body.count("event: response.output_item.done") >= 2
1581                  assert '"type": "function_call"' in body
1582                  assert '"type": "function_call_output"' in body
1583                  assert '"call_id": "call_123"' in body
1584                  assert '"name": "read_file"' in body
1585                  assert '"output": [{"type": "input_text", "text": "{\\"content\\":\\"hello\\"}"}]' in body
1586  
1587      @pytest.mark.asyncio
1588      async def test_streamed_response_is_stored_for_get(self, adapter):
1589          app = _create_app(adapter)
1590          async with TestClient(TestServer(app)) as cli:
1591              async def _mock_run_agent(**kwargs):
1592                  cb = kwargs.get("stream_delta_callback")
1593                  if cb:
1594                      cb("Stored response")
1595                  return (
1596                      {"final_response": "Stored response", "messages": [], "api_calls": 1},
1597                      {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3},
1598                  )
1599  
1600              with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
1601                  resp = await cli.post(
1602                      "/v1/responses",
1603                      json={"model": "hermes-agent", "input": "store this", "stream": True},
1604                  )
1605                  body = await resp.text()
1606                  response_id = None
1607                  for line in body.splitlines():
1608                      if line.startswith("data: "):
1609                          try:
1610                              payload = json.loads(line[len("data: "):])
1611                          except json.JSONDecodeError:
1612                              continue
1613                          if payload.get("type") == "response.completed":
1614                              response_id = payload["response"]["id"]
1615                              break
1616                  assert response_id
1617  
1618                  get_resp = await cli.get(f"/v1/responses/{response_id}")
1619                  assert get_resp.status == 200
1620                  data = await get_resp.json()
1621                  assert data["id"] == response_id
1622                  assert data["status"] == "completed"
1623                  assert data["output"][-1]["content"][0]["text"] == "Stored response"
1624  
1625      @pytest.mark.asyncio
1626      async def test_stream_cancelled_persists_incomplete_snapshot(self, adapter):
1627          """Server-side asyncio.CancelledError (shutdown, request timeout) must
1628          still leave an ``incomplete`` snapshot in ResponseStore so
1629          GET /v1/responses/{id} and previous_response_id chaining keep
1630          working.  Regression for PR #15171 follow-up.
1631  
1632          Calls _write_sse_responses directly so the test can await the
1633          handler to completion (TestClient disconnection races the server
1634          handler, which makes end-to-end assertion on the final stored
1635          snapshot flaky).
1636          """
1637          # Build a minimal fake request + stream queue the writer understands.
1638          fake_request = MagicMock()
1639          fake_request.headers = {}
1640  
1641          written_payloads: list = []
1642  
1643          class _FakeStreamResponse:
1644              async def prepare(self, req):
1645                  pass
1646  
1647              async def write(self, payload):
1648                  written_payloads.append(payload)
1649  
1650          # Patch web.StreamResponse for the duration of the writer call.
1651          import gateway.platforms.api_server as api_mod
1652          import queue as _q
1653  
1654          stream_q: _q.Queue = _q.Queue()
1655  
1656          async def _agent_coro():
1657              # Feed one partial delta into the stream queue...
1658              stream_q.put("partial output")
1659              # ...then give the drain loop a moment to pick it up before
1660              # raising CancelledError to simulate a server-side cancel.
1661              await asyncio.sleep(0.01)
1662              raise asyncio.CancelledError()
1663  
1664          agent_task = asyncio.ensure_future(_agent_coro())
1665          response_id = f"resp_{uuid.uuid4().hex[:28]}"
1666  
1667          with patch.object(api_mod.web, "StreamResponse", return_value=_FakeStreamResponse()):
1668              with pytest.raises(asyncio.CancelledError):
1669                  await adapter._write_sse_responses(
1670                      request=fake_request,
1671                      response_id=response_id,
1672                      model="hermes-agent",
1673                      created_at=int(time.time()),
1674                      stream_q=stream_q,
1675                      agent_task=agent_task,
1676                      agent_ref=[None],
1677                      conversation_history=[],
1678                      user_message="will be cancelled",
1679                      instructions=None,
1680                      conversation=None,
1681                      store=True,
1682                      session_id=None,
1683                  )
1684  
1685          # The in_progress snapshot was persisted on response.created,
1686          # and the CancelledError handler must have updated it to
1687          # ``incomplete`` with the partial text it saw.
1688          stored = adapter._response_store.get(response_id)
1689          assert stored is not None, "snapshot must be retrievable after cancellation"
1690          assert stored["response"]["status"] == "incomplete"
1691          # Partial text captured before cancel should be preserved.
1692          output_text = "".join(
1693              part.get("text", "")
1694              for item in stored["response"].get("output", [])
1695              if item.get("type") == "message"
1696              for part in item.get("content", [])
1697          )
1698          assert "partial output" in output_text
1699  
1700      @pytest.mark.asyncio
1701      async def test_stream_client_disconnect_persists_incomplete_snapshot(self, adapter):
1702          """Client disconnect (ConnectionResetError) during streaming must
1703          persist an ``incomplete`` snapshot in ResponseStore.  Regression
1704          for PR #15171."""
1705          fake_request = MagicMock()
1706          fake_request.headers = {}
1707  
1708          write_call_count = {"n": 0}
1709  
1710          class _DisconnectingStreamResponse:
1711              async def prepare(self, req):
1712                  pass
1713  
1714              async def write(self, payload):
1715                  # First two writes succeed (prepare + response.created).
1716                  # On the third write (a text delta), the "client"
1717                  # disconnects — simulate with ConnectionResetError.
1718                  write_call_count["n"] += 1
1719                  if write_call_count["n"] >= 3:
1720                      raise ConnectionResetError("simulated client disconnect")
1721  
1722          import gateway.platforms.api_server as api_mod
1723          import queue as _q
1724  
1725          stream_q: _q.Queue = _q.Queue()
1726          stream_q.put("some streamed text")
1727          stream_q.put(None)  # EOS sentinel
1728  
1729          async def _agent_coro():
1730              await asyncio.sleep(0.01)
1731              return ({"final_response": "", "messages": [], "api_calls": 0},
1732                      {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1733  
1734          agent_task = asyncio.ensure_future(_agent_coro())
1735          response_id = f"resp_{uuid.uuid4().hex[:28]}"
1736  
1737          with patch.object(api_mod.web, "StreamResponse", return_value=_DisconnectingStreamResponse()):
1738              await adapter._write_sse_responses(
1739                  request=fake_request,
1740                  response_id=response_id,
1741                  model="hermes-agent",
1742                  created_at=int(time.time()),
1743                  stream_q=stream_q,
1744                  agent_task=agent_task,
1745                  agent_ref=[None],
1746                  conversation_history=[],
1747                  user_message="will disconnect",
1748                  instructions=None,
1749                  conversation=None,
1750                  store=True,
1751                  session_id=None,
1752              )
1753  
1754          stored = adapter._response_store.get(response_id)
1755          assert stored is not None, "snapshot must survive client disconnect"
1756          assert stored["response"]["status"] == "incomplete"
1757  
1758  
1759  # ---------------------------------------------------------------------------
1760  # Auth on endpoints
1761  # ---------------------------------------------------------------------------
1762  
1763  
1764  class TestEndpointAuth:
1765      @pytest.mark.asyncio
1766      async def test_chat_completions_requires_auth(self, auth_adapter):
1767          app = _create_app(auth_adapter)
1768          async with TestClient(TestServer(app)) as cli:
1769              resp = await cli.post(
1770                  "/v1/chat/completions",
1771                  json={"model": "test", "messages": [{"role": "user", "content": "hi"}]},
1772              )
1773              assert resp.status == 401
1774  
1775      @pytest.mark.asyncio
1776      async def test_responses_requires_auth(self, auth_adapter):
1777          app = _create_app(auth_adapter)
1778          async with TestClient(TestServer(app)) as cli:
1779              resp = await cli.post(
1780                  "/v1/responses",
1781                  json={"model": "test", "input": "hi"},
1782              )
1783              assert resp.status == 401
1784  
1785      @pytest.mark.asyncio
1786      async def test_models_requires_auth(self, auth_adapter):
1787          app = _create_app(auth_adapter)
1788          async with TestClient(TestServer(app)) as cli:
1789              resp = await cli.get("/v1/models")
1790              assert resp.status == 401
1791  
1792      @pytest.mark.asyncio
1793      async def test_health_does_not_require_auth(self, auth_adapter):
1794          app = _create_app(auth_adapter)
1795          async with TestClient(TestServer(app)) as cli:
1796              resp = await cli.get("/health")
1797              assert resp.status == 200
1798  
1799  
1800  # ---------------------------------------------------------------------------
1801  # Config integration
1802  # ---------------------------------------------------------------------------
1803  
1804  
1805  class TestConfigIntegration:
1806      def test_platform_enum_has_api_server(self):
1807          assert Platform.API_SERVER.value == "api_server"
1808  
1809      def test_env_override_enables_api_server(self, monkeypatch):
1810          monkeypatch.setenv("API_SERVER_ENABLED", "true")
1811          from gateway.config import load_gateway_config
1812          config = load_gateway_config()
1813          assert Platform.API_SERVER in config.platforms
1814          assert config.platforms[Platform.API_SERVER].enabled is True
1815  
1816      def test_env_override_with_key(self, monkeypatch):
1817          monkeypatch.setenv("API_SERVER_KEY", "sk-mykey")
1818          from gateway.config import load_gateway_config
1819          config = load_gateway_config()
1820          assert Platform.API_SERVER in config.platforms
1821          assert config.platforms[Platform.API_SERVER].extra.get("key") == "sk-mykey"
1822  
1823      def test_env_override_port_and_host(self, monkeypatch):
1824          monkeypatch.setenv("API_SERVER_ENABLED", "true")
1825          monkeypatch.setenv("API_SERVER_PORT", "9999")
1826          monkeypatch.setenv("API_SERVER_HOST", "0.0.0.0")
1827          from gateway.config import load_gateway_config
1828          config = load_gateway_config()
1829          assert config.platforms[Platform.API_SERVER].extra.get("port") == 9999
1830          assert config.platforms[Platform.API_SERVER].extra.get("host") == "0.0.0.0"
1831  
1832      def test_env_override_cors_origins(self, monkeypatch):
1833          monkeypatch.setenv("API_SERVER_ENABLED", "true")
1834          monkeypatch.setenv(
1835              "API_SERVER_CORS_ORIGINS",
1836              "http://localhost:3000, http://127.0.0.1:3000",
1837          )
1838          from gateway.config import load_gateway_config
1839          config = load_gateway_config()
1840          assert config.platforms[Platform.API_SERVER].extra.get("cors_origins") == [
1841              "http://localhost:3000",
1842              "http://127.0.0.1:3000",
1843          ]
1844  
1845      def test_api_server_in_connected_platforms(self):
1846          config = GatewayConfig()
1847          config.platforms[Platform.API_SERVER] = PlatformConfig(enabled=True)
1848          connected = config.get_connected_platforms()
1849          assert Platform.API_SERVER in connected
1850  
1851      def test_api_server_not_in_connected_when_disabled(self):
1852          config = GatewayConfig()
1853          config.platforms[Platform.API_SERVER] = PlatformConfig(enabled=False)
1854          connected = config.get_connected_platforms()
1855          assert Platform.API_SERVER not in connected
1856  
1857  
1858  # ---------------------------------------------------------------------------
1859  # Multiple system messages
1860  # ---------------------------------------------------------------------------
1861  
1862  
1863  class TestMultipleSystemMessages:
1864      @pytest.mark.asyncio
1865      async def test_multiple_system_messages_concatenated(self, adapter):
1866          mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
1867  
1868          app = _create_app(adapter)
1869          async with TestClient(TestServer(app)) as cli:
1870              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1871                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1872                  resp = await cli.post(
1873                      "/v1/chat/completions",
1874                      json={
1875                          "model": "hermes-agent",
1876                          "messages": [
1877                              {"role": "system", "content": "You are helpful."},
1878                              {"role": "system", "content": "Be concise."},
1879                              {"role": "user", "content": "Hello"},
1880                          ],
1881                      },
1882                  )
1883  
1884              assert resp.status == 200
1885              call_kwargs = mock_run.call_args.kwargs
1886              prompt = call_kwargs["ephemeral_system_prompt"]
1887              assert "You are helpful." in prompt
1888              assert "Be concise." in prompt
1889  
1890  
1891  # ---------------------------------------------------------------------------
1892  # send() method (not used but required by base)
1893  # ---------------------------------------------------------------------------
1894  
1895  
1896  class TestSendMethod:
1897      @pytest.mark.asyncio
1898      async def test_send_returns_not_supported(self):
1899          config = PlatformConfig(enabled=True)
1900          adapter = APIServerAdapter(config)
1901          result = await adapter.send("chat1", "hello")
1902          assert result.success is False
1903          assert "HTTP request/response" in result.error
1904  
1905  
1906  # ---------------------------------------------------------------------------
1907  # GET /v1/responses/{response_id}
1908  # ---------------------------------------------------------------------------
1909  
1910  
1911  class TestGetResponse:
1912      @pytest.mark.asyncio
1913      async def test_get_stored_response(self, adapter):
1914          """GET returns a previously stored response."""
1915          mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1}
1916  
1917          app = _create_app(adapter)
1918          async with TestClient(TestServer(app)) as cli:
1919              # Create a response first
1920              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1921                  mock_run.return_value = (mock_result, {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15})
1922                  resp = await cli.post(
1923                      "/v1/responses",
1924                      json={"model": "hermes-agent", "input": "Hi"},
1925                  )
1926  
1927              assert resp.status == 200
1928              data = await resp.json()
1929              response_id = data["id"]
1930  
1931              # Now GET it
1932              resp2 = await cli.get(f"/v1/responses/{response_id}")
1933              assert resp2.status == 200
1934              data2 = await resp2.json()
1935              assert data2["id"] == response_id
1936              assert data2["object"] == "response"
1937              assert data2["status"] == "completed"
1938  
1939      @pytest.mark.asyncio
1940      async def test_get_not_found(self, adapter):
1941          app = _create_app(adapter)
1942          async with TestClient(TestServer(app)) as cli:
1943              resp = await cli.get("/v1/responses/resp_nonexistent")
1944              assert resp.status == 404
1945  
1946      @pytest.mark.asyncio
1947      async def test_get_requires_auth(self, auth_adapter):
1948          app = _create_app(auth_adapter)
1949          async with TestClient(TestServer(app)) as cli:
1950              resp = await cli.get("/v1/responses/resp_any")
1951              assert resp.status == 401
1952  
1953  
1954  # ---------------------------------------------------------------------------
1955  # DELETE /v1/responses/{response_id}
1956  # ---------------------------------------------------------------------------
1957  
1958  
1959  class TestDeleteResponse:
1960      @pytest.mark.asyncio
1961      async def test_delete_stored_response(self, adapter):
1962          """DELETE removes a stored response and returns confirmation."""
1963          mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1}
1964  
1965          app = _create_app(adapter)
1966          async with TestClient(TestServer(app)) as cli:
1967              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1968                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
1969                  resp = await cli.post(
1970                      "/v1/responses",
1971                      json={"model": "hermes-agent", "input": "Hi"},
1972                  )
1973  
1974              data = await resp.json()
1975              response_id = data["id"]
1976  
1977              # Delete it
1978              resp2 = await cli.delete(f"/v1/responses/{response_id}")
1979              assert resp2.status == 200
1980              data2 = await resp2.json()
1981              assert data2["id"] == response_id
1982              assert data2["object"] == "response"
1983              assert data2["deleted"] is True
1984  
1985              # Verify it's gone
1986              resp3 = await cli.get(f"/v1/responses/{response_id}")
1987              assert resp3.status == 404
1988  
1989      @pytest.mark.asyncio
1990      async def test_delete_not_found(self, adapter):
1991          app = _create_app(adapter)
1992          async with TestClient(TestServer(app)) as cli:
1993              resp = await cli.delete("/v1/responses/resp_nonexistent")
1994              assert resp.status == 404
1995  
1996      @pytest.mark.asyncio
1997      async def test_delete_requires_auth(self, auth_adapter):
1998          app = _create_app(auth_adapter)
1999          async with TestClient(TestServer(app)) as cli:
2000              resp = await cli.delete("/v1/responses/resp_any")
2001              assert resp.status == 401
2002  
2003  
2004  # ---------------------------------------------------------------------------
2005  # Tool calls in output
2006  # ---------------------------------------------------------------------------
2007  
2008  
2009  class TestToolCallsInOutput:
2010      @pytest.mark.asyncio
2011      async def test_tool_calls_in_output(self, adapter):
2012          """When agent returns tool calls, they appear as function_call items."""
2013          mock_result = {
2014              "final_response": "The result is 42.",
2015              "messages": [
2016                  {
2017                      "role": "assistant",
2018                      "content": None,
2019                      "tool_calls": [
2020                          {
2021                              "id": "call_abc123",
2022                              "function": {
2023                                  "name": "calculator",
2024                                  "arguments": '{"expression": "6*7"}',
2025                              },
2026                          }
2027                      ],
2028                  },
2029                  {
2030                      "role": "tool",
2031                      "tool_call_id": "call_abc123",
2032                      "content": "42",
2033                  },
2034                  {
2035                      "role": "assistant",
2036                      "content": "The result is 42.",
2037                  },
2038              ],
2039              "api_calls": 2,
2040          }
2041  
2042          app = _create_app(adapter)
2043          async with TestClient(TestServer(app)) as cli:
2044              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2045                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
2046                  resp = await cli.post(
2047                      "/v1/responses",
2048                      json={"model": "hermes-agent", "input": "What is 6*7?"},
2049                  )
2050  
2051              assert resp.status == 200
2052              data = await resp.json()
2053              output = data["output"]
2054  
2055              # Should have: function_call, function_call_output, message
2056              assert len(output) == 3
2057              assert output[0]["type"] == "function_call"
2058              assert output[0]["name"] == "calculator"
2059              assert output[0]["arguments"] == '{"expression": "6*7"}'
2060              assert output[0]["call_id"] == "call_abc123"
2061              assert output[1]["type"] == "function_call_output"
2062              assert output[1]["call_id"] == "call_abc123"
2063              assert output[1]["output"] == "42"
2064              assert output[2]["type"] == "message"
2065              assert output[2]["content"][0]["text"] == "The result is 42."
2066  
2067      @pytest.mark.asyncio
2068      async def test_no_tool_calls_still_works(self, adapter):
2069          """Without tool calls, output is just a message."""
2070          mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1}
2071  
2072          app = _create_app(adapter)
2073          async with TestClient(TestServer(app)) as cli:
2074              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2075                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
2076                  resp = await cli.post(
2077                      "/v1/responses",
2078                      json={"model": "hermes-agent", "input": "Hello"},
2079                  )
2080  
2081              assert resp.status == 200
2082              data = await resp.json()
2083              assert len(data["output"]) == 1
2084              assert data["output"][0]["type"] == "message"
2085  
2086  
2087  # ---------------------------------------------------------------------------
2088  # Usage / token counting
2089  # ---------------------------------------------------------------------------
2090  
2091  
2092  class TestUsageCounting:
2093      @pytest.mark.asyncio
2094      async def test_responses_usage(self, adapter):
2095          """Responses API returns real token counts."""
2096          mock_result = {"final_response": "Done", "messages": [], "api_calls": 1}
2097          usage = {"input_tokens": 100, "output_tokens": 50, "total_tokens": 150}
2098  
2099          app = _create_app(adapter)
2100          async with TestClient(TestServer(app)) as cli:
2101              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2102                  mock_run.return_value = (mock_result, usage)
2103                  resp = await cli.post(
2104                      "/v1/responses",
2105                      json={"model": "hermes-agent", "input": "Hi"},
2106                  )
2107  
2108              assert resp.status == 200
2109              data = await resp.json()
2110              assert data["usage"]["input_tokens"] == 100
2111              assert data["usage"]["output_tokens"] == 50
2112              assert data["usage"]["total_tokens"] == 150
2113  
2114      @pytest.mark.asyncio
2115      async def test_chat_completions_usage(self, adapter):
2116          """Chat completions returns real token counts."""
2117          mock_result = {"final_response": "Done", "messages": [], "api_calls": 1}
2118          usage = {"input_tokens": 200, "output_tokens": 80, "total_tokens": 280}
2119  
2120          app = _create_app(adapter)
2121          async with TestClient(TestServer(app)) as cli:
2122              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2123                  mock_run.return_value = (mock_result, usage)
2124                  resp = await cli.post(
2125                      "/v1/chat/completions",
2126                      json={
2127                          "model": "hermes-agent",
2128                          "messages": [{"role": "user", "content": "Hi"}],
2129                      },
2130                  )
2131  
2132              assert resp.status == 200
2133              data = await resp.json()
2134              assert data["usage"]["prompt_tokens"] == 200
2135              assert data["usage"]["completion_tokens"] == 80
2136              assert data["usage"]["total_tokens"] == 280
2137  
2138  
2139  # ---------------------------------------------------------------------------
2140  # Truncation
2141  # ---------------------------------------------------------------------------
2142  
2143  
2144  class TestTruncation:
2145      @pytest.mark.asyncio
2146      async def test_truncation_auto_limits_history(self, adapter):
2147          """With truncation=auto, history over 100 messages is trimmed."""
2148          mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
2149  
2150          # Pre-seed a stored response with a long history
2151          long_history = [{"role": "user", "content": f"msg {i}"} for i in range(150)]
2152          adapter._response_store.put("resp_prev", {
2153              "response": {"id": "resp_prev", "object": "response"},
2154              "conversation_history": long_history,
2155              "instructions": None,
2156          })
2157  
2158          app = _create_app(adapter)
2159          async with TestClient(TestServer(app)) as cli:
2160              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2161                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
2162                  resp = await cli.post(
2163                      "/v1/responses",
2164                      json={
2165                          "model": "hermes-agent",
2166                          "input": "follow up",
2167                          "previous_response_id": "resp_prev",
2168                          "truncation": "auto",
2169                      },
2170                  )
2171  
2172          assert resp.status == 200
2173          call_kwargs = mock_run.call_args.kwargs
2174          # History should be truncated to 100
2175          assert len(call_kwargs["conversation_history"]) <= 100
2176  
2177      @pytest.mark.asyncio
2178      async def test_no_truncation_keeps_full_history(self, adapter):
2179          """Without truncation=auto, long history is passed as-is."""
2180          mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
2181  
2182          long_history = [{"role": "user", "content": f"msg {i}"} for i in range(150)]
2183          adapter._response_store.put("resp_prev2", {
2184              "response": {"id": "resp_prev2", "object": "response"},
2185              "conversation_history": long_history,
2186              "instructions": None,
2187          })
2188  
2189          app = _create_app(adapter)
2190          async with TestClient(TestServer(app)) as cli:
2191              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2192                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
2193                  resp = await cli.post(
2194                      "/v1/responses",
2195                      json={
2196                          "model": "hermes-agent",
2197                          "input": "follow up",
2198                          "previous_response_id": "resp_prev2",
2199                      },
2200                  )
2201  
2202          assert resp.status == 200
2203          call_kwargs = mock_run.call_args.kwargs
2204          assert len(call_kwargs["conversation_history"]) == 150
2205  
2206  
2207  # ---------------------------------------------------------------------------
2208  # CORS
2209  # ---------------------------------------------------------------------------
2210  
2211  
2212  class TestCORS:
2213      def test_origin_allowed_for_non_browser_client(self, adapter):
2214          assert adapter._origin_allowed("") is True
2215  
2216      def test_origin_rejected_by_default(self, adapter):
2217          assert adapter._origin_allowed("http://evil.example") is False
2218  
2219      def test_origin_allowed_for_allowlist_match(self):
2220          adapter = _make_adapter(cors_origins=["http://localhost:3000"])
2221          assert adapter._origin_allowed("http://localhost:3000") is True
2222  
2223      def test_cors_headers_for_origin_disabled_by_default(self, adapter):
2224          assert adapter._cors_headers_for_origin("http://localhost:3000") is None
2225  
2226      def test_cors_headers_for_origin_matches_allowlist(self):
2227          adapter = _make_adapter(cors_origins=["http://localhost:3000"])
2228          headers = adapter._cors_headers_for_origin("http://localhost:3000")
2229          assert headers is not None
2230          assert headers["Access-Control-Allow-Origin"] == "http://localhost:3000"
2231          assert "POST" in headers["Access-Control-Allow-Methods"]
2232  
2233      def test_cors_headers_for_origin_rejects_unknown_origin(self):
2234          adapter = _make_adapter(cors_origins=["http://localhost:3000"])
2235          assert adapter._cors_headers_for_origin("http://evil.example") is None
2236  
2237      @pytest.mark.asyncio
2238      async def test_cors_headers_not_present_by_default(self, adapter):
2239          """CORS is disabled unless explicitly configured."""
2240          app = _create_app(adapter)
2241          async with TestClient(TestServer(app)) as cli:
2242              resp = await cli.get("/health")
2243              assert resp.status == 200
2244              assert resp.headers.get("Access-Control-Allow-Origin") is None
2245  
2246      @pytest.mark.asyncio
2247      async def test_browser_origin_rejected_by_default(self, adapter):
2248          """Browser-originated requests are rejected unless explicitly allowed."""
2249          app = _create_app(adapter)
2250          async with TestClient(TestServer(app)) as cli:
2251              resp = await cli.get("/health", headers={"Origin": "http://evil.example"})
2252              assert resp.status == 403
2253              assert resp.headers.get("Access-Control-Allow-Origin") is None
2254  
2255      @pytest.mark.asyncio
2256      async def test_cors_options_preflight_rejected_by_default(self, adapter):
2257          """Browser preflight is rejected unless CORS is explicitly configured."""
2258          app = _create_app(adapter)
2259          async with TestClient(TestServer(app)) as cli:
2260              resp = await cli.options(
2261                  "/v1/chat/completions",
2262                  headers={
2263                      "Origin": "http://evil.example",
2264                      "Access-Control-Request-Method": "POST",
2265                  },
2266              )
2267              assert resp.status == 403
2268              assert resp.headers.get("Access-Control-Allow-Origin") is None
2269  
2270      @pytest.mark.asyncio
2271      async def test_cors_headers_present_for_allowed_origin(self):
2272          """Allowed origins receive explicit CORS headers."""
2273          adapter = _make_adapter(cors_origins=["http://localhost:3000"])
2274          app = _create_app(adapter)
2275          async with TestClient(TestServer(app)) as cli:
2276              resp = await cli.get("/health", headers={"Origin": "http://localhost:3000"})
2277              assert resp.status == 200
2278              assert resp.headers.get("Access-Control-Allow-Origin") == "http://localhost:3000"
2279              assert "POST" in resp.headers.get("Access-Control-Allow-Methods", "")
2280              assert "DELETE" in resp.headers.get("Access-Control-Allow-Methods", "")
2281  
2282      @pytest.mark.asyncio
2283      async def test_cors_allows_idempotency_key_header(self):
2284          adapter = _make_adapter(cors_origins=["http://localhost:3000"])
2285          app = _create_app(adapter)
2286          async with TestClient(TestServer(app)) as cli:
2287              resp = await cli.options(
2288                  "/v1/chat/completions",
2289                  headers={
2290                      "Origin": "http://localhost:3000",
2291                      "Access-Control-Request-Method": "POST",
2292                      "Access-Control-Request-Headers": "Idempotency-Key",
2293                  },
2294              )
2295              assert resp.status == 200
2296              assert "Idempotency-Key" in resp.headers.get("Access-Control-Allow-Headers", "")
2297  
2298      @pytest.mark.asyncio
2299      async def test_cors_sets_vary_origin_header(self):
2300          adapter = _make_adapter(cors_origins=["http://localhost:3000"])
2301          app = _create_app(adapter)
2302          async with TestClient(TestServer(app)) as cli:
2303              resp = await cli.get("/health", headers={"Origin": "http://localhost:3000"})
2304              assert resp.status == 200
2305              assert resp.headers.get("Vary") == "Origin"
2306  
2307      @pytest.mark.asyncio
2308      async def test_cors_options_preflight_allowed_for_configured_origin(self):
2309          """Configured origins can complete browser preflight."""
2310          adapter = _make_adapter(cors_origins=["http://localhost:3000"])
2311          app = _create_app(adapter)
2312          async with TestClient(TestServer(app)) as cli:
2313              resp = await cli.options(
2314                  "/v1/chat/completions",
2315                  headers={
2316                      "Origin": "http://localhost:3000",
2317                      "Access-Control-Request-Method": "POST",
2318                      "Access-Control-Request-Headers": "Authorization, Content-Type",
2319                  },
2320              )
2321              assert resp.status == 200
2322              assert resp.headers.get("Access-Control-Allow-Origin") == "http://localhost:3000"
2323              assert "Authorization" in resp.headers.get("Access-Control-Allow-Headers", "")
2324  
2325  
2326      @pytest.mark.asyncio
2327      async def test_cors_preflight_sets_max_age(self):
2328          adapter = _make_adapter(cors_origins=["http://localhost:3000"])
2329          app = _create_app(adapter)
2330          async with TestClient(TestServer(app)) as cli:
2331              resp = await cli.options(
2332                  "/v1/chat/completions",
2333                  headers={
2334                      "Origin": "http://localhost:3000",
2335                      "Access-Control-Request-Method": "POST",
2336                      "Access-Control-Request-Headers": "Authorization, Content-Type",
2337                  },
2338              )
2339              assert resp.status == 200
2340              assert resp.headers.get("Access-Control-Max-Age") == "600"
2341  # ---------------------------------------------------------------------------
2342  # Conversation parameter
2343  # ---------------------------------------------------------------------------
2344  
2345  
2346  class TestConversationParameter:
2347      @pytest.mark.asyncio
2348      async def test_conversation_creates_new(self, adapter):
2349          """First request with a conversation name works (new conversation)."""
2350          app = _create_app(adapter)
2351          async with TestClient(TestServer(app)) as cli:
2352              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2353                  mock_run.return_value = (
2354                      {"final_response": "Hello!", "messages": [], "api_calls": 1},
2355                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
2356                  )
2357                  resp = await cli.post("/v1/responses", json={
2358                      "input": "hi",
2359                      "conversation": "my-chat",
2360                  })
2361                  assert resp.status == 200
2362                  data = await resp.json()
2363                  assert data["status"] == "completed"
2364                  # Conversation mapping should be set
2365                  assert adapter._response_store.get_conversation("my-chat") is not None
2366  
2367      @pytest.mark.asyncio
2368      async def test_conversation_chains_automatically(self, adapter):
2369          """Second request with same conversation name chains to first."""
2370          app = _create_app(adapter)
2371          async with TestClient(TestServer(app)) as cli:
2372              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2373                  mock_run.return_value = (
2374                      {"final_response": "First response", "messages": [], "api_calls": 1},
2375                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
2376                  )
2377                  # First request
2378                  resp1 = await cli.post("/v1/responses", json={
2379                      "input": "hello",
2380                      "conversation": "test-conv",
2381                  })
2382                  assert resp1.status == 200
2383                  data1 = await resp1.json()
2384                  resp1_id = data1["id"]
2385  
2386                  # Second request — should chain
2387                  mock_run.return_value = (
2388                      {"final_response": "Second response", "messages": [], "api_calls": 1},
2389                      {"input_tokens": 20, "output_tokens": 10, "total_tokens": 30},
2390                  )
2391                  resp2 = await cli.post("/v1/responses", json={
2392                      "input": "follow up",
2393                      "conversation": "test-conv",
2394                  })
2395                  assert resp2.status == 200
2396  
2397                  # The second call should have received conversation history from the first
2398                  assert mock_run.call_count == 2
2399                  second_call_kwargs = mock_run.call_args_list[1]
2400                  history = second_call_kwargs.kwargs.get("conversation_history",
2401                            second_call_kwargs[1].get("conversation_history", []) if len(second_call_kwargs) > 1 else [])
2402                  # History should be non-empty (contains messages from first response)
2403                  assert len(history) > 0
2404  
2405      @pytest.mark.asyncio
2406      async def test_conversation_and_previous_response_id_conflict(self, adapter):
2407          """Cannot use both conversation and previous_response_id."""
2408          app = _create_app(adapter)
2409          async with TestClient(TestServer(app)) as cli:
2410              resp = await cli.post("/v1/responses", json={
2411                  "input": "hi",
2412                  "conversation": "my-chat",
2413                  "previous_response_id": "resp_abc123",
2414              })
2415              assert resp.status == 400
2416              data = await resp.json()
2417              assert "Cannot use both" in data["error"]["message"]
2418  
2419      @pytest.mark.asyncio
2420      async def test_separate_conversations_are_isolated(self, adapter):
2421          """Different conversation names have independent histories."""
2422          app = _create_app(adapter)
2423          async with TestClient(TestServer(app)) as cli:
2424              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2425                  mock_run.return_value = (
2426                      {"final_response": "Response A", "messages": [], "api_calls": 1},
2427                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
2428                  )
2429                  # Conversation A
2430                  await cli.post("/v1/responses", json={"input": "conv-a msg", "conversation": "conv-a"})
2431                  # Conversation B
2432                  mock_run.return_value = (
2433                      {"final_response": "Response B", "messages": [], "api_calls": 1},
2434                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
2435                  )
2436                  await cli.post("/v1/responses", json={"input": "conv-b msg", "conversation": "conv-b"})
2437  
2438                  # They should have different response IDs in the mapping
2439                  assert adapter._response_store.get_conversation("conv-a") != adapter._response_store.get_conversation("conv-b")
2440  
2441      @pytest.mark.asyncio
2442      async def test_conversation_store_false_no_mapping(self, adapter):
2443          """If store=false, conversation mapping is not updated."""
2444          app = _create_app(adapter)
2445          async with TestClient(TestServer(app)) as cli:
2446              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2447                  mock_run.return_value = (
2448                      {"final_response": "Ephemeral", "messages": [], "api_calls": 1},
2449                      {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
2450                  )
2451                  resp = await cli.post("/v1/responses", json={
2452                      "input": "hi",
2453                      "conversation": "ephemeral-chat",
2454                      "store": False,
2455                  })
2456                  assert resp.status == 200
2457                  # Conversation mapping should NOT be set since store=false
2458                  assert adapter._response_store.get_conversation("ephemeral-chat") is None
2459  
2460  
2461  # ---------------------------------------------------------------------------
2462  # X-Hermes-Session-Id header (session continuity)
2463  # ---------------------------------------------------------------------------
2464  
2465  
2466  class TestSessionIdHeader:
2467      @pytest.mark.asyncio
2468      async def test_new_session_response_includes_session_id_header(self, adapter):
2469          """Without X-Hermes-Session-Id, a new session is created and returned in the header."""
2470          mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1}
2471          app = _create_app(adapter)
2472          async with TestClient(TestServer(app)) as cli:
2473              with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2474                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
2475                  resp = await cli.post(
2476                      "/v1/chat/completions",
2477                      json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]},
2478                  )
2479              assert resp.status == 200
2480              assert resp.headers.get("X-Hermes-Session-Id") is not None
2481  
2482      @pytest.mark.asyncio
2483      async def test_provided_session_id_is_used_and_echoed(self, auth_adapter):
2484          """When X-Hermes-Session-Id is provided, it's passed to the agent and echoed in the response."""
2485          mock_result = {"final_response": "Continuing!", "messages": [], "api_calls": 1}
2486          mock_db = MagicMock()
2487          mock_db.get_messages_as_conversation.return_value = [
2488              {"role": "user", "content": "previous message"},
2489              {"role": "assistant", "content": "previous reply"},
2490          ]
2491          auth_adapter._session_db = mock_db
2492          app = _create_app(auth_adapter)
2493          async with TestClient(TestServer(app)) as cli:
2494              with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2495                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
2496  
2497                  resp = await cli.post(
2498                      "/v1/chat/completions",
2499                      headers={"X-Hermes-Session-Id": "my-session-123", "Authorization": "Bearer sk-secret"},
2500                      json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Continue"}]},
2501                  )
2502  
2503              assert resp.status == 200
2504              assert resp.headers.get("X-Hermes-Session-Id") == "my-session-123"
2505              call_kwargs = mock_run.call_args.kwargs
2506              assert call_kwargs["session_id"] == "my-session-123"
2507  
2508      @pytest.mark.asyncio
2509      async def test_provided_session_id_loads_history_from_db(self, auth_adapter):
2510          """When X-Hermes-Session-Id is provided, history comes from SessionDB not request body."""
2511          mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
2512          db_history = [
2513              {"role": "user", "content": "stored message 1"},
2514              {"role": "assistant", "content": "stored reply 1"},
2515          ]
2516          mock_db = MagicMock()
2517          mock_db.get_messages_as_conversation.return_value = db_history
2518          auth_adapter._session_db = mock_db
2519          app = _create_app(auth_adapter)
2520          async with TestClient(TestServer(app)) as cli:
2521              with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
2522                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
2523  
2524                  resp = await cli.post(
2525                      "/v1/chat/completions",
2526                      headers={"X-Hermes-Session-Id": "existing-session", "Authorization": "Bearer sk-secret"},
2527                      # Request body has different history — should be ignored
2528                      json={
2529                          "model": "hermes-agent",
2530                          "messages": [
2531                              {"role": "user", "content": "old msg from client"},
2532                              {"role": "assistant", "content": "old reply from client"},
2533                              {"role": "user", "content": "new question"},
2534                          ],
2535                      },
2536                  )
2537  
2538              assert resp.status == 200
2539              call_kwargs = mock_run.call_args.kwargs
2540              # History must come from DB, not from the request body
2541              assert call_kwargs["conversation_history"] == db_history
2542              assert call_kwargs["user_message"] == "new question"
2543  
2544      @pytest.mark.asyncio
2545      async def test_db_failure_falls_back_to_empty_history(self, auth_adapter):
2546          """If SessionDB raises, history falls back to empty and request still succeeds."""
2547          mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
2548          # Simulate DB failure: _session_db is None and SessionDB() constructor raises
2549          auth_adapter._session_db = None
2550          app = _create_app(auth_adapter)
2551          async with TestClient(TestServer(app)) as cli:
2552              with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run, \
2553                   patch("hermes_state.SessionDB", side_effect=Exception("DB unavailable")):
2554                  mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
2555  
2556                  resp = await cli.post(
2557                      "/v1/chat/completions",
2558                      headers={"X-Hermes-Session-Id": "some-session", "Authorization": "Bearer sk-secret"},
2559                      json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]},
2560                  )
2561  
2562              assert resp.status == 200
2563              call_kwargs = mock_run.call_args.kwargs
2564              assert call_kwargs["conversation_history"] == []
2565              assert call_kwargs["session_id"] == "some-session"