test_sequential_chats_live.py
1 """Live regression guardrail for the keepalive/transport bug class (#10933). 2 3 AlexKucera reported on Discord (2026-04-16) that after ``hermes update`` pulled 4 #10933, the FIRST chat in a session worked and EVERY subsequent chat failed 5 with ``APIConnectionError('Connection error.')`` whose cause was 6 ``RuntimeError: Cannot send a request, as the client has been closed``. 7 8 The companion ``test_create_openai_client_reuse.py`` pins this contract at 9 object level with mocked ``OpenAI``. This file runs the same shape of 10 reproduction against a real provider so we have a true end-to-end smoke test 11 for any future keepalive / transport plumbing. 12 13 Opt-in — not part of default CI: 14 HERMES_LIVE_TESTS=1 pytest tests/run_agent/test_sequential_chats_live.py -v 15 16 Requires ``OPENROUTER_API_KEY`` to be set (or sourced via ~/.hermes/.env). 17 """ 18 from __future__ import annotations 19 20 import os 21 from pathlib import Path 22 23 import pytest 24 25 26 # Load ~/.hermes/.env so live runs pick up OPENROUTER_API_KEY without 27 # needing the runner to shell-source it first. Silent if the file is absent. 28 def _load_user_env() -> None: 29 env_file = Path.home() / ".hermes" / ".env" 30 if not env_file.exists(): 31 return 32 for raw in env_file.read_text().splitlines(): 33 line = raw.strip() 34 if not line or line.startswith("#") or "=" not in line: 35 continue 36 k, v = line.split("=", 1) 37 k = k.strip() 38 v = v.strip().strip('"').strip("'") 39 # Don't clobber an already-set env var — lets the caller override. 40 os.environ.setdefault(k, v) 41 42 43 _load_user_env() 44 45 46 LIVE = os.environ.get("HERMES_LIVE_TESTS") == "1" 47 OR_KEY = os.environ.get("OPENROUTER_API_KEY", "") 48 49 pytestmark = [ 50 pytest.mark.skipif(not LIVE, reason="live-only — set HERMES_LIVE_TESTS=1"), 51 pytest.mark.skipif(not OR_KEY, reason="OPENROUTER_API_KEY not configured"), 52 ] 53 54 # Cheap, fast, tool-capable. Swap if it ever goes dark. 55 LIVE_MODEL = "google/gemini-2.5-flash" 56 57 58 def _make_live_agent(): 59 from run_agent import AIAgent 60 61 return AIAgent( 62 model=LIVE_MODEL, 63 provider="openrouter", 64 api_key=OR_KEY, 65 base_url="https://openrouter.ai/api/v1", 66 max_iterations=3, 67 quiet_mode=True, 68 skip_context_files=True, 69 skip_memory=True, 70 # All toolsets off so the agent just produces a single text reply 71 # per turn — we want to test the HTTP client lifecycle, not tools. 72 disabled_toolsets=["*"], 73 ) 74 75 76 def _looks_like_error_reply(reply: str) -> tuple[bool, str]: 77 """AIAgent returns an error-sentinel string (not an exception) when the 78 underlying API call fails past retries. A naive ``assert reply and 79 reply.strip()`` misses this because the sentinel is truthy. This 80 checker enumerates the known-bad shapes so the live test actually 81 catches #10933 instead of rubber-stamping the error response. 82 """ 83 lowered = reply.lower().strip() 84 bad_substrings = ( 85 "api call failed", 86 "connection error", 87 "client has been closed", 88 "cannot send a request", 89 "max retries", 90 ) 91 for marker in bad_substrings: 92 if marker in lowered: 93 return True, marker 94 return False, "" 95 96 97 def _assert_healthy_reply(reply, turn_label: str) -> None: 98 assert reply and reply.strip(), f"{turn_label} returned empty: {reply!r}" 99 is_err, marker = _looks_like_error_reply(reply) 100 assert not is_err, ( 101 f"{turn_label} returned an error-sentinel string instead of a real " 102 f"model reply — matched marker {marker!r}. This is the exact shape " 103 f"of #10933 (AlexKucera Discord report, 2026-04-16): the agent's " 104 f"retry loop burned three attempts against a closed httpx transport " 105 f"and surfaced 'API call failed after 3 retries: Connection error.' " 106 f"to the user. Reply was: {reply!r}" 107 ) 108 109 110 def test_three_sequential_chats_across_client_rebuild(): 111 """Reproduces AlexKucera's exact failure shape end-to-end. 112 113 Turn 1 always worked under #10933. Turn 2 was the one that failed 114 because the shared httpx transport had been torn down between turns. 115 Turn 3 is here as extra insurance against any lazy-init shape where 116 the failure only shows up on call N>=3. 117 118 We also deliberately trigger ``_replace_primary_openai_client`` between 119 turn 2 and turn 3 — that is the real rebuild entrypoint (401 refresh, 120 credential rotation, model switch) and is the path that actually 121 stored the closed transport into ``self._client_kwargs`` in #10933. 122 """ 123 agent = _make_live_agent() 124 125 r1 = agent.chat("Respond with only the word: ONE") 126 _assert_healthy_reply(r1, "turn 1") 127 128 r2 = agent.chat("Respond with only the word: TWO") 129 _assert_healthy_reply(r2, "turn 2") 130 131 # Force a client rebuild through the real path — mimics 401 refresh / 132 # credential rotation / model switch lifecycle. 133 rebuilt = agent._replace_primary_openai_client(reason="regression_test_rebuild") 134 assert rebuilt, "rebuild via _replace_primary_openai_client returned False" 135 136 r3 = agent.chat("Respond with only the word: THREE") 137 _assert_healthy_reply(r3, "turn 3 (post-rebuild)")