Cradicle Explorer

/ tests / run_agent / test_sequential_chats_live.py
test_sequential_chats_live.py
  1  """Live regression guardrail for the keepalive/transport bug class (#10933).
  2  
  3  AlexKucera reported on Discord (2026-04-16) that after ``hermes update`` pulled
  4  #10933, the FIRST chat in a session worked and EVERY subsequent chat failed
  5  with ``APIConnectionError('Connection error.')`` whose cause was
  6  ``RuntimeError: Cannot send a request, as the client has been closed``.
  7  
  8  The companion ``test_create_openai_client_reuse.py`` pins this contract at
  9  object level with mocked ``OpenAI``. This file runs the same shape of
 10  reproduction against a real provider so we have a true end-to-end smoke test
 11  for any future keepalive / transport plumbing.
 12  
 13  Opt-in — not part of default CI:
 14      HERMES_LIVE_TESTS=1 pytest tests/run_agent/test_sequential_chats_live.py -v
 15  
 16  Requires ``OPENROUTER_API_KEY`` to be set (or sourced via ~/.hermes/.env).
 17  """
 18  from __future__ import annotations
 19  
 20  import os
 21  from pathlib import Path
 22  
 23  import pytest
 24  
 25  
 26  # Load ~/.hermes/.env so live runs pick up OPENROUTER_API_KEY without
 27  # needing the runner to shell-source it first. Silent if the file is absent.
 28  def _load_user_env() -> None:
 29      env_file = Path.home() / ".hermes" / ".env"
 30      if not env_file.exists():
 31          return
 32      for raw in env_file.read_text().splitlines():
 33          line = raw.strip()
 34          if not line or line.startswith("#") or "=" not in line:
 35              continue
 36          k, v = line.split("=", 1)
 37          k = k.strip()
 38          v = v.strip().strip('"').strip("'")
 39          # Don't clobber an already-set env var — lets the caller override.
 40          os.environ.setdefault(k, v)
 41  
 42  
 43  _load_user_env()
 44  
 45  
 46  LIVE = os.environ.get("HERMES_LIVE_TESTS") == "1"
 47  OR_KEY = os.environ.get("OPENROUTER_API_KEY", "")
 48  
 49  pytestmark = [
 50      pytest.mark.skipif(not LIVE, reason="live-only — set HERMES_LIVE_TESTS=1"),
 51      pytest.mark.skipif(not OR_KEY, reason="OPENROUTER_API_KEY not configured"),
 52  ]
 53  
 54  # Cheap, fast, tool-capable. Swap if it ever goes dark.
 55  LIVE_MODEL = "google/gemini-2.5-flash"
 56  
 57  
 58  def _make_live_agent():
 59      from run_agent import AIAgent
 60  
 61      return AIAgent(
 62          model=LIVE_MODEL,
 63          provider="openrouter",
 64          api_key=OR_KEY,
 65          base_url="https://openrouter.ai/api/v1",
 66          max_iterations=3,
 67          quiet_mode=True,
 68          skip_context_files=True,
 69          skip_memory=True,
 70          # All toolsets off so the agent just produces a single text reply
 71          # per turn — we want to test the HTTP client lifecycle, not tools.
 72          disabled_toolsets=["*"],
 73      )
 74  
 75  
 76  def _looks_like_error_reply(reply: str) -> tuple[bool, str]:
 77      """AIAgent returns an error-sentinel string (not an exception) when the
 78      underlying API call fails past retries. A naive ``assert reply and
 79      reply.strip()`` misses this because the sentinel is truthy. This
 80      checker enumerates the known-bad shapes so the live test actually
 81      catches #10933 instead of rubber-stamping the error response.
 82      """
 83      lowered = reply.lower().strip()
 84      bad_substrings = (
 85          "api call failed",
 86          "connection error",
 87          "client has been closed",
 88          "cannot send a request",
 89          "max retries",
 90      )
 91      for marker in bad_substrings:
 92          if marker in lowered:
 93              return True, marker
 94      return False, ""
 95  
 96  
 97  def _assert_healthy_reply(reply, turn_label: str) -> None:
 98      assert reply and reply.strip(), f"{turn_label} returned empty: {reply!r}"
 99      is_err, marker = _looks_like_error_reply(reply)
100      assert not is_err, (
101          f"{turn_label} returned an error-sentinel string instead of a real "
102          f"model reply — matched marker {marker!r}. This is the exact shape "
103          f"of #10933 (AlexKucera Discord report, 2026-04-16): the agent's "
104          f"retry loop burned three attempts against a closed httpx transport "
105          f"and surfaced 'API call failed after 3 retries: Connection error.' "
106          f"to the user. Reply was: {reply!r}"
107      )
108  
109  
110  def test_three_sequential_chats_across_client_rebuild():
111      """Reproduces AlexKucera's exact failure shape end-to-end.
112  
113      Turn 1 always worked under #10933. Turn 2 was the one that failed
114      because the shared httpx transport had been torn down between turns.
115      Turn 3 is here as extra insurance against any lazy-init shape where
116      the failure only shows up on call N>=3.
117  
118      We also deliberately trigger ``_replace_primary_openai_client`` between
119      turn 2 and turn 3 — that is the real rebuild entrypoint (401 refresh,
120      credential rotation, model switch) and is the path that actually
121      stored the closed transport into ``self._client_kwargs`` in #10933.
122      """
123      agent = _make_live_agent()
124  
125      r1 = agent.chat("Respond with only the word: ONE")
126      _assert_healthy_reply(r1, "turn 1")
127  
128      r2 = agent.chat("Respond with only the word: TWO")
129      _assert_healthy_reply(r2, "turn 2")
130  
131      # Force a client rebuild through the real path — mimics 401 refresh /
132      # credential rotation / model switch lifecycle.
133      rebuilt = agent._replace_primary_openai_client(reason="regression_test_rebuild")
134      assert rebuilt, "rebuild via _replace_primary_openai_client returned False"
135  
136      r3 = agent.chat("Respond with only the word: THREE")
137      _assert_healthy_reply(r3, "turn 3 (post-rebuild)")