/ tests / gateway / test_7100_transient_failure_transcript.py
test_7100_transient_failure_transcript.py
  1  """Tests for #7100 — transient failures (429/timeout) must not drop the
  2  user message from the transcript.
  3  
  4  The #1630 fix introduced a blanket skip of transcript writes on any
  5  ``failed`` agent result.  That was correct for context-overflow failures
  6  (which would otherwise cause a session-growth loop), but it also caused
  7  transient provider failures (rate limits, read timeouts, connection
  8  resets) to silently drop the user's message — so the agent had no memory
  9  of the last turn on the next attempt.
 10  
 11  The gateway classifier must distinguish:
 12  
 13  * ``compression_exhausted=True`` OR context-keyword errors OR a generic
 14    ``400`` on a long history  → context-overflow → skip transcript
 15  * everything else that fails → transient → persist the user message
 16  """
 17  
 18  import pytest
 19  
 20  
 21  def _classify(agent_result: dict, history_len: int) -> tuple[bool, bool]:
 22      """Replicate the gateway classifier from GatewayRunner._run_agent.
 23  
 24      Returns ``(agent_failed_early, is_context_overflow_failure)``.
 25      """
 26      agent_failed_early = bool(agent_result.get("failed"))
 27      err = str(agent_result.get("error", "")).lower()
 28      is_context_overflow_failure = agent_failed_early and (
 29          bool(agent_result.get("compression_exhausted"))
 30          or any(p in err for p in (
 31              "context length", "context size", "context window",
 32              "maximum context", "token limit", "too many tokens",
 33              "reduce the length", "exceeds the limit",
 34              "request entity too large", "prompt is too long",
 35              "payload too large", "input is too long",
 36          ))
 37          or ("400" in err and history_len > 50)
 38      )
 39      return agent_failed_early, is_context_overflow_failure
 40  
 41  
 42  class TestContextOverflowStillSkipsTranscript:
 43      """#1630 behavior must be preserved for real context-overflow cases."""
 44  
 45      def test_compression_exhausted_is_context_overflow(self):
 46          agent_result = {
 47              "failed": True,
 48              "compression_exhausted": True,
 49              "error": "Request payload too large: max compression attempts reached.",
 50          }
 51          failed, ctx_overflow = _classify(agent_result, history_len=100)
 52          assert failed
 53          assert ctx_overflow
 54  
 55      def test_explicit_context_length_error_is_context_overflow(self):
 56          agent_result = {
 57              "failed": True,
 58              "error": "prompt is too long: 250000 tokens > 200000 maximum",
 59          }
 60          failed, ctx_overflow = _classify(agent_result, history_len=10)
 61          assert failed
 62          assert ctx_overflow
 63  
 64      def test_generic_400_on_large_session_is_context_overflow(self):
 65          agent_result = {
 66              "failed": True,
 67              "error": "error code: 400 - {'type': 'error', 'message': 'Error'}",
 68          }
 69          failed, ctx_overflow = _classify(agent_result, history_len=100)
 70          assert failed
 71          assert ctx_overflow
 72  
 73  
 74  class TestTransientFailureKeepsUserMessage:
 75      """Transient provider failures must NOT skip the transcript — doing so
 76      drops the user message and the agent forgets the turn. (#7100)"""
 77  
 78      def test_rate_limit_429_is_not_context_overflow(self):
 79          agent_result = {
 80              "failed": True,
 81              "error": (
 82                  "API call failed after 3 retries: 429 Too Many Requests "
 83                  "— rate limit exceeded"
 84              ),
 85          }
 86          failed, ctx_overflow = _classify(agent_result, history_len=10)
 87          assert failed
 88          assert not ctx_overflow
 89  
 90      def test_read_timeout_is_not_context_overflow(self):
 91          agent_result = {
 92              "failed": True,
 93              "error": "ReadTimeout: HTTPSConnectionPool(host='api.z.ai'): Read timed out.",
 94          }
 95          failed, ctx_overflow = _classify(agent_result, history_len=10)
 96          assert failed
 97          assert not ctx_overflow
 98  
 99      def test_connection_reset_is_not_context_overflow(self):
100          agent_result = {
101              "failed": True,
102              "error": "ConnectionError: [Errno 54] Connection reset by peer",
103          }
104          failed, ctx_overflow = _classify(agent_result, history_len=10)
105          assert failed
106          assert not ctx_overflow
107  
108      def test_provider_500_is_not_context_overflow(self):
109          agent_result = {
110              "failed": True,
111              "error": "API call failed after 3 retries: 500 Internal Server Error",
112          }
113          failed, ctx_overflow = _classify(agent_result, history_len=10)
114          assert failed
115          assert not ctx_overflow
116  
117      def test_generic_400_on_short_session_is_not_context_overflow(self):
118          """A 400 on a short session is a real client error, not context
119          overflow — still not a reason to drop the user turn."""
120          agent_result = {
121              "failed": True,
122              "error": "error code: 400 - invalid model",
123          }
124          failed, ctx_overflow = _classify(agent_result, history_len=5)
125          assert failed
126          assert not ctx_overflow
127  
128  
129  class TestSuccessfulResultUnaffected:
130      def test_successful_result_neither_failed_nor_overflow(self):
131          agent_result = {
132              "final_response": "Hello!",
133              "messages": [{"role": "assistant", "content": "Hello!"}],
134          }
135          failed, ctx_overflow = _classify(agent_result, history_len=10)
136          assert not failed
137          assert not ctx_overflow