Cradicle Explorer

/ tests / agent / test_error_classifier.py
test_error_classifier.py
   1  """Tests for agent.error_classifier — structured API error classification."""
   2  
   3  import pytest
   4  from agent.error_classifier import (
   5      ClassifiedError,
   6      FailoverReason,
   7      classify_api_error,
   8      _extract_status_code,
   9      _extract_error_body,
  10      _extract_error_code,
  11      _classify_402,
  12  )
  13  
  14  
  15  # ── Helper: mock API errors ────────────────────────────────────────────
  16  
  17  class MockAPIError(Exception):
  18      """Simulates an OpenAI SDK APIStatusError."""
  19      def __init__(self, message, status_code=None, body=None):
  20          super().__init__(message)
  21          self.status_code = status_code
  22          self.body = body or {}
  23  
  24  
  25  class MockTransportError(Exception):
  26      """Simulates a transport-level error with a specific type name."""
  27      pass
  28  
  29  
  30  class ReadTimeout(MockTransportError):
  31      pass
  32  
  33  
  34  class ConnectError(MockTransportError):
  35      pass
  36  
  37  
  38  class RemoteProtocolError(MockTransportError):
  39      pass
  40  
  41  
  42  class ServerDisconnectedError(MockTransportError):
  43      pass
  44  
  45  
  46  # ── Test: FailoverReason enum ──────────────────────────────────────────
  47  
  48  class TestFailoverReason:
  49      def test_all_reasons_have_string_values(self):
  50          for reason in FailoverReason:
  51              assert isinstance(reason.value, str)
  52  
  53      def test_enum_members_exist(self):
  54          expected = {
  55              "auth", "auth_permanent", "billing", "rate_limit",
  56              "overloaded", "server_error", "timeout",
  57              "context_overflow", "payload_too_large", "image_too_large",
  58              "model_not_found", "format_error",
  59              "provider_policy_blocked",
  60              "thinking_signature", "long_context_tier",
  61              "oauth_long_context_beta_forbidden",
  62              "unknown",
  63          }
  64          actual = {r.value for r in FailoverReason}
  65          assert expected == actual
  66  
  67  
  68  # ── Test: ClassifiedError ──────────────────────────────────────────────
  69  
  70  class TestClassifiedError:
  71      def test_is_auth_property(self):
  72          e1 = ClassifiedError(reason=FailoverReason.auth)
  73          assert e1.is_auth is True
  74  
  75          e2 = ClassifiedError(reason=FailoverReason.auth_permanent)
  76          assert e2.is_auth is True
  77  
  78          e3 = ClassifiedError(reason=FailoverReason.billing)
  79          assert e3.is_auth is False
  80  
  81      def test_defaults(self):
  82          e = ClassifiedError(reason=FailoverReason.unknown)
  83          assert e.retryable is True
  84          assert e.should_compress is False
  85          assert e.should_rotate_credential is False
  86          assert e.should_fallback is False
  87          assert e.status_code is None
  88          assert e.message == ""
  89  
  90  
  91  # ── Test: Status code extraction ───────────────────────────────────────
  92  
  93  class TestExtractStatusCode:
  94      def test_from_status_code_attr(self):
  95          e = MockAPIError("fail", status_code=429)
  96          assert _extract_status_code(e) == 429
  97  
  98      def test_from_status_attr(self):
  99          class ErrWithStatus(Exception):
 100              status = 503
 101          assert _extract_status_code(ErrWithStatus()) == 503
 102  
 103      def test_from_cause_chain(self):
 104          inner = MockAPIError("inner", status_code=401)
 105          outer = Exception("outer")
 106          outer.__cause__ = inner
 107          assert _extract_status_code(outer) == 401
 108  
 109      def test_none_when_missing(self):
 110          assert _extract_status_code(Exception("generic")) is None
 111  
 112      def test_rejects_non_http_status(self):
 113          """Integers outside 100-599 on .status should be ignored."""
 114          class ErrWeirdStatus(Exception):
 115              status = 42
 116          assert _extract_status_code(ErrWeirdStatus()) is None
 117  
 118  
 119  # ── Test: Error body extraction ────────────────────────────────────────
 120  
 121  class TestExtractErrorBody:
 122      def test_from_body_attr(self):
 123          e = MockAPIError("fail", body={"error": {"message": "bad"}})
 124          assert _extract_error_body(e) == {"error": {"message": "bad"}}
 125  
 126      def test_empty_when_no_body(self):
 127          assert _extract_error_body(Exception("generic")) == {}
 128  
 129  
 130  # ── Test: Error code extraction ────────────────────────────────────────
 131  
 132  class TestExtractErrorCode:
 133      def test_from_nested_error_code(self):
 134          body = {"error": {"code": "rate_limit_exceeded"}}
 135          assert _extract_error_code(body) == "rate_limit_exceeded"
 136  
 137      def test_from_nested_error_type(self):
 138          body = {"error": {"type": "invalid_request_error"}}
 139          assert _extract_error_code(body) == "invalid_request_error"
 140  
 141      def test_from_top_level_code(self):
 142          body = {"code": "model_not_found"}
 143          assert _extract_error_code(body) == "model_not_found"
 144  
 145      def test_empty_when_no_code(self):
 146          assert _extract_error_code({}) == ""
 147          assert _extract_error_code({"error": {"message": "oops"}}) == ""
 148  
 149  
 150  # ── Test: 402 disambiguation ───────────────────────────────────────────
 151  
 152  class TestClassify402:
 153      """The critical 402 billing vs rate_limit disambiguation."""
 154  
 155      def test_billing_exhaustion(self):
 156          """Plain 402 = billing."""
 157          result = _classify_402(
 158              "payment required",
 159              lambda reason, **kw: ClassifiedError(reason=reason, **kw),
 160          )
 161          assert result.reason == FailoverReason.billing
 162          assert result.should_rotate_credential is True
 163  
 164      def test_transient_usage_limit(self):
 165          """402 with 'usage limit' + 'try again' = rate limit, not billing."""
 166          result = _classify_402(
 167              "usage limit exceeded. try again in 5 minutes",
 168              lambda reason, **kw: ClassifiedError(reason=reason, **kw),
 169          )
 170          assert result.reason == FailoverReason.rate_limit
 171          assert result.should_rotate_credential is True
 172  
 173      def test_quota_with_retry(self):
 174          """402 with 'quota' + 'retry' = rate limit."""
 175          result = _classify_402(
 176              "quota exceeded, please retry after the window resets",
 177              lambda reason, **kw: ClassifiedError(reason=reason, **kw),
 178          )
 179          assert result.reason == FailoverReason.rate_limit
 180  
 181      def test_quota_without_retry(self):
 182          """402 with just 'quota' but no transient signal = billing."""
 183          result = _classify_402(
 184              "quota exceeded",
 185              lambda reason, **kw: ClassifiedError(reason=reason, **kw),
 186          )
 187          assert result.reason == FailoverReason.billing
 188  
 189      def test_insufficient_credits(self):
 190          result = _classify_402(
 191              "insufficient credits to complete request",
 192              lambda reason, **kw: ClassifiedError(reason=reason, **kw),
 193          )
 194          assert result.reason == FailoverReason.billing
 195  
 196  
 197  # ── Test: Full classification pipeline ─────────────────────────────────
 198  
 199  class TestClassifyApiError:
 200      """End-to-end classification tests."""
 201  
 202      # ── Auth errors ──
 203  
 204      def test_401_classified_as_auth(self):
 205          e = MockAPIError("Unauthorized", status_code=401)
 206          result = classify_api_error(e, provider="openrouter")
 207          assert result.reason == FailoverReason.auth
 208          assert result.should_rotate_credential is True
 209          # 401 is non-retryable on its own — credential rotation runs
 210          # before the retryability check in the agent loop.
 211          assert result.retryable is False
 212          assert result.should_fallback is True
 213  
 214      def test_403_classified_as_auth(self):
 215          e = MockAPIError("Forbidden", status_code=403)
 216          result = classify_api_error(e, provider="anthropic")
 217          assert result.reason == FailoverReason.auth
 218          assert result.should_fallback is True
 219  
 220      def test_403_key_limit_classified_as_billing(self):
 221          """OpenRouter 403 'key limit exceeded' is billing, not auth."""
 222          e = MockAPIError("Key limit exceeded for this key", status_code=403)
 223          result = classify_api_error(e, provider="openrouter")
 224          assert result.reason == FailoverReason.billing
 225          assert result.should_rotate_credential is True
 226          assert result.should_fallback is True
 227  
 228      def test_403_spending_limit_classified_as_billing(self):
 229          e = MockAPIError("spending limit reached", status_code=403)
 230          result = classify_api_error(e, provider="openrouter")
 231          assert result.reason == FailoverReason.billing
 232  
 233      # ── Billing ──
 234  
 235      def test_402_plain_billing(self):
 236          e = MockAPIError("Payment Required", status_code=402)
 237          result = classify_api_error(e)
 238          assert result.reason == FailoverReason.billing
 239          assert result.retryable is False
 240  
 241      def test_402_transient_usage_limit(self):
 242          e = MockAPIError("usage limit exceeded, try again later", status_code=402)
 243          result = classify_api_error(e)
 244          assert result.reason == FailoverReason.rate_limit
 245          assert result.retryable is True
 246  
 247      # ── Rate limit ──
 248  
 249      def test_429_rate_limit(self):
 250          e = MockAPIError("Too Many Requests", status_code=429)
 251          result = classify_api_error(e)
 252          assert result.reason == FailoverReason.rate_limit
 253          assert result.should_fallback is True
 254  
 255      def test_alibaba_rate_increased_too_quickly(self):
 256          """Alibaba/DashScope returns a unique throttling message.
 257  
 258          Port from anomalyco/opencode#21355.
 259          """
 260          msg = (
 261              "Upstream error from Alibaba: Request rate increased too quickly. "
 262              "To ensure system stability, please adjust your client logic to "
 263              "scale requests more smoothly over time."
 264          )
 265          e = MockAPIError(msg, status_code=400)
 266          result = classify_api_error(e)
 267          assert result.reason == FailoverReason.rate_limit
 268          assert result.retryable is True
 269          assert result.should_rotate_credential is True
 270  
 271      # ── Server errors ──
 272  
 273      def test_500_server_error(self):
 274          e = MockAPIError("Internal Server Error", status_code=500)
 275          result = classify_api_error(e)
 276          assert result.reason == FailoverReason.server_error
 277          assert result.retryable is True
 278  
 279      def test_502_server_error(self):
 280          e = MockAPIError("Bad Gateway", status_code=502)
 281          result = classify_api_error(e)
 282          assert result.reason == FailoverReason.server_error
 283  
 284      def test_503_overloaded(self):
 285          e = MockAPIError("Service Unavailable", status_code=503)
 286          result = classify_api_error(e)
 287          assert result.reason == FailoverReason.overloaded
 288  
 289      def test_529_anthropic_overloaded(self):
 290          e = MockAPIError("Overloaded", status_code=529)
 291          result = classify_api_error(e)
 292          assert result.reason == FailoverReason.overloaded
 293  
 294      # ── Model not found ──
 295  
 296      def test_404_model_not_found(self):
 297          e = MockAPIError("model not found", status_code=404)
 298          result = classify_api_error(e)
 299          assert result.reason == FailoverReason.model_not_found
 300          assert result.should_fallback is True
 301          assert result.retryable is False
 302  
 303      def test_404_generic(self):
 304          # Generic 404 with no "model not found" signal — common for local
 305          # llama.cpp/Ollama/vLLM endpoints with slightly wrong paths.  Treat
 306          # as unknown (retryable) so the real error surfaces, rather than
 307          # claiming the model is missing and silently falling back.
 308          e = MockAPIError("Not Found", status_code=404)
 309          result = classify_api_error(e)
 310          assert result.reason == FailoverReason.unknown
 311          assert result.retryable is True
 312          assert result.should_fallback is False
 313  
 314      # ── Provider policy-block (OpenRouter privacy/guardrail) ──
 315  
 316      def test_404_openrouter_policy_blocked(self):
 317          # Real OpenRouter error when the user's account privacy setting
 318          # excludes the only endpoint serving a model (e.g. DeepSeek V4 Pro
 319          # which is hosted only by DeepSeek, and their endpoint may log
 320          # inputs).  Must NOT classify as model_not_found — the model
 321          # exists, falling back won't help (same account setting applies),
 322          # and the error body already tells the user where to fix it.
 323          e = MockAPIError(
 324              "No endpoints available matching your guardrail restrictions "
 325              "and data policy. Configure: https://openrouter.ai/settings/privacy",
 326              status_code=404,
 327          )
 328          result = classify_api_error(e)
 329          assert result.reason == FailoverReason.provider_policy_blocked
 330          assert result.retryable is False
 331          assert result.should_fallback is False
 332  
 333      def test_400_openrouter_policy_blocked(self):
 334          # Defense-in-depth: if OpenRouter ever returns this as 400 instead
 335          # of 404, still classify it distinctly rather than as format_error
 336          # or model_not_found.
 337          e = MockAPIError(
 338              "No endpoints available matching your data policy",
 339              status_code=400,
 340          )
 341          result = classify_api_error(e)
 342          assert result.reason == FailoverReason.provider_policy_blocked
 343          assert result.retryable is False
 344          assert result.should_fallback is False
 345  
 346      def test_message_only_openrouter_policy_blocked(self):
 347          # No status code — classifier should still catch the fingerprint
 348          # via the message-pattern fallback.
 349          e = Exception(
 350              "No endpoints available matching your guardrail restrictions "
 351              "and data policy"
 352          )
 353          result = classify_api_error(e)
 354          assert result.reason == FailoverReason.provider_policy_blocked
 355  
 356      def test_404_model_not_found_still_works(self):
 357          # Regression guard: the new policy-block check must not swallow
 358          # genuine model_not_found 404s.
 359          e = MockAPIError(
 360              "openrouter/nonexistent-model is not a valid model ID",
 361              status_code=404,
 362          )
 363          result = classify_api_error(e)
 364          assert result.reason == FailoverReason.model_not_found
 365          assert result.should_fallback is True
 366  
 367      # ── Payload too large ──
 368  
 369      def test_413_payload_too_large(self):
 370          e = MockAPIError("Request Entity Too Large", status_code=413)
 371          result = classify_api_error(e)
 372          assert result.reason == FailoverReason.payload_too_large
 373          assert result.should_compress is True
 374  
 375      # ── Context overflow ──
 376  
 377      def test_400_context_length(self):
 378          e = MockAPIError("context length exceeded: 250000 > 200000", status_code=400)
 379          result = classify_api_error(e)
 380          assert result.reason == FailoverReason.context_overflow
 381          assert result.should_compress is True
 382  
 383      def test_400_too_many_tokens(self):
 384          e = MockAPIError("This model's maximum context is 128000 tokens, too many tokens", status_code=400)
 385          result = classify_api_error(e)
 386          assert result.reason == FailoverReason.context_overflow
 387  
 388      def test_400_prompt_too_long(self):
 389          e = MockAPIError("prompt is too long: 300000 tokens > 200000 maximum", status_code=400)
 390          result = classify_api_error(e)
 391          assert result.reason == FailoverReason.context_overflow
 392  
 393      def test_400_generic_large_session(self):
 394          """Generic 400 with large session → context overflow heuristic."""
 395          e = MockAPIError(
 396              "Error",
 397              status_code=400,
 398              body={"error": {"message": "Error"}},
 399          )
 400          result = classify_api_error(e, approx_tokens=100000, context_length=200000)
 401          assert result.reason == FailoverReason.context_overflow
 402  
 403      def test_400_generic_small_session_is_format_error(self):
 404          """Generic 400 with small session → format error, not context overflow."""
 405          e = MockAPIError(
 406              "Error",
 407              status_code=400,
 408              body={"error": {"message": "Error"}},
 409          )
 410          result = classify_api_error(e, approx_tokens=1000, context_length=200000)
 411          assert result.reason == FailoverReason.format_error
 412  
 413      def test_400_generic_many_messages_below_large_context_pressure_is_format_error(self):
 414          """Large-context sessions should not overflow solely due to message count."""
 415          e = MockAPIError(
 416              "Error",
 417              status_code=400,
 418              body={"error": {"message": "Error"}},
 419          )
 420          result = classify_api_error(
 421              e,
 422              provider="openai-codex",
 423              model="gpt-5.5",
 424              approx_tokens=74320,
 425              context_length=1_000_000,
 426              num_messages=432,
 427          )
 428          assert result.reason == FailoverReason.format_error
 429          assert result.should_compress is False
 430  
 431      # ── Server disconnect + large session ──
 432  
 433      def test_disconnect_large_session_context_overflow(self):
 434          """Server disconnect with large session → context overflow."""
 435          e = Exception("server disconnected without sending complete message")
 436          result = classify_api_error(e, approx_tokens=150000, context_length=200000)
 437          assert result.reason == FailoverReason.context_overflow
 438          assert result.should_compress is True
 439  
 440      def test_disconnect_small_session_timeout(self):
 441          """Server disconnect with small session → timeout."""
 442          e = Exception("server disconnected without sending complete message")
 443          result = classify_api_error(e, approx_tokens=5000, context_length=200000)
 444          assert result.reason == FailoverReason.timeout
 445  
 446      def test_disconnect_many_messages_below_large_context_pressure_is_timeout(self):
 447          """Large-context disconnects should not overflow solely due to message count."""
 448          e = Exception("server disconnected without sending complete message")
 449          result = classify_api_error(
 450              e,
 451              provider="openai-codex",
 452              model="gpt-5.5",
 453              approx_tokens=74320,
 454              context_length=1_000_000,
 455              num_messages=432,
 456          )
 457          assert result.reason == FailoverReason.timeout
 458          assert result.should_compress is False
 459  
 460      # ── Provider-specific: Anthropic thinking signature ──
 461  
 462      def test_anthropic_thinking_signature(self):
 463          e = MockAPIError(
 464              "thinking block has invalid signature",
 465              status_code=400,
 466          )
 467          result = classify_api_error(e, provider="anthropic")
 468          assert result.reason == FailoverReason.thinking_signature
 469          assert result.retryable is True
 470  
 471      def test_non_anthropic_400_with_signature_not_classified_as_thinking(self):
 472          """400 with 'signature' but from non-Anthropic → format error."""
 473          e = MockAPIError("invalid signature", status_code=400)
 474          result = classify_api_error(e, provider="openrouter", approx_tokens=0)
 475          # Without "thinking" in the message, it shouldn't be thinking_signature
 476          assert result.reason != FailoverReason.thinking_signature
 477  
 478      # ── Provider-specific: Anthropic long-context tier ──
 479  
 480      def test_anthropic_long_context_tier(self):
 481          e = MockAPIError(
 482              "Extra usage is required for long context requests over 200k tokens",
 483              status_code=429,
 484          )
 485          result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4")
 486          assert result.reason == FailoverReason.long_context_tier
 487          assert result.should_compress is True
 488  
 489      def test_normal_429_not_long_context(self):
 490          """Normal 429 without 'extra usage' + 'long context' → rate_limit."""
 491          e = MockAPIError("Too Many Requests", status_code=429)
 492          result = classify_api_error(e, provider="anthropic")
 493          assert result.reason == FailoverReason.rate_limit
 494  
 495      # ── Provider-specific: Anthropic OAuth 1M-context beta forbidden ──
 496  
 497      def test_anthropic_oauth_1m_beta_forbidden(self):
 498          """400 + 'long context beta is not yet available for this subscription'
 499          → oauth_long_context_beta_forbidden (retryable, no compression)."""
 500          e = MockAPIError(
 501              "The long context beta is not yet available for this subscription.",
 502              status_code=400,
 503          )
 504          result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6")
 505          assert result.reason == FailoverReason.oauth_long_context_beta_forbidden
 506          assert result.retryable is True
 507          assert result.should_compress is False
 508  
 509      def test_anthropic_oauth_1m_beta_forbidden_does_not_collide_with_tier_gate(self):
 510          """The 429 'extra usage' + 'long context' tier gate keeps its own
 511          classification even though its message mentions 'long context'."""
 512          e = MockAPIError(
 513              "Extra usage is required for long context requests over 200k tokens",
 514              status_code=429,
 515          )
 516          result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6")
 517          assert result.reason == FailoverReason.long_context_tier
 518  
 519      def test_400_without_beta_phrase_is_not_1m_beta_forbidden(self):
 520          """A generic 400 that happens to mention 'long context' but not the
 521          exact beta-availability phrase should not be misclassified."""
 522          e = MockAPIError(
 523              "long context window exceeded",
 524              status_code=400,
 525          )
 526          result = classify_api_error(e, provider="anthropic")
 527          assert result.reason != FailoverReason.oauth_long_context_beta_forbidden
 528  
 529      # ── Transport errors ──
 530  
 531      def test_read_timeout(self):
 532          e = ReadTimeout("Read timed out")
 533          result = classify_api_error(e)
 534          assert result.reason == FailoverReason.timeout
 535          assert result.retryable is True
 536  
 537      def test_connect_error(self):
 538          e = ConnectError("Connection refused")
 539          result = classify_api_error(e)
 540          assert result.reason == FailoverReason.timeout
 541  
 542      def test_connection_error_builtin(self):
 543          e = ConnectionError("Connection reset by peer")
 544          result = classify_api_error(e)
 545          assert result.reason == FailoverReason.timeout
 546  
 547      def test_timeout_error_builtin(self):
 548          e = TimeoutError("timed out")
 549          result = classify_api_error(e)
 550          assert result.reason == FailoverReason.timeout
 551  
 552      # ── Error code classification ──
 553  
 554      def test_error_code_resource_exhausted(self):
 555          e = MockAPIError(
 556              "Resource exhausted",
 557              body={"error": {"code": "resource_exhausted", "message": "Too many requests"}},
 558          )
 559          result = classify_api_error(e)
 560          assert result.reason == FailoverReason.rate_limit
 561  
 562      def test_error_code_model_not_found(self):
 563          e = MockAPIError(
 564              "Model not available",
 565              body={"error": {"code": "model_not_found"}},
 566          )
 567          result = classify_api_error(e)
 568          assert result.reason == FailoverReason.model_not_found
 569  
 570      def test_error_code_context_length_exceeded(self):
 571          e = MockAPIError(
 572              "Context too large",
 573              body={"error": {"code": "context_length_exceeded"}},
 574          )
 575          result = classify_api_error(e)
 576          assert result.reason == FailoverReason.context_overflow
 577  
 578      # ── Message-only patterns (no status code) ──
 579  
 580      def test_message_billing_pattern(self):
 581          e = Exception("insufficient credits to complete this request")
 582          result = classify_api_error(e)
 583          assert result.reason == FailoverReason.billing
 584  
 585      def test_message_rate_limit_pattern(self):
 586          e = Exception("rate limit reached for this model")
 587          result = classify_api_error(e)
 588          assert result.reason == FailoverReason.rate_limit
 589  
 590      def test_message_auth_pattern(self):
 591          e = Exception("invalid api key provided")
 592          result = classify_api_error(e)
 593          assert result.reason == FailoverReason.auth
 594  
 595      def test_message_model_not_found_pattern(self):
 596          e = Exception("gpt-99 is not a valid model")
 597          result = classify_api_error(e)
 598          assert result.reason == FailoverReason.model_not_found
 599  
 600      def test_message_context_overflow_pattern(self):
 601          e = Exception("maximum context length exceeded")
 602          result = classify_api_error(e)
 603          assert result.reason == FailoverReason.context_overflow
 604  
 605      # ── Message-only usage limit disambiguation (no status code) ──
 606  
 607      def test_message_usage_limit_transient_is_rate_limit(self):
 608          """'usage limit' + 'try again' with no status code → rate_limit, not billing."""
 609          e = Exception("usage limit exceeded, try again in 5 minutes")
 610          result = classify_api_error(e)
 611          assert result.reason == FailoverReason.rate_limit
 612          assert result.retryable is True
 613          assert result.should_rotate_credential is True
 614          assert result.should_fallback is True
 615  
 616      def test_message_usage_limit_no_retry_signal_is_billing(self):
 617          """'usage limit' with no transient signal and no status code → billing."""
 618          e = Exception("usage limit reached")
 619          result = classify_api_error(e)
 620          assert result.reason == FailoverReason.billing
 621          assert result.retryable is False
 622          assert result.should_rotate_credential is True
 623  
 624      def test_message_quota_with_reset_window_is_rate_limit(self):
 625          """'quota' + 'resets at' with no status code → rate_limit."""
 626          e = Exception("quota exceeded, resets at midnight UTC")
 627          result = classify_api_error(e)
 628          assert result.reason == FailoverReason.rate_limit
 629          assert result.retryable is True
 630  
 631      def test_message_limit_exceeded_with_wait_is_rate_limit(self):
 632          """'limit exceeded' + 'wait' with no status code → rate_limit."""
 633          e = Exception("key limit exceeded, please wait before retrying")
 634          result = classify_api_error(e)
 635          assert result.reason == FailoverReason.rate_limit
 636          assert result.retryable is True
 637  
 638      # ── Unknown / fallback ──
 639  
 640      def test_generic_exception_is_unknown(self):
 641          e = Exception("something weird happened")
 642          result = classify_api_error(e)
 643          assert result.reason == FailoverReason.unknown
 644          assert result.retryable is True
 645  
 646      # ── Format error ──
 647  
 648      def test_400_descriptive_format_error(self):
 649          """400 with descriptive message (not context overflow) → format error."""
 650          e = MockAPIError(
 651              "Invalid value for parameter 'temperature': must be between 0 and 2",
 652              status_code=400,
 653              body={"error": {"message": "Invalid value for parameter 'temperature': must be between 0 and 2"}},
 654          )
 655          result = classify_api_error(e, approx_tokens=1000)
 656          assert result.reason == FailoverReason.format_error
 657          assert result.retryable is False
 658  
 659      def test_422_format_error(self):
 660          e = MockAPIError("Unprocessable Entity", status_code=422)
 661          result = classify_api_error(e)
 662          assert result.reason == FailoverReason.format_error
 663          assert result.retryable is False
 664  
 665      def test_400_flat_body_descriptive_not_context_overflow(self):
 666          """Responses API flat body with descriptive error + large session → format error.
 667  
 668          The Codex Responses API returns errors in flat body format:
 669          {"message": "...", "type": "..."} without an "error" wrapper.
 670          A descriptive 400 must NOT be misclassified as context overflow
 671          just because the session is large.
 672          """
 673          e = MockAPIError(
 674              "Invalid 'input[index].name': string does not match pattern.",
 675              status_code=400,
 676              body={"message": "Invalid 'input[index].name': string does not match pattern.",
 677                    "type": "invalid_request_error"},
 678          )
 679          result = classify_api_error(e, approx_tokens=200000, context_length=400000, num_messages=500)
 680          assert result.reason == FailoverReason.format_error
 681          assert result.retryable is False
 682  
 683      def test_400_flat_body_generic_large_session_still_context_overflow(self):
 684          """Flat body with generic 'Error' message + large session → context overflow.
 685  
 686          Regression: the flat-body fallback must not break the existing heuristic
 687          for genuinely generic errors from providers that use flat bodies.
 688          """
 689          e = MockAPIError(
 690              "Error",
 691              status_code=400,
 692              body={"message": "Error"},
 693          )
 694          result = classify_api_error(e, approx_tokens=100000, context_length=200000)
 695          assert result.reason == FailoverReason.context_overflow
 696  
 697      # ── Peer closed + large session ──
 698  
 699      def test_peer_closed_large_session(self):
 700          e = Exception("peer closed connection without sending complete message")
 701          result = classify_api_error(e, approx_tokens=130000, context_length=200000)
 702          assert result.reason == FailoverReason.context_overflow
 703  
 704      # ── Chinese error messages ──
 705  
 706      def test_chinese_context_overflow(self):
 707          e = MockAPIError("超过最大长度限制", status_code=400)
 708          result = classify_api_error(e)
 709          assert result.reason == FailoverReason.context_overflow
 710  
 711      # ── vLLM / local inference server error messages ──
 712  
 713      def test_vllm_max_model_len_overflow(self):
 714          """vLLM's 'exceeds the max_model_len' error → context_overflow."""
 715          e = MockAPIError(
 716              "The engine prompt length 1327246 exceeds the max_model_len 131072. "
 717              "Please reduce prompt.",
 718              status_code=400,
 719          )
 720          result = classify_api_error(e)
 721          assert result.reason == FailoverReason.context_overflow
 722  
 723      def test_vllm_prompt_length_exceeds(self):
 724          """vLLM prompt length error → context_overflow."""
 725          e = MockAPIError(
 726              "prompt length 200000 exceeds maximum model length 131072",
 727              status_code=400,
 728          )
 729          result = classify_api_error(e)
 730          assert result.reason == FailoverReason.context_overflow
 731  
 732      def test_vllm_input_too_long(self):
 733          """vLLM 'input is too long' error → context_overflow."""
 734          e = MockAPIError("input is too long for model", status_code=400)
 735          result = classify_api_error(e)
 736          assert result.reason == FailoverReason.context_overflow
 737  
 738      def test_ollama_context_length_exceeded(self):
 739          """Ollama 'context length exceeded' error → context_overflow."""
 740          e = MockAPIError("context length exceeded", status_code=400)
 741          result = classify_api_error(e)
 742          assert result.reason == FailoverReason.context_overflow
 743  
 744      def test_llamacpp_slot_context(self):
 745          """llama.cpp / llama-server 'slot context' error → context_overflow."""
 746          e = MockAPIError(
 747              "slot context: 4096 tokens, prompt 8192 tokens — not enough space",
 748              status_code=400,
 749          )
 750          result = classify_api_error(e)
 751          assert result.reason == FailoverReason.context_overflow
 752  
 753      # ── Result metadata ──
 754  
 755      def test_provider_and_model_in_result(self):
 756          e = MockAPIError("fail", status_code=500)
 757          result = classify_api_error(e, provider="openrouter", model="gpt-5")
 758          assert result.provider == "openrouter"
 759          assert result.model == "gpt-5"
 760          assert result.status_code == 500
 761  
 762      def test_message_extracted(self):
 763          e = MockAPIError(
 764              "outer",
 765              status_code=500,
 766              body={"error": {"message": "Internal server error occurred"}},
 767          )
 768          result = classify_api_error(e)
 769          assert result.message == "Internal server error occurred"
 770  
 771  
 772  # ── Test: Adversarial / edge cases (from live testing) ─────────────────
 773  
 774  class TestAdversarialEdgeCases:
 775      """Edge cases discovered during live testing with real SDK objects."""
 776  
 777      def test_empty_exception_message(self):
 778          result = classify_api_error(Exception(""))
 779          assert result.reason == FailoverReason.unknown
 780          assert result.retryable is True
 781  
 782      def test_500_with_none_body(self):
 783          e = MockAPIError("fail", status_code=500, body=None)
 784          result = classify_api_error(e)
 785          assert result.reason == FailoverReason.server_error
 786  
 787      def test_non_dict_body(self):
 788          """Some providers return strings instead of JSON."""
 789          class StringBodyError(Exception):
 790              status_code = 400
 791              body = "just a string"
 792          result = classify_api_error(StringBodyError("bad"))
 793          assert result.reason == FailoverReason.format_error
 794  
 795      def test_list_body(self):
 796          class ListBodyError(Exception):
 797              status_code = 500
 798              body = [{"error": "something"}]
 799          result = classify_api_error(ListBodyError("server error"))
 800          assert result.reason == FailoverReason.server_error
 801  
 802      def test_circular_cause_chain(self):
 803          """Must not infinite-loop on circular __cause__."""
 804          e = Exception("circular")
 805          e.__cause__ = e
 806          result = classify_api_error(e)
 807          assert result.reason == FailoverReason.unknown
 808  
 809      def test_three_level_cause_chain(self):
 810          inner = MockAPIError("inner", status_code=429)
 811          middle = Exception("middle")
 812          middle.__cause__ = inner
 813          outer = RuntimeError("outer")
 814          outer.__cause__ = middle
 815          result = classify_api_error(outer)
 816          assert result.status_code == 429
 817          assert result.reason == FailoverReason.rate_limit
 818  
 819      def test_400_with_rate_limit_text(self):
 820          """Some providers send rate limits as 400 instead of 429."""
 821          e = MockAPIError(
 822              "rate limit policy",
 823              status_code=400,
 824              body={"error": {"message": "rate limit exceeded on this model"}},
 825          )
 826          result = classify_api_error(e, provider="openrouter")
 827          assert result.reason == FailoverReason.rate_limit
 828  
 829      def test_400_with_billing_text(self):
 830          """Some providers send billing errors as 400."""
 831          e = MockAPIError(
 832              "billing",
 833              status_code=400,
 834              body={"error": {"message": "insufficient credits for this request"}},
 835          )
 836          result = classify_api_error(e)
 837          assert result.reason == FailoverReason.billing
 838  
 839      def test_200_with_error_body(self):
 840          """200 status with error in body — should be unknown, not crash."""
 841          class WeirdSuccess(Exception):
 842              status_code = 200
 843              body = {"error": {"message": "loading"}}
 844          result = classify_api_error(WeirdSuccess("model loading"))
 845          assert result.reason == FailoverReason.unknown
 846  
 847      def test_ollama_context_size_exceeded(self):
 848          e = MockAPIError(
 849              "Error",
 850              status_code=400,
 851              body={"error": {"message": "context size has been exceeded"}},
 852          )
 853          result = classify_api_error(e, provider="ollama")
 854          assert result.reason == FailoverReason.context_overflow
 855  
 856      def test_connection_refused_error(self):
 857          e = ConnectionRefusedError("Connection refused: localhost:11434")
 858          result = classify_api_error(e, provider="ollama")
 859          assert result.reason == FailoverReason.timeout
 860  
 861      def test_body_message_enrichment(self):
 862          """Body message must be included in pattern matching even when
 863          str(error) doesn't contain it (OpenAI SDK APIStatusError)."""
 864          e = MockAPIError(
 865              "Usage limit",  # str(e) = "usage limit"
 866              status_code=402,
 867              body={"error": {"message": "Usage limit reached, try again in 5 minutes"}},
 868          )
 869          result = classify_api_error(e)
 870          # "try again" is only in body, not in str(e)
 871          assert result.reason == FailoverReason.rate_limit
 872  
 873      def test_disconnect_pattern_ordering(self):
 874          """Disconnect + large session must beat generic transport catch."""
 875          class FakeRemoteProtocol(Exception):
 876              pass
 877          # Type name isn't in _TRANSPORT_ERROR_TYPES but message has disconnect pattern
 878          e = Exception("peer closed connection without sending complete message")
 879          result = classify_api_error(e, approx_tokens=150000, context_length=200000)
 880          assert result.reason == FailoverReason.context_overflow
 881          assert result.should_compress is True
 882  
 883      def test_credit_balance_too_low(self):
 884          e = MockAPIError(
 885              "Credits low",
 886              status_code=402,
 887              body={"error": {"message": "Your credit balance is too low"}},
 888          )
 889          result = classify_api_error(e, provider="anthropic")
 890          assert result.reason == FailoverReason.billing
 891  
 892      def test_deepseek_402_chinese(self):
 893          """Chinese billing message should still match billing patterns."""
 894          # "余额不足" doesn't match English billing patterns, but 402 defaults to billing
 895          e = MockAPIError("余额不足", status_code=402)
 896          result = classify_api_error(e, provider="deepseek")
 897          assert result.reason == FailoverReason.billing
 898  
 899      def test_openrouter_wrapped_context_overflow_in_metadata_raw(self):
 900          """OpenRouter wraps provider errors in metadata.raw JSON string."""
 901          e = MockAPIError(
 902              "Provider returned error",
 903              status_code=400,
 904              body={
 905                  "error": {
 906                      "message": "Provider returned error",
 907                      "code": 400,
 908                      "metadata": {
 909                          "raw": '{"error":{"message":"context length exceeded: 50000 > 32768"}}'
 910                      }
 911                  }
 912              },
 913          )
 914          result = classify_api_error(e, provider="openrouter", approx_tokens=10000)
 915          assert result.reason == FailoverReason.context_overflow
 916          assert result.should_compress is True
 917  
 918      def test_openrouter_wrapped_rate_limit_in_metadata_raw(self):
 919          e = MockAPIError(
 920              "Provider returned error",
 921              status_code=400,
 922              body={
 923                  "error": {
 924                      "message": "Provider returned error",
 925                      "metadata": {
 926                          "raw": '{"error":{"message":"Rate limit exceeded. Please retry after 30s."}}'
 927                      }
 928                  }
 929              },
 930          )
 931          result = classify_api_error(e, provider="openrouter")
 932          assert result.reason == FailoverReason.rate_limit
 933  
 934      def test_thinking_signature_via_openrouter(self):
 935          """Thinking signature errors proxied through OpenRouter must be caught."""
 936          e = MockAPIError(
 937              "thinking block has invalid signature",
 938              status_code=400,
 939          )
 940          # provider is openrouter, not anthropic — old code missed this
 941          result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4")
 942          assert result.reason == FailoverReason.thinking_signature
 943  
 944      def test_generic_400_large_by_message_count(self):
 945          """Many small messages (>80) should trigger context overflow heuristic."""
 946          e = MockAPIError(
 947              "Error",
 948              status_code=400,
 949              body={"error": {"message": "Error"}},
 950          )
 951          # Low token count but high message count
 952          result = classify_api_error(
 953              e, approx_tokens=5000, context_length=200000, num_messages=100,
 954          )
 955          assert result.reason == FailoverReason.context_overflow
 956  
 957      def test_disconnect_large_by_message_count(self):
 958          """Server disconnect with 200+ messages should trigger context overflow."""
 959          e = Exception("server disconnected without sending complete message")
 960          result = classify_api_error(
 961              e, approx_tokens=5000, context_length=200000, num_messages=250,
 962          )
 963          assert result.reason == FailoverReason.context_overflow
 964  
 965      def test_openrouter_wrapped_model_not_found_in_metadata_raw(self):
 966          e = MockAPIError(
 967              "Provider returned error",
 968              status_code=400,
 969              body={
 970                  "error": {
 971                      "message": "Provider returned error",
 972                      "metadata": {
 973                          "raw": '{"error":{"message":"The model gpt-99 does not exist"}}'
 974                      }
 975                  }
 976              },
 977          )
 978          result = classify_api_error(e, provider="openrouter")
 979          assert result.reason == FailoverReason.model_not_found
 980  
 981      # ── Regression: dict-typed message field (Issue #11233) ──
 982  
 983      def test_pydantic_dict_message_no_crash(self):
 984          """Pydantic validation errors return message as dict, not string.
 985  
 986          Regression: classify_api_error must not crash when body['message']
 987          is a dict (e.g. {"detail": [...]} from FastAPI/Pydantic). The
 988          'or ""' fallback only handles None/falsy values — a non-empty
 989          dict is truthy and passed to .lower(), causing AttributeError.
 990          """
 991          e = MockAPIError(
 992              "Unprocessable Entity",
 993              status_code=422,
 994              body={
 995                  "object": "error",
 996                  "message": {
 997                      "detail": [
 998                          {
 999                              "type": "extra_forbidden",
1000                              "loc": ["body", "think"],
1001                              "msg": "Extra inputs are not permitted",
1002                          }
1003                      ]
1004                  },
1005              },
1006          )
1007          result = classify_api_error(e)
1008          assert result.reason == FailoverReason.format_error
1009          assert result.status_code == 422
1010          assert result.retryable is False
1011  
1012      def test_nested_error_dict_message_no_crash(self):
1013          """Nested body['error']['message'] as dict must not crash.
1014  
1015          Some providers wrap Pydantic errors in an 'error' object.
1016          """
1017          e = MockAPIError(
1018              "Validation error",
1019              status_code=400,
1020              body={
1021                  "error": {
1022                      "message": {
1023                          "detail": [
1024                              {"type": "missing", "loc": ["body", "required"]}
1025                          ]
1026                      }
1027                  }
1028              },
1029          )
1030          result = classify_api_error(e, approx_tokens=1000)
1031          assert result.reason == FailoverReason.format_error
1032          assert result.status_code == 400
1033  
1034      def test_metadata_raw_dict_message_no_crash(self):
1035          """OpenRouter metadata.raw with dict message must not crash."""
1036          e = MockAPIError(
1037              "Provider error",
1038              status_code=400,
1039              body={
1040                  "error": {
1041                      "message": "Provider error",
1042                      "metadata": {
1043                          "raw": '{"error":{"message":{"detail":[{"type":"invalid"}]}}}'
1044                      }
1045                  }
1046              },
1047          )
1048          result = classify_api_error(e)
1049          assert result.reason == FailoverReason.format_error
1050  
1051      # Broader non-string type guards — defense against other provider quirks.
1052  
1053      def test_list_message_no_crash(self):
1054          """Some providers return message as a list of error entries."""
1055          e = MockAPIError(
1056              "validation",
1057              status_code=400,
1058              body={"message": [{"msg": "field required"}]},
1059          )
1060          result = classify_api_error(e)
1061          assert result is not None
1062  
1063      def test_int_message_no_crash(self):
1064          """Any non-string type must be coerced safely."""
1065          e = MockAPIError("server error", status_code=500, body={"message": 42})
1066          result = classify_api_error(e)
1067          assert result is not None
1068  
1069      def test_none_message_still_works(self):
1070          """Regression: None fallback (the 'or \"\"' path) must still work."""
1071          e = MockAPIError("server error", status_code=500, body={"message": None})
1072          result = classify_api_error(e)
1073          assert result is not None
1074  
1075  
1076  # ── Test: SSL/TLS transient errors ─────────────────────────────────────
1077  
1078  class TestSSLTransientPatterns:
1079      """SSL/TLS alerts mid-stream should retry as timeout, not unknown, and
1080      should NOT trigger context compression even on a large session.
1081  
1082      Motivation: OpenSSL 3.x changed TLS alert error code format
1083      (`SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
1084      breaking string-exact matching in downstream retry logic.  We match
1085      stable substrings instead.
1086      """
1087  
1088      def test_bad_record_mac_classifies_as_timeout(self):
1089          """OpenSSL 3.x mid-stream bad record mac alert."""
1090          e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac (_ssl.c:2580)")
1091          result = classify_api_error(e)
1092          assert result.reason == FailoverReason.timeout
1093          assert result.retryable is True
1094          assert result.should_compress is False
1095  
1096      def test_openssl_3x_format_classifies_as_timeout(self):
1097          """New format `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC` still matches
1098          because we key on both space- and underscore-separated forms of
1099          the stable `bad_record_mac` token."""
1100          e = Exception("ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC during streaming")
1101          result = classify_api_error(e)
1102          assert result.reason == FailoverReason.timeout
1103          assert result.retryable is True
1104          assert result.should_compress is False
1105  
1106      def test_tls_alert_internal_error_classifies_as_timeout(self):
1107          e = Exception("[SSL: TLSV1_ALERT_INTERNAL_ERROR] tlsv1 alert internal error")
1108          result = classify_api_error(e)
1109          assert result.reason == FailoverReason.timeout
1110          assert result.retryable is True
1111          assert result.should_compress is False
1112  
1113      def test_ssl_handshake_failure_classifies_as_timeout(self):
1114          e = Exception("ssl handshake failure during mid-stream")
1115          result = classify_api_error(e)
1116          assert result.reason == FailoverReason.timeout
1117          assert result.retryable is True
1118  
1119      def test_ssl_prefix_classifies_as_timeout(self):
1120          """Python's generic '[SSL: XYZ]' prefix from the ssl module."""
1121          e = Exception("[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol")
1122          result = classify_api_error(e)
1123          assert result.reason == FailoverReason.timeout
1124          assert result.retryable is True
1125  
1126      def test_ssl_alert_on_large_session_does_not_compress(self):
1127          """Critical: SSL alerts on big contexts must NOT trigger context
1128          compression — compression is expensive and won't fix a transport
1129          hiccup.  This is why _SSL_TRANSIENT_PATTERNS is separate from
1130          _SERVER_DISCONNECT_PATTERNS.
1131          """
1132          e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac")
1133          result = classify_api_error(
1134              e,
1135              approx_tokens=180000,      # 90% of a 200k-context window
1136              context_length=200000,
1137              num_messages=300,
1138          )
1139          assert result.reason == FailoverReason.timeout
1140          assert result.should_compress is False
1141  
1142      def test_plain_disconnect_on_large_session_still_compresses(self):
1143          """Regression guard: the context-overflow-via-disconnect path
1144          (non-SSL disconnects on large sessions) must still trigger
1145          compression.  Only SSL-specific disconnects skip it.
1146          """
1147          e = Exception("Server disconnected without sending a response")
1148          result = classify_api_error(
1149              e,
1150              approx_tokens=180000,
1151              context_length=200000,
1152              num_messages=300,
1153          )
1154          assert result.reason == FailoverReason.context_overflow
1155          assert result.should_compress is True
1156  
1157      def test_real_ssl_error_type_classifies_as_timeout(self):
1158          """Real ssl.SSLError instance — the type name alone (not message)
1159          should route to the transport bucket."""
1160          import ssl
1161          e = ssl.SSLError("arbitrary ssl error")
1162          result = classify_api_error(e)
1163          assert result.reason == FailoverReason.timeout
1164          assert result.retryable is True
1165  
1166  # ── Test: RateLimitError without status_code (Copilot/GitHub Models) ──────────
1167  
1168  class TestRateLimitErrorWithoutStatusCode:
1169      """Regression tests for the Copilot/GitHub Models edge case where the
1170      OpenAI SDK raises RateLimitError but does not populate .status_code."""
1171  
1172      def _make_rate_limit_error(self, status_code=None):
1173          """Create an exception whose class name is 'RateLimitError' with
1174          an optionally missing status_code, mirroring the OpenAI SDK shape."""
1175          cls = type("RateLimitError", (Exception,), {})
1176          e = cls("You have exceeded your rate limit.")
1177          e.status_code = status_code  # None simulates the Copilot case
1178          return e
1179  
1180      def test_rate_limit_error_without_status_code_classified_as_rate_limit(self):
1181          """RateLimitError with status_code=None must classify as rate_limit."""
1182          e = self._make_rate_limit_error(status_code=None)
1183          result = classify_api_error(e, provider="copilot", model="gpt-4o")
1184          assert result.reason == FailoverReason.rate_limit
1185  
1186      def test_rate_limit_error_with_status_code_429_classified_as_rate_limit(self):
1187          """RateLimitError that does set status_code=429 still classifies correctly."""
1188          e = self._make_rate_limit_error(status_code=429)
1189          result = classify_api_error(e, provider="copilot", model="gpt-4o")
1190          assert result.reason == FailoverReason.rate_limit
1191  
1192      def test_other_error_without_status_code_not_forced_to_rate_limit(self):
1193          """A non-RateLimitError with missing status_code must NOT be forced to 429."""
1194          cls = type("APIError", (Exception,), {})
1195          e = cls("something went wrong")
1196          e.status_code = None
1197          result = classify_api_error(e, provider="copilot", model="gpt-4o")
1198          assert result.reason != FailoverReason.rate_limit