test_error_classifier.py
1 """Tests for agent.error_classifier — structured API error classification.""" 2 3 import pytest 4 from agent.error_classifier import ( 5 ClassifiedError, 6 FailoverReason, 7 classify_api_error, 8 _extract_status_code, 9 _extract_error_body, 10 _extract_error_code, 11 _classify_402, 12 ) 13 14 15 # ── Helper: mock API errors ──────────────────────────────────────────── 16 17 class MockAPIError(Exception): 18 """Simulates an OpenAI SDK APIStatusError.""" 19 def __init__(self, message, status_code=None, body=None): 20 super().__init__(message) 21 self.status_code = status_code 22 self.body = body or {} 23 24 25 class MockTransportError(Exception): 26 """Simulates a transport-level error with a specific type name.""" 27 pass 28 29 30 class ReadTimeout(MockTransportError): 31 pass 32 33 34 class ConnectError(MockTransportError): 35 pass 36 37 38 class RemoteProtocolError(MockTransportError): 39 pass 40 41 42 class ServerDisconnectedError(MockTransportError): 43 pass 44 45 46 # ── Test: FailoverReason enum ────────────────────────────────────────── 47 48 class TestFailoverReason: 49 def test_all_reasons_have_string_values(self): 50 for reason in FailoverReason: 51 assert isinstance(reason.value, str) 52 53 def test_enum_members_exist(self): 54 expected = { 55 "auth", "auth_permanent", "billing", "rate_limit", 56 "overloaded", "server_error", "timeout", 57 "context_overflow", "payload_too_large", "image_too_large", 58 "model_not_found", "format_error", 59 "provider_policy_blocked", 60 "thinking_signature", "long_context_tier", 61 "oauth_long_context_beta_forbidden", 62 "unknown", 63 } 64 actual = {r.value for r in FailoverReason} 65 assert expected == actual 66 67 68 # ── Test: ClassifiedError ────────────────────────────────────────────── 69 70 class TestClassifiedError: 71 def test_is_auth_property(self): 72 e1 = ClassifiedError(reason=FailoverReason.auth) 73 assert e1.is_auth is True 74 75 e2 = ClassifiedError(reason=FailoverReason.auth_permanent) 76 assert e2.is_auth is True 77 78 e3 = ClassifiedError(reason=FailoverReason.billing) 79 assert e3.is_auth is False 80 81 def test_defaults(self): 82 e = ClassifiedError(reason=FailoverReason.unknown) 83 assert e.retryable is True 84 assert e.should_compress is False 85 assert e.should_rotate_credential is False 86 assert e.should_fallback is False 87 assert e.status_code is None 88 assert e.message == "" 89 90 91 # ── Test: Status code extraction ─────────────────────────────────────── 92 93 class TestExtractStatusCode: 94 def test_from_status_code_attr(self): 95 e = MockAPIError("fail", status_code=429) 96 assert _extract_status_code(e) == 429 97 98 def test_from_status_attr(self): 99 class ErrWithStatus(Exception): 100 status = 503 101 assert _extract_status_code(ErrWithStatus()) == 503 102 103 def test_from_cause_chain(self): 104 inner = MockAPIError("inner", status_code=401) 105 outer = Exception("outer") 106 outer.__cause__ = inner 107 assert _extract_status_code(outer) == 401 108 109 def test_none_when_missing(self): 110 assert _extract_status_code(Exception("generic")) is None 111 112 def test_rejects_non_http_status(self): 113 """Integers outside 100-599 on .status should be ignored.""" 114 class ErrWeirdStatus(Exception): 115 status = 42 116 assert _extract_status_code(ErrWeirdStatus()) is None 117 118 119 # ── Test: Error body extraction ──────────────────────────────────────── 120 121 class TestExtractErrorBody: 122 def test_from_body_attr(self): 123 e = MockAPIError("fail", body={"error": {"message": "bad"}}) 124 assert _extract_error_body(e) == {"error": {"message": "bad"}} 125 126 def test_empty_when_no_body(self): 127 assert _extract_error_body(Exception("generic")) == {} 128 129 130 # ── Test: Error code extraction ──────────────────────────────────────── 131 132 class TestExtractErrorCode: 133 def test_from_nested_error_code(self): 134 body = {"error": {"code": "rate_limit_exceeded"}} 135 assert _extract_error_code(body) == "rate_limit_exceeded" 136 137 def test_from_nested_error_type(self): 138 body = {"error": {"type": "invalid_request_error"}} 139 assert _extract_error_code(body) == "invalid_request_error" 140 141 def test_from_top_level_code(self): 142 body = {"code": "model_not_found"} 143 assert _extract_error_code(body) == "model_not_found" 144 145 def test_empty_when_no_code(self): 146 assert _extract_error_code({}) == "" 147 assert _extract_error_code({"error": {"message": "oops"}}) == "" 148 149 150 # ── Test: 402 disambiguation ─────────────────────────────────────────── 151 152 class TestClassify402: 153 """The critical 402 billing vs rate_limit disambiguation.""" 154 155 def test_billing_exhaustion(self): 156 """Plain 402 = billing.""" 157 result = _classify_402( 158 "payment required", 159 lambda reason, **kw: ClassifiedError(reason=reason, **kw), 160 ) 161 assert result.reason == FailoverReason.billing 162 assert result.should_rotate_credential is True 163 164 def test_transient_usage_limit(self): 165 """402 with 'usage limit' + 'try again' = rate limit, not billing.""" 166 result = _classify_402( 167 "usage limit exceeded. try again in 5 minutes", 168 lambda reason, **kw: ClassifiedError(reason=reason, **kw), 169 ) 170 assert result.reason == FailoverReason.rate_limit 171 assert result.should_rotate_credential is True 172 173 def test_quota_with_retry(self): 174 """402 with 'quota' + 'retry' = rate limit.""" 175 result = _classify_402( 176 "quota exceeded, please retry after the window resets", 177 lambda reason, **kw: ClassifiedError(reason=reason, **kw), 178 ) 179 assert result.reason == FailoverReason.rate_limit 180 181 def test_quota_without_retry(self): 182 """402 with just 'quota' but no transient signal = billing.""" 183 result = _classify_402( 184 "quota exceeded", 185 lambda reason, **kw: ClassifiedError(reason=reason, **kw), 186 ) 187 assert result.reason == FailoverReason.billing 188 189 def test_insufficient_credits(self): 190 result = _classify_402( 191 "insufficient credits to complete request", 192 lambda reason, **kw: ClassifiedError(reason=reason, **kw), 193 ) 194 assert result.reason == FailoverReason.billing 195 196 197 # ── Test: Full classification pipeline ───────────────────────────────── 198 199 class TestClassifyApiError: 200 """End-to-end classification tests.""" 201 202 # ── Auth errors ── 203 204 def test_401_classified_as_auth(self): 205 e = MockAPIError("Unauthorized", status_code=401) 206 result = classify_api_error(e, provider="openrouter") 207 assert result.reason == FailoverReason.auth 208 assert result.should_rotate_credential is True 209 # 401 is non-retryable on its own — credential rotation runs 210 # before the retryability check in the agent loop. 211 assert result.retryable is False 212 assert result.should_fallback is True 213 214 def test_403_classified_as_auth(self): 215 e = MockAPIError("Forbidden", status_code=403) 216 result = classify_api_error(e, provider="anthropic") 217 assert result.reason == FailoverReason.auth 218 assert result.should_fallback is True 219 220 def test_403_key_limit_classified_as_billing(self): 221 """OpenRouter 403 'key limit exceeded' is billing, not auth.""" 222 e = MockAPIError("Key limit exceeded for this key", status_code=403) 223 result = classify_api_error(e, provider="openrouter") 224 assert result.reason == FailoverReason.billing 225 assert result.should_rotate_credential is True 226 assert result.should_fallback is True 227 228 def test_403_spending_limit_classified_as_billing(self): 229 e = MockAPIError("spending limit reached", status_code=403) 230 result = classify_api_error(e, provider="openrouter") 231 assert result.reason == FailoverReason.billing 232 233 # ── Billing ── 234 235 def test_402_plain_billing(self): 236 e = MockAPIError("Payment Required", status_code=402) 237 result = classify_api_error(e) 238 assert result.reason == FailoverReason.billing 239 assert result.retryable is False 240 241 def test_402_transient_usage_limit(self): 242 e = MockAPIError("usage limit exceeded, try again later", status_code=402) 243 result = classify_api_error(e) 244 assert result.reason == FailoverReason.rate_limit 245 assert result.retryable is True 246 247 # ── Rate limit ── 248 249 def test_429_rate_limit(self): 250 e = MockAPIError("Too Many Requests", status_code=429) 251 result = classify_api_error(e) 252 assert result.reason == FailoverReason.rate_limit 253 assert result.should_fallback is True 254 255 def test_alibaba_rate_increased_too_quickly(self): 256 """Alibaba/DashScope returns a unique throttling message. 257 258 Port from anomalyco/opencode#21355. 259 """ 260 msg = ( 261 "Upstream error from Alibaba: Request rate increased too quickly. " 262 "To ensure system stability, please adjust your client logic to " 263 "scale requests more smoothly over time." 264 ) 265 e = MockAPIError(msg, status_code=400) 266 result = classify_api_error(e) 267 assert result.reason == FailoverReason.rate_limit 268 assert result.retryable is True 269 assert result.should_rotate_credential is True 270 271 # ── Server errors ── 272 273 def test_500_server_error(self): 274 e = MockAPIError("Internal Server Error", status_code=500) 275 result = classify_api_error(e) 276 assert result.reason == FailoverReason.server_error 277 assert result.retryable is True 278 279 def test_502_server_error(self): 280 e = MockAPIError("Bad Gateway", status_code=502) 281 result = classify_api_error(e) 282 assert result.reason == FailoverReason.server_error 283 284 def test_503_overloaded(self): 285 e = MockAPIError("Service Unavailable", status_code=503) 286 result = classify_api_error(e) 287 assert result.reason == FailoverReason.overloaded 288 289 def test_529_anthropic_overloaded(self): 290 e = MockAPIError("Overloaded", status_code=529) 291 result = classify_api_error(e) 292 assert result.reason == FailoverReason.overloaded 293 294 # ── Model not found ── 295 296 def test_404_model_not_found(self): 297 e = MockAPIError("model not found", status_code=404) 298 result = classify_api_error(e) 299 assert result.reason == FailoverReason.model_not_found 300 assert result.should_fallback is True 301 assert result.retryable is False 302 303 def test_404_generic(self): 304 # Generic 404 with no "model not found" signal — common for local 305 # llama.cpp/Ollama/vLLM endpoints with slightly wrong paths. Treat 306 # as unknown (retryable) so the real error surfaces, rather than 307 # claiming the model is missing and silently falling back. 308 e = MockAPIError("Not Found", status_code=404) 309 result = classify_api_error(e) 310 assert result.reason == FailoverReason.unknown 311 assert result.retryable is True 312 assert result.should_fallback is False 313 314 # ── Provider policy-block (OpenRouter privacy/guardrail) ── 315 316 def test_404_openrouter_policy_blocked(self): 317 # Real OpenRouter error when the user's account privacy setting 318 # excludes the only endpoint serving a model (e.g. DeepSeek V4 Pro 319 # which is hosted only by DeepSeek, and their endpoint may log 320 # inputs). Must NOT classify as model_not_found — the model 321 # exists, falling back won't help (same account setting applies), 322 # and the error body already tells the user where to fix it. 323 e = MockAPIError( 324 "No endpoints available matching your guardrail restrictions " 325 "and data policy. Configure: https://openrouter.ai/settings/privacy", 326 status_code=404, 327 ) 328 result = classify_api_error(e) 329 assert result.reason == FailoverReason.provider_policy_blocked 330 assert result.retryable is False 331 assert result.should_fallback is False 332 333 def test_400_openrouter_policy_blocked(self): 334 # Defense-in-depth: if OpenRouter ever returns this as 400 instead 335 # of 404, still classify it distinctly rather than as format_error 336 # or model_not_found. 337 e = MockAPIError( 338 "No endpoints available matching your data policy", 339 status_code=400, 340 ) 341 result = classify_api_error(e) 342 assert result.reason == FailoverReason.provider_policy_blocked 343 assert result.retryable is False 344 assert result.should_fallback is False 345 346 def test_message_only_openrouter_policy_blocked(self): 347 # No status code — classifier should still catch the fingerprint 348 # via the message-pattern fallback. 349 e = Exception( 350 "No endpoints available matching your guardrail restrictions " 351 "and data policy" 352 ) 353 result = classify_api_error(e) 354 assert result.reason == FailoverReason.provider_policy_blocked 355 356 def test_404_model_not_found_still_works(self): 357 # Regression guard: the new policy-block check must not swallow 358 # genuine model_not_found 404s. 359 e = MockAPIError( 360 "openrouter/nonexistent-model is not a valid model ID", 361 status_code=404, 362 ) 363 result = classify_api_error(e) 364 assert result.reason == FailoverReason.model_not_found 365 assert result.should_fallback is True 366 367 # ── Payload too large ── 368 369 def test_413_payload_too_large(self): 370 e = MockAPIError("Request Entity Too Large", status_code=413) 371 result = classify_api_error(e) 372 assert result.reason == FailoverReason.payload_too_large 373 assert result.should_compress is True 374 375 # ── Context overflow ── 376 377 def test_400_context_length(self): 378 e = MockAPIError("context length exceeded: 250000 > 200000", status_code=400) 379 result = classify_api_error(e) 380 assert result.reason == FailoverReason.context_overflow 381 assert result.should_compress is True 382 383 def test_400_too_many_tokens(self): 384 e = MockAPIError("This model's maximum context is 128000 tokens, too many tokens", status_code=400) 385 result = classify_api_error(e) 386 assert result.reason == FailoverReason.context_overflow 387 388 def test_400_prompt_too_long(self): 389 e = MockAPIError("prompt is too long: 300000 tokens > 200000 maximum", status_code=400) 390 result = classify_api_error(e) 391 assert result.reason == FailoverReason.context_overflow 392 393 def test_400_generic_large_session(self): 394 """Generic 400 with large session → context overflow heuristic.""" 395 e = MockAPIError( 396 "Error", 397 status_code=400, 398 body={"error": {"message": "Error"}}, 399 ) 400 result = classify_api_error(e, approx_tokens=100000, context_length=200000) 401 assert result.reason == FailoverReason.context_overflow 402 403 def test_400_generic_small_session_is_format_error(self): 404 """Generic 400 with small session → format error, not context overflow.""" 405 e = MockAPIError( 406 "Error", 407 status_code=400, 408 body={"error": {"message": "Error"}}, 409 ) 410 result = classify_api_error(e, approx_tokens=1000, context_length=200000) 411 assert result.reason == FailoverReason.format_error 412 413 def test_400_generic_many_messages_below_large_context_pressure_is_format_error(self): 414 """Large-context sessions should not overflow solely due to message count.""" 415 e = MockAPIError( 416 "Error", 417 status_code=400, 418 body={"error": {"message": "Error"}}, 419 ) 420 result = classify_api_error( 421 e, 422 provider="openai-codex", 423 model="gpt-5.5", 424 approx_tokens=74320, 425 context_length=1_000_000, 426 num_messages=432, 427 ) 428 assert result.reason == FailoverReason.format_error 429 assert result.should_compress is False 430 431 # ── Server disconnect + large session ── 432 433 def test_disconnect_large_session_context_overflow(self): 434 """Server disconnect with large session → context overflow.""" 435 e = Exception("server disconnected without sending complete message") 436 result = classify_api_error(e, approx_tokens=150000, context_length=200000) 437 assert result.reason == FailoverReason.context_overflow 438 assert result.should_compress is True 439 440 def test_disconnect_small_session_timeout(self): 441 """Server disconnect with small session → timeout.""" 442 e = Exception("server disconnected without sending complete message") 443 result = classify_api_error(e, approx_tokens=5000, context_length=200000) 444 assert result.reason == FailoverReason.timeout 445 446 def test_disconnect_many_messages_below_large_context_pressure_is_timeout(self): 447 """Large-context disconnects should not overflow solely due to message count.""" 448 e = Exception("server disconnected without sending complete message") 449 result = classify_api_error( 450 e, 451 provider="openai-codex", 452 model="gpt-5.5", 453 approx_tokens=74320, 454 context_length=1_000_000, 455 num_messages=432, 456 ) 457 assert result.reason == FailoverReason.timeout 458 assert result.should_compress is False 459 460 # ── Provider-specific: Anthropic thinking signature ── 461 462 def test_anthropic_thinking_signature(self): 463 e = MockAPIError( 464 "thinking block has invalid signature", 465 status_code=400, 466 ) 467 result = classify_api_error(e, provider="anthropic") 468 assert result.reason == FailoverReason.thinking_signature 469 assert result.retryable is True 470 471 def test_non_anthropic_400_with_signature_not_classified_as_thinking(self): 472 """400 with 'signature' but from non-Anthropic → format error.""" 473 e = MockAPIError("invalid signature", status_code=400) 474 result = classify_api_error(e, provider="openrouter", approx_tokens=0) 475 # Without "thinking" in the message, it shouldn't be thinking_signature 476 assert result.reason != FailoverReason.thinking_signature 477 478 # ── Provider-specific: Anthropic long-context tier ── 479 480 def test_anthropic_long_context_tier(self): 481 e = MockAPIError( 482 "Extra usage is required for long context requests over 200k tokens", 483 status_code=429, 484 ) 485 result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4") 486 assert result.reason == FailoverReason.long_context_tier 487 assert result.should_compress is True 488 489 def test_normal_429_not_long_context(self): 490 """Normal 429 without 'extra usage' + 'long context' → rate_limit.""" 491 e = MockAPIError("Too Many Requests", status_code=429) 492 result = classify_api_error(e, provider="anthropic") 493 assert result.reason == FailoverReason.rate_limit 494 495 # ── Provider-specific: Anthropic OAuth 1M-context beta forbidden ── 496 497 def test_anthropic_oauth_1m_beta_forbidden(self): 498 """400 + 'long context beta is not yet available for this subscription' 499 → oauth_long_context_beta_forbidden (retryable, no compression).""" 500 e = MockAPIError( 501 "The long context beta is not yet available for this subscription.", 502 status_code=400, 503 ) 504 result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6") 505 assert result.reason == FailoverReason.oauth_long_context_beta_forbidden 506 assert result.retryable is True 507 assert result.should_compress is False 508 509 def test_anthropic_oauth_1m_beta_forbidden_does_not_collide_with_tier_gate(self): 510 """The 429 'extra usage' + 'long context' tier gate keeps its own 511 classification even though its message mentions 'long context'.""" 512 e = MockAPIError( 513 "Extra usage is required for long context requests over 200k tokens", 514 status_code=429, 515 ) 516 result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6") 517 assert result.reason == FailoverReason.long_context_tier 518 519 def test_400_without_beta_phrase_is_not_1m_beta_forbidden(self): 520 """A generic 400 that happens to mention 'long context' but not the 521 exact beta-availability phrase should not be misclassified.""" 522 e = MockAPIError( 523 "long context window exceeded", 524 status_code=400, 525 ) 526 result = classify_api_error(e, provider="anthropic") 527 assert result.reason != FailoverReason.oauth_long_context_beta_forbidden 528 529 # ── Transport errors ── 530 531 def test_read_timeout(self): 532 e = ReadTimeout("Read timed out") 533 result = classify_api_error(e) 534 assert result.reason == FailoverReason.timeout 535 assert result.retryable is True 536 537 def test_connect_error(self): 538 e = ConnectError("Connection refused") 539 result = classify_api_error(e) 540 assert result.reason == FailoverReason.timeout 541 542 def test_connection_error_builtin(self): 543 e = ConnectionError("Connection reset by peer") 544 result = classify_api_error(e) 545 assert result.reason == FailoverReason.timeout 546 547 def test_timeout_error_builtin(self): 548 e = TimeoutError("timed out") 549 result = classify_api_error(e) 550 assert result.reason == FailoverReason.timeout 551 552 # ── Error code classification ── 553 554 def test_error_code_resource_exhausted(self): 555 e = MockAPIError( 556 "Resource exhausted", 557 body={"error": {"code": "resource_exhausted", "message": "Too many requests"}}, 558 ) 559 result = classify_api_error(e) 560 assert result.reason == FailoverReason.rate_limit 561 562 def test_error_code_model_not_found(self): 563 e = MockAPIError( 564 "Model not available", 565 body={"error": {"code": "model_not_found"}}, 566 ) 567 result = classify_api_error(e) 568 assert result.reason == FailoverReason.model_not_found 569 570 def test_error_code_context_length_exceeded(self): 571 e = MockAPIError( 572 "Context too large", 573 body={"error": {"code": "context_length_exceeded"}}, 574 ) 575 result = classify_api_error(e) 576 assert result.reason == FailoverReason.context_overflow 577 578 # ── Message-only patterns (no status code) ── 579 580 def test_message_billing_pattern(self): 581 e = Exception("insufficient credits to complete this request") 582 result = classify_api_error(e) 583 assert result.reason == FailoverReason.billing 584 585 def test_message_rate_limit_pattern(self): 586 e = Exception("rate limit reached for this model") 587 result = classify_api_error(e) 588 assert result.reason == FailoverReason.rate_limit 589 590 def test_message_auth_pattern(self): 591 e = Exception("invalid api key provided") 592 result = classify_api_error(e) 593 assert result.reason == FailoverReason.auth 594 595 def test_message_model_not_found_pattern(self): 596 e = Exception("gpt-99 is not a valid model") 597 result = classify_api_error(e) 598 assert result.reason == FailoverReason.model_not_found 599 600 def test_message_context_overflow_pattern(self): 601 e = Exception("maximum context length exceeded") 602 result = classify_api_error(e) 603 assert result.reason == FailoverReason.context_overflow 604 605 # ── Message-only usage limit disambiguation (no status code) ── 606 607 def test_message_usage_limit_transient_is_rate_limit(self): 608 """'usage limit' + 'try again' with no status code → rate_limit, not billing.""" 609 e = Exception("usage limit exceeded, try again in 5 minutes") 610 result = classify_api_error(e) 611 assert result.reason == FailoverReason.rate_limit 612 assert result.retryable is True 613 assert result.should_rotate_credential is True 614 assert result.should_fallback is True 615 616 def test_message_usage_limit_no_retry_signal_is_billing(self): 617 """'usage limit' with no transient signal and no status code → billing.""" 618 e = Exception("usage limit reached") 619 result = classify_api_error(e) 620 assert result.reason == FailoverReason.billing 621 assert result.retryable is False 622 assert result.should_rotate_credential is True 623 624 def test_message_quota_with_reset_window_is_rate_limit(self): 625 """'quota' + 'resets at' with no status code → rate_limit.""" 626 e = Exception("quota exceeded, resets at midnight UTC") 627 result = classify_api_error(e) 628 assert result.reason == FailoverReason.rate_limit 629 assert result.retryable is True 630 631 def test_message_limit_exceeded_with_wait_is_rate_limit(self): 632 """'limit exceeded' + 'wait' with no status code → rate_limit.""" 633 e = Exception("key limit exceeded, please wait before retrying") 634 result = classify_api_error(e) 635 assert result.reason == FailoverReason.rate_limit 636 assert result.retryable is True 637 638 # ── Unknown / fallback ── 639 640 def test_generic_exception_is_unknown(self): 641 e = Exception("something weird happened") 642 result = classify_api_error(e) 643 assert result.reason == FailoverReason.unknown 644 assert result.retryable is True 645 646 # ── Format error ── 647 648 def test_400_descriptive_format_error(self): 649 """400 with descriptive message (not context overflow) → format error.""" 650 e = MockAPIError( 651 "Invalid value for parameter 'temperature': must be between 0 and 2", 652 status_code=400, 653 body={"error": {"message": "Invalid value for parameter 'temperature': must be between 0 and 2"}}, 654 ) 655 result = classify_api_error(e, approx_tokens=1000) 656 assert result.reason == FailoverReason.format_error 657 assert result.retryable is False 658 659 def test_422_format_error(self): 660 e = MockAPIError("Unprocessable Entity", status_code=422) 661 result = classify_api_error(e) 662 assert result.reason == FailoverReason.format_error 663 assert result.retryable is False 664 665 def test_400_flat_body_descriptive_not_context_overflow(self): 666 """Responses API flat body with descriptive error + large session → format error. 667 668 The Codex Responses API returns errors in flat body format: 669 {"message": "...", "type": "..."} without an "error" wrapper. 670 A descriptive 400 must NOT be misclassified as context overflow 671 just because the session is large. 672 """ 673 e = MockAPIError( 674 "Invalid 'input[index].name': string does not match pattern.", 675 status_code=400, 676 body={"message": "Invalid 'input[index].name': string does not match pattern.", 677 "type": "invalid_request_error"}, 678 ) 679 result = classify_api_error(e, approx_tokens=200000, context_length=400000, num_messages=500) 680 assert result.reason == FailoverReason.format_error 681 assert result.retryable is False 682 683 def test_400_flat_body_generic_large_session_still_context_overflow(self): 684 """Flat body with generic 'Error' message + large session → context overflow. 685 686 Regression: the flat-body fallback must not break the existing heuristic 687 for genuinely generic errors from providers that use flat bodies. 688 """ 689 e = MockAPIError( 690 "Error", 691 status_code=400, 692 body={"message": "Error"}, 693 ) 694 result = classify_api_error(e, approx_tokens=100000, context_length=200000) 695 assert result.reason == FailoverReason.context_overflow 696 697 # ── Peer closed + large session ── 698 699 def test_peer_closed_large_session(self): 700 e = Exception("peer closed connection without sending complete message") 701 result = classify_api_error(e, approx_tokens=130000, context_length=200000) 702 assert result.reason == FailoverReason.context_overflow 703 704 # ── Chinese error messages ── 705 706 def test_chinese_context_overflow(self): 707 e = MockAPIError("超过最大长度限制", status_code=400) 708 result = classify_api_error(e) 709 assert result.reason == FailoverReason.context_overflow 710 711 # ── vLLM / local inference server error messages ── 712 713 def test_vllm_max_model_len_overflow(self): 714 """vLLM's 'exceeds the max_model_len' error → context_overflow.""" 715 e = MockAPIError( 716 "The engine prompt length 1327246 exceeds the max_model_len 131072. " 717 "Please reduce prompt.", 718 status_code=400, 719 ) 720 result = classify_api_error(e) 721 assert result.reason == FailoverReason.context_overflow 722 723 def test_vllm_prompt_length_exceeds(self): 724 """vLLM prompt length error → context_overflow.""" 725 e = MockAPIError( 726 "prompt length 200000 exceeds maximum model length 131072", 727 status_code=400, 728 ) 729 result = classify_api_error(e) 730 assert result.reason == FailoverReason.context_overflow 731 732 def test_vllm_input_too_long(self): 733 """vLLM 'input is too long' error → context_overflow.""" 734 e = MockAPIError("input is too long for model", status_code=400) 735 result = classify_api_error(e) 736 assert result.reason == FailoverReason.context_overflow 737 738 def test_ollama_context_length_exceeded(self): 739 """Ollama 'context length exceeded' error → context_overflow.""" 740 e = MockAPIError("context length exceeded", status_code=400) 741 result = classify_api_error(e) 742 assert result.reason == FailoverReason.context_overflow 743 744 def test_llamacpp_slot_context(self): 745 """llama.cpp / llama-server 'slot context' error → context_overflow.""" 746 e = MockAPIError( 747 "slot context: 4096 tokens, prompt 8192 tokens — not enough space", 748 status_code=400, 749 ) 750 result = classify_api_error(e) 751 assert result.reason == FailoverReason.context_overflow 752 753 # ── Result metadata ── 754 755 def test_provider_and_model_in_result(self): 756 e = MockAPIError("fail", status_code=500) 757 result = classify_api_error(e, provider="openrouter", model="gpt-5") 758 assert result.provider == "openrouter" 759 assert result.model == "gpt-5" 760 assert result.status_code == 500 761 762 def test_message_extracted(self): 763 e = MockAPIError( 764 "outer", 765 status_code=500, 766 body={"error": {"message": "Internal server error occurred"}}, 767 ) 768 result = classify_api_error(e) 769 assert result.message == "Internal server error occurred" 770 771 772 # ── Test: Adversarial / edge cases (from live testing) ───────────────── 773 774 class TestAdversarialEdgeCases: 775 """Edge cases discovered during live testing with real SDK objects.""" 776 777 def test_empty_exception_message(self): 778 result = classify_api_error(Exception("")) 779 assert result.reason == FailoverReason.unknown 780 assert result.retryable is True 781 782 def test_500_with_none_body(self): 783 e = MockAPIError("fail", status_code=500, body=None) 784 result = classify_api_error(e) 785 assert result.reason == FailoverReason.server_error 786 787 def test_non_dict_body(self): 788 """Some providers return strings instead of JSON.""" 789 class StringBodyError(Exception): 790 status_code = 400 791 body = "just a string" 792 result = classify_api_error(StringBodyError("bad")) 793 assert result.reason == FailoverReason.format_error 794 795 def test_list_body(self): 796 class ListBodyError(Exception): 797 status_code = 500 798 body = [{"error": "something"}] 799 result = classify_api_error(ListBodyError("server error")) 800 assert result.reason == FailoverReason.server_error 801 802 def test_circular_cause_chain(self): 803 """Must not infinite-loop on circular __cause__.""" 804 e = Exception("circular") 805 e.__cause__ = e 806 result = classify_api_error(e) 807 assert result.reason == FailoverReason.unknown 808 809 def test_three_level_cause_chain(self): 810 inner = MockAPIError("inner", status_code=429) 811 middle = Exception("middle") 812 middle.__cause__ = inner 813 outer = RuntimeError("outer") 814 outer.__cause__ = middle 815 result = classify_api_error(outer) 816 assert result.status_code == 429 817 assert result.reason == FailoverReason.rate_limit 818 819 def test_400_with_rate_limit_text(self): 820 """Some providers send rate limits as 400 instead of 429.""" 821 e = MockAPIError( 822 "rate limit policy", 823 status_code=400, 824 body={"error": {"message": "rate limit exceeded on this model"}}, 825 ) 826 result = classify_api_error(e, provider="openrouter") 827 assert result.reason == FailoverReason.rate_limit 828 829 def test_400_with_billing_text(self): 830 """Some providers send billing errors as 400.""" 831 e = MockAPIError( 832 "billing", 833 status_code=400, 834 body={"error": {"message": "insufficient credits for this request"}}, 835 ) 836 result = classify_api_error(e) 837 assert result.reason == FailoverReason.billing 838 839 def test_200_with_error_body(self): 840 """200 status with error in body — should be unknown, not crash.""" 841 class WeirdSuccess(Exception): 842 status_code = 200 843 body = {"error": {"message": "loading"}} 844 result = classify_api_error(WeirdSuccess("model loading")) 845 assert result.reason == FailoverReason.unknown 846 847 def test_ollama_context_size_exceeded(self): 848 e = MockAPIError( 849 "Error", 850 status_code=400, 851 body={"error": {"message": "context size has been exceeded"}}, 852 ) 853 result = classify_api_error(e, provider="ollama") 854 assert result.reason == FailoverReason.context_overflow 855 856 def test_connection_refused_error(self): 857 e = ConnectionRefusedError("Connection refused: localhost:11434") 858 result = classify_api_error(e, provider="ollama") 859 assert result.reason == FailoverReason.timeout 860 861 def test_body_message_enrichment(self): 862 """Body message must be included in pattern matching even when 863 str(error) doesn't contain it (OpenAI SDK APIStatusError).""" 864 e = MockAPIError( 865 "Usage limit", # str(e) = "usage limit" 866 status_code=402, 867 body={"error": {"message": "Usage limit reached, try again in 5 minutes"}}, 868 ) 869 result = classify_api_error(e) 870 # "try again" is only in body, not in str(e) 871 assert result.reason == FailoverReason.rate_limit 872 873 def test_disconnect_pattern_ordering(self): 874 """Disconnect + large session must beat generic transport catch.""" 875 class FakeRemoteProtocol(Exception): 876 pass 877 # Type name isn't in _TRANSPORT_ERROR_TYPES but message has disconnect pattern 878 e = Exception("peer closed connection without sending complete message") 879 result = classify_api_error(e, approx_tokens=150000, context_length=200000) 880 assert result.reason == FailoverReason.context_overflow 881 assert result.should_compress is True 882 883 def test_credit_balance_too_low(self): 884 e = MockAPIError( 885 "Credits low", 886 status_code=402, 887 body={"error": {"message": "Your credit balance is too low"}}, 888 ) 889 result = classify_api_error(e, provider="anthropic") 890 assert result.reason == FailoverReason.billing 891 892 def test_deepseek_402_chinese(self): 893 """Chinese billing message should still match billing patterns.""" 894 # "余额不足" doesn't match English billing patterns, but 402 defaults to billing 895 e = MockAPIError("余额不足", status_code=402) 896 result = classify_api_error(e, provider="deepseek") 897 assert result.reason == FailoverReason.billing 898 899 def test_openrouter_wrapped_context_overflow_in_metadata_raw(self): 900 """OpenRouter wraps provider errors in metadata.raw JSON string.""" 901 e = MockAPIError( 902 "Provider returned error", 903 status_code=400, 904 body={ 905 "error": { 906 "message": "Provider returned error", 907 "code": 400, 908 "metadata": { 909 "raw": '{"error":{"message":"context length exceeded: 50000 > 32768"}}' 910 } 911 } 912 }, 913 ) 914 result = classify_api_error(e, provider="openrouter", approx_tokens=10000) 915 assert result.reason == FailoverReason.context_overflow 916 assert result.should_compress is True 917 918 def test_openrouter_wrapped_rate_limit_in_metadata_raw(self): 919 e = MockAPIError( 920 "Provider returned error", 921 status_code=400, 922 body={ 923 "error": { 924 "message": "Provider returned error", 925 "metadata": { 926 "raw": '{"error":{"message":"Rate limit exceeded. Please retry after 30s."}}' 927 } 928 } 929 }, 930 ) 931 result = classify_api_error(e, provider="openrouter") 932 assert result.reason == FailoverReason.rate_limit 933 934 def test_thinking_signature_via_openrouter(self): 935 """Thinking signature errors proxied through OpenRouter must be caught.""" 936 e = MockAPIError( 937 "thinking block has invalid signature", 938 status_code=400, 939 ) 940 # provider is openrouter, not anthropic — old code missed this 941 result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4") 942 assert result.reason == FailoverReason.thinking_signature 943 944 def test_generic_400_large_by_message_count(self): 945 """Many small messages (>80) should trigger context overflow heuristic.""" 946 e = MockAPIError( 947 "Error", 948 status_code=400, 949 body={"error": {"message": "Error"}}, 950 ) 951 # Low token count but high message count 952 result = classify_api_error( 953 e, approx_tokens=5000, context_length=200000, num_messages=100, 954 ) 955 assert result.reason == FailoverReason.context_overflow 956 957 def test_disconnect_large_by_message_count(self): 958 """Server disconnect with 200+ messages should trigger context overflow.""" 959 e = Exception("server disconnected without sending complete message") 960 result = classify_api_error( 961 e, approx_tokens=5000, context_length=200000, num_messages=250, 962 ) 963 assert result.reason == FailoverReason.context_overflow 964 965 def test_openrouter_wrapped_model_not_found_in_metadata_raw(self): 966 e = MockAPIError( 967 "Provider returned error", 968 status_code=400, 969 body={ 970 "error": { 971 "message": "Provider returned error", 972 "metadata": { 973 "raw": '{"error":{"message":"The model gpt-99 does not exist"}}' 974 } 975 } 976 }, 977 ) 978 result = classify_api_error(e, provider="openrouter") 979 assert result.reason == FailoverReason.model_not_found 980 981 # ── Regression: dict-typed message field (Issue #11233) ── 982 983 def test_pydantic_dict_message_no_crash(self): 984 """Pydantic validation errors return message as dict, not string. 985 986 Regression: classify_api_error must not crash when body['message'] 987 is a dict (e.g. {"detail": [...]} from FastAPI/Pydantic). The 988 'or ""' fallback only handles None/falsy values — a non-empty 989 dict is truthy and passed to .lower(), causing AttributeError. 990 """ 991 e = MockAPIError( 992 "Unprocessable Entity", 993 status_code=422, 994 body={ 995 "object": "error", 996 "message": { 997 "detail": [ 998 { 999 "type": "extra_forbidden", 1000 "loc": ["body", "think"], 1001 "msg": "Extra inputs are not permitted", 1002 } 1003 ] 1004 }, 1005 }, 1006 ) 1007 result = classify_api_error(e) 1008 assert result.reason == FailoverReason.format_error 1009 assert result.status_code == 422 1010 assert result.retryable is False 1011 1012 def test_nested_error_dict_message_no_crash(self): 1013 """Nested body['error']['message'] as dict must not crash. 1014 1015 Some providers wrap Pydantic errors in an 'error' object. 1016 """ 1017 e = MockAPIError( 1018 "Validation error", 1019 status_code=400, 1020 body={ 1021 "error": { 1022 "message": { 1023 "detail": [ 1024 {"type": "missing", "loc": ["body", "required"]} 1025 ] 1026 } 1027 } 1028 }, 1029 ) 1030 result = classify_api_error(e, approx_tokens=1000) 1031 assert result.reason == FailoverReason.format_error 1032 assert result.status_code == 400 1033 1034 def test_metadata_raw_dict_message_no_crash(self): 1035 """OpenRouter metadata.raw with dict message must not crash.""" 1036 e = MockAPIError( 1037 "Provider error", 1038 status_code=400, 1039 body={ 1040 "error": { 1041 "message": "Provider error", 1042 "metadata": { 1043 "raw": '{"error":{"message":{"detail":[{"type":"invalid"}]}}}' 1044 } 1045 } 1046 }, 1047 ) 1048 result = classify_api_error(e) 1049 assert result.reason == FailoverReason.format_error 1050 1051 # Broader non-string type guards — defense against other provider quirks. 1052 1053 def test_list_message_no_crash(self): 1054 """Some providers return message as a list of error entries.""" 1055 e = MockAPIError( 1056 "validation", 1057 status_code=400, 1058 body={"message": [{"msg": "field required"}]}, 1059 ) 1060 result = classify_api_error(e) 1061 assert result is not None 1062 1063 def test_int_message_no_crash(self): 1064 """Any non-string type must be coerced safely.""" 1065 e = MockAPIError("server error", status_code=500, body={"message": 42}) 1066 result = classify_api_error(e) 1067 assert result is not None 1068 1069 def test_none_message_still_works(self): 1070 """Regression: None fallback (the 'or \"\"' path) must still work.""" 1071 e = MockAPIError("server error", status_code=500, body={"message": None}) 1072 result = classify_api_error(e) 1073 assert result is not None 1074 1075 1076 # ── Test: SSL/TLS transient errors ───────────────────────────────────── 1077 1078 class TestSSLTransientPatterns: 1079 """SSL/TLS alerts mid-stream should retry as timeout, not unknown, and 1080 should NOT trigger context compression even on a large session. 1081 1082 Motivation: OpenSSL 3.x changed TLS alert error code format 1083 (`SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`), 1084 breaking string-exact matching in downstream retry logic. We match 1085 stable substrings instead. 1086 """ 1087 1088 def test_bad_record_mac_classifies_as_timeout(self): 1089 """OpenSSL 3.x mid-stream bad record mac alert.""" 1090 e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac (_ssl.c:2580)") 1091 result = classify_api_error(e) 1092 assert result.reason == FailoverReason.timeout 1093 assert result.retryable is True 1094 assert result.should_compress is False 1095 1096 def test_openssl_3x_format_classifies_as_timeout(self): 1097 """New format `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC` still matches 1098 because we key on both space- and underscore-separated forms of 1099 the stable `bad_record_mac` token.""" 1100 e = Exception("ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC during streaming") 1101 result = classify_api_error(e) 1102 assert result.reason == FailoverReason.timeout 1103 assert result.retryable is True 1104 assert result.should_compress is False 1105 1106 def test_tls_alert_internal_error_classifies_as_timeout(self): 1107 e = Exception("[SSL: TLSV1_ALERT_INTERNAL_ERROR] tlsv1 alert internal error") 1108 result = classify_api_error(e) 1109 assert result.reason == FailoverReason.timeout 1110 assert result.retryable is True 1111 assert result.should_compress is False 1112 1113 def test_ssl_handshake_failure_classifies_as_timeout(self): 1114 e = Exception("ssl handshake failure during mid-stream") 1115 result = classify_api_error(e) 1116 assert result.reason == FailoverReason.timeout 1117 assert result.retryable is True 1118 1119 def test_ssl_prefix_classifies_as_timeout(self): 1120 """Python's generic '[SSL: XYZ]' prefix from the ssl module.""" 1121 e = Exception("[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol") 1122 result = classify_api_error(e) 1123 assert result.reason == FailoverReason.timeout 1124 assert result.retryable is True 1125 1126 def test_ssl_alert_on_large_session_does_not_compress(self): 1127 """Critical: SSL alerts on big contexts must NOT trigger context 1128 compression — compression is expensive and won't fix a transport 1129 hiccup. This is why _SSL_TRANSIENT_PATTERNS is separate from 1130 _SERVER_DISCONNECT_PATTERNS. 1131 """ 1132 e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac") 1133 result = classify_api_error( 1134 e, 1135 approx_tokens=180000, # 90% of a 200k-context window 1136 context_length=200000, 1137 num_messages=300, 1138 ) 1139 assert result.reason == FailoverReason.timeout 1140 assert result.should_compress is False 1141 1142 def test_plain_disconnect_on_large_session_still_compresses(self): 1143 """Regression guard: the context-overflow-via-disconnect path 1144 (non-SSL disconnects on large sessions) must still trigger 1145 compression. Only SSL-specific disconnects skip it. 1146 """ 1147 e = Exception("Server disconnected without sending a response") 1148 result = classify_api_error( 1149 e, 1150 approx_tokens=180000, 1151 context_length=200000, 1152 num_messages=300, 1153 ) 1154 assert result.reason == FailoverReason.context_overflow 1155 assert result.should_compress is True 1156 1157 def test_real_ssl_error_type_classifies_as_timeout(self): 1158 """Real ssl.SSLError instance — the type name alone (not message) 1159 should route to the transport bucket.""" 1160 import ssl 1161 e = ssl.SSLError("arbitrary ssl error") 1162 result = classify_api_error(e) 1163 assert result.reason == FailoverReason.timeout 1164 assert result.retryable is True 1165 1166 # ── Test: RateLimitError without status_code (Copilot/GitHub Models) ────────── 1167 1168 class TestRateLimitErrorWithoutStatusCode: 1169 """Regression tests for the Copilot/GitHub Models edge case where the 1170 OpenAI SDK raises RateLimitError but does not populate .status_code.""" 1171 1172 def _make_rate_limit_error(self, status_code=None): 1173 """Create an exception whose class name is 'RateLimitError' with 1174 an optionally missing status_code, mirroring the OpenAI SDK shape.""" 1175 cls = type("RateLimitError", (Exception,), {}) 1176 e = cls("You have exceeded your rate limit.") 1177 e.status_code = status_code # None simulates the Copilot case 1178 return e 1179 1180 def test_rate_limit_error_without_status_code_classified_as_rate_limit(self): 1181 """RateLimitError with status_code=None must classify as rate_limit.""" 1182 e = self._make_rate_limit_error(status_code=None) 1183 result = classify_api_error(e, provider="copilot", model="gpt-4o") 1184 assert result.reason == FailoverReason.rate_limit 1185 1186 def test_rate_limit_error_with_status_code_429_classified_as_rate_limit(self): 1187 """RateLimitError that does set status_code=429 still classifies correctly.""" 1188 e = self._make_rate_limit_error(status_code=429) 1189 result = classify_api_error(e, provider="copilot", model="gpt-4o") 1190 assert result.reason == FailoverReason.rate_limit 1191 1192 def test_other_error_without_status_code_not_forced_to_rate_limit(self): 1193 """A non-RateLimitError with missing status_code must NOT be forced to 429.""" 1194 cls = type("APIError", (Exception,), {}) 1195 e = cls("something went wrong") 1196 e.status_code = None 1197 result = classify_api_error(e, provider="copilot", model="gpt-4o") 1198 assert result.reason != FailoverReason.rate_limit