test_long_context_tier_429.py
1 """Tests for Anthropic Sonnet long-context tier 429 handling. 2 3 When Claude Max users without "extra usage" hit the 1M context tier 4 on Sonnet, Anthropic returns HTTP 429 "Extra usage is required for long 5 context requests." This is NOT a transient rate limit — the agent should 6 reduce context_length to 200k and compress instead of retrying. 7 8 Only Sonnet is affected — Opus 1M is general access. 9 """ 10 11 import pytest 12 from types import SimpleNamespace 13 from unittest.mock import MagicMock, patch 14 15 16 # --------------------------------------------------------------------------- 17 # Detection logic 18 # --------------------------------------------------------------------------- 19 20 21 class TestLongContextTierDetection: 22 """Verify the detection heuristic matches the Anthropic error.""" 23 24 @staticmethod 25 def _is_long_context_tier_error(status_code, error_msg, model="claude-sonnet-4.6"): 26 error_msg = error_msg.lower() 27 return ( 28 status_code == 429 29 and "extra usage" in error_msg 30 and "long context" in error_msg 31 and "sonnet" in model.lower() 32 ) 33 34 def test_matches_anthropic_error(self): 35 assert self._is_long_context_tier_error( 36 429, 37 "Extra usage is required for long context requests.", 38 ) 39 40 def test_matches_lowercase(self): 41 assert self._is_long_context_tier_error( 42 429, 43 "extra usage is required for long context requests.", 44 ) 45 46 def test_matches_openrouter_model_id(self): 47 assert self._is_long_context_tier_error( 48 429, 49 "Extra usage is required for long context requests.", 50 model="anthropic/claude-sonnet-4.6", 51 ) 52 53 def test_matches_nous_model_id(self): 54 assert self._is_long_context_tier_error( 55 429, 56 "Extra usage is required for long context requests.", 57 model="claude-sonnet-4-6", 58 ) 59 60 def test_rejects_opus(self): 61 """Opus 1M is general access — should NOT trigger reduction.""" 62 assert not self._is_long_context_tier_error( 63 429, 64 "Extra usage is required for long context requests.", 65 model="claude-opus-4.6", 66 ) 67 68 def test_rejects_opus_openrouter(self): 69 assert not self._is_long_context_tier_error( 70 429, 71 "Extra usage is required for long context requests.", 72 model="anthropic/claude-opus-4.6", 73 ) 74 75 def test_rejects_normal_429(self): 76 assert not self._is_long_context_tier_error( 77 429, 78 "Rate limit exceeded. Please retry after 30 seconds.", 79 ) 80 81 def test_rejects_wrong_status(self): 82 assert not self._is_long_context_tier_error( 83 400, 84 "Extra usage is required for long context requests.", 85 ) 86 87 def test_rejects_partial_match(self): 88 """Both 'extra usage' AND 'long context' must be present.""" 89 assert not self._is_long_context_tier_error( 90 429, "extra usage required" 91 ) 92 assert not self._is_long_context_tier_error( 93 429, "long context requests not supported" 94 ) 95 96 97 # --------------------------------------------------------------------------- 98 # Context reduction 99 # --------------------------------------------------------------------------- 100 101 102 class TestContextReduction: 103 """When the long-context tier error fires, context_length should 104 drop to 200k and the reduced flag should be set correctly.""" 105 106 def _make_compressor(self, context_length=1_000_000, threshold_percent=0.5): 107 c = SimpleNamespace( 108 context_length=context_length, 109 threshold_percent=threshold_percent, 110 threshold_tokens=int(context_length * threshold_percent), 111 _context_probed=False, 112 _context_probe_persistable=False, 113 ) 114 return c 115 116 def test_reduces_1m_to_200k(self): 117 comp = self._make_compressor(1_000_000) 118 reduced_ctx = 200_000 119 120 if comp.context_length > reduced_ctx: 121 comp.context_length = reduced_ctx 122 comp.threshold_tokens = int(reduced_ctx * comp.threshold_percent) 123 comp._context_probed = True 124 comp._context_probe_persistable = False 125 126 assert comp.context_length == 200_000 127 assert comp.threshold_tokens == 100_000 128 assert comp._context_probed is True 129 # Must NOT persist — subscription tier, not model capability 130 assert comp._context_probe_persistable is False 131 132 def test_no_reduction_when_already_200k(self): 133 comp = self._make_compressor(200_000) 134 reduced_ctx = 200_000 135 136 original = comp.context_length 137 if comp.context_length > reduced_ctx: 138 comp.context_length = reduced_ctx 139 140 assert comp.context_length == original # unchanged 141 142 def test_no_reduction_when_below_200k(self): 143 comp = self._make_compressor(128_000) 144 reduced_ctx = 200_000 145 146 original = comp.context_length 147 if comp.context_length > reduced_ctx: 148 comp.context_length = reduced_ctx 149 150 assert comp.context_length == original # unchanged 151 152 153 # --------------------------------------------------------------------------- 154 # Integration: agent error handler path 155 # --------------------------------------------------------------------------- 156 157 158 class TestAgentErrorPath: 159 """Verify the long-context 429 doesn't hit the generic rate-limit 160 or client-error handlers.""" 161 162 def test_long_context_429_not_treated_as_rate_limit(self): 163 """The error should be intercepted before the generic 164 is_rate_limited check fires a fallback switch.""" 165 error_msg = "extra usage is required for long context requests." 166 status_code = 429 167 model = "claude-sonnet-4.6" 168 169 _is_long_context_tier_error = ( 170 status_code == 429 171 and "extra usage" in error_msg 172 and "long context" in error_msg 173 and "sonnet" in model.lower() 174 ) 175 assert _is_long_context_tier_error 176 177 def test_opus_429_falls_through_to_rate_limit(self): 178 """Opus should NOT match — falls through to generic rate-limit.""" 179 error_msg = "extra usage is required for long context requests." 180 status_code = 429 181 model = "claude-opus-4.6" 182 183 _is_long_context_tier_error = ( 184 status_code == 429 185 and "extra usage" in error_msg 186 and "long context" in error_msg 187 and "sonnet" in model.lower() 188 ) 189 assert not _is_long_context_tier_error 190 191 def test_normal_429_still_treated_as_rate_limit(self): 192 """A normal 429 should NOT match the long-context check.""" 193 error_msg = "rate limit exceeded" 194 status_code = 429 195 model = "claude-sonnet-4.6" 196 197 _is_long_context_tier_error = ( 198 status_code == 429 199 and "extra usage" in error_msg 200 and "long context" in error_msg 201 and "sonnet" in model.lower() 202 ) 203 assert not _is_long_context_tier_error 204 205 is_rate_limited = ( 206 status_code == 429 207 or "rate limit" in error_msg 208 ) 209 assert is_rate_limited