/ tests / run_agent / test_long_context_tier_429.py
test_long_context_tier_429.py
  1  """Tests for Anthropic Sonnet long-context tier 429 handling.
  2  
  3  When Claude Max users without "extra usage" hit the 1M context tier
  4  on Sonnet, Anthropic returns HTTP 429 "Extra usage is required for long
  5  context requests."  This is NOT a transient rate limit — the agent should
  6  reduce context_length to 200k and compress instead of retrying.
  7  
  8  Only Sonnet is affected — Opus 1M is general access.
  9  """
 10  
 11  import pytest
 12  from types import SimpleNamespace
 13  from unittest.mock import MagicMock, patch
 14  
 15  
 16  # ---------------------------------------------------------------------------
 17  # Detection logic
 18  # ---------------------------------------------------------------------------
 19  
 20  
 21  class TestLongContextTierDetection:
 22      """Verify the detection heuristic matches the Anthropic error."""
 23  
 24      @staticmethod
 25      def _is_long_context_tier_error(status_code, error_msg, model="claude-sonnet-4.6"):
 26          error_msg = error_msg.lower()
 27          return (
 28              status_code == 429
 29              and "extra usage" in error_msg
 30              and "long context" in error_msg
 31              and "sonnet" in model.lower()
 32          )
 33  
 34      def test_matches_anthropic_error(self):
 35          assert self._is_long_context_tier_error(
 36              429,
 37              "Extra usage is required for long context requests.",
 38          )
 39  
 40      def test_matches_lowercase(self):
 41          assert self._is_long_context_tier_error(
 42              429,
 43              "extra usage is required for long context requests.",
 44          )
 45  
 46      def test_matches_openrouter_model_id(self):
 47          assert self._is_long_context_tier_error(
 48              429,
 49              "Extra usage is required for long context requests.",
 50              model="anthropic/claude-sonnet-4.6",
 51          )
 52  
 53      def test_matches_nous_model_id(self):
 54          assert self._is_long_context_tier_error(
 55              429,
 56              "Extra usage is required for long context requests.",
 57              model="claude-sonnet-4-6",
 58          )
 59  
 60      def test_rejects_opus(self):
 61          """Opus 1M is general access — should NOT trigger reduction."""
 62          assert not self._is_long_context_tier_error(
 63              429,
 64              "Extra usage is required for long context requests.",
 65              model="claude-opus-4.6",
 66          )
 67  
 68      def test_rejects_opus_openrouter(self):
 69          assert not self._is_long_context_tier_error(
 70              429,
 71              "Extra usage is required for long context requests.",
 72              model="anthropic/claude-opus-4.6",
 73          )
 74  
 75      def test_rejects_normal_429(self):
 76          assert not self._is_long_context_tier_error(
 77              429,
 78              "Rate limit exceeded. Please retry after 30 seconds.",
 79          )
 80  
 81      def test_rejects_wrong_status(self):
 82          assert not self._is_long_context_tier_error(
 83              400,
 84              "Extra usage is required for long context requests.",
 85          )
 86  
 87      def test_rejects_partial_match(self):
 88          """Both 'extra usage' AND 'long context' must be present."""
 89          assert not self._is_long_context_tier_error(
 90              429, "extra usage required"
 91          )
 92          assert not self._is_long_context_tier_error(
 93              429, "long context requests not supported"
 94          )
 95  
 96  
 97  # ---------------------------------------------------------------------------
 98  # Context reduction
 99  # ---------------------------------------------------------------------------
100  
101  
102  class TestContextReduction:
103      """When the long-context tier error fires, context_length should
104      drop to 200k and the reduced flag should be set correctly."""
105  
106      def _make_compressor(self, context_length=1_000_000, threshold_percent=0.5):
107          c = SimpleNamespace(
108              context_length=context_length,
109              threshold_percent=threshold_percent,
110              threshold_tokens=int(context_length * threshold_percent),
111              _context_probed=False,
112              _context_probe_persistable=False,
113          )
114          return c
115  
116      def test_reduces_1m_to_200k(self):
117          comp = self._make_compressor(1_000_000)
118          reduced_ctx = 200_000
119  
120          if comp.context_length > reduced_ctx:
121              comp.context_length = reduced_ctx
122              comp.threshold_tokens = int(reduced_ctx * comp.threshold_percent)
123              comp._context_probed = True
124              comp._context_probe_persistable = False
125  
126          assert comp.context_length == 200_000
127          assert comp.threshold_tokens == 100_000
128          assert comp._context_probed is True
129          # Must NOT persist — subscription tier, not model capability
130          assert comp._context_probe_persistable is False
131  
132      def test_no_reduction_when_already_200k(self):
133          comp = self._make_compressor(200_000)
134          reduced_ctx = 200_000
135  
136          original = comp.context_length
137          if comp.context_length > reduced_ctx:
138              comp.context_length = reduced_ctx
139  
140          assert comp.context_length == original  # unchanged
141  
142      def test_no_reduction_when_below_200k(self):
143          comp = self._make_compressor(128_000)
144          reduced_ctx = 200_000
145  
146          original = comp.context_length
147          if comp.context_length > reduced_ctx:
148              comp.context_length = reduced_ctx
149  
150          assert comp.context_length == original  # unchanged
151  
152  
153  # ---------------------------------------------------------------------------
154  # Integration: agent error handler path
155  # ---------------------------------------------------------------------------
156  
157  
158  class TestAgentErrorPath:
159      """Verify the long-context 429 doesn't hit the generic rate-limit
160      or client-error handlers."""
161  
162      def test_long_context_429_not_treated_as_rate_limit(self):
163          """The error should be intercepted before the generic
164          is_rate_limited check fires a fallback switch."""
165          error_msg = "extra usage is required for long context requests."
166          status_code = 429
167          model = "claude-sonnet-4.6"
168  
169          _is_long_context_tier_error = (
170              status_code == 429
171              and "extra usage" in error_msg
172              and "long context" in error_msg
173              and "sonnet" in model.lower()
174          )
175          assert _is_long_context_tier_error
176  
177      def test_opus_429_falls_through_to_rate_limit(self):
178          """Opus should NOT match — falls through to generic rate-limit."""
179          error_msg = "extra usage is required for long context requests."
180          status_code = 429
181          model = "claude-opus-4.6"
182  
183          _is_long_context_tier_error = (
184              status_code == 429
185              and "extra usage" in error_msg
186              and "long context" in error_msg
187              and "sonnet" in model.lower()
188          )
189          assert not _is_long_context_tier_error
190  
191      def test_normal_429_still_treated_as_rate_limit(self):
192          """A normal 429 should NOT match the long-context check."""
193          error_msg = "rate limit exceeded"
194          status_code = 429
195          model = "claude-sonnet-4.6"
196  
197          _is_long_context_tier_error = (
198              status_code == 429
199              and "extra usage" in error_msg
200              and "long context" in error_msg
201              and "sonnet" in model.lower()
202          )
203          assert not _is_long_context_tier_error
204  
205          is_rate_limited = (
206              status_code == 429
207              or "rate limit" in error_msg
208          )
209          assert is_rate_limited