/ tests / agent / test_nous_rate_guard.py
test_nous_rate_guard.py
  1  """Tests for agent/nous_rate_guard.py — cross-session Nous Portal rate limit guard."""
  2  
  3  import json
  4  import os
  5  import time
  6  
  7  import pytest
  8  
  9  
 10  @pytest.fixture
 11  def rate_guard_env(tmp_path, monkeypatch):
 12      """Isolate rate guard state to a temp directory."""
 13      hermes_home = str(tmp_path / ".hermes")
 14      os.makedirs(hermes_home, exist_ok=True)
 15      monkeypatch.setenv("HERMES_HOME", hermes_home)
 16      # Clear any cached module-level imports
 17      return hermes_home
 18  
 19  
 20  class TestRecordNousRateLimit:
 21      """Test recording rate limit state."""
 22  
 23      def test_records_with_header_reset(self, rate_guard_env):
 24          from agent.nous_rate_guard import record_nous_rate_limit, _state_path
 25  
 26          headers = {"x-ratelimit-reset-requests-1h": "1800"}
 27          record_nous_rate_limit(headers=headers)
 28  
 29          path = _state_path()
 30          assert os.path.exists(path)
 31          with open(path) as f:
 32              state = json.load(f)
 33          assert state["reset_seconds"] == pytest.approx(1800, abs=2)
 34          assert state["reset_at"] > time.time()
 35  
 36      def test_records_with_per_minute_header(self, rate_guard_env):
 37          from agent.nous_rate_guard import record_nous_rate_limit, _state_path
 38  
 39          headers = {"x-ratelimit-reset-requests": "45"}
 40          record_nous_rate_limit(headers=headers)
 41  
 42          with open(_state_path()) as f:
 43              state = json.load(f)
 44          assert state["reset_seconds"] == pytest.approx(45, abs=2)
 45  
 46      def test_records_with_retry_after_header(self, rate_guard_env):
 47          from agent.nous_rate_guard import record_nous_rate_limit, _state_path
 48  
 49          headers = {"retry-after": "60"}
 50          record_nous_rate_limit(headers=headers)
 51  
 52          with open(_state_path()) as f:
 53              state = json.load(f)
 54          assert state["reset_seconds"] == pytest.approx(60, abs=2)
 55  
 56      def test_prefers_hourly_over_per_minute(self, rate_guard_env):
 57          from agent.nous_rate_guard import record_nous_rate_limit, _state_path
 58  
 59          headers = {
 60              "x-ratelimit-reset-requests-1h": "1800",
 61              "x-ratelimit-reset-requests": "45",
 62          }
 63          record_nous_rate_limit(headers=headers)
 64  
 65          with open(_state_path()) as f:
 66              state = json.load(f)
 67          # Should use the hourly value, not the per-minute one
 68          assert state["reset_seconds"] == pytest.approx(1800, abs=2)
 69  
 70      def test_falls_back_to_error_context_reset_at(self, rate_guard_env):
 71          from agent.nous_rate_guard import record_nous_rate_limit, _state_path
 72  
 73          future_reset = time.time() + 900
 74          record_nous_rate_limit(
 75              headers=None,
 76              error_context={"reset_at": future_reset},
 77          )
 78  
 79          with open(_state_path()) as f:
 80              state = json.load(f)
 81          assert state["reset_at"] == pytest.approx(future_reset, abs=1)
 82  
 83      def test_falls_back_to_default_cooldown(self, rate_guard_env):
 84          from agent.nous_rate_guard import record_nous_rate_limit, _state_path
 85  
 86          record_nous_rate_limit(headers=None)
 87  
 88          with open(_state_path()) as f:
 89              state = json.load(f)
 90          # Default is 300 seconds (5 minutes)
 91          assert state["reset_seconds"] == pytest.approx(300, abs=2)
 92  
 93      def test_custom_default_cooldown(self, rate_guard_env):
 94          from agent.nous_rate_guard import record_nous_rate_limit, _state_path
 95  
 96          record_nous_rate_limit(headers=None, default_cooldown=120.0)
 97  
 98          with open(_state_path()) as f:
 99              state = json.load(f)
100          assert state["reset_seconds"] == pytest.approx(120, abs=2)
101  
102      def test_creates_directory_if_missing(self, rate_guard_env):
103          from agent.nous_rate_guard import record_nous_rate_limit, _state_path
104  
105          record_nous_rate_limit(headers={"retry-after": "10"})
106          assert os.path.exists(_state_path())
107  
108  
109  class TestNousRateLimitRemaining:
110      """Test checking remaining rate limit time."""
111  
112      def test_returns_none_when_no_file(self, rate_guard_env):
113          from agent.nous_rate_guard import nous_rate_limit_remaining
114  
115          assert nous_rate_limit_remaining() is None
116  
117      def test_returns_remaining_seconds_when_active(self, rate_guard_env):
118          from agent.nous_rate_guard import record_nous_rate_limit, nous_rate_limit_remaining
119  
120          record_nous_rate_limit(headers={"x-ratelimit-reset-requests-1h": "600"})
121          remaining = nous_rate_limit_remaining()
122          assert remaining is not None
123          assert 595 < remaining <= 605  # ~600 seconds, allowing for test execution time
124  
125      def test_returns_none_when_expired(self, rate_guard_env):
126          from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
127  
128          # Write an already-expired state
129          state_dir = os.path.dirname(_state_path())
130          os.makedirs(state_dir, exist_ok=True)
131          with open(_state_path(), "w") as f:
132              json.dump({"reset_at": time.time() - 10, "recorded_at": time.time() - 100}, f)
133  
134          assert nous_rate_limit_remaining() is None
135          # File should be cleaned up
136          assert not os.path.exists(_state_path())
137  
138      def test_handles_corrupt_file(self, rate_guard_env):
139          from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
140  
141          state_dir = os.path.dirname(_state_path())
142          os.makedirs(state_dir, exist_ok=True)
143          with open(_state_path(), "w") as f:
144              f.write("not valid json{{{")
145  
146          assert nous_rate_limit_remaining() is None
147  
148  
149  class TestClearNousRateLimit:
150      """Test clearing rate limit state."""
151  
152      def test_clears_existing_file(self, rate_guard_env):
153          from agent.nous_rate_guard import (
154              record_nous_rate_limit,
155              clear_nous_rate_limit,
156              nous_rate_limit_remaining,
157              _state_path,
158          )
159  
160          record_nous_rate_limit(headers={"retry-after": "600"})
161          assert nous_rate_limit_remaining() is not None
162  
163          clear_nous_rate_limit()
164          assert nous_rate_limit_remaining() is None
165          assert not os.path.exists(_state_path())
166  
167      def test_clear_when_no_file(self, rate_guard_env):
168          from agent.nous_rate_guard import clear_nous_rate_limit
169  
170          # Should not raise
171          clear_nous_rate_limit()
172  
173  
174  class TestFormatRemaining:
175      """Test human-readable duration formatting."""
176  
177      def test_seconds(self):
178          from agent.nous_rate_guard import format_remaining
179  
180          assert format_remaining(30) == "30s"
181  
182      def test_minutes(self):
183          from agent.nous_rate_guard import format_remaining
184  
185          assert format_remaining(125) == "2m 5s"
186  
187      def test_exact_minutes(self):
188          from agent.nous_rate_guard import format_remaining
189  
190          assert format_remaining(120) == "2m"
191  
192      def test_hours(self):
193          from agent.nous_rate_guard import format_remaining
194  
195          assert format_remaining(3720) == "1h 2m"
196  
197  
198  class TestParseResetSeconds:
199      """Test header parsing for reset times."""
200  
201      def test_case_insensitive_headers(self, rate_guard_env):
202          from agent.nous_rate_guard import _parse_reset_seconds
203  
204          headers = {"X-Ratelimit-Reset-Requests-1h": "1200"}
205          assert _parse_reset_seconds(headers) == 1200.0
206  
207      def test_returns_none_for_empty_headers(self):
208          from agent.nous_rate_guard import _parse_reset_seconds
209  
210          assert _parse_reset_seconds(None) is None
211          assert _parse_reset_seconds({}) is None
212  
213      def test_ignores_zero_values(self):
214          from agent.nous_rate_guard import _parse_reset_seconds
215  
216          headers = {"x-ratelimit-reset-requests-1h": "0"}
217          assert _parse_reset_seconds(headers) is None
218  
219      def test_ignores_invalid_values(self):
220          from agent.nous_rate_guard import _parse_reset_seconds
221  
222          headers = {"x-ratelimit-reset-requests-1h": "not-a-number"}
223          assert _parse_reset_seconds(headers) is None
224  
225  
226  class TestAuxiliaryClientIntegration:
227      """Test that the auxiliary client respects the rate guard."""
228  
229      def test_try_nous_skips_when_rate_limited(self, rate_guard_env, monkeypatch):
230          from agent.nous_rate_guard import record_nous_rate_limit
231  
232          # Record a rate limit
233          record_nous_rate_limit(headers={"retry-after": "600"})
234  
235          # Mock _read_nous_auth to return valid creds (would normally succeed)
236          import agent.auxiliary_client as aux
237          monkeypatch.setattr(aux, "_read_nous_auth", lambda: {
238              "access_token": "test-token",
239              "inference_base_url": "https://api.nous.test/v1",
240          })
241  
242          result = aux._try_nous()
243          assert result == (None, None)
244  
245      def test_try_nous_works_when_not_rate_limited(self, rate_guard_env, monkeypatch):
246          import agent.auxiliary_client as aux
247  
248          # No rate limit recorded — _try_nous should proceed normally
249          # (will return None because no real creds, but won't be blocked
250          # by the rate guard)
251          monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
252          result = aux._try_nous()
253          assert result == (None, None)
254  
255  
256  class TestIsGenuineNousRateLimit:
257      """Tell a real account-level 429 apart from an upstream-capacity 429.
258  
259      Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes).
260      A 429 from an upstream out of capacity should NOT trip the
261      cross-session breaker; a real user-quota 429 should.
262      """
263  
264      def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self):
265          from agent.nous_rate_guard import is_genuine_nous_rate_limit
266  
267          headers = {
268              "x-ratelimit-limit-requests-1h": "800",
269              "x-ratelimit-remaining-requests-1h": "0",
270              "x-ratelimit-reset-requests-1h": "3100",
271              "x-ratelimit-limit-requests": "200",
272              "x-ratelimit-remaining-requests": "198",
273              "x-ratelimit-reset-requests": "40",
274          }
275          assert is_genuine_nous_rate_limit(headers=headers) is True
276  
277      def test_exhausted_tokens_bucket_is_genuine(self):
278          from agent.nous_rate_guard import is_genuine_nous_rate_limit
279  
280          headers = {
281              "x-ratelimit-limit-tokens": "800000",
282              "x-ratelimit-remaining-tokens": "0",
283              "x-ratelimit-reset-tokens": "45",  # < 60s threshold -> not genuine
284              "x-ratelimit-limit-tokens-1h": "8000000",
285              "x-ratelimit-remaining-tokens-1h": "0",
286              "x-ratelimit-reset-tokens-1h": "1800",  # >= 60s threshold -> genuine
287          }
288          assert is_genuine_nous_rate_limit(headers=headers) is True
289  
290      def test_healthy_headers_on_429_are_upstream_capacity(self):
291          # Classic upstream-capacity symptom: Nous edge reports plenty of
292          # headroom on every bucket, but returns 429 anyway because
293          # upstream (DeepSeek / Kimi / ...) is out of capacity.
294          from agent.nous_rate_guard import is_genuine_nous_rate_limit
295  
296          headers = {
297              "x-ratelimit-limit-requests": "200",
298              "x-ratelimit-remaining-requests": "198",
299              "x-ratelimit-reset-requests": "40",
300              "x-ratelimit-limit-requests-1h": "800",
301              "x-ratelimit-remaining-requests-1h": "750",
302              "x-ratelimit-reset-requests-1h": "3100",
303              "x-ratelimit-limit-tokens": "800000",
304              "x-ratelimit-remaining-tokens": "790000",
305              "x-ratelimit-reset-tokens": "40",
306              "x-ratelimit-limit-tokens-1h": "8000000",
307              "x-ratelimit-remaining-tokens-1h": "7800000",
308              "x-ratelimit-reset-tokens-1h": "3100",
309          }
310          assert is_genuine_nous_rate_limit(headers=headers) is False
311  
312      def test_bare_429_with_no_headers_is_upstream(self):
313          from agent.nous_rate_guard import is_genuine_nous_rate_limit
314  
315          assert is_genuine_nous_rate_limit(headers=None) is False
316          assert is_genuine_nous_rate_limit(headers={}) is False
317          assert is_genuine_nous_rate_limit(
318              headers={"content-type": "application/json"}
319          ) is False
320  
321      def test_exhausted_bucket_with_short_reset_is_not_genuine(self):
322          # remaining == 0 but reset in < 60s: almost certainly a
323          # secondary per-minute throttle that will clear immediately --
324          # not worth tripping the cross-session breaker.
325          from agent.nous_rate_guard import is_genuine_nous_rate_limit
326  
327          headers = {
328              "x-ratelimit-limit-requests": "200",
329              "x-ratelimit-remaining-requests": "0",
330              "x-ratelimit-reset-requests": "30",
331          }
332          assert is_genuine_nous_rate_limit(headers=headers) is False
333  
334      def test_last_known_state_with_exhausted_bucket_triggers_genuine(self):
335          # Headers on the 429 lack rate-limit info, but the previous
336          # successful response already showed the hourly bucket
337          # exhausted -- the 429 is almost certainly that limit
338          # continuing.
339          from agent.nous_rate_guard import is_genuine_nous_rate_limit
340          from agent.rate_limit_tracker import parse_rate_limit_headers
341  
342          prior_headers = {
343              "x-ratelimit-limit-requests-1h": "800",
344              "x-ratelimit-remaining-requests-1h": "0",
345              "x-ratelimit-reset-requests-1h": "2000",
346              "x-ratelimit-limit-requests": "200",
347              "x-ratelimit-remaining-requests": "100",
348              "x-ratelimit-reset-requests": "30",
349              "x-ratelimit-limit-tokens": "800000",
350              "x-ratelimit-remaining-tokens": "700000",
351              "x-ratelimit-reset-tokens": "30",
352              "x-ratelimit-limit-tokens-1h": "8000000",
353              "x-ratelimit-remaining-tokens-1h": "7000000",
354              "x-ratelimit-reset-tokens-1h": "2000",
355          }
356          last_state = parse_rate_limit_headers(prior_headers, provider="nous")
357          assert is_genuine_nous_rate_limit(
358              headers=None, last_known_state=last_state
359          ) is True
360  
361      def test_last_known_state_all_healthy_stays_upstream(self):
362          # Prior state was healthy; bare 429 arrives; should be treated
363          # as upstream capacity.
364          from agent.nous_rate_guard import is_genuine_nous_rate_limit
365          from agent.rate_limit_tracker import parse_rate_limit_headers
366  
367          prior_headers = {
368              "x-ratelimit-limit-requests-1h": "800",
369              "x-ratelimit-remaining-requests-1h": "750",
370              "x-ratelimit-reset-requests-1h": "2000",
371              "x-ratelimit-limit-requests": "200",
372              "x-ratelimit-remaining-requests": "180",
373              "x-ratelimit-reset-requests": "30",
374              "x-ratelimit-limit-tokens": "800000",
375              "x-ratelimit-remaining-tokens": "790000",
376              "x-ratelimit-reset-tokens": "30",
377              "x-ratelimit-limit-tokens-1h": "8000000",
378              "x-ratelimit-remaining-tokens-1h": "7900000",
379              "x-ratelimit-reset-tokens-1h": "2000",
380          }
381          last_state = parse_rate_limit_headers(prior_headers, provider="nous")
382          assert is_genuine_nous_rate_limit(
383              headers=None, last_known_state=last_state
384          ) is False
385  
386      def test_none_last_state_and_no_headers_is_upstream(self):
387          from agent.nous_rate_guard import is_genuine_nous_rate_limit
388  
389          assert is_genuine_nous_rate_limit(
390              headers=None, last_known_state=None
391          ) is False