test_nous_rate_guard.py
1 """Tests for agent/nous_rate_guard.py — cross-session Nous Portal rate limit guard.""" 2 3 import json 4 import os 5 import time 6 7 import pytest 8 9 10 @pytest.fixture 11 def rate_guard_env(tmp_path, monkeypatch): 12 """Isolate rate guard state to a temp directory.""" 13 hermes_home = str(tmp_path / ".hermes") 14 os.makedirs(hermes_home, exist_ok=True) 15 monkeypatch.setenv("HERMES_HOME", hermes_home) 16 # Clear any cached module-level imports 17 return hermes_home 18 19 20 class TestRecordNousRateLimit: 21 """Test recording rate limit state.""" 22 23 def test_records_with_header_reset(self, rate_guard_env): 24 from agent.nous_rate_guard import record_nous_rate_limit, _state_path 25 26 headers = {"x-ratelimit-reset-requests-1h": "1800"} 27 record_nous_rate_limit(headers=headers) 28 29 path = _state_path() 30 assert os.path.exists(path) 31 with open(path) as f: 32 state = json.load(f) 33 assert state["reset_seconds"] == pytest.approx(1800, abs=2) 34 assert state["reset_at"] > time.time() 35 36 def test_records_with_per_minute_header(self, rate_guard_env): 37 from agent.nous_rate_guard import record_nous_rate_limit, _state_path 38 39 headers = {"x-ratelimit-reset-requests": "45"} 40 record_nous_rate_limit(headers=headers) 41 42 with open(_state_path()) as f: 43 state = json.load(f) 44 assert state["reset_seconds"] == pytest.approx(45, abs=2) 45 46 def test_records_with_retry_after_header(self, rate_guard_env): 47 from agent.nous_rate_guard import record_nous_rate_limit, _state_path 48 49 headers = {"retry-after": "60"} 50 record_nous_rate_limit(headers=headers) 51 52 with open(_state_path()) as f: 53 state = json.load(f) 54 assert state["reset_seconds"] == pytest.approx(60, abs=2) 55 56 def test_prefers_hourly_over_per_minute(self, rate_guard_env): 57 from agent.nous_rate_guard import record_nous_rate_limit, _state_path 58 59 headers = { 60 "x-ratelimit-reset-requests-1h": "1800", 61 "x-ratelimit-reset-requests": "45", 62 } 63 record_nous_rate_limit(headers=headers) 64 65 with open(_state_path()) as f: 66 state = json.load(f) 67 # Should use the hourly value, not the per-minute one 68 assert state["reset_seconds"] == pytest.approx(1800, abs=2) 69 70 def test_falls_back_to_error_context_reset_at(self, rate_guard_env): 71 from agent.nous_rate_guard import record_nous_rate_limit, _state_path 72 73 future_reset = time.time() + 900 74 record_nous_rate_limit( 75 headers=None, 76 error_context={"reset_at": future_reset}, 77 ) 78 79 with open(_state_path()) as f: 80 state = json.load(f) 81 assert state["reset_at"] == pytest.approx(future_reset, abs=1) 82 83 def test_falls_back_to_default_cooldown(self, rate_guard_env): 84 from agent.nous_rate_guard import record_nous_rate_limit, _state_path 85 86 record_nous_rate_limit(headers=None) 87 88 with open(_state_path()) as f: 89 state = json.load(f) 90 # Default is 300 seconds (5 minutes) 91 assert state["reset_seconds"] == pytest.approx(300, abs=2) 92 93 def test_custom_default_cooldown(self, rate_guard_env): 94 from agent.nous_rate_guard import record_nous_rate_limit, _state_path 95 96 record_nous_rate_limit(headers=None, default_cooldown=120.0) 97 98 with open(_state_path()) as f: 99 state = json.load(f) 100 assert state["reset_seconds"] == pytest.approx(120, abs=2) 101 102 def test_creates_directory_if_missing(self, rate_guard_env): 103 from agent.nous_rate_guard import record_nous_rate_limit, _state_path 104 105 record_nous_rate_limit(headers={"retry-after": "10"}) 106 assert os.path.exists(_state_path()) 107 108 109 class TestNousRateLimitRemaining: 110 """Test checking remaining rate limit time.""" 111 112 def test_returns_none_when_no_file(self, rate_guard_env): 113 from agent.nous_rate_guard import nous_rate_limit_remaining 114 115 assert nous_rate_limit_remaining() is None 116 117 def test_returns_remaining_seconds_when_active(self, rate_guard_env): 118 from agent.nous_rate_guard import record_nous_rate_limit, nous_rate_limit_remaining 119 120 record_nous_rate_limit(headers={"x-ratelimit-reset-requests-1h": "600"}) 121 remaining = nous_rate_limit_remaining() 122 assert remaining is not None 123 assert 595 < remaining <= 605 # ~600 seconds, allowing for test execution time 124 125 def test_returns_none_when_expired(self, rate_guard_env): 126 from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path 127 128 # Write an already-expired state 129 state_dir = os.path.dirname(_state_path()) 130 os.makedirs(state_dir, exist_ok=True) 131 with open(_state_path(), "w") as f: 132 json.dump({"reset_at": time.time() - 10, "recorded_at": time.time() - 100}, f) 133 134 assert nous_rate_limit_remaining() is None 135 # File should be cleaned up 136 assert not os.path.exists(_state_path()) 137 138 def test_handles_corrupt_file(self, rate_guard_env): 139 from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path 140 141 state_dir = os.path.dirname(_state_path()) 142 os.makedirs(state_dir, exist_ok=True) 143 with open(_state_path(), "w") as f: 144 f.write("not valid json{{{") 145 146 assert nous_rate_limit_remaining() is None 147 148 149 class TestClearNousRateLimit: 150 """Test clearing rate limit state.""" 151 152 def test_clears_existing_file(self, rate_guard_env): 153 from agent.nous_rate_guard import ( 154 record_nous_rate_limit, 155 clear_nous_rate_limit, 156 nous_rate_limit_remaining, 157 _state_path, 158 ) 159 160 record_nous_rate_limit(headers={"retry-after": "600"}) 161 assert nous_rate_limit_remaining() is not None 162 163 clear_nous_rate_limit() 164 assert nous_rate_limit_remaining() is None 165 assert not os.path.exists(_state_path()) 166 167 def test_clear_when_no_file(self, rate_guard_env): 168 from agent.nous_rate_guard import clear_nous_rate_limit 169 170 # Should not raise 171 clear_nous_rate_limit() 172 173 174 class TestFormatRemaining: 175 """Test human-readable duration formatting.""" 176 177 def test_seconds(self): 178 from agent.nous_rate_guard import format_remaining 179 180 assert format_remaining(30) == "30s" 181 182 def test_minutes(self): 183 from agent.nous_rate_guard import format_remaining 184 185 assert format_remaining(125) == "2m 5s" 186 187 def test_exact_minutes(self): 188 from agent.nous_rate_guard import format_remaining 189 190 assert format_remaining(120) == "2m" 191 192 def test_hours(self): 193 from agent.nous_rate_guard import format_remaining 194 195 assert format_remaining(3720) == "1h 2m" 196 197 198 class TestParseResetSeconds: 199 """Test header parsing for reset times.""" 200 201 def test_case_insensitive_headers(self, rate_guard_env): 202 from agent.nous_rate_guard import _parse_reset_seconds 203 204 headers = {"X-Ratelimit-Reset-Requests-1h": "1200"} 205 assert _parse_reset_seconds(headers) == 1200.0 206 207 def test_returns_none_for_empty_headers(self): 208 from agent.nous_rate_guard import _parse_reset_seconds 209 210 assert _parse_reset_seconds(None) is None 211 assert _parse_reset_seconds({}) is None 212 213 def test_ignores_zero_values(self): 214 from agent.nous_rate_guard import _parse_reset_seconds 215 216 headers = {"x-ratelimit-reset-requests-1h": "0"} 217 assert _parse_reset_seconds(headers) is None 218 219 def test_ignores_invalid_values(self): 220 from agent.nous_rate_guard import _parse_reset_seconds 221 222 headers = {"x-ratelimit-reset-requests-1h": "not-a-number"} 223 assert _parse_reset_seconds(headers) is None 224 225 226 class TestAuxiliaryClientIntegration: 227 """Test that the auxiliary client respects the rate guard.""" 228 229 def test_try_nous_skips_when_rate_limited(self, rate_guard_env, monkeypatch): 230 from agent.nous_rate_guard import record_nous_rate_limit 231 232 # Record a rate limit 233 record_nous_rate_limit(headers={"retry-after": "600"}) 234 235 # Mock _read_nous_auth to return valid creds (would normally succeed) 236 import agent.auxiliary_client as aux 237 monkeypatch.setattr(aux, "_read_nous_auth", lambda: { 238 "access_token": "test-token", 239 "inference_base_url": "https://api.nous.test/v1", 240 }) 241 242 result = aux._try_nous() 243 assert result == (None, None) 244 245 def test_try_nous_works_when_not_rate_limited(self, rate_guard_env, monkeypatch): 246 import agent.auxiliary_client as aux 247 248 # No rate limit recorded — _try_nous should proceed normally 249 # (will return None because no real creds, but won't be blocked 250 # by the rate guard) 251 monkeypatch.setattr(aux, "_read_nous_auth", lambda: None) 252 result = aux._try_nous() 253 assert result == (None, None) 254 255 256 class TestIsGenuineNousRateLimit: 257 """Tell a real account-level 429 apart from an upstream-capacity 429. 258 259 Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes). 260 A 429 from an upstream out of capacity should NOT trip the 261 cross-session breaker; a real user-quota 429 should. 262 """ 263 264 def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self): 265 from agent.nous_rate_guard import is_genuine_nous_rate_limit 266 267 headers = { 268 "x-ratelimit-limit-requests-1h": "800", 269 "x-ratelimit-remaining-requests-1h": "0", 270 "x-ratelimit-reset-requests-1h": "3100", 271 "x-ratelimit-limit-requests": "200", 272 "x-ratelimit-remaining-requests": "198", 273 "x-ratelimit-reset-requests": "40", 274 } 275 assert is_genuine_nous_rate_limit(headers=headers) is True 276 277 def test_exhausted_tokens_bucket_is_genuine(self): 278 from agent.nous_rate_guard import is_genuine_nous_rate_limit 279 280 headers = { 281 "x-ratelimit-limit-tokens": "800000", 282 "x-ratelimit-remaining-tokens": "0", 283 "x-ratelimit-reset-tokens": "45", # < 60s threshold -> not genuine 284 "x-ratelimit-limit-tokens-1h": "8000000", 285 "x-ratelimit-remaining-tokens-1h": "0", 286 "x-ratelimit-reset-tokens-1h": "1800", # >= 60s threshold -> genuine 287 } 288 assert is_genuine_nous_rate_limit(headers=headers) is True 289 290 def test_healthy_headers_on_429_are_upstream_capacity(self): 291 # Classic upstream-capacity symptom: Nous edge reports plenty of 292 # headroom on every bucket, but returns 429 anyway because 293 # upstream (DeepSeek / Kimi / ...) is out of capacity. 294 from agent.nous_rate_guard import is_genuine_nous_rate_limit 295 296 headers = { 297 "x-ratelimit-limit-requests": "200", 298 "x-ratelimit-remaining-requests": "198", 299 "x-ratelimit-reset-requests": "40", 300 "x-ratelimit-limit-requests-1h": "800", 301 "x-ratelimit-remaining-requests-1h": "750", 302 "x-ratelimit-reset-requests-1h": "3100", 303 "x-ratelimit-limit-tokens": "800000", 304 "x-ratelimit-remaining-tokens": "790000", 305 "x-ratelimit-reset-tokens": "40", 306 "x-ratelimit-limit-tokens-1h": "8000000", 307 "x-ratelimit-remaining-tokens-1h": "7800000", 308 "x-ratelimit-reset-tokens-1h": "3100", 309 } 310 assert is_genuine_nous_rate_limit(headers=headers) is False 311 312 def test_bare_429_with_no_headers_is_upstream(self): 313 from agent.nous_rate_guard import is_genuine_nous_rate_limit 314 315 assert is_genuine_nous_rate_limit(headers=None) is False 316 assert is_genuine_nous_rate_limit(headers={}) is False 317 assert is_genuine_nous_rate_limit( 318 headers={"content-type": "application/json"} 319 ) is False 320 321 def test_exhausted_bucket_with_short_reset_is_not_genuine(self): 322 # remaining == 0 but reset in < 60s: almost certainly a 323 # secondary per-minute throttle that will clear immediately -- 324 # not worth tripping the cross-session breaker. 325 from agent.nous_rate_guard import is_genuine_nous_rate_limit 326 327 headers = { 328 "x-ratelimit-limit-requests": "200", 329 "x-ratelimit-remaining-requests": "0", 330 "x-ratelimit-reset-requests": "30", 331 } 332 assert is_genuine_nous_rate_limit(headers=headers) is False 333 334 def test_last_known_state_with_exhausted_bucket_triggers_genuine(self): 335 # Headers on the 429 lack rate-limit info, but the previous 336 # successful response already showed the hourly bucket 337 # exhausted -- the 429 is almost certainly that limit 338 # continuing. 339 from agent.nous_rate_guard import is_genuine_nous_rate_limit 340 from agent.rate_limit_tracker import parse_rate_limit_headers 341 342 prior_headers = { 343 "x-ratelimit-limit-requests-1h": "800", 344 "x-ratelimit-remaining-requests-1h": "0", 345 "x-ratelimit-reset-requests-1h": "2000", 346 "x-ratelimit-limit-requests": "200", 347 "x-ratelimit-remaining-requests": "100", 348 "x-ratelimit-reset-requests": "30", 349 "x-ratelimit-limit-tokens": "800000", 350 "x-ratelimit-remaining-tokens": "700000", 351 "x-ratelimit-reset-tokens": "30", 352 "x-ratelimit-limit-tokens-1h": "8000000", 353 "x-ratelimit-remaining-tokens-1h": "7000000", 354 "x-ratelimit-reset-tokens-1h": "2000", 355 } 356 last_state = parse_rate_limit_headers(prior_headers, provider="nous") 357 assert is_genuine_nous_rate_limit( 358 headers=None, last_known_state=last_state 359 ) is True 360 361 def test_last_known_state_all_healthy_stays_upstream(self): 362 # Prior state was healthy; bare 429 arrives; should be treated 363 # as upstream capacity. 364 from agent.nous_rate_guard import is_genuine_nous_rate_limit 365 from agent.rate_limit_tracker import parse_rate_limit_headers 366 367 prior_headers = { 368 "x-ratelimit-limit-requests-1h": "800", 369 "x-ratelimit-remaining-requests-1h": "750", 370 "x-ratelimit-reset-requests-1h": "2000", 371 "x-ratelimit-limit-requests": "200", 372 "x-ratelimit-remaining-requests": "180", 373 "x-ratelimit-reset-requests": "30", 374 "x-ratelimit-limit-tokens": "800000", 375 "x-ratelimit-remaining-tokens": "790000", 376 "x-ratelimit-reset-tokens": "30", 377 "x-ratelimit-limit-tokens-1h": "8000000", 378 "x-ratelimit-remaining-tokens-1h": "7900000", 379 "x-ratelimit-reset-tokens-1h": "2000", 380 } 381 last_state = parse_rate_limit_headers(prior_headers, provider="nous") 382 assert is_genuine_nous_rate_limit( 383 headers=None, last_known_state=last_state 384 ) is False 385 386 def test_none_last_state_and_no_headers_is_upstream(self): 387 from agent.nous_rate_guard import is_genuine_nous_rate_limit 388 389 assert is_genuine_nous_rate_limit( 390 headers=None, last_known_state=None 391 ) is False