test_compression_trigger_excludes_reasoning.py
1 """Verify compression trigger excludes reasoning/completion tokens (#12026). 2 3 Thinking models (GLM-5.1, QwQ, DeepSeek R1) inflate completion_tokens with 4 reasoning tokens that don't consume context window space. The compression 5 trigger must use only prompt_tokens so sessions aren't prematurely split. 6 """ 7 8 import types 9 import pytest 10 from unittest.mock import MagicMock, patch 11 12 13 def _make_agent_stub(prompt_tokens, completion_tokens, threshold_tokens): 14 """Create a minimal stub that exercises the compression check path.""" 15 compressor = types.SimpleNamespace( 16 last_prompt_tokens=prompt_tokens, 17 last_completion_tokens=completion_tokens, 18 threshold_tokens=threshold_tokens, 19 ) 20 # Replicate the fixed logic from run_agent.py ~line 11273 21 if compressor.last_prompt_tokens > 0: 22 real_tokens = compressor.last_prompt_tokens # Fixed: no completion 23 else: 24 real_tokens = 0 25 return real_tokens, compressor 26 27 28 class TestCompressionTriggerExcludesReasoning: 29 def test_high_reasoning_tokens_should_not_trigger_compression(self): 30 """With the old bug, 40k prompt + 80k reasoning = 120k > 100k threshold. 31 After the fix, only 40k prompt is compared — no compression.""" 32 real_tokens, comp = _make_agent_stub( 33 prompt_tokens=40_000, 34 completion_tokens=80_000, # reasoning-heavy model 35 threshold_tokens=100_000, 36 ) 37 assert real_tokens == 40_000 38 assert real_tokens < comp.threshold_tokens, ( 39 "Should NOT trigger compression — only prompt tokens matter" 40 ) 41 42 def test_high_prompt_tokens_should_trigger_compression(self): 43 """When prompt tokens genuinely exceed the threshold, compress.""" 44 real_tokens, comp = _make_agent_stub( 45 prompt_tokens=110_000, 46 completion_tokens=5_000, 47 threshold_tokens=100_000, 48 ) 49 assert real_tokens == 110_000 50 assert real_tokens >= comp.threshold_tokens, ( 51 "Should trigger compression — prompt tokens exceed threshold" 52 ) 53 54 def test_zero_prompt_tokens_falls_back(self): 55 """When provider returns 0 prompt tokens, real_tokens is 0 (fallback path).""" 56 real_tokens, _ = _make_agent_stub( 57 prompt_tokens=0, 58 completion_tokens=50_000, 59 threshold_tokens=100_000, 60 ) 61 assert real_tokens == 0