Cradicle Explorer

/ tests / run_agent / test_compression_trigger_excludes_reasoning.py
test_compression_trigger_excludes_reasoning.py
 1  """Verify compression trigger excludes reasoning/completion tokens (#12026).
 2  
 3  Thinking models (GLM-5.1, QwQ, DeepSeek R1) inflate completion_tokens with
 4  reasoning tokens that don't consume context window space.  The compression
 5  trigger must use only prompt_tokens so sessions aren't prematurely split.
 6  """
 7  
 8  import types
 9  import pytest
10  from unittest.mock import MagicMock, patch
11  
12  
13  def _make_agent_stub(prompt_tokens, completion_tokens, threshold_tokens):
14      """Create a minimal stub that exercises the compression check path."""
15      compressor = types.SimpleNamespace(
16          last_prompt_tokens=prompt_tokens,
17          last_completion_tokens=completion_tokens,
18          threshold_tokens=threshold_tokens,
19      )
20      # Replicate the fixed logic from run_agent.py ~line 11273
21      if compressor.last_prompt_tokens > 0:
22          real_tokens = compressor.last_prompt_tokens  # Fixed: no completion
23      else:
24          real_tokens = 0
25      return real_tokens, compressor
26  
27  
28  class TestCompressionTriggerExcludesReasoning:
29      def test_high_reasoning_tokens_should_not_trigger_compression(self):
30          """With the old bug, 40k prompt + 80k reasoning = 120k > 100k threshold.
31          After the fix, only 40k prompt is compared — no compression."""
32          real_tokens, comp = _make_agent_stub(
33              prompt_tokens=40_000,
34              completion_tokens=80_000,  # reasoning-heavy model
35              threshold_tokens=100_000,
36          )
37          assert real_tokens == 40_000
38          assert real_tokens < comp.threshold_tokens, (
39              "Should NOT trigger compression — only prompt tokens matter"
40          )
41  
42      def test_high_prompt_tokens_should_trigger_compression(self):
43          """When prompt tokens genuinely exceed the threshold, compress."""
44          real_tokens, comp = _make_agent_stub(
45              prompt_tokens=110_000,
46              completion_tokens=5_000,
47              threshold_tokens=100_000,
48          )
49          assert real_tokens == 110_000
50          assert real_tokens >= comp.threshold_tokens, (
51              "Should trigger compression — prompt tokens exceed threshold"
52          )
53  
54      def test_zero_prompt_tokens_falls_back(self):
55          """When provider returns 0 prompt tokens, real_tokens is 0 (fallback path)."""
56          real_tokens, _ = _make_agent_stub(
57              prompt_tokens=0,
58              completion_tokens=50_000,
59              threshold_tokens=100_000,
60          )
61          assert real_tokens == 0