/ tests / agent / test_usage_pricing.py
test_usage_pricing.py
  1  from types import SimpleNamespace
  2  
  3  from agent.usage_pricing import (
  4      CanonicalUsage,
  5      estimate_usage_cost,
  6      get_pricing_entry,
  7      normalize_usage,
  8  )
  9  
 10  
 11  def test_normalize_usage_anthropic_keeps_cache_buckets_separate():
 12      usage = SimpleNamespace(
 13          input_tokens=1000,
 14          output_tokens=500,
 15          cache_read_input_tokens=2000,
 16          cache_creation_input_tokens=400,
 17      )
 18  
 19      normalized = normalize_usage(usage, provider="anthropic", api_mode="anthropic_messages")
 20  
 21      assert normalized.input_tokens == 1000
 22      assert normalized.output_tokens == 500
 23      assert normalized.cache_read_tokens == 2000
 24      assert normalized.cache_write_tokens == 400
 25      assert normalized.prompt_tokens == 3400
 26  
 27  
 28  def test_normalize_usage_openai_subtracts_cached_prompt_tokens():
 29      usage = SimpleNamespace(
 30          prompt_tokens=3000,
 31          completion_tokens=700,
 32          prompt_tokens_details=SimpleNamespace(cached_tokens=1800),
 33      )
 34  
 35      normalized = normalize_usage(usage, provider="openai", api_mode="chat_completions")
 36  
 37      assert normalized.input_tokens == 1200
 38      assert normalized.cache_read_tokens == 1800
 39      assert normalized.output_tokens == 700
 40  
 41  
 42  def test_normalize_usage_openai_reads_top_level_anthropic_cache_fields():
 43      """Some OpenAI-compatible proxies (OpenRouter, Vercel AI Gateway, Cline) expose
 44      Anthropic-style cache token counts at the top level of the usage object when
 45      routing Claude models, instead of nesting them in prompt_tokens_details.
 46  
 47      Regression guard for the bug fixed in cline/cline#10266 — before this fix,
 48      the chat-completions branch of normalize_usage() only read
 49      prompt_tokens_details.cache_write_tokens and completely missed the
 50      cache_creation_input_tokens case, so cache writes showed as 0 and reflected
 51      inputTokens were overstated by the cache-write amount.
 52      """
 53      usage = SimpleNamespace(
 54          prompt_tokens=1000,
 55          completion_tokens=200,
 56          prompt_tokens_details=SimpleNamespace(cached_tokens=500),
 57          cache_creation_input_tokens=300,
 58      )
 59  
 60      normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
 61  
 62      # Expected: cache read from prompt_tokens_details.cached_tokens (preferred),
 63      # cache write from top-level cache_creation_input_tokens (fallback).
 64      assert normalized.cache_read_tokens == 500
 65      assert normalized.cache_write_tokens == 300
 66      # input_tokens = prompt_total - cache_read - cache_write = 1000 - 500 - 300 = 200
 67      assert normalized.input_tokens == 200
 68      assert normalized.output_tokens == 200
 69  
 70  
 71  def test_normalize_usage_openai_reads_top_level_cache_read_when_details_missing():
 72      """Some proxies expose only top-level Anthropic-style fields with no
 73      prompt_tokens_details object. Regression guard for cline/cline#10266.
 74      """
 75      usage = SimpleNamespace(
 76          prompt_tokens=1000,
 77          completion_tokens=200,
 78          cache_read_input_tokens=500,
 79          cache_creation_input_tokens=300,
 80      )
 81  
 82      normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
 83  
 84      assert normalized.cache_read_tokens == 500
 85      assert normalized.cache_write_tokens == 300
 86      assert normalized.input_tokens == 200
 87  
 88  
 89  def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level():
 90      """When both prompt_tokens_details and top-level Anthropic fields are
 91      present, we prefer the OpenAI-standard nested fields. Top-level Anthropic
 92      fields are only a fallback when the nested ones are absent/zero.
 93      """
 94      usage = SimpleNamespace(
 95          prompt_tokens=1000,
 96          completion_tokens=200,
 97          prompt_tokens_details=SimpleNamespace(cached_tokens=600, cache_write_tokens=150),
 98          # Intentionally different values — proving we ignore these when details exist.
 99          cache_read_input_tokens=999,
100          cache_creation_input_tokens=999,
101      )
102  
103      normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
104  
105      assert normalized.cache_read_tokens == 600
106      assert normalized.cache_write_tokens == 150
107  
108  
109  def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
110      monkeypatch.setattr(
111          "agent.usage_pricing.fetch_model_metadata",
112          lambda: {
113              "anthropic/claude-opus-4.6": {
114                  "pricing": {
115                      "prompt": "0.000005",
116                      "completion": "0.000025",
117                      "input_cache_read": "0.0000005",
118                      "input_cache_write": "0.00000625",
119                  }
120              }
121          },
122      )
123  
124      entry = get_pricing_entry(
125          "anthropic/claude-opus-4.6",
126          provider="openrouter",
127          base_url="https://openrouter.ai/api/v1",
128      )
129  
130      assert float(entry.input_cost_per_million) == 5.0
131      assert float(entry.output_cost_per_million) == 25.0
132      assert float(entry.cache_read_cost_per_million) == 0.5
133      assert float(entry.cache_write_cost_per_million) == 6.25
134  
135  
136  def test_estimate_usage_cost_marks_subscription_routes_included():
137      result = estimate_usage_cost(
138          "gpt-5.3-codex",
139          CanonicalUsage(input_tokens=1000, output_tokens=500),
140          provider="openai-codex",
141          base_url="https://chatgpt.com/backend-api/codex",
142      )
143  
144      assert result.status == "included"
145      assert float(result.amount_usd) == 0.0
146  
147  
148  def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(monkeypatch):
149      monkeypatch.setattr(
150          "agent.usage_pricing.fetch_model_metadata",
151          lambda: {
152              "google/gemini-2.5-pro": {
153                  "pricing": {
154                      "prompt": "0.00000125",
155                      "completion": "0.00001",
156                  }
157              }
158          },
159      )
160  
161      result = estimate_usage_cost(
162          "google/gemini-2.5-pro",
163          CanonicalUsage(input_tokens=1000, output_tokens=500, cache_read_tokens=100),
164          provider="openrouter",
165          base_url="https://openrouter.ai/api/v1",
166      )
167  
168      assert result.status == "unknown"
169  
170  
171  def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
172      monkeypatch.setattr(
173          "agent.usage_pricing.fetch_endpoint_model_metadata",
174          lambda base_url, api_key=None: {
175              "zai-org/GLM-5-TEE": {
176                  "pricing": {
177                      "prompt": "0.0000005",
178                      "completion": "0.000002",
179                  }
180              }
181          },
182      )
183  
184      entry = get_pricing_entry(
185          "zai-org/GLM-5-TEE",
186          provider="custom",
187          base_url="https://llm.chutes.ai/v1",
188          api_key="test-key",
189      )
190  
191      assert float(entry.input_cost_per_million) == 0.5
192      assert float(entry.output_cost_per_million) == 2.0