test_usage_pricing.py
1 from types import SimpleNamespace 2 3 from agent.usage_pricing import ( 4 CanonicalUsage, 5 estimate_usage_cost, 6 get_pricing_entry, 7 normalize_usage, 8 ) 9 10 11 def test_normalize_usage_anthropic_keeps_cache_buckets_separate(): 12 usage = SimpleNamespace( 13 input_tokens=1000, 14 output_tokens=500, 15 cache_read_input_tokens=2000, 16 cache_creation_input_tokens=400, 17 ) 18 19 normalized = normalize_usage(usage, provider="anthropic", api_mode="anthropic_messages") 20 21 assert normalized.input_tokens == 1000 22 assert normalized.output_tokens == 500 23 assert normalized.cache_read_tokens == 2000 24 assert normalized.cache_write_tokens == 400 25 assert normalized.prompt_tokens == 3400 26 27 28 def test_normalize_usage_openai_subtracts_cached_prompt_tokens(): 29 usage = SimpleNamespace( 30 prompt_tokens=3000, 31 completion_tokens=700, 32 prompt_tokens_details=SimpleNamespace(cached_tokens=1800), 33 ) 34 35 normalized = normalize_usage(usage, provider="openai", api_mode="chat_completions") 36 37 assert normalized.input_tokens == 1200 38 assert normalized.cache_read_tokens == 1800 39 assert normalized.output_tokens == 700 40 41 42 def test_normalize_usage_openai_reads_top_level_anthropic_cache_fields(): 43 """Some OpenAI-compatible proxies (OpenRouter, Vercel AI Gateway, Cline) expose 44 Anthropic-style cache token counts at the top level of the usage object when 45 routing Claude models, instead of nesting them in prompt_tokens_details. 46 47 Regression guard for the bug fixed in cline/cline#10266 — before this fix, 48 the chat-completions branch of normalize_usage() only read 49 prompt_tokens_details.cache_write_tokens and completely missed the 50 cache_creation_input_tokens case, so cache writes showed as 0 and reflected 51 inputTokens were overstated by the cache-write amount. 52 """ 53 usage = SimpleNamespace( 54 prompt_tokens=1000, 55 completion_tokens=200, 56 prompt_tokens_details=SimpleNamespace(cached_tokens=500), 57 cache_creation_input_tokens=300, 58 ) 59 60 normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions") 61 62 # Expected: cache read from prompt_tokens_details.cached_tokens (preferred), 63 # cache write from top-level cache_creation_input_tokens (fallback). 64 assert normalized.cache_read_tokens == 500 65 assert normalized.cache_write_tokens == 300 66 # input_tokens = prompt_total - cache_read - cache_write = 1000 - 500 - 300 = 200 67 assert normalized.input_tokens == 200 68 assert normalized.output_tokens == 200 69 70 71 def test_normalize_usage_openai_reads_top_level_cache_read_when_details_missing(): 72 """Some proxies expose only top-level Anthropic-style fields with no 73 prompt_tokens_details object. Regression guard for cline/cline#10266. 74 """ 75 usage = SimpleNamespace( 76 prompt_tokens=1000, 77 completion_tokens=200, 78 cache_read_input_tokens=500, 79 cache_creation_input_tokens=300, 80 ) 81 82 normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions") 83 84 assert normalized.cache_read_tokens == 500 85 assert normalized.cache_write_tokens == 300 86 assert normalized.input_tokens == 200 87 88 89 def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level(): 90 """When both prompt_tokens_details and top-level Anthropic fields are 91 present, we prefer the OpenAI-standard nested fields. Top-level Anthropic 92 fields are only a fallback when the nested ones are absent/zero. 93 """ 94 usage = SimpleNamespace( 95 prompt_tokens=1000, 96 completion_tokens=200, 97 prompt_tokens_details=SimpleNamespace(cached_tokens=600, cache_write_tokens=150), 98 # Intentionally different values — proving we ignore these when details exist. 99 cache_read_input_tokens=999, 100 cache_creation_input_tokens=999, 101 ) 102 103 normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions") 104 105 assert normalized.cache_read_tokens == 600 106 assert normalized.cache_write_tokens == 150 107 108 109 def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch): 110 monkeypatch.setattr( 111 "agent.usage_pricing.fetch_model_metadata", 112 lambda: { 113 "anthropic/claude-opus-4.6": { 114 "pricing": { 115 "prompt": "0.000005", 116 "completion": "0.000025", 117 "input_cache_read": "0.0000005", 118 "input_cache_write": "0.00000625", 119 } 120 } 121 }, 122 ) 123 124 entry = get_pricing_entry( 125 "anthropic/claude-opus-4.6", 126 provider="openrouter", 127 base_url="https://openrouter.ai/api/v1", 128 ) 129 130 assert float(entry.input_cost_per_million) == 5.0 131 assert float(entry.output_cost_per_million) == 25.0 132 assert float(entry.cache_read_cost_per_million) == 0.5 133 assert float(entry.cache_write_cost_per_million) == 6.25 134 135 136 def test_estimate_usage_cost_marks_subscription_routes_included(): 137 result = estimate_usage_cost( 138 "gpt-5.3-codex", 139 CanonicalUsage(input_tokens=1000, output_tokens=500), 140 provider="openai-codex", 141 base_url="https://chatgpt.com/backend-api/codex", 142 ) 143 144 assert result.status == "included" 145 assert float(result.amount_usd) == 0.0 146 147 148 def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(monkeypatch): 149 monkeypatch.setattr( 150 "agent.usage_pricing.fetch_model_metadata", 151 lambda: { 152 "google/gemini-2.5-pro": { 153 "pricing": { 154 "prompt": "0.00000125", 155 "completion": "0.00001", 156 } 157 } 158 }, 159 ) 160 161 result = estimate_usage_cost( 162 "google/gemini-2.5-pro", 163 CanonicalUsage(input_tokens=1000, output_tokens=500, cache_read_tokens=100), 164 provider="openrouter", 165 base_url="https://openrouter.ai/api/v1", 166 ) 167 168 assert result.status == "unknown" 169 170 171 def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch): 172 monkeypatch.setattr( 173 "agent.usage_pricing.fetch_endpoint_model_metadata", 174 lambda base_url, api_key=None: { 175 "zai-org/GLM-5-TEE": { 176 "pricing": { 177 "prompt": "0.0000005", 178 "completion": "0.000002", 179 } 180 } 181 }, 182 ) 183 184 entry = get_pricing_entry( 185 "zai-org/GLM-5-TEE", 186 provider="custom", 187 base_url="https://llm.chutes.ai/v1", 188 api_key="test-key", 189 ) 190 191 assert float(entry.input_cost_per_million) == 0.5 192 assert float(entry.output_cost_per_million) == 2.0