modelCost.ts
1 import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' 2 import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from 'src/services/analytics/index.js' 3 import { logEvent } from 'src/services/analytics/index.js' 4 import { setHasUnknownModelCost } from '../bootstrap/state.js' 5 import { isFastModeEnabled } from './fastMode.js' 6 import { 7 CLAUDE_3_5_HAIKU_CONFIG, 8 CLAUDE_3_5_V2_SONNET_CONFIG, 9 CLAUDE_3_7_SONNET_CONFIG, 10 CLAUDE_HAIKU_4_5_CONFIG, 11 CLAUDE_OPUS_4_1_CONFIG, 12 CLAUDE_OPUS_4_5_CONFIG, 13 CLAUDE_OPUS_4_6_CONFIG, 14 CLAUDE_OPUS_4_CONFIG, 15 CLAUDE_SONNET_4_5_CONFIG, 16 CLAUDE_SONNET_4_6_CONFIG, 17 CLAUDE_SONNET_4_CONFIG, 18 } from './model/configs.js' 19 import { 20 firstPartyNameToCanonical, 21 getCanonicalName, 22 getDefaultMainLoopModelSetting, 23 type ModelShortName, 24 } from './model/model.js' 25 26 // @see https://platform.claude.com/docs/en/about-claude/pricing 27 export type ModelCosts = { 28 inputTokens: number 29 outputTokens: number 30 promptCacheWriteTokens: number 31 promptCacheReadTokens: number 32 webSearchRequests: number 33 } 34 35 // Standard pricing tier for Sonnet models: $3 input / $15 output per Mtok 36 export const COST_TIER_3_15 = { 37 inputTokens: 3, 38 outputTokens: 15, 39 promptCacheWriteTokens: 3.75, 40 promptCacheReadTokens: 0.3, 41 webSearchRequests: 0.01, 42 } as const satisfies ModelCosts 43 44 // Pricing tier for Opus 4/4.1: $15 input / $75 output per Mtok 45 export const COST_TIER_15_75 = { 46 inputTokens: 15, 47 outputTokens: 75, 48 promptCacheWriteTokens: 18.75, 49 promptCacheReadTokens: 1.5, 50 webSearchRequests: 0.01, 51 } as const satisfies ModelCosts 52 53 // Pricing tier for Opus 4.5: $5 input / $25 output per Mtok 54 export const COST_TIER_5_25 = { 55 inputTokens: 5, 56 outputTokens: 25, 57 promptCacheWriteTokens: 6.25, 58 promptCacheReadTokens: 0.5, 59 webSearchRequests: 0.01, 60 } as const satisfies ModelCosts 61 62 // Fast mode pricing for Opus 4.6: $30 input / $150 output per Mtok 63 export const COST_TIER_30_150 = { 64 inputTokens: 30, 65 outputTokens: 150, 66 promptCacheWriteTokens: 37.5, 67 promptCacheReadTokens: 3, 68 webSearchRequests: 0.01, 69 } as const satisfies ModelCosts 70 71 // Pricing for Haiku 3.5: $0.80 input / $4 output per Mtok 72 export const COST_HAIKU_35 = { 73 inputTokens: 0.8, 74 outputTokens: 4, 75 promptCacheWriteTokens: 1, 76 promptCacheReadTokens: 0.08, 77 webSearchRequests: 0.01, 78 } as const satisfies ModelCosts 79 80 // Pricing for Haiku 4.5: $1 input / $5 output per Mtok 81 export const COST_HAIKU_45 = { 82 inputTokens: 1, 83 outputTokens: 5, 84 promptCacheWriteTokens: 1.25, 85 promptCacheReadTokens: 0.1, 86 webSearchRequests: 0.01, 87 } as const satisfies ModelCosts 88 89 const DEFAULT_UNKNOWN_MODEL_COST = COST_TIER_5_25 90 91 /** 92 * Get the cost tier for Opus 4.6 based on fast mode. 93 */ 94 export function getOpus46CostTier(fastMode: boolean): ModelCosts { 95 if (isFastModeEnabled() && fastMode) { 96 return COST_TIER_30_150 97 } 98 return COST_TIER_5_25 99 } 100 101 // @[MODEL LAUNCH]: Add a pricing entry for the new model below. 102 // Costs from https://platform.claude.com/docs/en/about-claude/pricing 103 // Web search cost: $10 per 1000 requests = $0.01 per request 104 export const MODEL_COSTS: Record<ModelShortName, ModelCosts> = { 105 [firstPartyNameToCanonical(CLAUDE_3_5_HAIKU_CONFIG.firstParty)]: 106 COST_HAIKU_35, 107 [firstPartyNameToCanonical(CLAUDE_HAIKU_4_5_CONFIG.firstParty)]: 108 COST_HAIKU_45, 109 [firstPartyNameToCanonical(CLAUDE_3_5_V2_SONNET_CONFIG.firstParty)]: 110 COST_TIER_3_15, 111 [firstPartyNameToCanonical(CLAUDE_3_7_SONNET_CONFIG.firstParty)]: 112 COST_TIER_3_15, 113 [firstPartyNameToCanonical(CLAUDE_SONNET_4_CONFIG.firstParty)]: 114 COST_TIER_3_15, 115 [firstPartyNameToCanonical(CLAUDE_SONNET_4_5_CONFIG.firstParty)]: 116 COST_TIER_3_15, 117 [firstPartyNameToCanonical(CLAUDE_SONNET_4_6_CONFIG.firstParty)]: 118 COST_TIER_3_15, 119 [firstPartyNameToCanonical(CLAUDE_OPUS_4_CONFIG.firstParty)]: COST_TIER_15_75, 120 [firstPartyNameToCanonical(CLAUDE_OPUS_4_1_CONFIG.firstParty)]: 121 COST_TIER_15_75, 122 [firstPartyNameToCanonical(CLAUDE_OPUS_4_5_CONFIG.firstParty)]: 123 COST_TIER_5_25, 124 [firstPartyNameToCanonical(CLAUDE_OPUS_4_6_CONFIG.firstParty)]: 125 COST_TIER_5_25, 126 } 127 128 /** 129 * Calculates the USD cost based on token usage and model cost configuration 130 */ 131 function tokensToUSDCost(modelCosts: ModelCosts, usage: Usage): number { 132 return ( 133 (usage.input_tokens / 1_000_000) * modelCosts.inputTokens + 134 (usage.output_tokens / 1_000_000) * modelCosts.outputTokens + 135 ((usage.cache_read_input_tokens ?? 0) / 1_000_000) * 136 modelCosts.promptCacheReadTokens + 137 ((usage.cache_creation_input_tokens ?? 0) / 1_000_000) * 138 modelCosts.promptCacheWriteTokens + 139 (usage.server_tool_use?.web_search_requests ?? 0) * 140 modelCosts.webSearchRequests 141 ) 142 } 143 144 export function getModelCosts(model: string, usage: Usage): ModelCosts { 145 const shortName = getCanonicalName(model) 146 147 // Check if this is an Opus 4.6 model with fast mode active. 148 if ( 149 shortName === firstPartyNameToCanonical(CLAUDE_OPUS_4_6_CONFIG.firstParty) 150 ) { 151 const isFastMode = usage.speed === 'fast' 152 return getOpus46CostTier(isFastMode) 153 } 154 155 const costs = MODEL_COSTS[shortName] 156 if (!costs) { 157 trackUnknownModelCost(model, shortName) 158 return ( 159 MODEL_COSTS[getCanonicalName(getDefaultMainLoopModelSetting())] ?? 160 DEFAULT_UNKNOWN_MODEL_COST 161 ) 162 } 163 return costs 164 } 165 166 function trackUnknownModelCost(model: string, shortName: ModelShortName): void { 167 logEvent('tengu_unknown_model_cost', { 168 model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 169 shortName: 170 shortName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 171 }) 172 setHasUnknownModelCost() 173 } 174 175 // Calculate the cost of a query in US dollars. 176 // If the model's costs are not found, use the default model's costs. 177 export function calculateUSDCost(resolvedModel: string, usage: Usage): number { 178 const modelCosts = getModelCosts(resolvedModel, usage) 179 return tokensToUSDCost(modelCosts, usage) 180 } 181 182 /** 183 * Calculate cost from raw token counts without requiring a full BetaUsage object. 184 * Useful for side queries (e.g. classifier) that track token counts independently. 185 */ 186 export function calculateCostFromTokens( 187 model: string, 188 tokens: { 189 inputTokens: number 190 outputTokens: number 191 cacheReadInputTokens: number 192 cacheCreationInputTokens: number 193 }, 194 ): number { 195 const usage: Usage = { 196 input_tokens: tokens.inputTokens, 197 output_tokens: tokens.outputTokens, 198 cache_read_input_tokens: tokens.cacheReadInputTokens, 199 cache_creation_input_tokens: tokens.cacheCreationInputTokens, 200 } as Usage 201 return calculateUSDCost(model, usage) 202 } 203 204 function formatPrice(price: number): string { 205 // Format price: integers without decimals, others with 2 decimal places 206 // e.g., 3 -> "$3", 0.8 -> "$0.80", 22.5 -> "$22.50" 207 if (Number.isInteger(price)) { 208 return `$${price}` 209 } 210 return `$${price.toFixed(2)}` 211 } 212 213 /** 214 * Format model costs as a pricing string for display 215 * e.g., "$3/$15 per Mtok" 216 */ 217 export function formatModelPricing(costs: ModelCosts): string { 218 return `${formatPrice(costs.inputTokens)}/${formatPrice(costs.outputTokens)} per Mtok` 219 } 220 221 /** 222 * Get formatted pricing string for a model 223 * Accepts either a short name or full model name 224 * Returns undefined if model is not found 225 */ 226 export function getModelPricingString(model: string): string | undefined { 227 const shortName = getCanonicalName(model) 228 const costs = MODEL_COSTS[shortName] 229 if (!costs) return undefined 230 return formatModelPricing(costs) 231 }