llm-usage-tracker.js
1 /** 2 * LLM Usage Tracker 3 * Logs token usage and costs to database for analytics and forecasting 4 */ 5 6 import { run, getOne, getAll } from './db.js'; 7 import './load-env.js'; 8 9 /** 10 * Model pricing in USD per million tokens 11 * Updated 2026-03-01 12 * Sources: https://openrouter.ai/pricing, https://www.anthropic.com/api 13 */ 14 const MODEL_PRICING = { 15 // OpenAI models via OpenRouter 16 'openai/gpt-4o-mini': { input: 0.15, output: 0.6 }, 17 'openai/gpt-4o': { input: 2.5, output: 10 }, 18 'openai/gpt-5.2': { input: 1.75, output: 14.0 }, 19 20 // DeepSeek via OpenRouter 21 'deepseek/deepseek-v3.2': { input: 0.24, output: 0.38 }, 22 23 // Google via OpenRouter 24 'google/gemini-2.5-flash': { input: 0.3, output: 2.5 }, 25 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 }, 26 27 // Anthropic models (direct) 28 'claude-3-5-sonnet-20241022': { input: 3.0, output: 15.0 }, 29 'claude-3-5-haiku-20241022': { input: 0.8, output: 4.0 }, 30 'claude-3-opus-20240229': { input: 15.0, output: 75.0 }, 31 32 // Anthropic via OpenRouter 33 'anthropic/claude-3.5-sonnet': { input: 3.0, output: 15.0 }, 34 'anthropic/claude-3.5-haiku': { input: 0.8, output: 4.0 }, 35 'anthropic/claude-haiku-4-5': { input: 0.8, output: 4.0 }, 36 'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 }, 37 'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 }, 38 }; 39 40 /** 41 * Calculate cost for token usage 42 * @param {string} model - Model name 43 * @param {number} promptTokens - Input tokens 44 * @param {number} completionTokens - Output tokens 45 * @returns {number} - Estimated cost in USD 46 */ 47 export function calculateCost(model, promptTokens, completionTokens) { 48 // eslint-disable-next-line security/detect-object-injection -- Safe: model from our config 49 const pricing = MODEL_PRICING[model]; 50 51 if (!pricing) { 52 console.warn(`Unknown model pricing: ${model}, using default`); 53 return ((promptTokens * 0.5 + completionTokens * 1.5) / 1_000_000).toFixed(6); 54 } 55 56 const inputCost = (promptTokens / 1_000_000) * pricing.input; 57 const outputCost = (completionTokens / 1_000_000) * pricing.output; 58 59 return parseFloat((inputCost + outputCost).toFixed(6)); 60 } 61 62 /** 63 * Log LLM usage to database 64 * @param {Object} params - Usage data 65 * @param {number} params.siteId - Site ID (optional) 66 * @param {string} params.stage - Pipeline stage (scoring, rescoring, proposals, enrichment, other) 67 * @param {string} params.provider - Provider (anthropic, openrouter) 68 * @param {string} params.model - Model name 69 * @param {number} params.promptTokens - Input tokens 70 * @param {number} params.completionTokens - Output tokens 71 * @param {string} params.requestId - Optional request ID for debugging 72 * @param {number|null} params.costOverride - Optional cost override in USD 73 * @returns {Promise<void>} 74 */ 75 export async function logLLMUsage({ 76 siteId = null, 77 stage, 78 provider, 79 model, 80 promptTokens, 81 completionTokens, 82 requestId = null, 83 costOverride = null, 84 }) { 85 const totalTokens = promptTokens + completionTokens; 86 const estimatedCost = costOverride ?? calculateCost(model, promptTokens, completionTokens); 87 88 await run( 89 `INSERT INTO tel.llm_usage ( 90 site_id, stage, provider, model, 91 prompt_tokens, completion_tokens, total_tokens, 92 estimated_cost, request_id 93 ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, 94 [siteId, stage, provider, model, promptTokens, completionTokens, totalTokens, estimatedCost, requestId] 95 ); 96 } 97 98 /** 99 * Get total cost for a specific site 100 * @param {number} siteId - Site ID 101 * @returns {Promise<number>} - Total cost in USD 102 */ 103 export async function getSiteCost(siteId) { 104 const result = await getOne( 105 `SELECT COALESCE(SUM(estimated_cost), 0) AS total_cost 106 FROM tel.llm_usage 107 WHERE site_id = $1`, 108 [siteId] 109 ); 110 111 return parseFloat(result.total_cost); 112 } 113 114 /** 115 * Get total cost breakdown by stage 116 * @returns {Promise<Array>} - Cost breakdown [{stage, total_cost, total_tokens, request_count}] 117 */ 118 export async function getCostByStage() { 119 return await getAll( 120 `SELECT 121 stage, 122 SUM(estimated_cost) AS total_cost, 123 SUM(total_tokens) AS total_tokens, 124 COUNT(*) AS request_count 125 FROM tel.llm_usage 126 GROUP BY stage 127 ORDER BY total_cost DESC` 128 ); 129 } 130 131 /** 132 * Check cost variance against budgets for the last hour 133 * Returns any stages where actual cost exceeds 3x expected or model doesn't match 134 * @returns {Promise<Array>} - Variance alerts [{stage, avgCost, expectedCost, model, expectedModel, type}] 135 */ 136 export async function checkBudgetVariance() { 137 // Check if budget table exists 138 const tableExists = await getOne( 139 `SELECT table_name 140 FROM information_schema.tables 141 WHERE table_schema = 'ops' 142 AND table_name = 'llm_cost_budgets'` 143 ); 144 145 if (!tableExists) return []; 146 147 const alerts = []; 148 149 // Get last hour's average cost per stage 150 const hourlyStats = await getAll( 151 `SELECT stage, model, 152 AVG(estimated_cost) AS avg_cost, 153 COUNT(*) AS call_count 154 FROM tel.llm_usage 155 WHERE created_at >= NOW() - INTERVAL '1 hour' 156 GROUP BY stage, model` 157 ); 158 159 for (const stat of hourlyStats) { 160 const budget = await getOne( 161 'SELECT * FROM llm_cost_budgets WHERE call_type = $1', 162 [stat.stage] 163 ); 164 165 if (!budget) continue; 166 167 // Cost variance check 168 if (stat.avg_cost > budget.max_cost_per_call) { 169 alerts.push({ 170 stage: stat.stage, 171 type: 'cost_variance', 172 avgCost: stat.avg_cost, 173 expectedCost: budget.expected_cost_per_call, 174 maxCost: budget.max_cost_per_call, 175 callCount: stat.call_count, 176 model: stat.model, 177 }); 178 } 179 180 // Model mismatch check 181 if (stat.model !== budget.expected_model) { 182 alerts.push({ 183 stage: stat.stage, 184 type: 'model_mismatch', 185 actualModel: stat.model, 186 expectedModel: budget.expected_model, 187 avgCost: stat.avg_cost, 188 callCount: stat.call_count, 189 }); 190 } 191 } 192 193 return alerts; 194 } 195 196 /** 197 * Get total LLM spend for today (UTC) 198 * @returns {Promise<number>} - Today's total spend in USD 199 */ 200 export async function getDailySpend() { 201 const result = await getOne( 202 `SELECT COALESCE(SUM(estimated_cost), 0) AS total 203 FROM tel.llm_usage 204 WHERE created_at::date = CURRENT_DATE` 205 ); 206 207 return parseFloat(result.total); 208 } 209 210 /** 211 * Get total LLM spend for the current hour (UTC) 212 * @returns {Promise<number>} - Current hour's total spend in USD 213 */ 214 export async function getHourlySpend() { 215 const result = await getOne( 216 `SELECT COALESCE(SUM(estimated_cost), 0) AS total 217 FROM tel.llm_usage 218 WHERE created_at >= NOW() - INTERVAL '1 hour'` 219 ); 220 221 return parseFloat(result.total); 222 }