/ src / utils / llm-usage-tracker.js
llm-usage-tracker.js
  1  /**
  2   * LLM Usage Tracker
  3   * Logs token usage and costs to database for analytics and forecasting
  4   */
  5  
  6  import { run, getOne, getAll } from './db.js';
  7  import './load-env.js';
  8  
  9  /**
 10   * Model pricing in USD per million tokens
 11   * Updated 2026-03-01
 12   * Sources: https://openrouter.ai/pricing, https://www.anthropic.com/api
 13   */
 14  const MODEL_PRICING = {
 15    // OpenAI models via OpenRouter
 16    'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
 17    'openai/gpt-4o': { input: 2.5, output: 10 },
 18    'openai/gpt-5.2': { input: 1.75, output: 14.0 },
 19  
 20    // DeepSeek via OpenRouter
 21    'deepseek/deepseek-v3.2': { input: 0.24, output: 0.38 },
 22  
 23    // Google via OpenRouter
 24    'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
 25    'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
 26  
 27    // Anthropic models (direct)
 28    'claude-3-5-sonnet-20241022': { input: 3.0, output: 15.0 },
 29    'claude-3-5-haiku-20241022': { input: 0.8, output: 4.0 },
 30    'claude-3-opus-20240229': { input: 15.0, output: 75.0 },
 31  
 32    // Anthropic via OpenRouter
 33    'anthropic/claude-3.5-sonnet': { input: 3.0, output: 15.0 },
 34    'anthropic/claude-3.5-haiku': { input: 0.8, output: 4.0 },
 35    'anthropic/claude-haiku-4-5': { input: 0.8, output: 4.0 },
 36    'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
 37    'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
 38  };
 39  
 40  /**
 41   * Calculate cost for token usage
 42   * @param {string} model - Model name
 43   * @param {number} promptTokens - Input tokens
 44   * @param {number} completionTokens - Output tokens
 45   * @returns {number} - Estimated cost in USD
 46   */
 47  export function calculateCost(model, promptTokens, completionTokens) {
 48    // eslint-disable-next-line security/detect-object-injection -- Safe: model from our config
 49    const pricing = MODEL_PRICING[model];
 50  
 51    if (!pricing) {
 52      console.warn(`Unknown model pricing: ${model}, using default`);
 53      return ((promptTokens * 0.5 + completionTokens * 1.5) / 1_000_000).toFixed(6);
 54    }
 55  
 56    const inputCost = (promptTokens / 1_000_000) * pricing.input;
 57    const outputCost = (completionTokens / 1_000_000) * pricing.output;
 58  
 59    return parseFloat((inputCost + outputCost).toFixed(6));
 60  }
 61  
 62  /**
 63   * Log LLM usage to database
 64   * @param {Object} params - Usage data
 65   * @param {number} params.siteId - Site ID (optional)
 66   * @param {string} params.stage - Pipeline stage (scoring, rescoring, proposals, enrichment, other)
 67   * @param {string} params.provider - Provider (anthropic, openrouter)
 68   * @param {string} params.model - Model name
 69   * @param {number} params.promptTokens - Input tokens
 70   * @param {number} params.completionTokens - Output tokens
 71   * @param {string} params.requestId - Optional request ID for debugging
 72   * @param {number|null} params.costOverride - Optional cost override in USD
 73   * @returns {Promise<void>}
 74   */
 75  export async function logLLMUsage({
 76    siteId = null,
 77    stage,
 78    provider,
 79    model,
 80    promptTokens,
 81    completionTokens,
 82    requestId = null,
 83    costOverride = null,
 84  }) {
 85    const totalTokens = promptTokens + completionTokens;
 86    const estimatedCost = costOverride ?? calculateCost(model, promptTokens, completionTokens);
 87  
 88    await run(
 89      `INSERT INTO tel.llm_usage (
 90        site_id, stage, provider, model,
 91        prompt_tokens, completion_tokens, total_tokens,
 92        estimated_cost, request_id
 93      ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`,
 94      [siteId, stage, provider, model, promptTokens, completionTokens, totalTokens, estimatedCost, requestId]
 95    );
 96  }
 97  
 98  /**
 99   * Get total cost for a specific site
100   * @param {number} siteId - Site ID
101   * @returns {Promise<number>} - Total cost in USD
102   */
103  export async function getSiteCost(siteId) {
104    const result = await getOne(
105      `SELECT COALESCE(SUM(estimated_cost), 0) AS total_cost
106       FROM tel.llm_usage
107       WHERE site_id = $1`,
108      [siteId]
109    );
110  
111    return parseFloat(result.total_cost);
112  }
113  
114  /**
115   * Get total cost breakdown by stage
116   * @returns {Promise<Array>} - Cost breakdown [{stage, total_cost, total_tokens, request_count}]
117   */
118  export async function getCostByStage() {
119    return await getAll(
120      `SELECT
121        stage,
122        SUM(estimated_cost) AS total_cost,
123        SUM(total_tokens) AS total_tokens,
124        COUNT(*) AS request_count
125       FROM tel.llm_usage
126       GROUP BY stage
127       ORDER BY total_cost DESC`
128    );
129  }
130  
131  /**
132   * Check cost variance against budgets for the last hour
133   * Returns any stages where actual cost exceeds 3x expected or model doesn't match
134   * @returns {Promise<Array>} - Variance alerts [{stage, avgCost, expectedCost, model, expectedModel, type}]
135   */
136  export async function checkBudgetVariance() {
137    // Check if budget table exists
138    const tableExists = await getOne(
139      `SELECT table_name
140       FROM information_schema.tables
141       WHERE table_schema = 'ops'
142         AND table_name = 'llm_cost_budgets'`
143    );
144  
145    if (!tableExists) return [];
146  
147    const alerts = [];
148  
149    // Get last hour's average cost per stage
150    const hourlyStats = await getAll(
151      `SELECT stage, model,
152              AVG(estimated_cost) AS avg_cost,
153              COUNT(*) AS call_count
154       FROM tel.llm_usage
155       WHERE created_at >= NOW() - INTERVAL '1 hour'
156       GROUP BY stage, model`
157    );
158  
159    for (const stat of hourlyStats) {
160      const budget = await getOne(
161        'SELECT * FROM llm_cost_budgets WHERE call_type = $1',
162        [stat.stage]
163      );
164  
165      if (!budget) continue;
166  
167      // Cost variance check
168      if (stat.avg_cost > budget.max_cost_per_call) {
169        alerts.push({
170          stage: stat.stage,
171          type: 'cost_variance',
172          avgCost: stat.avg_cost,
173          expectedCost: budget.expected_cost_per_call,
174          maxCost: budget.max_cost_per_call,
175          callCount: stat.call_count,
176          model: stat.model,
177        });
178      }
179  
180      // Model mismatch check
181      if (stat.model !== budget.expected_model) {
182        alerts.push({
183          stage: stat.stage,
184          type: 'model_mismatch',
185          actualModel: stat.model,
186          expectedModel: budget.expected_model,
187          avgCost: stat.avg_cost,
188          callCount: stat.call_count,
189        });
190      }
191    }
192  
193    return alerts;
194  }
195  
196  /**
197   * Get total LLM spend for today (UTC)
198   * @returns {Promise<number>} - Today's total spend in USD
199   */
200  export async function getDailySpend() {
201    const result = await getOne(
202      `SELECT COALESCE(SUM(estimated_cost), 0) AS total
203       FROM tel.llm_usage
204       WHERE created_at::date = CURRENT_DATE`
205    );
206  
207    return parseFloat(result.total);
208  }
209  
210  /**
211   * Get total LLM spend for the current hour (UTC)
212   * @returns {Promise<number>} - Current hour's total spend in USD
213   */
214  export async function getHourlySpend() {
215    const result = await getOne(
216      `SELECT COALESCE(SUM(estimated_cost), 0) AS total
217       FROM tel.llm_usage
218       WHERE created_at >= NOW() - INTERVAL '1 hour'`
219    );
220  
221    return parseFloat(result.total);
222  }