llm-usage-tracker.test.js
1 /** 2 * Tests for src/utils/llm-usage-tracker.js 3 * 4 * Covers: 5 * - calculateCost() — known models, unknown models, edge cases 6 * - logLLMUsage() — inserts row, computes cost correctly 7 * - getSiteCost() — sums per site 8 * - getCostByStage() — group-by aggregation 9 * - getDailySpend() — today filter 10 * - getHourlySpend() — 1-hour window 11 * - checkBudgetVariance() — no budget table → empty; with table → alerts 12 * 13 * Uses pg-mock / better-sqlite3 in-memory DB. 14 */ 15 16 import { describe, test, mock, beforeEach } from 'node:test'; 17 import assert from 'node:assert/strict'; 18 import Database from 'better-sqlite3'; 19 import { createPgMock } from '../helpers/pg-mock.js'; 20 21 // ── DB setup ────────────────────────────────────────────────────────────────── 22 23 const db = new Database(':memory:'); 24 db.exec(` 25 CREATE TABLE IF NOT EXISTS llm_usage ( 26 id INTEGER PRIMARY KEY AUTOINCREMENT, 27 site_id INTEGER, 28 stage TEXT NOT NULL, 29 provider TEXT NOT NULL, 30 model TEXT NOT NULL, 31 prompt_tokens INTEGER NOT NULL, 32 completion_tokens INTEGER NOT NULL, 33 total_tokens INTEGER NOT NULL, 34 cost_usd REAL, 35 estimated_cost REAL, 36 request_id TEXT, 37 created_at DATETIME DEFAULT (datetime('now')) 38 ); 39 40 -- information_schema.tables stub for checkBudgetVariance 41 CREATE TABLE IF NOT EXISTS information_schema_tables ( 42 table_schema TEXT, 43 table_name TEXT 44 ); 45 46 CREATE TABLE IF NOT EXISTS llm_cost_budgets ( 47 id INTEGER PRIMARY KEY AUTOINCREMENT, 48 call_type TEXT NOT NULL UNIQUE, 49 expected_cost_per_call REAL NOT NULL, 50 max_cost_per_call REAL NOT NULL, 51 expected_model TEXT NOT NULL 52 ); 53 `); 54 55 // Patch pg-mock's getOne so `information_schema.tables` queries work in SQLite. 56 // The real query is: 57 // SELECT table_name FROM information_schema.tables 58 // WHERE table_schema='ops' AND table_name='llm_cost_budgets' 59 // We rewrite it to query our stub table instead. 60 const pgMock = createPgMock(db); 61 const origGetOne = pgMock.getOne.bind(pgMock); 62 pgMock.getOne = async (sql, params = []) => { 63 if (/information_schema/i.test(sql)) { 64 // Return truthy when we want the budget table to "exist" 65 const row = db.prepare('SELECT * FROM information_schema_tables WHERE table_name = ?') 66 .get('llm_cost_budgets'); 67 return row || null; 68 } 69 return origGetOne(sql, params); 70 }; 71 72 mock.module('../../src/utils/db.js', { namedExports: pgMock }); 73 mock.module('../../src/utils/logger.js', { 74 defaultExport: class { info(){} warn(){} error(){} debug(){} success(){} }, 75 }); 76 mock.module('../../src/utils/load-env.js', { defaultExport: {} }); 77 78 const { 79 calculateCost, 80 logLLMUsage, 81 getSiteCost, 82 getCostByStage, 83 getDailySpend, 84 getHourlySpend, 85 checkBudgetVariance, 86 } = await import('../../src/utils/llm-usage-tracker.js'); 87 88 // ── helpers ─────────────────────────────────────────────────────────────────── 89 90 function clearUsage() { 91 db.exec('DELETE FROM llm_usage'); 92 } 93 94 /** 95 * Format a Date as SQLite-compatible datetime string (YYYY-MM-DD HH:MM:SS). 96 * This ensures comparisons against datetime('now') and date('now') work correctly, 97 * since SQLite string-compares datetimes and 'T' > ' ' breaks ISO 8601 ordering. 98 */ 99 function toSqliteDate(d = new Date()) { 100 return d.toISOString().replace('T', ' ').replace('Z', ''); 101 } 102 103 function insertUsage(overrides = {}) { 104 const defaults = { 105 site_id: 1, 106 stage: 'scoring', 107 provider: 'openrouter', 108 model: 'openai/gpt-4o-mini', 109 prompt_tokens: 1000, 110 completion_tokens: 200, 111 total_tokens: 1200, 112 estimated_cost: 0.000270, 113 request_id: null, 114 created_at: toSqliteDate(), 115 }; 116 const d = { ...defaults, ...overrides }; 117 // Allow caller to pass a Date object for created_at 118 const createdAt = d.created_at instanceof Date ? toSqliteDate(d.created_at) : d.created_at; 119 db.prepare( 120 `INSERT INTO llm_usage (site_id, stage, provider, model, 121 prompt_tokens, completion_tokens, total_tokens, estimated_cost, request_id, created_at) 122 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` 123 ).run(d.site_id, d.stage, d.provider, d.model, 124 d.prompt_tokens, d.completion_tokens, d.total_tokens, 125 d.estimated_cost, d.request_id, createdAt); 126 } 127 128 // ═══════════════════════════════════════════════════════════════════════════════ 129 // calculateCost 130 // ═══════════════════════════════════════════════════════════════════════════════ 131 132 describe('calculateCost', () => { 133 test('calculates cost for openai/gpt-4o-mini correctly', () => { 134 // $0.15/M input, $0.60/M output 135 // 1000 prompt + 500 completion 136 const cost = calculateCost('openai/gpt-4o-mini', 1_000, 500); 137 // 1000 * 0.15/1e6 + 500 * 0.60/1e6 = 0.000150 + 0.000300 = 0.000450 138 assert.equal(cost, 0.000450); 139 }); 140 141 test('calculates cost for anthropic/claude-opus-4.6', () => { 142 // $5.0/M input, $25.0/M output 143 const cost = calculateCost('anthropic/claude-opus-4.6', 2_000, 1_000); 144 // 2000 * 5.0/1e6 + 1000 * 25.0/1e6 = 0.010000 + 0.025000 = 0.035000 145 assert.equal(cost, 0.035000); 146 }); 147 148 test('uses default pricing for unknown model and returns string from toFixed', () => { 149 // Unknown model uses: (promptTokens * 0.5 + completionTokens * 1.5) / 1_000_000 150 const cost = calculateCost('unknown/model-xyz', 1_000_000, 0); 151 // 1000000 * 0.5 / 1000000 = 0.5 152 assert.equal(parseFloat(cost), 0.5); 153 }); 154 155 test('returns 0 cost for zero tokens', () => { 156 const cost = calculateCost('openai/gpt-4o-mini', 0, 0); 157 assert.equal(cost, 0); 158 }); 159 160 test('calculates cost for claude-3-opus-20240229', () => { 161 // $15/M input, $75/M output 162 const cost = calculateCost('claude-3-opus-20240229', 100_000, 10_000); 163 // 100000 * 15/1e6 + 10000 * 75/1e6 = 1.5 + 0.75 = 2.25 164 assert.equal(cost, 2.25); 165 }); 166 167 test('calculates cost for google/gemini-2.5-flash', () => { 168 // $0.3/M input, $2.5/M output 169 const cost = calculateCost('google/gemini-2.5-flash', 500_000, 100_000); 170 // 500000 * 0.3/1e6 + 100000 * 2.5/1e6 = 0.15 + 0.25 = 0.4 171 assert.equal(cost, 0.4); 172 }); 173 174 test('calculates cost for deepseek/deepseek-v3.2', () => { 175 // $0.24/M input, $0.38/M output 176 const cost = calculateCost('deepseek/deepseek-v3.2', 1_000_000, 1_000_000); 177 // 1 * 0.24 + 1 * 0.38 = 0.62 178 assert.equal(cost, 0.62); 179 }); 180 181 test('calculates cost for openai/gpt-4o', () => { 182 // $2.5/M input, $10/M output 183 const cost = calculateCost('openai/gpt-4o', 2_000, 500); 184 // 2000 * 2.5/1e6 + 500 * 10/1e6 = 0.005 + 0.005 = 0.01 185 assert.equal(cost, 0.01); 186 }); 187 188 test('result is a number with up to 6 decimal places', () => { 189 const cost = calculateCost('anthropic/claude-3.5-haiku', 123, 456); 190 assert.equal(typeof cost, 'number'); 191 // Verify max 6 decimal places 192 const decimals = cost.toString().split('.')[1]?.length ?? 0; 193 assert.ok(decimals <= 6, `Expected ≤6 decimals, got ${decimals}`); 194 }); 195 }); 196 197 // ═══════════════════════════════════════════════════════════════════════════════ 198 // logLLMUsage 199 // ═══════════════════════════════════════════════════════════════════════════════ 200 201 describe('logLLMUsage', () => { 202 beforeEach(() => clearUsage()); 203 204 test('inserts a row into llm_usage', async () => { 205 await logLLMUsage({ 206 siteId: 42, 207 stage: 'scoring', 208 provider: 'openrouter', 209 model: 'openai/gpt-4o-mini', 210 promptTokens: 1000, 211 completionTokens: 200, 212 }); 213 214 const row = db.prepare('SELECT * FROM llm_usage WHERE site_id = 42').get(); 215 assert.ok(row, 'Row should exist'); 216 assert.equal(row.stage, 'scoring'); 217 assert.equal(row.provider, 'openrouter'); 218 assert.equal(row.model, 'openai/gpt-4o-mini'); 219 assert.equal(row.prompt_tokens, 1000); 220 assert.equal(row.completion_tokens, 200); 221 assert.equal(row.total_tokens, 1200); 222 }); 223 224 test('computes estimated_cost from calculateCost when no costOverride', async () => { 225 await logLLMUsage({ 226 siteId: 1, 227 stage: 'proposals', 228 provider: 'openrouter', 229 model: 'openai/gpt-4o-mini', 230 promptTokens: 1_000_000, 231 completionTokens: 0, 232 }); 233 234 const row = db.prepare('SELECT estimated_cost FROM llm_usage LIMIT 1').get(); 235 // 1M input * $0.15/M = 0.15 236 assert.equal(parseFloat(row.estimated_cost), 0.15); 237 }); 238 239 test('uses costOverride when provided', async () => { 240 await logLLMUsage({ 241 siteId: 1, 242 stage: 'enrichment', 243 provider: 'anthropic', 244 model: 'claude-3-5-sonnet-20241022', 245 promptTokens: 500, 246 completionTokens: 100, 247 costOverride: 0.999, 248 }); 249 250 const row = db.prepare('SELECT estimated_cost FROM llm_usage LIMIT 1').get(); 251 assert.equal(parseFloat(row.estimated_cost), 0.999); 252 }); 253 254 test('stores null siteId when not provided', async () => { 255 await logLLMUsage({ 256 stage: 'other', 257 provider: 'openrouter', 258 model: 'openai/gpt-4o-mini', 259 promptTokens: 100, 260 completionTokens: 50, 261 }); 262 263 const row = db.prepare('SELECT * FROM llm_usage LIMIT 1').get(); 264 assert.equal(row.site_id, null); 265 }); 266 267 test('stores requestId when provided', async () => { 268 await logLLMUsage({ 269 siteId: 5, 270 stage: 'replies', 271 provider: 'openrouter', 272 model: 'anthropic/claude-sonnet-4.6', 273 promptTokens: 200, 274 completionTokens: 100, 275 requestId: 'req-abc-123', 276 }); 277 278 const row = db.prepare('SELECT request_id FROM llm_usage WHERE site_id = 5').get(); 279 assert.equal(row.request_id, 'req-abc-123'); 280 }); 281 282 test('total_tokens equals promptTokens + completionTokens', async () => { 283 await logLLMUsage({ 284 siteId: 10, 285 stage: 'scoring', 286 provider: 'openrouter', 287 model: 'openai/gpt-4o-mini', 288 promptTokens: 3000, 289 completionTokens: 700, 290 }); 291 292 const row = db.prepare('SELECT total_tokens FROM llm_usage WHERE site_id = 10').get(); 293 assert.equal(row.total_tokens, 3700); 294 }); 295 }); 296 297 // ═══════════════════════════════════════════════════════════════════════════════ 298 // getSiteCost 299 // ═══════════════════════════════════════════════════════════════════════════════ 300 301 describe('getSiteCost', () => { 302 beforeEach(() => clearUsage()); 303 304 test('returns 0 for a site with no usage', async () => { 305 const cost = await getSiteCost(999); 306 assert.equal(cost, 0); 307 }); 308 309 test('returns sum of costs for a specific site', async () => { 310 insertUsage({ site_id: 1, estimated_cost: 0.01 }); 311 insertUsage({ site_id: 1, estimated_cost: 0.02 }); 312 insertUsage({ site_id: 2, estimated_cost: 0.50 }); 313 314 const cost = await getSiteCost(1); 315 assert.ok(Math.abs(cost - 0.03) < 0.000001, `Expected ~0.03, got ${cost}`); 316 }); 317 318 test('does not include other sites in sum', async () => { 319 insertUsage({ site_id: 10, estimated_cost: 1.00 }); 320 insertUsage({ site_id: 20, estimated_cost: 2.00 }); 321 322 const cost = await getSiteCost(10); 323 assert.ok(Math.abs(cost - 1.00) < 0.000001, `Expected 1.00, got ${cost}`); 324 }); 325 326 test('returns a number (not a string)', async () => { 327 insertUsage({ site_id: 7, estimated_cost: 0.005 }); 328 const cost = await getSiteCost(7); 329 assert.equal(typeof cost, 'number'); 330 }); 331 }); 332 333 // ═══════════════════════════════════════════════════════════════════════════════ 334 // getCostByStage 335 // ═══════════════════════════════════════════════════════════════════════════════ 336 337 describe('getCostByStage', () => { 338 beforeEach(() => clearUsage()); 339 340 test('returns empty array when no usage exists', async () => { 341 const result = await getCostByStage(); 342 assert.deepEqual(result, []); 343 }); 344 345 test('groups costs by stage', async () => { 346 insertUsage({ stage: 'scoring', estimated_cost: 0.10, total_tokens: 100 }); 347 insertUsage({ stage: 'scoring', estimated_cost: 0.20, total_tokens: 200 }); 348 insertUsage({ stage: 'proposals', estimated_cost: 0.50, total_tokens: 500 }); 349 350 const result = await getCostByStage(); 351 assert.equal(result.length, 2); 352 353 const scoring = result.find(r => r.stage === 'scoring'); 354 assert.ok(scoring, 'should have scoring stage'); 355 assert.equal(Number(scoring.request_count), 2); 356 assert.ok(Math.abs(Number(scoring.total_tokens) - 300) < 1); 357 358 const proposals = result.find(r => r.stage === 'proposals'); 359 assert.ok(proposals, 'should have proposals stage'); 360 assert.equal(Number(proposals.request_count), 1); 361 }); 362 363 test('orders by total_cost descending', async () => { 364 insertUsage({ stage: 'enrichment', estimated_cost: 0.01, total_tokens: 10 }); 365 insertUsage({ stage: 'scoring', estimated_cost: 5.00, total_tokens: 1000 }); 366 insertUsage({ stage: 'proposals', estimated_cost: 1.00, total_tokens: 500 }); 367 368 const result = await getCostByStage(); 369 assert.equal(result[0].stage, 'scoring'); 370 assert.equal(result[1].stage, 'proposals'); 371 assert.equal(result[2].stage, 'enrichment'); 372 }); 373 }); 374 375 // ═══════════════════════════════════════════════════════════════════════════════ 376 // getDailySpend 377 // ═══════════════════════════════════════════════════════════════════════════════ 378 379 describe('getDailySpend', () => { 380 beforeEach(() => clearUsage()); 381 382 test('returns 0 when no usage today', async () => { 383 const spend = await getDailySpend(); 384 assert.equal(spend, 0); 385 }); 386 387 test('sums only today\'s records (created_at = current date)', async () => { 388 // pg-mock translates created_at::date = CURRENT_DATE → created_at = date('now') 389 // So we store created_at as a bare date string to match this comparison. 390 const today = new Date().toISOString().slice(0, 10); // 'YYYY-MM-DD' 391 const yesterday = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString().slice(0, 10); 392 insertUsage({ estimated_cost: 0.05, created_at: today }); 393 insertUsage({ estimated_cost: 0.10, created_at: today }); 394 insertUsage({ estimated_cost: 1.00, created_at: yesterday }); 395 396 const spend = await getDailySpend(); 397 assert.ok(Math.abs(spend - 0.15) < 0.0001, `Expected ~0.15, got ${spend}`); 398 }); 399 400 test('returns a number', async () => { 401 const spend = await getDailySpend(); 402 assert.equal(typeof spend, 'number'); 403 }); 404 }); 405 406 // ═══════════════════════════════════════════════════════════════════════════════ 407 // getHourlySpend 408 // ═══════════════════════════════════════════════════════════════════════════════ 409 410 describe('getHourlySpend', () => { 411 beforeEach(() => clearUsage()); 412 413 test('returns 0 when no usage in last hour', async () => { 414 const spend = await getHourlySpend(); 415 assert.equal(spend, 0); 416 }); 417 418 test('sums only records in the last hour', async () => { 419 // Within last hour 420 insertUsage({ estimated_cost: 0.03 }); 421 insertUsage({ estimated_cost: 0.07 }); 422 // Older than 1 hour 423 const twoHoursAgo = toSqliteDate(new Date(Date.now() - 2 * 60 * 60 * 1000)); 424 insertUsage({ estimated_cost: 0.50, created_at: twoHoursAgo }); 425 426 const spend = await getHourlySpend(); 427 assert.ok(Math.abs(spend - 0.10) < 0.0001, `Expected ~0.10, got ${spend}`); 428 }); 429 430 test('returns a number', async () => { 431 const spend = await getHourlySpend(); 432 assert.equal(typeof spend, 'number'); 433 }); 434 }); 435 436 // ═══════════════════════════════════════════════════════════════════════════════ 437 // checkBudgetVariance 438 // ═══════════════════════════════════════════════════════════════════════════════ 439 440 describe('checkBudgetVariance', () => { 441 beforeEach(() => { 442 clearUsage(); 443 db.exec('DELETE FROM llm_cost_budgets; DELETE FROM information_schema_tables'); 444 }); 445 446 test('returns empty array when budget table does not exist', async () => { 447 // information_schema_tables is empty → tableExists returns null → returns [] 448 const alerts = await checkBudgetVariance(); 449 assert.deepEqual(alerts, []); 450 }); 451 452 test('returns empty array when budget table exists but no usage in last hour', async () => { 453 // Mark budget table as existing 454 db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)') 455 .run('ops', 'llm_cost_budgets'); 456 457 const alerts = await checkBudgetVariance(); 458 assert.deepEqual(alerts, []); 459 }); 460 461 test('returns cost_variance alert when avg_cost exceeds max_cost_per_call', async () => { 462 db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)') 463 .run('ops', 'llm_cost_budgets'); 464 465 db.prepare( 466 `INSERT INTO llm_cost_budgets (call_type, expected_cost_per_call, max_cost_per_call, expected_model) 467 VALUES (?, ?, ?, ?)` 468 ).run('scoring', 0.001, 0.005, 'openai/gpt-4o-mini'); 469 470 // Insert usage in the last hour with cost above max 471 insertUsage({ 472 stage: 'scoring', 473 model: 'openai/gpt-4o-mini', 474 estimated_cost: 0.010, // > max 0.005 475 created_at: toSqliteDate(), 476 }); 477 478 const alerts = await checkBudgetVariance(); 479 const costAlert = alerts.find(a => a.type === 'cost_variance'); 480 assert.ok(costAlert, 'Should have a cost_variance alert'); 481 assert.equal(costAlert.stage, 'scoring'); 482 assert.equal(costAlert.maxCost, 0.005); 483 }); 484 485 test('returns model_mismatch alert when model differs from expected', async () => { 486 db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)') 487 .run('ops', 'llm_cost_budgets'); 488 489 db.prepare( 490 `INSERT INTO llm_cost_budgets (call_type, expected_cost_per_call, max_cost_per_call, expected_model) 491 VALUES (?, ?, ?, ?)` 492 ).run('proposals', 0.01, 0.10, 'openai/gpt-4o-mini'); 493 494 // Insert usage with a different model 495 insertUsage({ 496 stage: 'proposals', 497 model: 'anthropic/claude-opus-4.6', // different from expected 498 estimated_cost: 0.001, // within budget 499 created_at: toSqliteDate(), 500 }); 501 502 const alerts = await checkBudgetVariance(); 503 const mismatch = alerts.find(a => a.type === 'model_mismatch'); 504 assert.ok(mismatch, 'Should have a model_mismatch alert'); 505 assert.equal(mismatch.stage, 'proposals'); 506 assert.equal(mismatch.actualModel, 'anthropic/claude-opus-4.6'); 507 assert.equal(mismatch.expectedModel, 'openai/gpt-4o-mini'); 508 }); 509 510 test('returns no alerts when cost is within budget and model matches', async () => { 511 db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)') 512 .run('ops', 'llm_cost_budgets'); 513 514 db.prepare( 515 `INSERT INTO llm_cost_budgets (call_type, expected_cost_per_call, max_cost_per_call, expected_model) 516 VALUES (?, ?, ?, ?)` 517 ).run('enrichment', 0.001, 0.010, 'openai/gpt-4o-mini'); 518 519 insertUsage({ 520 stage: 'enrichment', 521 model: 'openai/gpt-4o-mini', 522 estimated_cost: 0.002, // within budget 523 created_at: toSqliteDate(), 524 }); 525 526 const alerts = await checkBudgetVariance(); 527 assert.equal(alerts.length, 0); 528 }); 529 530 test('skips stages with no matching budget entry', async () => { 531 db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)') 532 .run('ops', 'llm_cost_budgets'); 533 534 // Budget only for 'scoring', but usage is for 'replies' 535 db.prepare( 536 `INSERT INTO llm_cost_budgets (call_type, expected_cost_per_call, max_cost_per_call, expected_model) 537 VALUES (?, ?, ?, ?)` 538 ).run('scoring', 0.001, 0.005, 'openai/gpt-4o-mini'); 539 540 insertUsage({ 541 stage: 'replies', 542 model: 'anthropic/claude-opus-4.6', 543 estimated_cost: 9.99, 544 created_at: toSqliteDate(), 545 }); 546 547 const alerts = await checkBudgetVariance(); 548 assert.equal(alerts.length, 0, 'No alerts for stages without a budget entry'); 549 }); 550 });