Cradicle Explorer

/ tests / utils / llm-usage-tracker.test.js
llm-usage-tracker.test.js
  1  /**
  2   * Tests for src/utils/llm-usage-tracker.js
  3   *
  4   * Covers:
  5   *   - calculateCost() — known models, unknown models, edge cases
  6   *   - logLLMUsage()   — inserts row, computes cost correctly
  7   *   - getSiteCost()   — sums per site
  8   *   - getCostByStage() — group-by aggregation
  9   *   - getDailySpend() — today filter
 10   *   - getHourlySpend() — 1-hour window
 11   *   - checkBudgetVariance() — no budget table → empty; with table → alerts
 12   *
 13   * Uses pg-mock / better-sqlite3 in-memory DB.
 14   */
 15  
 16  import { describe, test, mock, beforeEach } from 'node:test';
 17  import assert from 'node:assert/strict';
 18  import Database from 'better-sqlite3';
 19  import { createPgMock } from '../helpers/pg-mock.js';
 20  
 21  // ── DB setup ──────────────────────────────────────────────────────────────────
 22  
 23  const db = new Database(':memory:');
 24  db.exec(`
 25    CREATE TABLE IF NOT EXISTS llm_usage (
 26      id INTEGER PRIMARY KEY AUTOINCREMENT,
 27      site_id INTEGER,
 28      stage TEXT NOT NULL,
 29      provider TEXT NOT NULL,
 30      model TEXT NOT NULL,
 31      prompt_tokens INTEGER NOT NULL,
 32      completion_tokens INTEGER NOT NULL,
 33      total_tokens INTEGER NOT NULL,
 34      cost_usd REAL,
 35      estimated_cost REAL,
 36      request_id TEXT,
 37      created_at DATETIME DEFAULT (datetime('now'))
 38    );
 39  
 40    -- information_schema.tables stub for checkBudgetVariance
 41    CREATE TABLE IF NOT EXISTS information_schema_tables (
 42      table_schema TEXT,
 43      table_name TEXT
 44    );
 45  
 46    CREATE TABLE IF NOT EXISTS llm_cost_budgets (
 47      id INTEGER PRIMARY KEY AUTOINCREMENT,
 48      call_type TEXT NOT NULL UNIQUE,
 49      expected_cost_per_call REAL NOT NULL,
 50      max_cost_per_call REAL NOT NULL,
 51      expected_model TEXT NOT NULL
 52    );
 53  `);
 54  
 55  // Patch pg-mock's getOne so `information_schema.tables` queries work in SQLite.
 56  // The real query is:
 57  //   SELECT table_name FROM information_schema.tables
 58  //   WHERE table_schema='ops' AND table_name='llm_cost_budgets'
 59  // We rewrite it to query our stub table instead.
 60  const pgMock = createPgMock(db);
 61  const origGetOne = pgMock.getOne.bind(pgMock);
 62  pgMock.getOne = async (sql, params = []) => {
 63    if (/information_schema/i.test(sql)) {
 64      // Return truthy when we want the budget table to "exist"
 65      const row = db.prepare('SELECT * FROM information_schema_tables WHERE table_name = ?')
 66        .get('llm_cost_budgets');
 67      return row || null;
 68    }
 69    return origGetOne(sql, params);
 70  };
 71  
 72  mock.module('../../src/utils/db.js', { namedExports: pgMock });
 73  mock.module('../../src/utils/logger.js', {
 74    defaultExport: class { info(){} warn(){} error(){} debug(){} success(){} },
 75  });
 76  mock.module('../../src/utils/load-env.js', { defaultExport: {} });
 77  
 78  const {
 79    calculateCost,
 80    logLLMUsage,
 81    getSiteCost,
 82    getCostByStage,
 83    getDailySpend,
 84    getHourlySpend,
 85    checkBudgetVariance,
 86  } = await import('../../src/utils/llm-usage-tracker.js');
 87  
 88  // ── helpers ───────────────────────────────────────────────────────────────────
 89  
 90  function clearUsage() {
 91    db.exec('DELETE FROM llm_usage');
 92  }
 93  
 94  /**
 95   * Format a Date as SQLite-compatible datetime string (YYYY-MM-DD HH:MM:SS).
 96   * This ensures comparisons against datetime('now') and date('now') work correctly,
 97   * since SQLite string-compares datetimes and 'T' > ' ' breaks ISO 8601 ordering.
 98   */
 99  function toSqliteDate(d = new Date()) {
100    return d.toISOString().replace('T', ' ').replace('Z', '');
101  }
102  
103  function insertUsage(overrides = {}) {
104    const defaults = {
105      site_id: 1,
106      stage: 'scoring',
107      provider: 'openrouter',
108      model: 'openai/gpt-4o-mini',
109      prompt_tokens: 1000,
110      completion_tokens: 200,
111      total_tokens: 1200,
112      estimated_cost: 0.000270,
113      request_id: null,
114      created_at: toSqliteDate(),
115    };
116    const d = { ...defaults, ...overrides };
117    // Allow caller to pass a Date object for created_at
118    const createdAt = d.created_at instanceof Date ? toSqliteDate(d.created_at) : d.created_at;
119    db.prepare(
120      `INSERT INTO llm_usage (site_id, stage, provider, model,
121        prompt_tokens, completion_tokens, total_tokens, estimated_cost, request_id, created_at)
122       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
123    ).run(d.site_id, d.stage, d.provider, d.model,
124      d.prompt_tokens, d.completion_tokens, d.total_tokens,
125      d.estimated_cost, d.request_id, createdAt);
126  }
127  
128  // ═══════════════════════════════════════════════════════════════════════════════
129  // calculateCost
130  // ═══════════════════════════════════════════════════════════════════════════════
131  
132  describe('calculateCost', () => {
133    test('calculates cost for openai/gpt-4o-mini correctly', () => {
134      // $0.15/M input, $0.60/M output
135      // 1000 prompt + 500 completion
136      const cost = calculateCost('openai/gpt-4o-mini', 1_000, 500);
137      // 1000 * 0.15/1e6 + 500 * 0.60/1e6 = 0.000150 + 0.000300 = 0.000450
138      assert.equal(cost, 0.000450);
139    });
140  
141    test('calculates cost for anthropic/claude-opus-4.6', () => {
142      // $5.0/M input, $25.0/M output
143      const cost = calculateCost('anthropic/claude-opus-4.6', 2_000, 1_000);
144      // 2000 * 5.0/1e6 + 1000 * 25.0/1e6 = 0.010000 + 0.025000 = 0.035000
145      assert.equal(cost, 0.035000);
146    });
147  
148    test('uses default pricing for unknown model and returns string from toFixed', () => {
149      // Unknown model uses: (promptTokens * 0.5 + completionTokens * 1.5) / 1_000_000
150      const cost = calculateCost('unknown/model-xyz', 1_000_000, 0);
151      // 1000000 * 0.5 / 1000000 = 0.5
152      assert.equal(parseFloat(cost), 0.5);
153    });
154  
155    test('returns 0 cost for zero tokens', () => {
156      const cost = calculateCost('openai/gpt-4o-mini', 0, 0);
157      assert.equal(cost, 0);
158    });
159  
160    test('calculates cost for claude-3-opus-20240229', () => {
161      // $15/M input, $75/M output
162      const cost = calculateCost('claude-3-opus-20240229', 100_000, 10_000);
163      // 100000 * 15/1e6 + 10000 * 75/1e6 = 1.5 + 0.75 = 2.25
164      assert.equal(cost, 2.25);
165    });
166  
167    test('calculates cost for google/gemini-2.5-flash', () => {
168      // $0.3/M input, $2.5/M output
169      const cost = calculateCost('google/gemini-2.5-flash', 500_000, 100_000);
170      // 500000 * 0.3/1e6 + 100000 * 2.5/1e6 = 0.15 + 0.25 = 0.4
171      assert.equal(cost, 0.4);
172    });
173  
174    test('calculates cost for deepseek/deepseek-v3.2', () => {
175      // $0.24/M input, $0.38/M output
176      const cost = calculateCost('deepseek/deepseek-v3.2', 1_000_000, 1_000_000);
177      // 1 * 0.24 + 1 * 0.38 = 0.62
178      assert.equal(cost, 0.62);
179    });
180  
181    test('calculates cost for openai/gpt-4o', () => {
182      // $2.5/M input, $10/M output
183      const cost = calculateCost('openai/gpt-4o', 2_000, 500);
184      // 2000 * 2.5/1e6 + 500 * 10/1e6 = 0.005 + 0.005 = 0.01
185      assert.equal(cost, 0.01);
186    });
187  
188    test('result is a number with up to 6 decimal places', () => {
189      const cost = calculateCost('anthropic/claude-3.5-haiku', 123, 456);
190      assert.equal(typeof cost, 'number');
191      // Verify max 6 decimal places
192      const decimals = cost.toString().split('.')[1]?.length ?? 0;
193      assert.ok(decimals <= 6, `Expected ≤6 decimals, got ${decimals}`);
194    });
195  });
196  
197  // ═══════════════════════════════════════════════════════════════════════════════
198  // logLLMUsage
199  // ═══════════════════════════════════════════════════════════════════════════════
200  
201  describe('logLLMUsage', () => {
202    beforeEach(() => clearUsage());
203  
204    test('inserts a row into llm_usage', async () => {
205      await logLLMUsage({
206        siteId: 42,
207        stage: 'scoring',
208        provider: 'openrouter',
209        model: 'openai/gpt-4o-mini',
210        promptTokens: 1000,
211        completionTokens: 200,
212      });
213  
214      const row = db.prepare('SELECT * FROM llm_usage WHERE site_id = 42').get();
215      assert.ok(row, 'Row should exist');
216      assert.equal(row.stage, 'scoring');
217      assert.equal(row.provider, 'openrouter');
218      assert.equal(row.model, 'openai/gpt-4o-mini');
219      assert.equal(row.prompt_tokens, 1000);
220      assert.equal(row.completion_tokens, 200);
221      assert.equal(row.total_tokens, 1200);
222    });
223  
224    test('computes estimated_cost from calculateCost when no costOverride', async () => {
225      await logLLMUsage({
226        siteId: 1,
227        stage: 'proposals',
228        provider: 'openrouter',
229        model: 'openai/gpt-4o-mini',
230        promptTokens: 1_000_000,
231        completionTokens: 0,
232      });
233  
234      const row = db.prepare('SELECT estimated_cost FROM llm_usage LIMIT 1').get();
235      // 1M input * $0.15/M = 0.15
236      assert.equal(parseFloat(row.estimated_cost), 0.15);
237    });
238  
239    test('uses costOverride when provided', async () => {
240      await logLLMUsage({
241        siteId: 1,
242        stage: 'enrichment',
243        provider: 'anthropic',
244        model: 'claude-3-5-sonnet-20241022',
245        promptTokens: 500,
246        completionTokens: 100,
247        costOverride: 0.999,
248      });
249  
250      const row = db.prepare('SELECT estimated_cost FROM llm_usage LIMIT 1').get();
251      assert.equal(parseFloat(row.estimated_cost), 0.999);
252    });
253  
254    test('stores null siteId when not provided', async () => {
255      await logLLMUsage({
256        stage: 'other',
257        provider: 'openrouter',
258        model: 'openai/gpt-4o-mini',
259        promptTokens: 100,
260        completionTokens: 50,
261      });
262  
263      const row = db.prepare('SELECT * FROM llm_usage LIMIT 1').get();
264      assert.equal(row.site_id, null);
265    });
266  
267    test('stores requestId when provided', async () => {
268      await logLLMUsage({
269        siteId: 5,
270        stage: 'replies',
271        provider: 'openrouter',
272        model: 'anthropic/claude-sonnet-4.6',
273        promptTokens: 200,
274        completionTokens: 100,
275        requestId: 'req-abc-123',
276      });
277  
278      const row = db.prepare('SELECT request_id FROM llm_usage WHERE site_id = 5').get();
279      assert.equal(row.request_id, 'req-abc-123');
280    });
281  
282    test('total_tokens equals promptTokens + completionTokens', async () => {
283      await logLLMUsage({
284        siteId: 10,
285        stage: 'scoring',
286        provider: 'openrouter',
287        model: 'openai/gpt-4o-mini',
288        promptTokens: 3000,
289        completionTokens: 700,
290      });
291  
292      const row = db.prepare('SELECT total_tokens FROM llm_usage WHERE site_id = 10').get();
293      assert.equal(row.total_tokens, 3700);
294    });
295  });
296  
297  // ═══════════════════════════════════════════════════════════════════════════════
298  // getSiteCost
299  // ═══════════════════════════════════════════════════════════════════════════════
300  
301  describe('getSiteCost', () => {
302    beforeEach(() => clearUsage());
303  
304    test('returns 0 for a site with no usage', async () => {
305      const cost = await getSiteCost(999);
306      assert.equal(cost, 0);
307    });
308  
309    test('returns sum of costs for a specific site', async () => {
310      insertUsage({ site_id: 1, estimated_cost: 0.01 });
311      insertUsage({ site_id: 1, estimated_cost: 0.02 });
312      insertUsage({ site_id: 2, estimated_cost: 0.50 });
313  
314      const cost = await getSiteCost(1);
315      assert.ok(Math.abs(cost - 0.03) < 0.000001, `Expected ~0.03, got ${cost}`);
316    });
317  
318    test('does not include other sites in sum', async () => {
319      insertUsage({ site_id: 10, estimated_cost: 1.00 });
320      insertUsage({ site_id: 20, estimated_cost: 2.00 });
321  
322      const cost = await getSiteCost(10);
323      assert.ok(Math.abs(cost - 1.00) < 0.000001, `Expected 1.00, got ${cost}`);
324    });
325  
326    test('returns a number (not a string)', async () => {
327      insertUsage({ site_id: 7, estimated_cost: 0.005 });
328      const cost = await getSiteCost(7);
329      assert.equal(typeof cost, 'number');
330    });
331  });
332  
333  // ═══════════════════════════════════════════════════════════════════════════════
334  // getCostByStage
335  // ═══════════════════════════════════════════════════════════════════════════════
336  
337  describe('getCostByStage', () => {
338    beforeEach(() => clearUsage());
339  
340    test('returns empty array when no usage exists', async () => {
341      const result = await getCostByStage();
342      assert.deepEqual(result, []);
343    });
344  
345    test('groups costs by stage', async () => {
346      insertUsage({ stage: 'scoring', estimated_cost: 0.10, total_tokens: 100 });
347      insertUsage({ stage: 'scoring', estimated_cost: 0.20, total_tokens: 200 });
348      insertUsage({ stage: 'proposals', estimated_cost: 0.50, total_tokens: 500 });
349  
350      const result = await getCostByStage();
351      assert.equal(result.length, 2);
352  
353      const scoring = result.find(r => r.stage === 'scoring');
354      assert.ok(scoring, 'should have scoring stage');
355      assert.equal(Number(scoring.request_count), 2);
356      assert.ok(Math.abs(Number(scoring.total_tokens) - 300) < 1);
357  
358      const proposals = result.find(r => r.stage === 'proposals');
359      assert.ok(proposals, 'should have proposals stage');
360      assert.equal(Number(proposals.request_count), 1);
361    });
362  
363    test('orders by total_cost descending', async () => {
364      insertUsage({ stage: 'enrichment', estimated_cost: 0.01, total_tokens: 10 });
365      insertUsage({ stage: 'scoring', estimated_cost: 5.00, total_tokens: 1000 });
366      insertUsage({ stage: 'proposals', estimated_cost: 1.00, total_tokens: 500 });
367  
368      const result = await getCostByStage();
369      assert.equal(result[0].stage, 'scoring');
370      assert.equal(result[1].stage, 'proposals');
371      assert.equal(result[2].stage, 'enrichment');
372    });
373  });
374  
375  // ═══════════════════════════════════════════════════════════════════════════════
376  // getDailySpend
377  // ═══════════════════════════════════════════════════════════════════════════════
378  
379  describe('getDailySpend', () => {
380    beforeEach(() => clearUsage());
381  
382    test('returns 0 when no usage today', async () => {
383      const spend = await getDailySpend();
384      assert.equal(spend, 0);
385    });
386  
387    test('sums only today\'s records (created_at = current date)', async () => {
388      // pg-mock translates created_at::date = CURRENT_DATE → created_at = date('now')
389      // So we store created_at as a bare date string to match this comparison.
390      const today = new Date().toISOString().slice(0, 10);        // 'YYYY-MM-DD'
391      const yesterday = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString().slice(0, 10);
392      insertUsage({ estimated_cost: 0.05, created_at: today });
393      insertUsage({ estimated_cost: 0.10, created_at: today });
394      insertUsage({ estimated_cost: 1.00, created_at: yesterday });
395  
396      const spend = await getDailySpend();
397      assert.ok(Math.abs(spend - 0.15) < 0.0001, `Expected ~0.15, got ${spend}`);
398    });
399  
400    test('returns a number', async () => {
401      const spend = await getDailySpend();
402      assert.equal(typeof spend, 'number');
403    });
404  });
405  
406  // ═══════════════════════════════════════════════════════════════════════════════
407  // getHourlySpend
408  // ═══════════════════════════════════════════════════════════════════════════════
409  
410  describe('getHourlySpend', () => {
411    beforeEach(() => clearUsage());
412  
413    test('returns 0 when no usage in last hour', async () => {
414      const spend = await getHourlySpend();
415      assert.equal(spend, 0);
416    });
417  
418    test('sums only records in the last hour', async () => {
419      // Within last hour
420      insertUsage({ estimated_cost: 0.03 });
421      insertUsage({ estimated_cost: 0.07 });
422      // Older than 1 hour
423      const twoHoursAgo = toSqliteDate(new Date(Date.now() - 2 * 60 * 60 * 1000));
424      insertUsage({ estimated_cost: 0.50, created_at: twoHoursAgo });
425  
426      const spend = await getHourlySpend();
427      assert.ok(Math.abs(spend - 0.10) < 0.0001, `Expected ~0.10, got ${spend}`);
428    });
429  
430    test('returns a number', async () => {
431      const spend = await getHourlySpend();
432      assert.equal(typeof spend, 'number');
433    });
434  });
435  
436  // ═══════════════════════════════════════════════════════════════════════════════
437  // checkBudgetVariance
438  // ═══════════════════════════════════════════════════════════════════════════════
439  
440  describe('checkBudgetVariance', () => {
441    beforeEach(() => {
442      clearUsage();
443      db.exec('DELETE FROM llm_cost_budgets; DELETE FROM information_schema_tables');
444    });
445  
446    test('returns empty array when budget table does not exist', async () => {
447      // information_schema_tables is empty → tableExists returns null → returns []
448      const alerts = await checkBudgetVariance();
449      assert.deepEqual(alerts, []);
450    });
451  
452    test('returns empty array when budget table exists but no usage in last hour', async () => {
453      // Mark budget table as existing
454      db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)')
455        .run('ops', 'llm_cost_budgets');
456  
457      const alerts = await checkBudgetVariance();
458      assert.deepEqual(alerts, []);
459    });
460  
461    test('returns cost_variance alert when avg_cost exceeds max_cost_per_call', async () => {
462      db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)')
463        .run('ops', 'llm_cost_budgets');
464  
465      db.prepare(
466        `INSERT INTO llm_cost_budgets (call_type, expected_cost_per_call, max_cost_per_call, expected_model)
467         VALUES (?, ?, ?, ?)`
468      ).run('scoring', 0.001, 0.005, 'openai/gpt-4o-mini');
469  
470      // Insert usage in the last hour with cost above max
471      insertUsage({
472        stage: 'scoring',
473        model: 'openai/gpt-4o-mini',
474        estimated_cost: 0.010,  // > max 0.005
475        created_at: toSqliteDate(),
476      });
477  
478      const alerts = await checkBudgetVariance();
479      const costAlert = alerts.find(a => a.type === 'cost_variance');
480      assert.ok(costAlert, 'Should have a cost_variance alert');
481      assert.equal(costAlert.stage, 'scoring');
482      assert.equal(costAlert.maxCost, 0.005);
483    });
484  
485    test('returns model_mismatch alert when model differs from expected', async () => {
486      db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)')
487        .run('ops', 'llm_cost_budgets');
488  
489      db.prepare(
490        `INSERT INTO llm_cost_budgets (call_type, expected_cost_per_call, max_cost_per_call, expected_model)
491         VALUES (?, ?, ?, ?)`
492      ).run('proposals', 0.01, 0.10, 'openai/gpt-4o-mini');
493  
494      // Insert usage with a different model
495      insertUsage({
496        stage: 'proposals',
497        model: 'anthropic/claude-opus-4.6',  // different from expected
498        estimated_cost: 0.001,              // within budget
499        created_at: toSqliteDate(),
500      });
501  
502      const alerts = await checkBudgetVariance();
503      const mismatch = alerts.find(a => a.type === 'model_mismatch');
504      assert.ok(mismatch, 'Should have a model_mismatch alert');
505      assert.equal(mismatch.stage, 'proposals');
506      assert.equal(mismatch.actualModel, 'anthropic/claude-opus-4.6');
507      assert.equal(mismatch.expectedModel, 'openai/gpt-4o-mini');
508    });
509  
510    test('returns no alerts when cost is within budget and model matches', async () => {
511      db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)')
512        .run('ops', 'llm_cost_budgets');
513  
514      db.prepare(
515        `INSERT INTO llm_cost_budgets (call_type, expected_cost_per_call, max_cost_per_call, expected_model)
516         VALUES (?, ?, ?, ?)`
517      ).run('enrichment', 0.001, 0.010, 'openai/gpt-4o-mini');
518  
519      insertUsage({
520        stage: 'enrichment',
521        model: 'openai/gpt-4o-mini',
522        estimated_cost: 0.002,  // within budget
523        created_at: toSqliteDate(),
524      });
525  
526      const alerts = await checkBudgetVariance();
527      assert.equal(alerts.length, 0);
528    });
529  
530    test('skips stages with no matching budget entry', async () => {
531      db.prepare('INSERT INTO information_schema_tables (table_schema, table_name) VALUES (?, ?)')
532        .run('ops', 'llm_cost_budgets');
533  
534      // Budget only for 'scoring', but usage is for 'replies'
535      db.prepare(
536        `INSERT INTO llm_cost_budgets (call_type, expected_cost_per_call, max_cost_per_call, expected_model)
537         VALUES (?, ?, ?, ?)`
538      ).run('scoring', 0.001, 0.005, 'openai/gpt-4o-mini');
539  
540      insertUsage({
541        stage: 'replies',
542        model: 'anthropic/claude-opus-4.6',
543        estimated_cost: 9.99,
544        created_at: toSqliteDate(),
545      });
546  
547      const alerts = await checkBudgetVariance();
548      assert.equal(alerts.length, 0, 'No alerts for stages without a budget entry');
549    });
550  });