Cradicle Explorer

/ tests / utils / llm-provider.test.js
llm-provider.test.js
  1  /**
  2   * Integration Tests for LLM Provider Module
  3   *
  4   * Note: These tests verify provider initialization and basic functionality.
  5   * Full API call testing requires real API keys and would incur costs.
  6   */
  7  
  8  import { describe, it, test, mock, beforeEach } from 'node:test';
  9  import assert from 'node:assert';
 10  
 11  // ---------------------------------------------------------------------------
 12  // Mocks for callLLM tests — must be set up before module imports
 13  // ---------------------------------------------------------------------------
 14  
 15  const mockCreate = mock.fn();
 16  
 17  class MockAnthropic {
 18    constructor() {
 19      this.messages = { create: mockCreate };
 20    }
 21  }
 22  
 23  mock.module('@anthropic-ai/sdk', {
 24    defaultExport: MockAnthropic,
 25  });
 26  
 27  const mockAxiosPost = mock.fn();
 28  mock.module('axios', {
 29    defaultExport: { post: mockAxiosPost },
 30  });
 31  
 32  // Mock llm-usage-tracker to control getDailySpend / getHourlySpend / logLLMUsage
 33  const mockGetDailySpend = mock.fn(() => 0);
 34  const mockGetHourlySpend = mock.fn(() => 0);
 35  const mockLogLLMUsage = mock.fn();
 36  
 37  mock.module('../../src/utils/llm-usage-tracker.js', {
 38    namedExports: {
 39      getDailySpend: mockGetDailySpend,
 40      getHourlySpend: mockGetHourlySpend,
 41      logLLMUsage: mockLogLLMUsage,
 42    },
 43  });
 44  
 45  // ---------------------------------------------------------------------------
 46  // Import mocked modules for callLLM tests
 47  // ---------------------------------------------------------------------------
 48  
 49  process.env.OPENROUTER_API_KEY = 'test-or-key';
 50  delete process.env.ANTHROPIC_API_KEY;
 51  
 52  const { callLLM: orCallLLM } = await import('../../src/utils/llm-provider.js?mocked=or');
 53  
 54  describe('LLM Provider Module - OpenRouter Provider', () => {
 55    it('should initialize with OpenRouter provider when only OPENROUTER_API_KEY is set', async () => {
 56      // Set OpenRouter API key only
 57      const originalAnthropicKey = process.env.ANTHROPIC_API_KEY;
 58      const originalOpenRouterKey = process.env.OPENROUTER_API_KEY;
 59  
 60      delete process.env.ANTHROPIC_API_KEY;
 61      process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock';
 62  
 63      // Import with cache busting
 64      const timestamp = Date.now();
 65      const module = await import(`../../src/utils/llm-provider.js?t=${timestamp}`);
 66  
 67      assert.strictEqual(module.getProvider(), 'openrouter', 'Provider should be openrouter');
 68      assert.strictEqual(
 69        module.getProviderDisplayName(),
 70        'OpenRouter',
 71        'Display name should be OpenRouter'
 72      );
 73  
 74      // Restore original env vars
 75      if (originalAnthropicKey) process.env.ANTHROPIC_API_KEY = originalAnthropicKey;
 76      if (originalOpenRouterKey) {
 77        process.env.OPENROUTER_API_KEY = originalOpenRouterKey;
 78      } else {
 79        delete process.env.OPENROUTER_API_KEY;
 80      }
 81    });
 82  });
 83  
 84  describe('LLM Provider Module - Provider Priority', () => {
 85    it('should prefer OpenRouter when both API keys are set (cost efficiency)', async () => {
 86      // Set both API keys
 87      const originalAnthropicKey = process.env.ANTHROPIC_API_KEY;
 88      const originalOpenRouterKey = process.env.OPENROUTER_API_KEY;
 89  
 90      process.env.ANTHROPIC_API_KEY = 'sk-ant-test-key-mock';
 91      process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock';
 92  
 93      // Import with cache busting
 94      const timestamp = Date.now();
 95      const module = await import(`../../src/utils/llm-provider.js?t=${timestamp}`);
 96  
 97      // OpenRouter is preferred over Anthropic for cost efficiency (see llm-provider.js)
 98      assert.strictEqual(
 99        module.getProvider(),
100        'openrouter',
101        'Should prefer OpenRouter when both keys are set (cost efficiency)'
102      );
103  
104      // Restore original env vars
105      if (originalAnthropicKey) {
106        process.env.ANTHROPIC_API_KEY = originalAnthropicKey;
107      } else {
108        delete process.env.ANTHROPIC_API_KEY;
109      }
110      if (originalOpenRouterKey) {
111        process.env.OPENROUTER_API_KEY = originalOpenRouterKey;
112      } else {
113        delete process.env.OPENROUTER_API_KEY;
114      }
115    });
116  });
117  
118  describe('LLM Provider Module - API Call Structure', () => {
119    it('should have callLLM function with correct signature', async () => {
120      const origKey = process.env.OPENROUTER_API_KEY;
121      if (!origKey) process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock';
122      const module = await import(`../../src/utils/llm-provider.js?t=${Date.now()}`);
123      if (!origKey) delete process.env.OPENROUTER_API_KEY;
124  
125      assert.strictEqual(typeof module.callLLM, 'function', 'callLLM should be a function');
126      assert.ok(
127        module.callLLM.constructor.name === 'AsyncFunction' ||
128          module.callLLM.constructor.name === 'Function',
129        'callLLM should be a function'
130      );
131    });
132  
133    it('should export getProvider function', async () => {
134      const origKey = process.env.OPENROUTER_API_KEY;
135      if (!origKey) process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock';
136      const module = await import(`../../src/utils/llm-provider.js?t=${Date.now()}`);
137      if (!origKey) delete process.env.OPENROUTER_API_KEY;
138  
139      assert.strictEqual(typeof module.getProvider, 'function', 'getProvider should be a function');
140      const provider = module.getProvider();
141      assert.strictEqual(provider, 'openrouter', 'Provider should be openrouter');
142    });
143  
144    it('should export getProviderDisplayName function', async () => {
145      const origKey = process.env.OPENROUTER_API_KEY;
146      if (!origKey) process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock';
147      const module = await import(`../../src/utils/llm-provider.js?t=${Date.now()}`);
148      if (!origKey) delete process.env.OPENROUTER_API_KEY;
149  
150      assert.strictEqual(
151        typeof module.getProviderDisplayName,
152        'function',
153        'getProviderDisplayName should be a function'
154      );
155      const displayName = module.getProviderDisplayName();
156      assert.strictEqual(displayName, 'OpenRouter', 'Display name should be OpenRouter');
157    });
158  });
159  
160  /*
161   * NOTE: Full API integration tests are not included because they would:
162   * 1. Require real API keys
163   * 2. Make actual API calls that cost money
164   * 3. Depend on external service availability
165   * 4. Be slow (network latency)
166   *
167   * The module is tested in practice through:
168   * - src/score.js (vision scoring)
169   * - src/proposal-generator-v2.js (text generation)
170   * - Integration tests for these modules
171   *
172   * To manually test API calls:
173   * 1. Set ANTHROPIC_API_KEY or OPENROUTER_API_KEY in .env
174   * 2. Run: node -e "import('./src/utils/llm-provider.js').then(m => m.callLLM({model: 'openai/gpt-4o-mini', messages: [{role: 'user', content: 'Hello'}]}).then(console.log))"
175   */
176  
177  // ---------------------------------------------------------------------------
178  // Budget enforcement tests (callLLM — OpenRouter path)
179  // ---------------------------------------------------------------------------
180  
181  describe('callLLM - budget enforcement (OpenRouter)', () => {
182    beforeEach(() => {
183      mockGetDailySpend.mock.resetCalls();
184      mockGetHourlySpend.mock.resetCalls();
185      mockLogLLMUsage.mock.resetCalls();
186      mockAxiosPost.mock.resetCalls();
187      // Default: within budget
188      mockGetDailySpend.mock.mockImplementation(() => 0);
189      mockGetHourlySpend.mock.mockImplementation(() => 0);
190    });
191  
192    test('throws when daily spend meets or exceeds LLM_DAILY_BUDGET', async () => {
193      process.env.LLM_DAILY_BUDGET = '10';
194      mockGetDailySpend.mock.mockImplementation(() => 10.0); // exactly at limit
195  
196      await assert.rejects(
197        () =>
198          orCallLLM({
199            model: 'openai/gpt-4o-mini',
200            messages: [{ role: 'user', content: 'hello' }],
201          }),
202        err => {
203          assert.ok(err.message.includes('daily budget exceeded'), `Got: ${err.message}`);
204          return true;
205        }
206      );
207      delete process.env.LLM_DAILY_BUDGET;
208    });
209  
210    test('throws when daily spend exceeds LLM_DAILY_BUDGET', async () => {
211      process.env.LLM_DAILY_BUDGET = '5';
212      mockGetDailySpend.mock.mockImplementation(() => 6.5);
213  
214      await assert.rejects(
215        () =>
216          orCallLLM({
217            model: 'openai/gpt-4o-mini',
218            messages: [{ role: 'user', content: 'hello' }],
219          }),
220        err => {
221          assert.ok(err.message.includes('daily budget exceeded'));
222          return true;
223        }
224      );
225      delete process.env.LLM_DAILY_BUDGET;
226    });
227  
228    test('logs warning when hourly spend meets LLM_HOURLY_ALERT threshold', async () => {
229      process.env.LLM_HOURLY_ALERT = '2';
230      mockGetHourlySpend.mock.mockImplementation(() => 2.0);
231      mockAxiosPost.mock.mockImplementation(async () => ({
232        data: {
233          choices: [{ message: { content: 'ok' } }],
234          usage: { prompt_tokens: 5, completion_tokens: 3 },
235        },
236      }));
237  
238      // Should not throw — just warns
239      const result = await orCallLLM({
240        model: 'openai/gpt-4o-mini',
241        messages: [{ role: 'user', content: 'hello' }],
242      });
243      assert.equal(result.content, 'ok');
244      delete process.env.LLM_HOURLY_ALERT;
245    });
246  
247    test('proceeds normally when within budget', async () => {
248      mockGetDailySpend.mock.mockImplementation(() => 0.5);
249      mockGetHourlySpend.mock.mockImplementation(() => 0.1);
250      mockAxiosPost.mock.mockImplementation(async () => ({
251        data: {
252          choices: [{ message: { content: 'within budget' } }],
253          usage: { prompt_tokens: 10, completion_tokens: 5 },
254        },
255      }));
256  
257      const result = await orCallLLM({
258        model: 'openai/gpt-4o-mini',
259        messages: [{ role: 'user', content: 'hello' }],
260      });
261      assert.equal(result.content, 'within budget');
262    });
263  
264    test('continues when budget DB check throws non-budget error', async () => {
265      mockGetDailySpend.mock.mockImplementation(() => {
266        throw new Error('DB connection failed');
267      });
268      mockAxiosPost.mock.mockImplementation(async () => ({
269        data: {
270          choices: [{ message: { content: 'db error handled' } }],
271          usage: { prompt_tokens: 5, completion_tokens: 3 },
272        },
273      }));
274  
275      // Should swallow non-budget errors and proceed
276      const result = await orCallLLM({
277        model: 'openai/gpt-4o-mini',
278        messages: [{ role: 'user', content: 'hello' }],
279      });
280      assert.equal(result.content, 'db error handled');
281    });
282  
283    test('uses default daily budget of 50 when LLM_DAILY_BUDGET not set', async () => {
284      delete process.env.LLM_DAILY_BUDGET;
285      mockGetDailySpend.mock.mockImplementation(() => 49.99);
286      mockAxiosPost.mock.mockImplementation(async () => ({
287        data: {
288          choices: [{ message: { content: 'under default budget' } }],
289          usage: { prompt_tokens: 5, completion_tokens: 3 },
290        },
291      }));
292  
293      const result = await orCallLLM({
294        model: 'openai/gpt-4o-mini',
295        messages: [{ role: 'user', content: 'hello' }],
296      });
297      assert.equal(result.content, 'under default budget');
298    });
299  });
300  
301  // ---------------------------------------------------------------------------
302  // Usage auto-tracking tests
303  // ---------------------------------------------------------------------------
304  
305  describe('callLLM - usage auto-tracking (OpenRouter)', () => {
306    beforeEach(() => {
307      mockGetDailySpend.mock.resetCalls();
308      mockGetHourlySpend.mock.resetCalls();
309      mockLogLLMUsage.mock.resetCalls();
310      mockAxiosPost.mock.resetCalls();
311      mockGetDailySpend.mock.mockImplementation(() => 0);
312      mockGetHourlySpend.mock.mockImplementation(() => 0);
313    });
314  
315    test('calls logLLMUsage when stage is provided and usage data present', async () => {
316      mockAxiosPost.mock.mockImplementation(async () => ({
317        data: {
318          choices: [{ message: { content: 'tracked' } }],
319          usage: { prompt_tokens: 100, completion_tokens: 50 },
320        },
321      }));
322  
323      await orCallLLM({
324        model: 'openai/gpt-4o-mini',
325        messages: [{ role: 'user', content: 'hello' }],
326        stage: 'scoring',
327        siteId: 42,
328      });
329  
330      assert.equal(mockLogLLMUsage.mock.callCount(), 1);
331      const trackArgs = mockLogLLMUsage.mock.calls[0].arguments[0];
332      assert.equal(trackArgs.stage, 'scoring');
333      assert.equal(trackArgs.siteId, 42);
334      assert.equal(trackArgs.promptTokens, 100);
335      assert.equal(trackArgs.completionTokens, 50);
336    });
337  
338    test('does not call logLLMUsage when stage is null', async () => {
339      mockAxiosPost.mock.mockImplementation(async () => ({
340        data: {
341          choices: [{ message: { content: 'ok' } }],
342          usage: { prompt_tokens: 100, completion_tokens: 50 },
343        },
344      }));
345  
346      await orCallLLM({
347        model: 'openai/gpt-4o-mini',
348        messages: [{ role: 'user', content: 'hello' }],
349        // no stage
350      });
351  
352      assert.equal(mockLogLLMUsage.mock.callCount(), 0);
353    });
354  
355    test('does not call logLLMUsage when usage tokens are both zero', async () => {
356      mockAxiosPost.mock.mockImplementation(async () => ({
357        data: {
358          choices: [{ message: { content: 'ok' } }],
359          usage: { prompt_tokens: 0, completion_tokens: 0 },
360        },
361      }));
362  
363      await orCallLLM({
364        model: 'openai/gpt-4o-mini',
365        messages: [{ role: 'user', content: 'hello' }],
366        stage: 'scoring',
367      });
368  
369      assert.equal(mockLogLLMUsage.mock.callCount(), 0);
370    });
371  
372    test('does not throw when logLLMUsage fails (tracking errors are non-blocking)', async () => {
373      mockAxiosPost.mock.mockImplementation(async () => ({
374        data: {
375          choices: [{ message: { content: 'tracking will fail' } }],
376          usage: { prompt_tokens: 100, completion_tokens: 50 },
377        },
378      }));
379      mockLogLLMUsage.mock.mockImplementation(() => {
380        throw new Error('DB write error');
381      });
382  
383      // Must not throw
384      const result = await orCallLLM({
385        model: 'openai/gpt-4o-mini',
386        messages: [{ role: 'user', content: 'hello' }],
387        stage: 'scoring',
388      });
389      assert.equal(result.content, 'tracking will fail');
390    });
391  
392    test('calls logLLMUsage when only promptTokens > 0', async () => {
393      mockAxiosPost.mock.mockImplementation(async () => ({
394        data: {
395          choices: [{ message: { content: 'prompt only' } }],
396          usage: { prompt_tokens: 50, completion_tokens: 0 },
397        },
398      }));
399  
400      await orCallLLM({
401        model: 'openai/gpt-4o-mini',
402        messages: [{ role: 'user', content: 'hello' }],
403        stage: 'enrichment',
404      });
405  
406      assert.equal(mockLogLLMUsage.mock.callCount(), 1);
407    });
408  });
409  
410  // ---------------------------------------------------------------------------
411  // callLLM OpenRouter: finish_reason=length truncation warning
412  // ---------------------------------------------------------------------------
413  
414  describe('callLLM - OpenRouter truncation warning', () => {
415    beforeEach(() => {
416      mockGetDailySpend.mock.resetCalls();
417      mockGetHourlySpend.mock.resetCalls();
418      mockAxiosPost.mock.resetCalls();
419      mockLogLLMUsage.mock.resetCalls();
420      mockGetDailySpend.mock.mockImplementation(() => 0);
421      mockGetHourlySpend.mock.mockImplementation(() => 0);
422    });
423  
424    test('logs console.warn when finish_reason is "length"', async () => {
425      mockAxiosPost.mock.mockImplementation(async () => ({
426        data: {
427          choices: [{ message: { content: 'truncated response...' }, finish_reason: 'length' }],
428          usage: { prompt_tokens: 10, completion_tokens: 2000 },
429        },
430      }));
431  
432      const warnings = [];
433      const origWarn = console.warn;
434      console.warn = (...args) => warnings.push(args.join(' '));
435  
436      try {
437        const result = await orCallLLM({
438          model: 'openai/gpt-4o-mini',
439          messages: [{ role: 'user', content: 'hello' }],
440        });
441        assert.equal(result.content, 'truncated response...');
442        assert.ok(
443          warnings.some(w => w.includes('finish_reason=length')),
444          'Should warn about truncation'
445        );
446        assert.ok(
447          warnings.some(w => w.includes('openai/gpt-4o-mini')),
448          'Should include model name'
449        );
450      } finally {
451        console.warn = origWarn;
452      }
453    });
454  
455    test('does not warn when finish_reason is "stop"', async () => {
456      mockAxiosPost.mock.mockImplementation(async () => ({
457        data: {
458          choices: [{ message: { content: 'complete' }, finish_reason: 'stop' }],
459          usage: { prompt_tokens: 10, completion_tokens: 50 },
460        },
461      }));
462  
463      const warnings = [];
464      const origWarn = console.warn;
465      console.warn = (...args) => warnings.push(args.join(' '));
466  
467      try {
468        await orCallLLM({
469          model: 'openai/gpt-4o-mini',
470          messages: [{ role: 'user', content: 'hello' }],
471        });
472        assert.ok(
473          !warnings.some(w => w.includes('finish_reason=length')),
474          'Should not warn for stop finish_reason'
475        );
476      } finally {
477        console.warn = origWarn;
478      }
479    });
480  
481    test('finish_reason=length with unknown completion_tokens reports "?"', async () => {
482      mockAxiosPost.mock.mockImplementation(async () => ({
483        data: {
484          choices: [{ message: { content: 'truncated' }, finish_reason: 'length' }],
485          // no usage field
486        },
487      }));
488  
489      const warnings = [];
490      const origWarn = console.warn;
491      console.warn = (...args) => warnings.push(args.join(' '));
492  
493      try {
494        await orCallLLM({
495          model: 'openai/gpt-4o-mini',
496          messages: [{ role: 'user', content: 'hello' }],
497        });
498        assert.ok(
499          warnings.some(w => w.includes('completion_tokens=?')),
500          'Should report ? for unknown completion tokens'
501        );
502      } finally {
503        console.warn = origWarn;
504      }
505    });
506  });
507  
508  // NOTE: Anthropic provider path was removed in the source code refactor.
509  // All LLM calls now go through OpenRouter only. The Anthropic-specific tests
510  // (model mapping, system message extraction, image_url conversion, etc.)
511  // have been removed since those code paths no longer exist.