llm-provider.test.js
1 /** 2 * Integration Tests for LLM Provider Module 3 * 4 * Note: These tests verify provider initialization and basic functionality. 5 * Full API call testing requires real API keys and would incur costs. 6 */ 7 8 import { describe, it, test, mock, beforeEach } from 'node:test'; 9 import assert from 'node:assert'; 10 11 // --------------------------------------------------------------------------- 12 // Mocks for callLLM tests — must be set up before module imports 13 // --------------------------------------------------------------------------- 14 15 const mockCreate = mock.fn(); 16 17 class MockAnthropic { 18 constructor() { 19 this.messages = { create: mockCreate }; 20 } 21 } 22 23 mock.module('@anthropic-ai/sdk', { 24 defaultExport: MockAnthropic, 25 }); 26 27 const mockAxiosPost = mock.fn(); 28 mock.module('axios', { 29 defaultExport: { post: mockAxiosPost }, 30 }); 31 32 // Mock llm-usage-tracker to control getDailySpend / getHourlySpend / logLLMUsage 33 const mockGetDailySpend = mock.fn(() => 0); 34 const mockGetHourlySpend = mock.fn(() => 0); 35 const mockLogLLMUsage = mock.fn(); 36 37 mock.module('../../src/utils/llm-usage-tracker.js', { 38 namedExports: { 39 getDailySpend: mockGetDailySpend, 40 getHourlySpend: mockGetHourlySpend, 41 logLLMUsage: mockLogLLMUsage, 42 }, 43 }); 44 45 // --------------------------------------------------------------------------- 46 // Import mocked modules for callLLM tests 47 // --------------------------------------------------------------------------- 48 49 process.env.OPENROUTER_API_KEY = 'test-or-key'; 50 delete process.env.ANTHROPIC_API_KEY; 51 52 const { callLLM: orCallLLM } = await import('../../src/utils/llm-provider.js?mocked=or'); 53 54 describe('LLM Provider Module - OpenRouter Provider', () => { 55 it('should initialize with OpenRouter provider when only OPENROUTER_API_KEY is set', async () => { 56 // Set OpenRouter API key only 57 const originalAnthropicKey = process.env.ANTHROPIC_API_KEY; 58 const originalOpenRouterKey = process.env.OPENROUTER_API_KEY; 59 60 delete process.env.ANTHROPIC_API_KEY; 61 process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock'; 62 63 // Import with cache busting 64 const timestamp = Date.now(); 65 const module = await import(`../../src/utils/llm-provider.js?t=${timestamp}`); 66 67 assert.strictEqual(module.getProvider(), 'openrouter', 'Provider should be openrouter'); 68 assert.strictEqual( 69 module.getProviderDisplayName(), 70 'OpenRouter', 71 'Display name should be OpenRouter' 72 ); 73 74 // Restore original env vars 75 if (originalAnthropicKey) process.env.ANTHROPIC_API_KEY = originalAnthropicKey; 76 if (originalOpenRouterKey) { 77 process.env.OPENROUTER_API_KEY = originalOpenRouterKey; 78 } else { 79 delete process.env.OPENROUTER_API_KEY; 80 } 81 }); 82 }); 83 84 describe('LLM Provider Module - Provider Priority', () => { 85 it('should prefer OpenRouter when both API keys are set (cost efficiency)', async () => { 86 // Set both API keys 87 const originalAnthropicKey = process.env.ANTHROPIC_API_KEY; 88 const originalOpenRouterKey = process.env.OPENROUTER_API_KEY; 89 90 process.env.ANTHROPIC_API_KEY = 'sk-ant-test-key-mock'; 91 process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock'; 92 93 // Import with cache busting 94 const timestamp = Date.now(); 95 const module = await import(`../../src/utils/llm-provider.js?t=${timestamp}`); 96 97 // OpenRouter is preferred over Anthropic for cost efficiency (see llm-provider.js) 98 assert.strictEqual( 99 module.getProvider(), 100 'openrouter', 101 'Should prefer OpenRouter when both keys are set (cost efficiency)' 102 ); 103 104 // Restore original env vars 105 if (originalAnthropicKey) { 106 process.env.ANTHROPIC_API_KEY = originalAnthropicKey; 107 } else { 108 delete process.env.ANTHROPIC_API_KEY; 109 } 110 if (originalOpenRouterKey) { 111 process.env.OPENROUTER_API_KEY = originalOpenRouterKey; 112 } else { 113 delete process.env.OPENROUTER_API_KEY; 114 } 115 }); 116 }); 117 118 describe('LLM Provider Module - API Call Structure', () => { 119 it('should have callLLM function with correct signature', async () => { 120 const origKey = process.env.OPENROUTER_API_KEY; 121 if (!origKey) process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock'; 122 const module = await import(`../../src/utils/llm-provider.js?t=${Date.now()}`); 123 if (!origKey) delete process.env.OPENROUTER_API_KEY; 124 125 assert.strictEqual(typeof module.callLLM, 'function', 'callLLM should be a function'); 126 assert.ok( 127 module.callLLM.constructor.name === 'AsyncFunction' || 128 module.callLLM.constructor.name === 'Function', 129 'callLLM should be a function' 130 ); 131 }); 132 133 it('should export getProvider function', async () => { 134 const origKey = process.env.OPENROUTER_API_KEY; 135 if (!origKey) process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock'; 136 const module = await import(`../../src/utils/llm-provider.js?t=${Date.now()}`); 137 if (!origKey) delete process.env.OPENROUTER_API_KEY; 138 139 assert.strictEqual(typeof module.getProvider, 'function', 'getProvider should be a function'); 140 const provider = module.getProvider(); 141 assert.strictEqual(provider, 'openrouter', 'Provider should be openrouter'); 142 }); 143 144 it('should export getProviderDisplayName function', async () => { 145 const origKey = process.env.OPENROUTER_API_KEY; 146 if (!origKey) process.env.OPENROUTER_API_KEY = 'sk-or-test-key-mock'; 147 const module = await import(`../../src/utils/llm-provider.js?t=${Date.now()}`); 148 if (!origKey) delete process.env.OPENROUTER_API_KEY; 149 150 assert.strictEqual( 151 typeof module.getProviderDisplayName, 152 'function', 153 'getProviderDisplayName should be a function' 154 ); 155 const displayName = module.getProviderDisplayName(); 156 assert.strictEqual(displayName, 'OpenRouter', 'Display name should be OpenRouter'); 157 }); 158 }); 159 160 /* 161 * NOTE: Full API integration tests are not included because they would: 162 * 1. Require real API keys 163 * 2. Make actual API calls that cost money 164 * 3. Depend on external service availability 165 * 4. Be slow (network latency) 166 * 167 * The module is tested in practice through: 168 * - src/score.js (vision scoring) 169 * - src/proposal-generator-v2.js (text generation) 170 * - Integration tests for these modules 171 * 172 * To manually test API calls: 173 * 1. Set ANTHROPIC_API_KEY or OPENROUTER_API_KEY in .env 174 * 2. Run: node -e "import('./src/utils/llm-provider.js').then(m => m.callLLM({model: 'openai/gpt-4o-mini', messages: [{role: 'user', content: 'Hello'}]}).then(console.log))" 175 */ 176 177 // --------------------------------------------------------------------------- 178 // Budget enforcement tests (callLLM — OpenRouter path) 179 // --------------------------------------------------------------------------- 180 181 describe('callLLM - budget enforcement (OpenRouter)', () => { 182 beforeEach(() => { 183 mockGetDailySpend.mock.resetCalls(); 184 mockGetHourlySpend.mock.resetCalls(); 185 mockLogLLMUsage.mock.resetCalls(); 186 mockAxiosPost.mock.resetCalls(); 187 // Default: within budget 188 mockGetDailySpend.mock.mockImplementation(() => 0); 189 mockGetHourlySpend.mock.mockImplementation(() => 0); 190 }); 191 192 test('throws when daily spend meets or exceeds LLM_DAILY_BUDGET', async () => { 193 process.env.LLM_DAILY_BUDGET = '10'; 194 mockGetDailySpend.mock.mockImplementation(() => 10.0); // exactly at limit 195 196 await assert.rejects( 197 () => 198 orCallLLM({ 199 model: 'openai/gpt-4o-mini', 200 messages: [{ role: 'user', content: 'hello' }], 201 }), 202 err => { 203 assert.ok(err.message.includes('daily budget exceeded'), `Got: ${err.message}`); 204 return true; 205 } 206 ); 207 delete process.env.LLM_DAILY_BUDGET; 208 }); 209 210 test('throws when daily spend exceeds LLM_DAILY_BUDGET', async () => { 211 process.env.LLM_DAILY_BUDGET = '5'; 212 mockGetDailySpend.mock.mockImplementation(() => 6.5); 213 214 await assert.rejects( 215 () => 216 orCallLLM({ 217 model: 'openai/gpt-4o-mini', 218 messages: [{ role: 'user', content: 'hello' }], 219 }), 220 err => { 221 assert.ok(err.message.includes('daily budget exceeded')); 222 return true; 223 } 224 ); 225 delete process.env.LLM_DAILY_BUDGET; 226 }); 227 228 test('logs warning when hourly spend meets LLM_HOURLY_ALERT threshold', async () => { 229 process.env.LLM_HOURLY_ALERT = '2'; 230 mockGetHourlySpend.mock.mockImplementation(() => 2.0); 231 mockAxiosPost.mock.mockImplementation(async () => ({ 232 data: { 233 choices: [{ message: { content: 'ok' } }], 234 usage: { prompt_tokens: 5, completion_tokens: 3 }, 235 }, 236 })); 237 238 // Should not throw — just warns 239 const result = await orCallLLM({ 240 model: 'openai/gpt-4o-mini', 241 messages: [{ role: 'user', content: 'hello' }], 242 }); 243 assert.equal(result.content, 'ok'); 244 delete process.env.LLM_HOURLY_ALERT; 245 }); 246 247 test('proceeds normally when within budget', async () => { 248 mockGetDailySpend.mock.mockImplementation(() => 0.5); 249 mockGetHourlySpend.mock.mockImplementation(() => 0.1); 250 mockAxiosPost.mock.mockImplementation(async () => ({ 251 data: { 252 choices: [{ message: { content: 'within budget' } }], 253 usage: { prompt_tokens: 10, completion_tokens: 5 }, 254 }, 255 })); 256 257 const result = await orCallLLM({ 258 model: 'openai/gpt-4o-mini', 259 messages: [{ role: 'user', content: 'hello' }], 260 }); 261 assert.equal(result.content, 'within budget'); 262 }); 263 264 test('continues when budget DB check throws non-budget error', async () => { 265 mockGetDailySpend.mock.mockImplementation(() => { 266 throw new Error('DB connection failed'); 267 }); 268 mockAxiosPost.mock.mockImplementation(async () => ({ 269 data: { 270 choices: [{ message: { content: 'db error handled' } }], 271 usage: { prompt_tokens: 5, completion_tokens: 3 }, 272 }, 273 })); 274 275 // Should swallow non-budget errors and proceed 276 const result = await orCallLLM({ 277 model: 'openai/gpt-4o-mini', 278 messages: [{ role: 'user', content: 'hello' }], 279 }); 280 assert.equal(result.content, 'db error handled'); 281 }); 282 283 test('uses default daily budget of 50 when LLM_DAILY_BUDGET not set', async () => { 284 delete process.env.LLM_DAILY_BUDGET; 285 mockGetDailySpend.mock.mockImplementation(() => 49.99); 286 mockAxiosPost.mock.mockImplementation(async () => ({ 287 data: { 288 choices: [{ message: { content: 'under default budget' } }], 289 usage: { prompt_tokens: 5, completion_tokens: 3 }, 290 }, 291 })); 292 293 const result = await orCallLLM({ 294 model: 'openai/gpt-4o-mini', 295 messages: [{ role: 'user', content: 'hello' }], 296 }); 297 assert.equal(result.content, 'under default budget'); 298 }); 299 }); 300 301 // --------------------------------------------------------------------------- 302 // Usage auto-tracking tests 303 // --------------------------------------------------------------------------- 304 305 describe('callLLM - usage auto-tracking (OpenRouter)', () => { 306 beforeEach(() => { 307 mockGetDailySpend.mock.resetCalls(); 308 mockGetHourlySpend.mock.resetCalls(); 309 mockLogLLMUsage.mock.resetCalls(); 310 mockAxiosPost.mock.resetCalls(); 311 mockGetDailySpend.mock.mockImplementation(() => 0); 312 mockGetHourlySpend.mock.mockImplementation(() => 0); 313 }); 314 315 test('calls logLLMUsage when stage is provided and usage data present', async () => { 316 mockAxiosPost.mock.mockImplementation(async () => ({ 317 data: { 318 choices: [{ message: { content: 'tracked' } }], 319 usage: { prompt_tokens: 100, completion_tokens: 50 }, 320 }, 321 })); 322 323 await orCallLLM({ 324 model: 'openai/gpt-4o-mini', 325 messages: [{ role: 'user', content: 'hello' }], 326 stage: 'scoring', 327 siteId: 42, 328 }); 329 330 assert.equal(mockLogLLMUsage.mock.callCount(), 1); 331 const trackArgs = mockLogLLMUsage.mock.calls[0].arguments[0]; 332 assert.equal(trackArgs.stage, 'scoring'); 333 assert.equal(trackArgs.siteId, 42); 334 assert.equal(trackArgs.promptTokens, 100); 335 assert.equal(trackArgs.completionTokens, 50); 336 }); 337 338 test('does not call logLLMUsage when stage is null', async () => { 339 mockAxiosPost.mock.mockImplementation(async () => ({ 340 data: { 341 choices: [{ message: { content: 'ok' } }], 342 usage: { prompt_tokens: 100, completion_tokens: 50 }, 343 }, 344 })); 345 346 await orCallLLM({ 347 model: 'openai/gpt-4o-mini', 348 messages: [{ role: 'user', content: 'hello' }], 349 // no stage 350 }); 351 352 assert.equal(mockLogLLMUsage.mock.callCount(), 0); 353 }); 354 355 test('does not call logLLMUsage when usage tokens are both zero', async () => { 356 mockAxiosPost.mock.mockImplementation(async () => ({ 357 data: { 358 choices: [{ message: { content: 'ok' } }], 359 usage: { prompt_tokens: 0, completion_tokens: 0 }, 360 }, 361 })); 362 363 await orCallLLM({ 364 model: 'openai/gpt-4o-mini', 365 messages: [{ role: 'user', content: 'hello' }], 366 stage: 'scoring', 367 }); 368 369 assert.equal(mockLogLLMUsage.mock.callCount(), 0); 370 }); 371 372 test('does not throw when logLLMUsage fails (tracking errors are non-blocking)', async () => { 373 mockAxiosPost.mock.mockImplementation(async () => ({ 374 data: { 375 choices: [{ message: { content: 'tracking will fail' } }], 376 usage: { prompt_tokens: 100, completion_tokens: 50 }, 377 }, 378 })); 379 mockLogLLMUsage.mock.mockImplementation(() => { 380 throw new Error('DB write error'); 381 }); 382 383 // Must not throw 384 const result = await orCallLLM({ 385 model: 'openai/gpt-4o-mini', 386 messages: [{ role: 'user', content: 'hello' }], 387 stage: 'scoring', 388 }); 389 assert.equal(result.content, 'tracking will fail'); 390 }); 391 392 test('calls logLLMUsage when only promptTokens > 0', async () => { 393 mockAxiosPost.mock.mockImplementation(async () => ({ 394 data: { 395 choices: [{ message: { content: 'prompt only' } }], 396 usage: { prompt_tokens: 50, completion_tokens: 0 }, 397 }, 398 })); 399 400 await orCallLLM({ 401 model: 'openai/gpt-4o-mini', 402 messages: [{ role: 'user', content: 'hello' }], 403 stage: 'enrichment', 404 }); 405 406 assert.equal(mockLogLLMUsage.mock.callCount(), 1); 407 }); 408 }); 409 410 // --------------------------------------------------------------------------- 411 // callLLM OpenRouter: finish_reason=length truncation warning 412 // --------------------------------------------------------------------------- 413 414 describe('callLLM - OpenRouter truncation warning', () => { 415 beforeEach(() => { 416 mockGetDailySpend.mock.resetCalls(); 417 mockGetHourlySpend.mock.resetCalls(); 418 mockAxiosPost.mock.resetCalls(); 419 mockLogLLMUsage.mock.resetCalls(); 420 mockGetDailySpend.mock.mockImplementation(() => 0); 421 mockGetHourlySpend.mock.mockImplementation(() => 0); 422 }); 423 424 test('logs console.warn when finish_reason is "length"', async () => { 425 mockAxiosPost.mock.mockImplementation(async () => ({ 426 data: { 427 choices: [{ message: { content: 'truncated response...' }, finish_reason: 'length' }], 428 usage: { prompt_tokens: 10, completion_tokens: 2000 }, 429 }, 430 })); 431 432 const warnings = []; 433 const origWarn = console.warn; 434 console.warn = (...args) => warnings.push(args.join(' ')); 435 436 try { 437 const result = await orCallLLM({ 438 model: 'openai/gpt-4o-mini', 439 messages: [{ role: 'user', content: 'hello' }], 440 }); 441 assert.equal(result.content, 'truncated response...'); 442 assert.ok( 443 warnings.some(w => w.includes('finish_reason=length')), 444 'Should warn about truncation' 445 ); 446 assert.ok( 447 warnings.some(w => w.includes('openai/gpt-4o-mini')), 448 'Should include model name' 449 ); 450 } finally { 451 console.warn = origWarn; 452 } 453 }); 454 455 test('does not warn when finish_reason is "stop"', async () => { 456 mockAxiosPost.mock.mockImplementation(async () => ({ 457 data: { 458 choices: [{ message: { content: 'complete' }, finish_reason: 'stop' }], 459 usage: { prompt_tokens: 10, completion_tokens: 50 }, 460 }, 461 })); 462 463 const warnings = []; 464 const origWarn = console.warn; 465 console.warn = (...args) => warnings.push(args.join(' ')); 466 467 try { 468 await orCallLLM({ 469 model: 'openai/gpt-4o-mini', 470 messages: [{ role: 'user', content: 'hello' }], 471 }); 472 assert.ok( 473 !warnings.some(w => w.includes('finish_reason=length')), 474 'Should not warn for stop finish_reason' 475 ); 476 } finally { 477 console.warn = origWarn; 478 } 479 }); 480 481 test('finish_reason=length with unknown completion_tokens reports "?"', async () => { 482 mockAxiosPost.mock.mockImplementation(async () => ({ 483 data: { 484 choices: [{ message: { content: 'truncated' }, finish_reason: 'length' }], 485 // no usage field 486 }, 487 })); 488 489 const warnings = []; 490 const origWarn = console.warn; 491 console.warn = (...args) => warnings.push(args.join(' ')); 492 493 try { 494 await orCallLLM({ 495 model: 'openai/gpt-4o-mini', 496 messages: [{ role: 'user', content: 'hello' }], 497 }); 498 assert.ok( 499 warnings.some(w => w.includes('completion_tokens=?')), 500 'Should report ? for unknown completion tokens' 501 ); 502 } finally { 503 console.warn = origWarn; 504 } 505 }); 506 }); 507 508 // NOTE: Anthropic provider path was removed in the source code refactor. 509 // All LLM calls now go through OpenRouter only. The Anthropic-specific tests 510 // (model mapping, system message extraction, image_url conversion, etc.) 511 // have been removed since those code paths no longer exist.