agent-claude-api.test.js
1 /** 2 * Tests for Agent-Claude API Module 3 * 4 * Uses mock.module() to mock llm-provider and better-sqlite3 so no real API 5 * calls or database files are needed. Covers: 6 * - selectModel: all task-type/complexity/env-var branches 7 * - calculateCost / budget limits (daily + emergency) 8 * - validateApiKey (no keys configured) 9 * - getTodaySpending / getHourlySpending / getUsageStats 10 * - logUsage: tracked vs untracked agents 11 * - callClaude: success, API error, explicit model, taskType/complexity routing 12 * - analyzeCode, generateCode, generateTests, reviewArchitecture, 13 * suggestRefactoring, classifyIssue, simpleLLMCall, 14 * analyzeCodeSecurity, generateSecureFix, performThreatModeling 15 * - JSON parse error paths in classifyIssue, analyzeCodeSecurity, 16 * generateSecureFix, performThreatModeling 17 * - resetDb 18 */ 19 20 import { test, describe, mock, before, beforeEach, afterEach } from 'node:test'; 21 import assert from 'node:assert/strict'; 22 23 // --- mock.module() must be called before the module under test is imported --- 24 25 // Shared callLLM mock state — tests replace this function per test. 26 let _callLLMMock = async () => ({ 27 content: 'default mock response', 28 usage: { promptTokens: 10, completionTokens: 5 }, 29 }); 30 31 // Shared DB mock state 32 let _dbRows = []; 33 const _dbRunResult = { changes: 1 }; 34 let _preparedSql = ''; 35 36 const mockStmt = { 37 get: () => ({ total: 0 }), 38 all: (..._params) => _dbRows, 39 run: (..._args) => _dbRunResult, 40 }; 41 42 const mockDb = { 43 pragma: () => {}, 44 prepare: sql => { 45 _preparedSql = sql; 46 return mockStmt; 47 }, 48 close: () => {}, 49 }; 50 51 const _dbFactory = () => mockDb; 52 53 mock.module('better-sqlite3', { 54 defaultExport: function Database(_path) { 55 return _dbFactory(); 56 }, 57 }); 58 59 mock.module('../../../src/utils/llm-provider.js', { 60 namedExports: { 61 callLLM: async (...args) => _callLLMMock(...args), 62 }, 63 }); 64 65 // Now import the module under test (after mocks are registered) 66 const { 67 resetDb, 68 selectModel, 69 getTodaySpending, 70 getHourlySpending, 71 getUsageStats, 72 analyzeCode, 73 generateCode, 74 generateTests, 75 reviewArchitecture, 76 suggestRefactoring, 77 classifyIssue, 78 simpleLLMCall, 79 analyzeCodeSecurity, 80 generateSecureFix, 81 performThreatModeling, 82 } = await import('../../../src/agents/utils/agent-claude-api.js'); 83 84 // ────────────────────────────────────────────────────────────────────────────── 85 // Helpers 86 // ────────────────────────────────────────────────────────────────────────────── 87 88 function setApiKey(type = 'openrouter') { 89 delete process.env.ANTHROPIC_API_KEY; 90 delete process.env.OPENROUTER_API_KEY; 91 if (type === 'openrouter') process.env.OPENROUTER_API_KEY = 'test-or-key'; 92 else if (type === 'anthropic') process.env.ANTHROPIC_API_KEY = 'test-ant-key'; 93 } 94 95 function clearApiKeys() { 96 delete process.env.ANTHROPIC_API_KEY; 97 delete process.env.OPENROUTER_API_KEY; 98 } 99 100 function setLLMMock(text, usage = { promptTokens: 100, completionTokens: 50 }) { 101 _callLLMMock = async () => ({ content: text, usage }); 102 } 103 104 function setDbSpending(todayTotal, hourlyTotal = 0) { 105 let callCount = 0; 106 mockStmt.get = () => { 107 callCount++; 108 // First call = daily spending, second = hourly 109 if (callCount % 2 === 1) return { total: todayTotal }; 110 return { total: hourlyTotal }; 111 }; 112 } 113 114 // ────────────────────────────────────────────────────────────────────────────── 115 116 describe('selectModel', () => { 117 beforeEach(() => { 118 delete process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS; 119 }); 120 121 test('returns haiku for simple task type (classify_issue)', () => { 122 assert.ok(selectModel('classify_issue').includes('haiku')); 123 }); 124 125 test('returns haiku for scan_logs', () => { 126 assert.ok(selectModel('scan_logs').includes('haiku')); 127 }); 128 129 test('returns haiku for detect_anomaly', () => { 130 assert.ok(selectModel('detect_anomaly').includes('haiku')); 131 }); 132 133 test('returns haiku for scan_secrets', () => { 134 assert.ok(selectModel('scan_secrets').includes('haiku')); 135 }); 136 137 test('returns haiku for find_test_files', () => { 138 assert.ok(selectModel('find_test_files').includes('haiku')); 139 }); 140 141 test('returns haiku for scan_dependencies', () => { 142 assert.ok(selectModel('scan_dependencies').includes('haiku')); 143 }); 144 145 test('returns sonnet for complex task type (analyzeCode)', () => { 146 assert.ok(selectModel('analyzeCode').includes('sonnet')); 147 }); 148 149 test('returns sonnet for generateCode', () => { 150 assert.ok(selectModel('generateCode').includes('sonnet')); 151 }); 152 153 test('returns sonnet for reviewArchitecture', () => { 154 assert.ok(selectModel('reviewArchitecture').includes('sonnet')); 155 }); 156 157 test('returns sonnet for suggestRefactoring', () => { 158 assert.ok(selectModel('suggestRefactoring').includes('sonnet')); 159 }); 160 161 test('returns sonnet for threat_model', () => { 162 assert.ok(selectModel('threat_model').includes('sonnet')); 163 }); 164 165 test('returns sonnet for generateTests', () => { 166 assert.ok(selectModel('generateTests').includes('sonnet')); 167 }); 168 169 test('returns sonnet for unknown task type', () => { 170 assert.ok(selectModel('completely_unknown').includes('sonnet')); 171 }); 172 173 test('explicit complexity=simple forces haiku', () => { 174 assert.ok(selectModel('analyzeCode', 'simple').includes('haiku')); 175 }); 176 177 test('explicit complexity=complex forces sonnet', () => { 178 assert.ok(selectModel('classify_issue', 'complex').includes('sonnet')); 179 }); 180 181 test('AGENT_USE_HAIKU_FOR_SIMPLE_TASKS=false always returns sonnet', () => { 182 process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS = 'false'; 183 assert.ok(selectModel('classify_issue').includes('sonnet')); 184 assert.ok(selectModel('classify_issue', 'simple').includes('sonnet')); 185 delete process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS; 186 }); 187 }); 188 189 // ────────────────────────────────────────────────────────────────────────────── 190 191 describe('getTodaySpending', () => { 192 beforeEach(() => { 193 resetDb(); 194 setApiKey(); 195 mockStmt.get = () => ({ total: 3.14 }); 196 }); 197 198 afterEach(() => { 199 resetDb(); 200 clearApiKeys(); 201 }); 202 203 test('returns numeric total from db', () => { 204 const result = getTodaySpending(); 205 assert.strictEqual(result, 3.14); 206 }); 207 }); 208 209 describe('getHourlySpending', () => { 210 beforeEach(() => { 211 resetDb(); 212 setApiKey(); 213 mockStmt.get = () => ({ total: 0.5 }); 214 }); 215 216 afterEach(() => { 217 resetDb(); 218 clearApiKeys(); 219 }); 220 221 test('returns numeric total from db', () => { 222 const result = getHourlySpending(); 223 assert.strictEqual(result, 0.5); 224 }); 225 }); 226 227 // ────────────────────────────────────────────────────────────────────────────── 228 229 describe('getUsageStats', () => { 230 beforeEach(() => { 231 resetDb(); 232 setApiKey(); 233 }); 234 235 afterEach(() => { 236 resetDb(); 237 clearApiKeys(); 238 }); 239 240 test('returns aggregate stats with empty results', () => { 241 _dbRows = []; 242 mockStmt.all = () => []; 243 const stats = getUsageStats(); 244 assert.strictEqual(stats.totalCost, 0); 245 assert.strictEqual(stats.agents.length, 0); 246 assert.strictEqual(stats.modelBreakdown.haiku.calls, 0); 247 assert.strictEqual(stats.modelBreakdown.sonnet.calls, 0); 248 assert.strictEqual(stats.days, 7); 249 }); 250 251 test('returns stats with haiku and sonnet rows', () => { 252 mockStmt.all = () => [ 253 { 254 agent_name: 'developer', 255 model: 'anthropic/claude-3.5-haiku', 256 call_count: 10, 257 total_prompt_tokens: 1000, 258 total_completion_tokens: 500, 259 total_cost_usd: 0.01, 260 avg_cost_per_call: 0.001, 261 }, 262 { 263 agent_name: 'qa', 264 model: 'anthropic/claude-3.5-sonnet', 265 call_count: 5, 266 total_prompt_tokens: 2000, 267 total_completion_tokens: 1000, 268 total_cost_usd: 0.1, 269 avg_cost_per_call: 0.02, 270 }, 271 ]; 272 273 const stats = getUsageStats(); 274 assert.strictEqual(stats.totalCost, 0.11); 275 assert.strictEqual(stats.modelBreakdown.haiku.calls, 10); 276 assert.strictEqual(stats.modelBreakdown.sonnet.calls, 5); 277 assert.ok(parseFloat(stats.modelBreakdown.savings) > 0); 278 }); 279 280 test('filters by agentName when provided', () => { 281 let capturedParams; 282 mockStmt.all = (...params) => { 283 capturedParams = params; 284 return []; 285 }; 286 getUsageStats('developer', 14); 287 // Should have passed the agent name as a param 288 assert.ok(capturedParams.includes('developer')); 289 }); 290 291 test('avgCost is 0 when haiku calls is 0', () => { 292 mockStmt.all = () => [ 293 { 294 agent_name: 'qa', 295 model: 'anthropic/claude-3.5-sonnet', 296 call_count: 2, 297 total_prompt_tokens: 100, 298 total_completion_tokens: 50, 299 total_cost_usd: 0.05, 300 avg_cost_per_call: 0.025, 301 }, 302 ]; 303 const stats = getUsageStats(); 304 assert.strictEqual(stats.modelBreakdown.haiku.avgCost, 0); 305 // When sonnetCost > 0 and haikuCost = 0, savings = '0.0' (toFixed returns string) 306 assert.strictEqual(stats.modelBreakdown.savings, '0.0'); 307 }); 308 }); 309 310 // ────────────────────────────────────────────────────────────────────────────── 311 312 describe('budget enforcement', () => { 313 beforeEach(() => { 314 resetDb(); 315 setApiKey(); 316 }); 317 318 afterEach(() => { 319 resetDb(); 320 clearApiKeys(); 321 delete process.env.AGENT_DAILY_BUDGET; 322 mockStmt.get = () => ({ total: 0 }); 323 }); 324 325 test('throws when hourly spending exceeds $5 emergency threshold', async () => { 326 let callCount = 0; 327 mockStmt.get = () => { 328 callCount++; 329 // hourly check comes second in checkBudgetLimits 330 if (callCount % 2 === 1) return { total: 0 }; 331 return { total: 5.5 }; 332 }; 333 setLLMMock('some response'); 334 335 await assert.rejects(() => analyzeCode('developer', 1, 'test.js', 'analyze this'), /EMERGENCY/); 336 }); 337 338 test('throws when daily spending meets budget limit', async () => { 339 process.env.AGENT_DAILY_BUDGET = '5'; 340 let callCount = 0; 341 mockStmt.get = () => { 342 callCount++; 343 if (callCount % 2 === 1) return { total: 5.0 }; 344 return { total: 0 }; 345 }; 346 setLLMMock('some response'); 347 348 await assert.rejects( 349 () => analyzeCode('developer', 1, 'test.js', 'analyze this'), 350 /Daily budget exceeded/ 351 ); 352 }); 353 354 test('throws when no API key is configured', async () => { 355 clearApiKeys(); 356 mockStmt.get = () => ({ total: 0 }); 357 setLLMMock('irrelevant'); 358 359 await assert.rejects( 360 () => analyzeCode('developer', 1, 'test.js', 'analyze this'), 361 /No LLM API key/ 362 ); 363 }); 364 }); 365 366 // ────────────────────────────────────────────────────────────────────────────── 367 368 describe('analyzeCode', () => { 369 beforeEach(() => { 370 resetDb(); 371 setApiKey(); 372 mockStmt.get = () => ({ total: 0 }); 373 mockStmt.run = () => ({ changes: 1 }); 374 }); 375 376 afterEach(() => { 377 resetDb(); 378 clearApiKeys(); 379 }); 380 381 test('returns text response for basic analysis', async () => { 382 setLLMMock('Found 2 issues: missing error handling on line 10'); 383 const result = await analyzeCode('developer', 1, 'src/foo.js', 'check for bugs'); 384 assert.strictEqual(result, 'Found 2 issues: missing error handling on line 10'); 385 }); 386 387 test('includes fileContent in prompt when provided', async () => { 388 let capturedArgs; 389 _callLLMMock = async args => { 390 capturedArgs = args; 391 return { content: 'ok', usage: { promptTokens: 10, completionTokens: 5 } }; 392 }; 393 394 await analyzeCode('developer', 1, 'src/foo.js', 'check bugs', { 395 fileContent: 'const x = 1;', 396 }); 397 398 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 399 assert.ok(userMsg.includes('const x = 1;')); 400 }); 401 402 test('includes relatedFiles when provided', async () => { 403 let capturedArgs; 404 _callLLMMock = async args => { 405 capturedArgs = args; 406 return { content: 'ok', usage: { promptTokens: 10, completionTokens: 5 } }; 407 }; 408 409 await analyzeCode('developer', 1, 'src/foo.js', 'check bugs', { 410 relatedFiles: 'utils.js: export function helper() {}', 411 }); 412 413 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 414 assert.ok(userMsg.includes('Related files')); 415 }); 416 417 test('includes errorMessage when provided', async () => { 418 let capturedArgs; 419 _callLLMMock = async args => { 420 capturedArgs = args; 421 return { content: 'ok', usage: { promptTokens: 10, completionTokens: 5 } }; 422 }; 423 424 await analyzeCode('developer', 1, 'src/foo.js', 'debug this', { 425 errorMessage: 'TypeError: Cannot read property', 426 }); 427 428 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 429 assert.ok(userMsg.includes('TypeError')); 430 }); 431 432 test('uses explicit model override when provided', async () => { 433 let capturedArgs; 434 _callLLMMock = async args => { 435 capturedArgs = args; 436 return { content: 'ok', usage: { promptTokens: 10, completionTokens: 5 } }; 437 }; 438 439 await analyzeCode('developer', 1, 'src/foo.js', 'analyze', { 440 model: 'anthropic/claude-3.5-haiku', 441 }); 442 443 assert.strictEqual(capturedArgs.model, 'anthropic/claude-3.5-haiku'); 444 }); 445 446 test('propagates API errors', async () => { 447 _callLLMMock = async () => { 448 throw new Error('API rate limit exceeded'); 449 }; 450 451 await assert.rejects( 452 () => analyzeCode('developer', 1, 'src/foo.js', 'analyze'), 453 /API rate limit exceeded/ 454 ); 455 }); 456 457 test('logs usage for tracked agent (developer)', async () => { 458 let insertCalled = false; 459 mockStmt.run = () => { 460 insertCalled = true; 461 return { changes: 1 }; 462 }; 463 setLLMMock('analysis result'); 464 465 await analyzeCode('developer', 1, 'src/foo.js', 'analyze'); 466 assert.ok(insertCalled, 'should have logged usage for tracked agent'); 467 }); 468 469 test('does NOT log usage for untracked agent', async () => { 470 let insertCalled = false; 471 mockStmt.run = () => { 472 insertCalled = true; 473 return { changes: 1 }; 474 }; 475 setLLMMock('analysis result'); 476 477 await analyzeCode('task-manager', null, 'src/foo.js', 'analyze'); 478 assert.ok(!insertCalled, 'should not log usage for untracked agent'); 479 }); 480 }); 481 482 // ────────────────────────────────────────────────────────────────────────────── 483 484 describe('generateCode', () => { 485 beforeEach(() => { 486 resetDb(); 487 setApiKey(); 488 mockStmt.get = () => ({ total: 0 }); 489 mockStmt.run = () => ({ changes: 1 }); 490 }); 491 492 afterEach(() => { 493 resetDb(); 494 clearApiKeys(); 495 }); 496 497 test('returns generated code', async () => { 498 setLLMMock("import { foo } from './bar.js';\nexport function myFn() {}"); 499 const result = await generateCode('developer', 1, 'src/new.js', 'create a helper function'); 500 assert.ok(result.includes('myFn')); 501 }); 502 503 test('includes existingCode in prompt when provided', async () => { 504 let capturedArgs; 505 _callLLMMock = async args => { 506 capturedArgs = args; 507 return { content: 'new code', usage: { promptTokens: 10, completionTokens: 5 } }; 508 }; 509 510 await generateCode('developer', 1, 'src/foo.js', 'add a method', 'class Foo {}'); 511 512 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 513 assert.ok(userMsg.includes('class Foo {}')); 514 }); 515 516 test('uses explicit model override from options', async () => { 517 let capturedArgs; 518 _callLLMMock = async args => { 519 capturedArgs = args; 520 return { content: 'code', usage: { promptTokens: 10, completionTokens: 5 } }; 521 }; 522 523 await generateCode('developer', 1, 'src/foo.js', 'requirements', null, { 524 model: 'anthropic/claude-3.5-haiku', 525 }); 526 527 assert.strictEqual(capturedArgs.model, 'anthropic/claude-3.5-haiku'); 528 }); 529 530 test('requests 8192 max tokens', async () => { 531 let capturedArgs; 532 _callLLMMock = async args => { 533 capturedArgs = args; 534 return { content: 'code', usage: { promptTokens: 10, completionTokens: 5 } }; 535 }; 536 537 await generateCode('developer', 1, 'src/foo.js', 'requirements'); 538 assert.strictEqual(capturedArgs.max_tokens, 8192); 539 }); 540 }); 541 542 // ────────────────────────────────────────────────────────────────────────────── 543 544 describe('generateTests', () => { 545 beforeEach(() => { 546 resetDb(); 547 setApiKey(); 548 mockStmt.get = () => ({ total: 0 }); 549 mockStmt.run = () => ({ changes: 1 }); 550 }); 551 552 afterEach(() => { 553 resetDb(); 554 clearApiKeys(); 555 }); 556 557 test('returns test code', async () => { 558 setLLMMock("import { test } from 'node:test';\ntest('works', () => {});"); 559 const result = await generateTests('developer', 1, 'src/foo.js', 'export function add(a,b) {}'); 560 assert.ok(result.includes('import { test }')); 561 }); 562 563 test('includes coverageGaps in prompt when provided', async () => { 564 let capturedArgs; 565 _callLLMMock = async args => { 566 capturedArgs = args; 567 return { content: 'tests', usage: { promptTokens: 10, completionTokens: 5 } }; 568 }; 569 570 const gaps = { uncoveredLines: [10, 15, 20] }; 571 await generateTests('qa', 1, 'src/foo.js', 'const x = 1', gaps); 572 573 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 574 assert.ok(userMsg.includes('uncoveredLines')); 575 }); 576 577 test('uses explicit model from options', async () => { 578 let capturedArgs; 579 _callLLMMock = async args => { 580 capturedArgs = args; 581 return { content: 'tests', usage: { promptTokens: 10, completionTokens: 5 } }; 582 }; 583 584 await generateTests('qa', 1, 'src/foo.js', 'code', null, { 585 model: 'anthropic/claude-3.5-haiku', 586 }); 587 588 assert.strictEqual(capturedArgs.model, 'anthropic/claude-3.5-haiku'); 589 }); 590 }); 591 592 // ────────────────────────────────────────────────────────────────────────────── 593 594 describe('reviewArchitecture', () => { 595 beforeEach(() => { 596 resetDb(); 597 setApiKey(); 598 mockStmt.get = () => ({ total: 0 }); 599 mockStmt.run = () => ({ changes: 1 }); 600 }); 601 602 afterEach(() => { 603 resetDb(); 604 clearApiKeys(); 605 }); 606 607 test('returns review feedback', async () => { 608 setLLMMock('Architecture looks good but consider adding indexes to the messages table.'); 609 const result = await reviewArchitecture('architect', 1, 'proposal: use SQLite with WAL mode'); 610 assert.ok(result.includes('Architecture')); 611 }); 612 613 test('includes relatedFiles in prompt when provided', async () => { 614 let capturedArgs; 615 _callLLMMock = async args => { 616 capturedArgs = args; 617 return { content: 'feedback', usage: { promptTokens: 10, completionTokens: 5 } }; 618 }; 619 620 await reviewArchitecture('architect', 1, 'proposal text', { 621 relatedFiles: 'schema.sql: CREATE TABLE ...', 622 }); 623 624 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 625 assert.ok(userMsg.includes('Related files')); 626 }); 627 628 test('includes requirements in prompt when provided', async () => { 629 let capturedArgs; 630 _callLLMMock = async args => { 631 capturedArgs = args; 632 return { content: 'feedback', usage: { promptTokens: 10, completionTokens: 5 } }; 633 }; 634 635 await reviewArchitecture('architect', 1, 'proposal text', { 636 requirements: 'must support 10k rps', 637 }); 638 639 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 640 assert.ok(userMsg.includes('must support 10k rps')); 641 }); 642 643 test('uses model from context when provided', async () => { 644 let capturedArgs; 645 _callLLMMock = async args => { 646 capturedArgs = args; 647 return { content: 'feedback', usage: { promptTokens: 10, completionTokens: 5 } }; 648 }; 649 650 await reviewArchitecture('architect', 1, 'proposal text', { 651 model: 'anthropic/claude-3.5-haiku', 652 }); 653 654 assert.strictEqual(capturedArgs.model, 'anthropic/claude-3.5-haiku'); 655 }); 656 }); 657 658 // ────────────────────────────────────────────────────────────────────────────── 659 660 describe('suggestRefactoring', () => { 661 beforeEach(() => { 662 resetDb(); 663 setApiKey(); 664 mockStmt.get = () => ({ total: 0 }); 665 mockStmt.run = () => ({ changes: 1 }); 666 }); 667 668 afterEach(() => { 669 resetDb(); 670 clearApiKeys(); 671 }); 672 673 test('returns refactoring suggestions', async () => { 674 setLLMMock('Extract the validation logic into a separate function.'); 675 const result = await suggestRefactoring('developer', 1, 'src/foo.js', 'function bigFn() {}'); 676 assert.ok(result.includes('Extract')); 677 }); 678 679 test('includes reason in prompt when provided', async () => { 680 let capturedArgs; 681 _callLLMMock = async args => { 682 capturedArgs = args; 683 return { content: 'suggestions', usage: { promptTokens: 10, completionTokens: 5 } }; 684 }; 685 686 await suggestRefactoring('developer', 1, 'src/foo.js', 'code', 'too complex'); 687 688 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 689 assert.ok(userMsg.includes('too complex')); 690 }); 691 692 test('uses model from options when provided', async () => { 693 let capturedArgs; 694 _callLLMMock = async args => { 695 capturedArgs = args; 696 return { content: 'suggestions', usage: { promptTokens: 10, completionTokens: 5 } }; 697 }; 698 699 await suggestRefactoring('developer', 1, 'src/foo.js', 'code', null, { 700 model: 'anthropic/claude-3.5-haiku', 701 }); 702 703 assert.strictEqual(capturedArgs.model, 'anthropic/claude-3.5-haiku'); 704 }); 705 }); 706 707 // ────────────────────────────────────────────────────────────────────────────── 708 709 describe('classifyIssue', () => { 710 beforeEach(() => { 711 resetDb(); 712 setApiKey(); 713 mockStmt.get = () => ({ total: 0 }); 714 mockStmt.run = () => ({ changes: 1 }); 715 }); 716 717 afterEach(() => { 718 resetDb(); 719 clearApiKeys(); 720 }); 721 722 test('returns parsed JSON classification', async () => { 723 const classification = { 724 category: 'DATABASE', 725 severity: 'high', 726 suggestedAction: 'add retry logic', 727 assignTo: 'developer', 728 }; 729 setLLMMock(JSON.stringify(classification)); 730 731 const result = await classifyIssue('triage', 1, 'SQLITE_BUSY: database is locked'); 732 assert.strictEqual(result.category, 'DATABASE'); 733 assert.strictEqual(result.severity, 'high'); 734 assert.strictEqual(result.assignTo, 'developer'); 735 }); 736 737 test('includes stackTrace in prompt when provided', async () => { 738 let capturedArgs; 739 _callLLMMock = async args => { 740 capturedArgs = args; 741 return { 742 content: JSON.stringify({ 743 category: 'CODE_BUG', 744 severity: 'low', 745 suggestedAction: 'fix', 746 assignTo: 'developer', 747 }), 748 usage: { promptTokens: 10, completionTokens: 5 }, 749 }; 750 }; 751 752 await classifyIssue('triage', 1, 'TypeError', { 753 stackTrace: ' at foo.js:10:5', 754 }); 755 756 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 757 assert.ok(userMsg.includes('at foo.js:10:5')); 758 }); 759 760 test('includes logContext in prompt when provided', async () => { 761 let capturedArgs; 762 _callLLMMock = async args => { 763 capturedArgs = args; 764 return { 765 content: JSON.stringify({ 766 category: 'NETWORK', 767 severity: 'medium', 768 suggestedAction: 'retry', 769 assignTo: 'developer', 770 }), 771 usage: { promptTokens: 10, completionTokens: 5 }, 772 }; 773 }; 774 775 await classifyIssue('triage', 1, 'ECONNREFUSED', { 776 logContext: 'last 5 lines of log', 777 }); 778 779 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 780 assert.ok(userMsg.includes('last 5 lines of log')); 781 }); 782 783 test('throws on invalid JSON response', async () => { 784 setLLMMock('not valid json at all'); 785 786 await assert.rejects( 787 () => classifyIssue('triage', 1, 'some error'), 788 /Invalid classification response/ 789 ); 790 }); 791 792 test('uses haiku model (simple task)', async () => { 793 let capturedArgs; 794 _callLLMMock = async args => { 795 capturedArgs = args; 796 return { 797 content: JSON.stringify({ 798 category: 'API', 799 severity: 'low', 800 suggestedAction: 'retry', 801 assignTo: 'triage', 802 }), 803 usage: { promptTokens: 10, completionTokens: 5 }, 804 }; 805 }; 806 807 await classifyIssue('triage', 1, 'API error'); 808 assert.ok(capturedArgs.model.includes('haiku'), 'classifyIssue should use haiku'); 809 }); 810 811 test('uses model from context when provided', async () => { 812 let capturedArgs; 813 _callLLMMock = async args => { 814 capturedArgs = args; 815 return { 816 content: JSON.stringify({ 817 category: 'API', 818 severity: 'low', 819 suggestedAction: 'retry', 820 assignTo: 'triage', 821 }), 822 usage: { promptTokens: 10, completionTokens: 5 }, 823 }; 824 }; 825 826 await classifyIssue('triage', 1, 'API error', { model: 'anthropic/claude-3.5-sonnet' }); 827 assert.strictEqual(capturedArgs.model, 'anthropic/claude-3.5-sonnet'); 828 }); 829 }); 830 831 // ────────────────────────────────────────────────────────────────────────────── 832 833 describe('simpleLLMCall', () => { 834 beforeEach(() => { 835 resetDb(); 836 setApiKey(); 837 mockStmt.get = () => ({ total: 0 }); 838 mockStmt.run = () => ({ changes: 1 }); 839 }); 840 841 afterEach(() => { 842 resetDb(); 843 clearApiKeys(); 844 }); 845 846 test('returns text response', async () => { 847 setLLMMock('Here is your answer.'); 848 const result = await simpleLLMCall('developer', 1, { prompt: 'What is 2+2?' }); 849 assert.strictEqual(result, 'Here is your answer.'); 850 }); 851 852 test('uses default system prompt when none provided', async () => { 853 let capturedArgs; 854 _callLLMMock = async args => { 855 capturedArgs = args; 856 return { content: 'response', usage: { promptTokens: 10, completionTokens: 5 } }; 857 }; 858 859 await simpleLLMCall('developer', 1, { prompt: 'hello' }); 860 const sysMsg = capturedArgs.messages.find(m => m.role === 'system').content; 861 assert.ok(sysMsg.includes('helpful AI assistant')); 862 }); 863 864 test('injects taskHistory into system prompt when provided', async () => { 865 let capturedArgs; 866 _callLLMMock = async args => { 867 capturedArgs = args; 868 return { content: 'response', usage: { promptTokens: 10, completionTokens: 5 } }; 869 }; 870 871 await simpleLLMCall('developer', 1, { 872 prompt: 'continue', 873 systemPrompt: 'You are a developer.', 874 taskHistory: 'Previous task: fixed bug #42', 875 }); 876 877 const sysMsg = capturedArgs.messages.find(m => m.role === 'system').content; 878 assert.ok(sysMsg.includes('Previous task: fixed bug #42')); 879 }); 880 881 test('respects maxTokens option', async () => { 882 let capturedArgs; 883 _callLLMMock = async args => { 884 capturedArgs = args; 885 return { content: 'response', usage: { promptTokens: 10, completionTokens: 5 } }; 886 }; 887 888 await simpleLLMCall('developer', 1, { prompt: 'hello', maxTokens: 1024 }); 889 assert.strictEqual(capturedArgs.max_tokens, 1024); 890 }); 891 }); 892 893 // ────────────────────────────────────────────────────────────────────────────── 894 895 describe('analyzeCodeSecurity', () => { 896 beforeEach(() => { 897 resetDb(); 898 setApiKey(); 899 mockStmt.get = () => ({ total: 0 }); 900 mockStmt.run = () => ({ changes: 1 }); 901 }); 902 903 afterEach(() => { 904 resetDb(); 905 clearApiKeys(); 906 }); 907 908 test('returns parsed security findings', async () => { 909 const findings = { 910 findings: [ 911 { 912 type: 'sql_injection', 913 severity: 'critical', 914 line: 42, 915 description: 'Unparameterized query', 916 recommendation: 'Use prepared statements', 917 cwe_id: 'CWE-89', 918 }, 919 ], 920 summary: 'One critical SQL injection found', 921 }; 922 setLLMMock(JSON.stringify(findings)); 923 924 const result = await analyzeCodeSecurity( 925 'security', 926 1, 927 'const q = `SELECT * FROM users WHERE id = ${id}`' 928 ); 929 assert.strictEqual(result.findings.length, 1); 930 assert.strictEqual(result.findings[0].type, 'sql_injection'); 931 assert.strictEqual(result.summary, 'One critical SQL injection found'); 932 }); 933 934 test('handles markdown code block wrapping in response', async () => { 935 const findings = { findings: [], summary: 'No issues' }; 936 setLLMMock(`\`\`\`json\n${JSON.stringify(findings)}\n\`\`\``); 937 938 const result = await analyzeCodeSecurity('security', 1, 'safe code'); 939 assert.strictEqual(result.summary, 'No issues'); 940 }); 941 942 test('handles generic code block wrapping', async () => { 943 const findings = { findings: [], summary: 'Clean' }; 944 setLLMMock(`\`\`\`\n${JSON.stringify(findings)}\n\`\`\``); 945 946 const result = await analyzeCodeSecurity('security', 1, 'safe code'); 947 assert.strictEqual(result.summary, 'Clean'); 948 }); 949 950 test('includes focusArea in prompt when provided', async () => { 951 let capturedArgs; 952 _callLLMMock = async args => { 953 capturedArgs = args; 954 return { 955 content: JSON.stringify({ findings: [], summary: 'ok' }), 956 usage: { promptTokens: 10, completionTokens: 5 }, 957 }; 958 }; 959 960 await analyzeCodeSecurity('security', 1, 'code', 'sql_injection'); 961 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 962 assert.ok(userMsg.includes('sql injection')); 963 }); 964 965 test('includes fileName in prompt when provided', async () => { 966 let capturedArgs; 967 _callLLMMock = async args => { 968 capturedArgs = args; 969 return { 970 content: JSON.stringify({ findings: [], summary: 'ok' }), 971 usage: { promptTokens: 10, completionTokens: 5 }, 972 }; 973 }; 974 975 await analyzeCodeSecurity('security', 1, 'code', null, 'src/auth.js'); 976 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 977 assert.ok(userMsg.includes('src/auth.js')); 978 }); 979 980 test('throws on invalid JSON response', async () => { 981 setLLMMock('not valid json'); 982 983 await assert.rejects( 984 () => analyzeCodeSecurity('security', 1, 'code'), 985 /Failed to parse security analysis/ 986 ); 987 }); 988 }); 989 990 // ────────────────────────────────────────────────────────────────────────────── 991 992 describe('generateSecureFix', () => { 993 beforeEach(() => { 994 resetDb(); 995 setApiKey(); 996 mockStmt.get = () => ({ total: 0 }); 997 mockStmt.run = () => ({ changes: 1 }); 998 }); 999 1000 afterEach(() => { 1001 resetDb(); 1002 clearApiKeys(); 1003 }); 1004 1005 const sampleFinding = { 1006 type: 'sql_injection', 1007 severity: 'critical', 1008 line: 10, 1009 description: 'Unparameterized query', 1010 recommendation: 'Use prepared statements', 1011 }; 1012 1013 test('returns fix with old_string and new_string', async () => { 1014 const fix = { 1015 old_string: 'db.query(`SELECT * FROM users WHERE id = ${id}`)', 1016 new_string: 'db.prepare("SELECT * FROM users WHERE id = ?").get(id)', 1017 explanation: 'Parameterized query prevents SQL injection', 1018 testing_notes: 'Verify with malicious input', 1019 }; 1020 setLLMMock(JSON.stringify(fix)); 1021 1022 const result = await generateSecureFix('security', 1, { 1023 code: 'db.query(`SELECT * FROM users WHERE id = ${id}`)', 1024 finding: sampleFinding, 1025 }); 1026 1027 assert.ok(result.old_string.includes('SELECT')); 1028 assert.ok(result.new_string.includes('prepare')); 1029 }); 1030 1031 test('handles markdown code block in response', async () => { 1032 const fix = { 1033 old_string: 'bad code', 1034 new_string: 'good code', 1035 explanation: 'fixed', 1036 testing_notes: 'test it', 1037 }; 1038 setLLMMock(`\`\`\`json\n${JSON.stringify(fix)}\n\`\`\``); 1039 1040 const result = await generateSecureFix('security', 1, { 1041 code: 'bad code', 1042 finding: sampleFinding, 1043 }); 1044 assert.strictEqual(result.old_string, 'bad code'); 1045 }); 1046 1047 test('includes fileName in prompt when provided', async () => { 1048 let capturedArgs; 1049 _callLLMMock = async args => { 1050 capturedArgs = args; 1051 return { 1052 content: JSON.stringify({ 1053 old_string: 'a', 1054 new_string: 'b', 1055 explanation: 'e', 1056 testing_notes: 't', 1057 }), 1058 usage: { promptTokens: 10, completionTokens: 5 }, 1059 }; 1060 }; 1061 1062 await generateSecureFix('security', 1, { 1063 code: 'some code', 1064 finding: sampleFinding, 1065 fileName: 'src/auth.js', 1066 }); 1067 1068 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 1069 assert.ok(userMsg.includes('src/auth.js')); 1070 }); 1071 1072 test('throws when response is invalid JSON', async () => { 1073 setLLMMock('not valid json'); 1074 1075 await assert.rejects( 1076 () => generateSecureFix('security', 1, { code: 'code', finding: sampleFinding }), 1077 /Failed to generate secure fix/ 1078 ); 1079 }); 1080 1081 test('throws when fix is missing old_string or new_string', async () => { 1082 setLLMMock(JSON.stringify({ explanation: 'oops', testing_notes: 'n/a' })); 1083 1084 await assert.rejects( 1085 () => generateSecureFix('security', 1, { code: 'code', finding: sampleFinding }), 1086 /Failed to generate secure fix/ 1087 ); 1088 }); 1089 }); 1090 1091 // ────────────────────────────────────────────────────────────────────────────── 1092 1093 describe('performThreatModeling', () => { 1094 beforeEach(() => { 1095 resetDb(); 1096 setApiKey(); 1097 mockStmt.get = () => ({ total: 0 }); 1098 mockStmt.run = () => ({ changes: 1 }); 1099 }); 1100 1101 afterEach(() => { 1102 resetDb(); 1103 clearApiKeys(); 1104 }); 1105 1106 const sampleThreatModel = { 1107 threats: [ 1108 { 1109 stride_category: 'Spoofing', 1110 title: 'Fake identity', 1111 description: 'Attacker could spoof user identity', 1112 attack_scenario: 'Steal session token', 1113 dread: { 1114 damage: 8, 1115 reproducibility: 7, 1116 exploitability: 6, 1117 affected_users: 5, 1118 discoverability: 4, 1119 total: 30, 1120 average: 6, 1121 }, 1122 risk_level: 'high', 1123 mitigation: 'Use secure session tokens', 1124 cwe_id: 'CWE-287', 1125 }, 1126 ], 1127 summary: 'Moderate risk overall', 1128 priority_threats: ['Fake identity'], 1129 }; 1130 1131 test('returns parsed threat model', async () => { 1132 setLLMMock(JSON.stringify(sampleThreatModel)); 1133 1134 const result = await performThreatModeling('security', 1, { 1135 component: 'auth module', 1136 }); 1137 1138 assert.strictEqual(result.threats.length, 1); 1139 assert.strictEqual(result.threats[0].stride_category, 'Spoofing'); 1140 assert.strictEqual(result.summary, 'Moderate risk overall'); 1141 }); 1142 1143 test('handles markdown code block in response', async () => { 1144 setLLMMock(`\`\`\`json\n${JSON.stringify(sampleThreatModel)}\n\`\`\``); 1145 1146 const result = await performThreatModeling('security', 1, { 1147 component: 'auth module', 1148 }); 1149 assert.ok(result.threats.length > 0); 1150 }); 1151 1152 test('includes dataFlow in prompt when provided', async () => { 1153 let capturedArgs; 1154 _callLLMMock = async args => { 1155 capturedArgs = args; 1156 return { 1157 content: JSON.stringify(sampleThreatModel), 1158 usage: { promptTokens: 10, completionTokens: 5 }, 1159 }; 1160 }; 1161 1162 await performThreatModeling('security', 1, { 1163 component: 'component', 1164 componentType: 'api', 1165 dataFlow: 'user -> api -> db', 1166 }); 1167 1168 const userMsg = capturedArgs.messages.find(m => m.role === 'user').content; 1169 assert.ok(userMsg.includes('user -> api -> db')); 1170 }); 1171 1172 test('throws on invalid JSON response', async () => { 1173 setLLMMock('not valid json'); 1174 1175 await assert.rejects( 1176 () => performThreatModeling('security', 1, { component: 'something' }), 1177 /Failed to perform threat modeling/ 1178 ); 1179 }); 1180 }); 1181 1182 // ────────────────────────────────────────────────────────────────────────────── 1183 1184 describe('resetDb', () => { 1185 test('can be called when db is null (no-op)', () => { 1186 resetDb(); // call twice to cover both null and non-null paths 1187 resetDb(); 1188 // Should not throw 1189 }); 1190 1191 test('resets db so next call reinitializes it', () => { 1192 resetDb(); 1193 setApiKey(); 1194 mockStmt.get = () => ({ total: 0 }); 1195 // getTodaySpending should reinitialize the db without error 1196 assert.doesNotThrow(() => getTodaySpending()); 1197 resetDb(); 1198 clearApiKeys(); 1199 }); 1200 });