agent-claude-api.test.js
1 /** 2 * Comprehensive Tests for Agent-Claude API Module 3 * 4 * Tests model selection, budget tracking, usage stats, and ALL API-calling functions. 5 * Uses mock.method() on axios to avoid real API calls (callLLM routes through axios for OpenRouter). 6 */ 7 8 import { test, mock } from 'node:test'; 9 import assert from 'node:assert/strict'; 10 import Database from 'better-sqlite3'; 11 import { mkdtempSync, rmSync } from 'fs'; 12 import { tmpdir } from 'os'; 13 import { join } from 'path'; 14 import axios from 'axios'; 15 import { 16 resetDb, 17 selectModel, 18 getTodaySpending, 19 getHourlySpending, 20 getUsageStats, 21 analyzeCode, 22 generateCode, 23 generateTests, 24 reviewArchitecture, 25 suggestRefactoring, 26 classifyIssue, 27 simpleLLMCall, 28 analyzeCodeSecurity, 29 generateSecureFix, 30 performThreatModeling, 31 } from '../../src/agents/utils/agent-claude-api.js'; 32 33 let testDir; 34 let dbPath; 35 let db; 36 37 function initTestDb() { 38 testDir = mkdtempSync(join(tmpdir(), 'api-test-')); 39 dbPath = join(testDir, 'test.db'); 40 process.env.DATABASE_PATH = dbPath; 41 42 db = new Database(dbPath); 43 db.pragma('foreign_keys = ON'); 44 45 db.exec(` 46 CREATE TABLE agent_tasks ( 47 id INTEGER PRIMARY KEY AUTOINCREMENT, 48 task_type TEXT NOT NULL, 49 assigned_to TEXT NOT NULL, 50 status TEXT DEFAULT 'pending', 51 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 52 ); 53 54 CREATE TABLE agent_llm_usage ( 55 id INTEGER PRIMARY KEY AUTOINCREMENT, 56 agent_name TEXT NOT NULL, 57 task_id INTEGER, 58 model TEXT NOT NULL, 59 prompt_tokens INTEGER NOT NULL, 60 completion_tokens INTEGER NOT NULL, 61 cost_usd DECIMAL(10, 6) NOT NULL, 62 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 63 ); 64 65 CREATE INDEX IF NOT EXISTS idx_agent_llm_usage_agent ON agent_llm_usage(agent_name); 66 CREATE INDEX IF NOT EXISTS idx_agent_llm_usage_created ON agent_llm_usage(created_at); 67 `); 68 } 69 70 function cleanupTestDb() { 71 resetDb(); 72 if (db) { 73 try { 74 db.close(); 75 } catch (_e) { 76 /* ignore */ 77 } 78 db = null; 79 } 80 if (testDir) { 81 try { 82 rmSync(testDir, { recursive: true }); 83 } catch (_e) { 84 /* ignore */ 85 } 86 testDir = null; 87 } 88 delete process.env.DATABASE_PATH; 89 delete process.env.OPENROUTER_API_KEY; 90 delete process.env.ANTHROPIC_API_KEY; 91 delete process.env.AGENT_DAILY_BUDGET; 92 delete process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS; 93 } 94 95 // makeMockAxios returns an axios-style mock: response.data.choices[0].message.content 96 function makeMockAxios(responseText, usage = { prompt_tokens: 100, completion_tokens: 50 }) { 97 return async () => ({ 98 data: { 99 choices: [{ message: { content: responseText } }], 100 usage, 101 }, 102 }); 103 } 104 105 test('selectModel', async t => { 106 await t.beforeEach(() => { 107 delete process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS; 108 }); 109 110 await t.test('returns haiku for simple tasks', () => { 111 assert.ok(selectModel('classify_issue').includes('haiku')); 112 }); 113 await t.test('returns sonnet for complex tasks', () => { 114 assert.ok(selectModel('analyzeCode').includes('sonnet')); 115 }); 116 await t.test('returns sonnet for unknown tasks', () => { 117 assert.ok(selectModel('unknown_task').includes('sonnet')); 118 }); 119 await t.test('explicit simple override', () => { 120 assert.ok(selectModel('analyzeCode', 'simple').includes('haiku')); 121 }); 122 await t.test('explicit complex override', () => { 123 assert.ok(selectModel('classify_issue', 'complex').includes('sonnet')); 124 }); 125 await t.test('disables haiku when env var false', () => { 126 process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS = 'false'; 127 assert.ok(selectModel('classify_issue').includes('sonnet')); 128 delete process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS; 129 }); 130 await t.test('scan_logs is simple', () => { 131 assert.ok(selectModel('scan_logs').includes('haiku')); 132 }); 133 await t.test('detect_anomaly is simple', () => { 134 assert.ok(selectModel('detect_anomaly').includes('haiku')); 135 }); 136 await t.test('scan_secrets is simple', () => { 137 assert.ok(selectModel('scan_secrets').includes('haiku')); 138 }); 139 await t.test('find_test_files is simple', () => { 140 assert.ok(selectModel('find_test_files').includes('haiku')); 141 }); 142 await t.test('scan_dependencies is simple', () => { 143 assert.ok(selectModel('scan_dependencies').includes('haiku')); 144 }); 145 await t.test('generateCode is complex', () => { 146 assert.ok(selectModel('generateCode').includes('sonnet')); 147 }); 148 await t.test('reviewArchitecture is complex', () => { 149 assert.ok(selectModel('reviewArchitecture').includes('sonnet')); 150 }); 151 await t.test('suggestRefactoring is complex', () => { 152 assert.ok(selectModel('suggestRefactoring').includes('sonnet')); 153 }); 154 await t.test('threat_model is complex', () => { 155 assert.ok(selectModel('threat_model').includes('sonnet')); 156 }); 157 await t.test('generateTests is complex', () => { 158 assert.ok(selectModel('generateTests').includes('sonnet')); 159 }); 160 await t.test('standard complexity defaults to sonnet', () => { 161 assert.ok(selectModel('some_task', 'standard').includes('sonnet')); 162 }); 163 }); 164 165 test('getTodaySpending', async t => { 166 await t.beforeEach(() => { 167 initTestDb(); 168 }); 169 await t.afterEach(() => { 170 cleanupTestDb(); 171 }); 172 173 await t.test('returns 0 when no usage', () => { 174 assert.strictEqual(getTodaySpending(), 0); 175 }); 176 await t.test('sums today spending', () => { 177 db.prepare( 178 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 179 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.005); 180 db.prepare( 181 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 182 ).run('qa', 'anthropic/claude-3.5-haiku', 500, 100, 0.001); 183 assert.ok(Math.abs(getTodaySpending() - 0.006) < 0.0001); 184 }); 185 await t.test('excludes old spending', () => { 186 db.prepare( 187 "INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd, created_at) VALUES (?, ?, ?, ?, ?, datetime('now', '-2 days'))" 188 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 10.0); 189 assert.strictEqual(getTodaySpending(), 0); 190 }); 191 }); 192 193 test('getHourlySpending', async t => { 194 await t.beforeEach(() => { 195 initTestDb(); 196 }); 197 await t.afterEach(() => { 198 cleanupTestDb(); 199 }); 200 201 await t.test('returns 0 when no usage', () => { 202 assert.strictEqual(getHourlySpending(), 0); 203 }); 204 await t.test('sums recent spending', () => { 205 db.prepare( 206 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 207 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.01); 208 assert.ok(Math.abs(getHourlySpending() - 0.01) < 0.0001); 209 }); 210 await t.test('excludes spending from 2 hours ago', () => { 211 db.prepare( 212 "INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd, created_at) VALUES (?, ?, ?, ?, ?, datetime('now', '-2 hours'))" 213 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 5.0); 214 assert.strictEqual(getHourlySpending(), 0); 215 }); 216 }); 217 218 test('getUsageStats', async t => { 219 await t.beforeEach(() => { 220 initTestDb(); 221 }); 222 await t.afterEach(() => { 223 cleanupTestDb(); 224 }); 225 226 await t.test('returns empty stats when no usage', () => { 227 const stats = getUsageStats(); 228 assert.strictEqual(stats.agents.length, 0); 229 assert.strictEqual(stats.totalCost, 0); 230 assert.strictEqual(stats.modelBreakdown.haiku.calls, 0); 231 assert.strictEqual(stats.modelBreakdown.sonnet.calls, 0); 232 }); 233 await t.test('groups by agent and model', () => { 234 db.prepare( 235 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 236 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.006); 237 db.prepare( 238 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 239 ).run('dev', 'anthropic/claude-3.5-sonnet', 500, 100, 0.003); 240 const stats = getUsageStats(); 241 assert.strictEqual(stats.agents.length, 1); 242 assert.strictEqual(stats.agents[0].call_count, 2); 243 assert.ok(Math.abs(stats.totalCost - 0.009) < 0.0001); 244 }); 245 await t.test('filters by agent name', () => { 246 db.prepare( 247 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 248 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.006); 249 db.prepare( 250 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 251 ).run('qa', 'anthropic/claude-3.5-haiku', 500, 100, 0.001); 252 const stats = getUsageStats('dev'); 253 assert.strictEqual(stats.agents.length, 1); 254 assert.strictEqual(stats.agents[0].agent_name, 'dev'); 255 }); 256 await t.test('model breakdown with haiku and sonnet', () => { 257 db.prepare( 258 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 259 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.006); 260 db.prepare( 261 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 262 ).run('qa', 'anthropic/claude-3.5-haiku', 500, 100, 0.001); 263 const stats = getUsageStats(); 264 assert.strictEqual(stats.modelBreakdown.sonnet.calls, 1); 265 assert.strictEqual(stats.modelBreakdown.haiku.calls, 1); 266 assert.ok(stats.modelBreakdown.sonnet.cost > 0); 267 assert.ok(stats.modelBreakdown.haiku.cost > 0); 268 }); 269 await t.test('custom days parameter', () => { 270 const stats = getUsageStats(null, 30); 271 assert.strictEqual(stats.days, 30); 272 }); 273 await t.test('haiku avgCost is 0 when no haiku calls', () => { 274 db.prepare( 275 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 276 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.006); 277 assert.strictEqual(getUsageStats().modelBreakdown.haiku.avgCost, 0); 278 }); 279 await t.test('includes total tokens', () => { 280 db.prepare( 281 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 282 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.006); 283 const stats = getUsageStats('dev'); 284 assert.strictEqual(stats.agents[0].total_prompt_tokens, 1000); 285 assert.strictEqual(stats.agents[0].total_completion_tokens, 200); 286 }); 287 await t.test('savings percentage when both models used', () => { 288 db.prepare( 289 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 290 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.006); 291 db.prepare( 292 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 293 ).run('qa', 'anthropic/claude-3.5-haiku', 500, 100, 0.001); 294 const stats = getUsageStats(); 295 assert.ok(stats.modelBreakdown.savings !== undefined); 296 }); 297 await t.test('savings is 0 when no sonnet calls', () => { 298 db.prepare( 299 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 300 ).run('qa', 'anthropic/claude-3.5-haiku', 500, 100, 0.001); 301 assert.strictEqual(getUsageStats().modelBreakdown.savings, 0); 302 }); 303 await t.test('sonnet avgCost is correct', () => { 304 db.prepare( 305 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 306 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.006); 307 db.prepare( 308 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 309 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.006); 310 const stats = getUsageStats('dev'); 311 assert.ok(Math.abs(stats.modelBreakdown.sonnet.avgCost - 0.006) < 0.0001); 312 }); 313 }); 314 315 test('resetDb', async t => { 316 await t.test('handles null db (no-op)', () => { 317 resetDb(); 318 assert.ok(true); 319 }); 320 await t.test('closes connection and allows reconnect', () => { 321 initTestDb(); 322 assert.strictEqual(getTodaySpending(), 0); 323 resetDb(); 324 cleanupTestDb(); 325 assert.ok(true); 326 }); 327 await t.test('silently ignores error when db.close() throws', () => { 328 // Open the module-internal db by triggering getTodaySpending 329 initTestDb(); 330 getTodaySpending(); // ensures module's internal db is initialized 331 332 // Patch Database.prototype.close to throw on the next call 333 const originalClose = Database.prototype.close; 334 Database.prototype.close = function () { 335 throw new Error('Simulated close error'); 336 }; 337 338 try { 339 // Should not throw even though db.close() throws internally 340 assert.doesNotThrow(() => resetDb()); 341 } finally { 342 // Restore prototype method before cleanup 343 Database.prototype.close = originalClose; 344 cleanupTestDb(); 345 } 346 }); 347 }); 348 349 test('analyzeCode (mocked fetch)', async t => { 350 await t.beforeEach(() => { 351 initTestDb(); 352 process.env.OPENROUTER_API_KEY = 'test-key-123'; 353 }); 354 await t.afterEach(() => { 355 cleanupTestDb(); 356 mock.restoreAll(); 357 }); 358 359 await t.test('returns analysis text', async () => { 360 mock.method(axios, 'post', makeMockAxios('Code looks good.')); 361 const result = await analyzeCode('developer', 1, 'src/test.js', 'Review'); 362 assert.strictEqual(result, 'Code looks good.'); 363 }); 364 await t.test('includes fileContent in prompt', async () => { 365 let body; 366 mock.method(axios, 'post', async (url, capturedBody) => { 367 body = capturedBody; 368 return { 369 data: { 370 choices: [{ message: { content: 'result' } }], 371 usage: { prompt_tokens: 100, completion_tokens: 50 }, 372 }, 373 }; 374 }); 375 await analyzeCode('developer', 1, 'src/test.js', 'Review', { fileContent: 'const x = 1;' }); 376 assert.ok(body.messages.find(m => m.role === 'user').content.includes('const x = 1;')); 377 }); 378 await t.test('includes relatedFiles in prompt', async () => { 379 let body; 380 mock.method(axios, 'post', async (url, capturedBody) => { 381 body = capturedBody; 382 return { 383 data: { 384 choices: [{ message: { content: 'result' } }], 385 usage: { prompt_tokens: 100, completion_tokens: 50 }, 386 }, 387 }; 388 }); 389 await analyzeCode('developer', 1, 'src/test.js', 'Review', { relatedFiles: 'related content' }); 390 assert.ok(body.messages.find(m => m.role === 'user').content.includes('related content')); 391 }); 392 await t.test('includes errorMessage in prompt', async () => { 393 let body; 394 mock.method(axios, 'post', async (url, capturedBody) => { 395 body = capturedBody; 396 return { 397 data: { 398 choices: [{ message: { content: 'result' } }], 399 usage: { prompt_tokens: 100, completion_tokens: 50 }, 400 }, 401 }; 402 }); 403 await analyzeCode('developer', 1, 'src/test.js', 'Review', { 404 errorMessage: 'TypeError: x is null', 405 }); 406 assert.ok(body.messages.find(m => m.role === 'user').content.includes('TypeError: x is null')); 407 }); 408 await t.test('logs usage to DB', async () => { 409 mock.method(axios, 'post', makeMockAxios('result')); 410 await analyzeCode('developer', 1, 'src/test.js', 'Review'); 411 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 412 assert.strictEqual(usage.length, 1); 413 assert.strictEqual(usage[0].agent_name, 'developer'); 414 assert.strictEqual(usage[0].task_id, 1); 415 }); 416 await t.test('throws when API key not set', async () => { 417 delete process.env.OPENROUTER_API_KEY; 418 delete process.env.ANTHROPIC_API_KEY; 419 await assert.rejects(() => analyzeCode('developer', 1, 'src/test.js', 'Review'), { 420 message: /No LLM API key configured/, 421 }); 422 }); 423 await t.test('throws on API error (axios throws)', async () => { 424 const axiosErr = new Error('Request failed with status code 500'); 425 axiosErr.response = { 426 status: 500, 427 statusText: 'Internal Server Error', 428 data: { error: { message: 'Server Error' } }, 429 }; 430 mock.method(axios, 'post', async () => { 431 throw axiosErr; 432 }); 433 await assert.rejects(() => analyzeCode('developer', 1, 'src/test.js', 'Review')); 434 }); 435 await t.test('throws when daily budget exceeded', async () => { 436 process.env.AGENT_DAILY_BUDGET = '0.001'; 437 db.prepare( 438 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 439 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 0.01); 440 await assert.rejects(() => analyzeCode('developer', 1, 'src/test.js', 'Review'), { 441 message: /Daily budget exceeded/, 442 }); 443 delete process.env.AGENT_DAILY_BUDGET; 444 }); 445 await t.test('throws on emergency hourly spend', async () => { 446 db.prepare( 447 'INSERT INTO agent_llm_usage (agent_name, model, prompt_tokens, completion_tokens, cost_usd) VALUES (?, ?, ?, ?, ?)' 448 ).run('dev', 'anthropic/claude-3.5-sonnet', 1000, 200, 6.0); 449 await assert.rejects(() => analyzeCode('developer', 1, 'src/test.js', 'Review'), { 450 message: /EMERGENCY/, 451 }); 452 }); 453 await t.test('throws when API returns no content', async () => { 454 mock.method(axios, 'post', async () => ({ 455 data: { choices: [{ message: { content: null } }], usage: {} }, 456 })); 457 await assert.rejects(() => analyzeCode('developer', 1, 'src/test.js', 'Review')); 458 }); 459 await t.test('uses explicit model from context', async () => { 460 let body; 461 mock.method(axios, 'post', async (url, capturedBody) => { 462 body = capturedBody; 463 return { 464 data: { 465 choices: [{ message: { content: 'result' } }], 466 usage: { prompt_tokens: 100, completion_tokens: 50 }, 467 }, 468 }; 469 }); 470 await analyzeCode('developer', 1, 'src/test.js', 'Review', { 471 model: 'anthropic/claude-3.5-haiku', 472 }); 473 assert.strictEqual(body.model, 'anthropic/claude-3.5-haiku'); 474 }); 475 await t.test('sets correct OpenRouter headers', async () => { 476 let capturedConfig; 477 mock.method(axios, 'post', async (url, _body, config) => { 478 capturedConfig = config; 479 return { 480 data: { 481 choices: [{ message: { content: 'result' } }], 482 usage: { prompt_tokens: 100, completion_tokens: 50 }, 483 }, 484 }; 485 }); 486 await analyzeCode('developer', 1, 'src/test.js', 'Review'); 487 // llm-provider.js captures the API key at module load time, so check format not exact value 488 assert.ok(capturedConfig.headers['Authorization'].startsWith('Bearer ')); 489 assert.ok(capturedConfig.headers['HTTP-Referer']); 490 assert.ok(capturedConfig.headers['X-Title']); 491 }); 492 await t.test('handles missing usage data gracefully', async () => { 493 mock.method(axios, 'post', async () => ({ 494 data: { choices: [{ message: { content: 'result' } }] }, 495 })); 496 const result = await analyzeCode('developer', 1, 'src/test.js', 'Review'); 497 assert.ok(typeof result === 'string'); 498 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 499 assert.strictEqual(usage[0].prompt_tokens, 0); 500 assert.strictEqual(usage[0].completion_tokens, 0); 501 }); 502 }); 503 504 test('generateCode (mocked fetch)', async t => { 505 await t.beforeEach(() => { 506 initTestDb(); 507 process.env.OPENROUTER_API_KEY = 'test-key-123'; 508 }); 509 await t.afterEach(() => { 510 cleanupTestDb(); 511 mock.restoreAll(); 512 }); 513 514 await t.test('returns generated code', async () => { 515 mock.method(axios, 'post', makeMockAxios('const hello = () => "world";')); 516 const result = await generateCode('developer', 1, 'src/hello.js', 'Create hello function'); 517 assert.strictEqual(result, 'const hello = () => "world";'); 518 }); 519 await t.test('includes existing code in prompt', async () => { 520 let body; 521 mock.method(axios, 'post', async (url, capturedBody) => { 522 body = capturedBody; 523 return { 524 data: { 525 choices: [{ message: { content: 'code' } }], 526 usage: { prompt_tokens: 100, completion_tokens: 50 }, 527 }, 528 }; 529 }); 530 await generateCode('developer', 1, 'src/test.js', 'Enhance', 'const x = 1;'); 531 assert.ok(body.messages.find(m => m.role === 'user').content.includes('const x = 1;')); 532 }); 533 await t.test('uses 8192 max tokens', async () => { 534 let body; 535 mock.method(axios, 'post', async (url, capturedBody) => { 536 body = capturedBody; 537 return { 538 data: { 539 choices: [{ message: { content: 'code' } }], 540 usage: { prompt_tokens: 100, completion_tokens: 50 }, 541 }, 542 }; 543 }); 544 await generateCode('developer', 1, 'src/test.js', 'Generate module'); 545 assert.strictEqual(body.max_tokens, 8192); 546 }); 547 await t.test('logs usage to DB', async () => { 548 mock.method(axios, 'post', makeMockAxios('const x = 1;')); 549 await generateCode('developer', 2, 'src/test.js', 'Create module'); 550 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 551 assert.strictEqual(usage[0].task_id, 2); 552 }); 553 await t.test('uses explicit model override', async () => { 554 let body; 555 mock.method(axios, 'post', async (url, capturedBody) => { 556 body = capturedBody; 557 return { 558 data: { 559 choices: [{ message: { content: 'result' } }], 560 usage: { prompt_tokens: 100, completion_tokens: 50 }, 561 }, 562 }; 563 }); 564 await generateCode('developer', 1, 'src/test.js', 'Generate', null, { 565 model: 'anthropic/claude-3.5-haiku', 566 }); 567 assert.strictEqual(body.model, 'anthropic/claude-3.5-haiku'); 568 }); 569 await t.test('handles null taskId', async () => { 570 mock.method(axios, 'post', makeMockAxios('const x = 1;')); 571 await generateCode('developer', null, 'src/test.js', 'Create module'); 572 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 573 assert.strictEqual(usage[0].task_id, null); 574 }); 575 await t.test('omits existing code block when existingCode is null', async () => { 576 let body; 577 mock.method(axios, 'post', async (url, capturedBody) => { 578 body = capturedBody; 579 return { 580 data: { 581 choices: [{ message: { content: 'result' } }], 582 usage: { prompt_tokens: 100, completion_tokens: 50 }, 583 }, 584 }; 585 }); 586 await generateCode('developer', 1, 'src/test.js', 'Create module', null); 587 assert.ok(!body.messages.find(m => m.role === 'user').content.includes('Existing code:')); 588 }); 589 }); 590 591 test('generateTests (mocked fetch)', async t => { 592 await t.beforeEach(() => { 593 initTestDb(); 594 process.env.OPENROUTER_API_KEY = 'test-key-123'; 595 }); 596 await t.afterEach(() => { 597 cleanupTestDb(); 598 mock.restoreAll(); 599 }); 600 601 await t.test('returns generated test code', async () => { 602 const testCode = 'import { test } from "node:test"; test("example", () => {});'; 603 mock.method(axios, 'post', makeMockAxios(testCode)); 604 const result = await generateTests('developer', 1, 'src/utils.js', 'const x = () => 1;'); 605 assert.strictEqual(result, testCode); 606 }); 607 await t.test('includes coverage gaps when provided', async () => { 608 let body; 609 mock.method(axios, 'post', async (url, capturedBody) => { 610 body = capturedBody; 611 return { 612 data: { 613 choices: [{ message: { content: 'tests' } }], 614 usage: { prompt_tokens: 100, completion_tokens: 50 }, 615 }, 616 }; 617 }); 618 await generateTests('developer', 1, 'src/utils.js', 'const x = 1;', { functions: ['myFunc'] }); 619 assert.ok(body.messages.find(m => m.role === 'user').content.includes('myFunc')); 620 }); 621 await t.test('uses 8192 max tokens', async () => { 622 let body; 623 mock.method(axios, 'post', async (url, capturedBody) => { 624 body = capturedBody; 625 return { 626 data: { 627 choices: [{ message: { content: 'tests' } }], 628 usage: { prompt_tokens: 100, completion_tokens: 50 }, 629 }, 630 }; 631 }); 632 await generateTests('developer', 1, 'src/utils.js', 'const x = 1;'); 633 assert.strictEqual(body.max_tokens, 8192); 634 }); 635 await t.test('uses explicit model override', async () => { 636 let body; 637 mock.method(axios, 'post', async (url, capturedBody) => { 638 body = capturedBody; 639 return { 640 data: { 641 choices: [{ message: { content: 'result' } }], 642 usage: { prompt_tokens: 100, completion_tokens: 50 }, 643 }, 644 }; 645 }); 646 await generateTests('developer', 1, 'src/utils.js', 'const x = 1;', null, { 647 model: 'anthropic/claude-3.5-sonnet', 648 }); 649 assert.strictEqual(body.model, 'anthropic/claude-3.5-sonnet'); 650 }); 651 await t.test('handles null coverageGaps', async () => { 652 let body; 653 mock.method(axios, 'post', async (url, capturedBody) => { 654 body = capturedBody; 655 return { 656 data: { 657 choices: [{ message: { content: 'tests' } }], 658 usage: { prompt_tokens: 100, completion_tokens: 50 }, 659 }, 660 }; 661 }); 662 await generateTests('developer', 1, 'src/utils.js', 'const x = 1;', null); 663 assert.ok(!body.messages.find(m => m.role === 'user').content.includes('Coverage gaps')); 664 }); 665 }); 666 667 test('reviewArchitecture (mocked fetch)', async t => { 668 await t.beforeEach(() => { 669 initTestDb(); 670 process.env.OPENROUTER_API_KEY = 'test-key-123'; 671 }); 672 await t.afterEach(() => { 673 cleanupTestDb(); 674 mock.restoreAll(); 675 }); 676 677 await t.test('returns review text', async () => { 678 mock.method(axios, 'post', makeMockAxios('Architecture solid.')); 679 const result = await reviewArchitecture('architect', 1, 'Proposed microservice...'); 680 assert.strictEqual(result, 'Architecture solid.'); 681 }); 682 await t.test('includes relatedFiles in prompt', async () => { 683 let body; 684 mock.method(axios, 'post', async (url, capturedBody) => { 685 body = capturedBody; 686 return { 687 data: { 688 choices: [{ message: { content: 'review' } }], 689 usage: { prompt_tokens: 100, completion_tokens: 50 }, 690 }, 691 }; 692 }); 693 await reviewArchitecture('architect', 1, 'Proposal', { relatedFiles: 'existing code here' }); 694 assert.ok(body.messages.find(m => m.role === 'user').content.includes('existing code here')); 695 }); 696 await t.test('includes requirements in prompt', async () => { 697 let body; 698 mock.method(axios, 'post', async (url, capturedBody) => { 699 body = capturedBody; 700 return { 701 data: { 702 choices: [{ message: { content: 'review' } }], 703 usage: { prompt_tokens: 100, completion_tokens: 50 }, 704 }, 705 }; 706 }); 707 await reviewArchitecture('architect', 1, 'Proposal', { 708 requirements: 'Must support 1000 users', 709 }); 710 assert.ok( 711 body.messages.find(m => m.role === 'user').content.includes('Must support 1000 users') 712 ); 713 }); 714 await t.test('uses explicit model override', async () => { 715 let body; 716 mock.method(axios, 'post', async (url, capturedBody) => { 717 body = capturedBody; 718 return { 719 data: { 720 choices: [{ message: { content: 'review' } }], 721 usage: { prompt_tokens: 100, completion_tokens: 50 }, 722 }, 723 }; 724 }); 725 await reviewArchitecture('architect', 1, 'Proposal', { model: 'anthropic/claude-3.5-haiku' }); 726 assert.strictEqual(body.model, 'anthropic/claude-3.5-haiku'); 727 }); 728 await t.test('logs usage to DB', async () => { 729 mock.method(axios, 'post', makeMockAxios('review text')); 730 await reviewArchitecture('architect', 4, 'Proposal'); 731 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 732 assert.strictEqual(usage[0].agent_name, 'architect'); 733 assert.strictEqual(usage[0].task_id, 4); 734 }); 735 }); 736 737 test('suggestRefactoring (mocked fetch)', async t => { 738 await t.beforeEach(() => { 739 initTestDb(); 740 process.env.OPENROUTER_API_KEY = 'test-key-123'; 741 }); 742 await t.afterEach(() => { 743 cleanupTestDb(); 744 mock.restoreAll(); 745 }); 746 747 await t.test('returns suggestions text', async () => { 748 mock.method(axios, 'post', makeMockAxios('Extract function.')); 749 const result = await suggestRefactoring('developer', 1, 'src/big.js', 'const x = 1;'); 750 assert.strictEqual(result, 'Extract function.'); 751 }); 752 await t.test('includes reason in prompt', async () => { 753 let body; 754 mock.method(axios, 'post', async (url, capturedBody) => { 755 body = capturedBody; 756 return { 757 data: { 758 choices: [{ message: { content: 'suggestions' } }], 759 usage: { prompt_tokens: 100, completion_tokens: 50 }, 760 }, 761 }; 762 }); 763 await suggestRefactoring('developer', 1, 'src/big.js', 'const x = 1;', 'File is 200 lines'); 764 assert.ok(body.messages.find(m => m.role === 'user').content.includes('File is 200 lines')); 765 }); 766 await t.test('works without reason (null)', async () => { 767 mock.method(axios, 'post', makeMockAxios('suggestions')); 768 const result = await suggestRefactoring('developer', 1, 'src/test.js', 'const x = 1;', null); 769 assert.ok(typeof result === 'string'); 770 }); 771 await t.test('uses explicit model override', async () => { 772 let body; 773 mock.method(axios, 'post', async (url, capturedBody) => { 774 body = capturedBody; 775 return { 776 data: { 777 choices: [{ message: { content: 'suggestions' } }], 778 usage: { prompt_tokens: 100, completion_tokens: 50 }, 779 }, 780 }; 781 }); 782 await suggestRefactoring('developer', 1, 'src/test.js', 'const x = 1;', null, { 783 model: 'anthropic/claude-3.5-haiku', 784 }); 785 assert.strictEqual(body.model, 'anthropic/claude-3.5-haiku'); 786 }); 787 await t.test('logs usage to DB', async () => { 788 mock.method(axios, 'post', makeMockAxios('suggestions')); 789 await suggestRefactoring('developer', 7, 'src/big.js', 'const x = 1;'); 790 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 791 assert.strictEqual(usage[0].task_id, 7); 792 }); 793 }); 794 795 test('classifyIssue (mocked fetch)', async t => { 796 await t.beforeEach(() => { 797 initTestDb(); 798 process.env.OPENROUTER_API_KEY = 'test-key-123'; 799 }); 800 await t.afterEach(() => { 801 cleanupTestDb(); 802 mock.restoreAll(); 803 }); 804 805 await t.test('returns parsed classification', async () => { 806 const cls = { 807 category: 'NETWORK', 808 severity: 'high', 809 suggestedAction: 'Retry', 810 assignTo: 'developer', 811 }; 812 mock.method(axios, 'post', makeMockAxios(JSON.stringify(cls))); 813 const result = await classifyIssue('triage', 1, 'Connection timeout'); 814 assert.strictEqual(result.category, 'NETWORK'); 815 assert.strictEqual(result.severity, 'high'); 816 }); 817 await t.test('includes stackTrace in prompt', async () => { 818 let body; 819 const cls = { 820 category: 'CODE_BUG', 821 severity: 'medium', 822 suggestedAction: 'Fix', 823 assignTo: 'developer', 824 }; 825 mock.method(axios, 'post', async (url, capturedBody) => { 826 body = capturedBody; 827 return { 828 data: { 829 choices: [{ message: { content: JSON.stringify(cls) } }], 830 usage: { prompt_tokens: 100, completion_tokens: 50 }, 831 }, 832 }; 833 }); 834 await classifyIssue('triage', 1, 'TypeError', { 835 stackTrace: 'at myFunction (src/utils.js:42)', 836 }); 837 assert.ok(body.messages.find(m => m.role === 'user').content.includes('at myFunction')); 838 }); 839 await t.test('includes logContext in prompt', async () => { 840 let body; 841 const cls = { 842 category: 'DATABASE', 843 severity: 'low', 844 suggestedAction: 'Check query', 845 assignTo: 'developer', 846 }; 847 mock.method(axios, 'post', async (url, capturedBody) => { 848 body = capturedBody; 849 return { 850 data: { 851 choices: [{ message: { content: JSON.stringify(cls) } }], 852 usage: { prompt_tokens: 100, completion_tokens: 50 }, 853 }, 854 }; 855 }); 856 await classifyIssue('triage', 1, 'DB error', { logContext: 'Last 10 lines...' }); 857 assert.ok(body.messages.find(m => m.role === 'user').content.includes('Last 10 lines')); 858 }); 859 await t.test('throws on invalid JSON response', async () => { 860 mock.method(axios, 'post', makeMockAxios('Not JSON')); 861 await assert.rejects(() => classifyIssue('triage', 1, 'Error'), { 862 message: /Invalid classification response/, 863 }); 864 }); 865 await t.test('uses explicit model override', async () => { 866 let body; 867 const cls = { 868 category: 'API', 869 severity: 'low', 870 suggestedAction: 'Retry', 871 assignTo: 'developer', 872 }; 873 mock.method(axios, 'post', async (url, capturedBody) => { 874 body = capturedBody; 875 return { 876 data: { 877 choices: [{ message: { content: JSON.stringify(cls) } }], 878 usage: { prompt_tokens: 100, completion_tokens: 50 }, 879 }, 880 }; 881 }); 882 await classifyIssue('triage', 1, 'API timeout', { model: 'anthropic/claude-3.5-sonnet' }); 883 assert.strictEqual(body.model, 'anthropic/claude-3.5-sonnet'); 884 }); 885 await t.test('uses 1024 max tokens', async () => { 886 let body; 887 const cls = { 888 category: 'RATE_LIMIT', 889 severity: 'medium', 890 suggestedAction: 'Backoff', 891 assignTo: 'developer', 892 }; 893 mock.method(axios, 'post', async (url, capturedBody) => { 894 body = capturedBody; 895 return { 896 data: { 897 choices: [{ message: { content: JSON.stringify(cls) } }], 898 usage: { prompt_tokens: 100, completion_tokens: 50 }, 899 }, 900 }; 901 }); 902 await classifyIssue('triage', 1, 'Rate limit hit'); 903 assert.strictEqual(body.max_tokens, 1024); 904 }); 905 }); 906 907 test('simpleLLMCall (mocked fetch)', async t => { 908 await t.beforeEach(() => { 909 initTestDb(); 910 process.env.OPENROUTER_API_KEY = 'test-key-123'; 911 }); 912 await t.afterEach(() => { 913 cleanupTestDb(); 914 mock.restoreAll(); 915 }); 916 917 await t.test('returns response text', async () => { 918 mock.method(axios, 'post', makeMockAxios('Here is the answer.')); 919 const result = await simpleLLMCall('monitor', null, { prompt: 'What is the status?' }); 920 assert.strictEqual(result, 'Here is the answer.'); 921 }); 922 await t.test('uses custom system prompt', async () => { 923 let body; 924 mock.method(axios, 'post', async (url, capturedBody) => { 925 body = capturedBody; 926 return { 927 data: { 928 choices: [{ message: { content: 'response' } }], 929 usage: { prompt_tokens: 100, completion_tokens: 50 }, 930 }, 931 }; 932 }); 933 await simpleLLMCall('monitor', null, { 934 prompt: 'Hello', 935 systemPrompt: 'You are a monitoring expert.', 936 }); 937 assert.ok( 938 body.messages.find(m => m.role === 'system').content.includes('You are a monitoring expert.') 939 ); 940 }); 941 await t.test('uses default system prompt when none provided', async () => { 942 let body; 943 mock.method(axios, 'post', async (url, capturedBody) => { 944 body = capturedBody; 945 return { 946 data: { 947 choices: [{ message: { content: 'response' } }], 948 usage: { prompt_tokens: 100, completion_tokens: 50 }, 949 }, 950 }; 951 }); 952 await simpleLLMCall('monitor', null, { prompt: 'Hello' }); 953 assert.ok(body.messages.find(m => m.role === 'system').content.includes('333 Method')); 954 }); 955 await t.test('injects taskHistory into system prompt', async () => { 956 let body; 957 mock.method(axios, 'post', async (url, capturedBody) => { 958 body = capturedBody; 959 return { 960 data: { 961 choices: [{ message: { content: 'response' } }], 962 usage: { prompt_tokens: 100, completion_tokens: 50 }, 963 }, 964 }; 965 }); 966 await simpleLLMCall('monitor', null, { 967 prompt: 'Continue', 968 taskHistory: 'Previous: 100 sites analyzed', 969 }); 970 assert.ok( 971 body.messages.find(m => m.role === 'system').content.includes('Previous: 100 sites analyzed') 972 ); 973 }); 974 await t.test('uses custom maxTokens', async () => { 975 let body; 976 mock.method(axios, 'post', async (url, capturedBody) => { 977 body = capturedBody; 978 return { 979 data: { 980 choices: [{ message: { content: 'response' } }], 981 usage: { prompt_tokens: 100, completion_tokens: 50 }, 982 }, 983 }; 984 }); 985 await simpleLLMCall('monitor', null, { prompt: 'Hello', maxTokens: 2000 }); 986 assert.strictEqual(body.max_tokens, 2000); 987 }); 988 await t.test('defaults to 4000 maxTokens', async () => { 989 let body; 990 mock.method(axios, 'post', async (url, capturedBody) => { 991 body = capturedBody; 992 return { 993 data: { 994 choices: [{ message: { content: 'response' } }], 995 usage: { prompt_tokens: 100, completion_tokens: 50 }, 996 }, 997 }; 998 }); 999 await simpleLLMCall('monitor', null, { prompt: 'Hello' }); 1000 assert.strictEqual(body.max_tokens, 4000); 1001 }); 1002 await t.test('logs usage to DB', async () => { 1003 mock.method(axios, 'post', makeMockAxios('response')); 1004 await simpleLLMCall('monitor', 5, { prompt: 'Analyze' }); 1005 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 1006 assert.strictEqual(usage[0].agent_name, 'monitor'); 1007 assert.strictEqual(usage[0].task_id, 5); 1008 }); 1009 await t.test('taskHistory appended to custom systemPrompt', async () => { 1010 let body; 1011 mock.method(axios, 'post', async (url, capturedBody) => { 1012 body = capturedBody; 1013 return { 1014 data: { 1015 choices: [{ message: { content: 'response' } }], 1016 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1017 }, 1018 }; 1019 }); 1020 await simpleLLMCall('monitor', null, { 1021 prompt: 'Continue', 1022 systemPrompt: 'You are a monitor.', 1023 taskHistory: 'Task history here', 1024 }); 1025 const sysContent = body.messages.find(m => m.role === 'system').content; 1026 assert.ok(sysContent.includes('You are a monitor.')); 1027 assert.ok(sysContent.includes('Task history here')); 1028 }); 1029 }); 1030 1031 test('analyzeCodeSecurity (mocked fetch)', async t => { 1032 await t.beforeEach(() => { 1033 initTestDb(); 1034 process.env.OPENROUTER_API_KEY = 'test-key-123'; 1035 }); 1036 await t.afterEach(() => { 1037 cleanupTestDb(); 1038 mock.restoreAll(); 1039 }); 1040 1041 await t.test('returns parsed security findings', async () => { 1042 const r = { 1043 findings: [ 1044 { 1045 type: 'sql_injection', 1046 severity: 'critical', 1047 line: 42, 1048 description: 'SQL injection', 1049 recommendation: 'Parameterize', 1050 cwe_id: 'CWE-89', 1051 }, 1052 ], 1053 summary: 'Critical SQL injection found', 1054 }; 1055 mock.method(axios, 'post', makeMockAxios(JSON.stringify(r))); 1056 const result = await analyzeCodeSecurity('security', 1, 'SELECT * WHERE id=${id}'); 1057 assert.strictEqual(result.findings.length, 1); 1058 assert.strictEqual(result.findings[0].type, 'sql_injection'); 1059 }); 1060 await t.test('handles focusArea parameter', async () => { 1061 let body; 1062 mock.method(axios, 'post', async (url, capturedBody) => { 1063 body = capturedBody; 1064 return { 1065 data: { 1066 choices: [{ message: { content: JSON.stringify({ findings: [], summary: 'Clean' }) } }], 1067 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1068 }, 1069 }; 1070 }); 1071 await analyzeCodeSecurity('security', 1, 'const x = 1;', 'sql_injection'); 1072 assert.ok(body.messages.find(m => m.role === 'user').content.includes('sql injection')); 1073 }); 1074 await t.test('handles fileName parameter', async () => { 1075 let body; 1076 mock.method(axios, 'post', async (url, capturedBody) => { 1077 body = capturedBody; 1078 return { 1079 data: { 1080 choices: [{ message: { content: JSON.stringify({ findings: [], summary: 'Clean' }) } }], 1081 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1082 }, 1083 }; 1084 }); 1085 await analyzeCodeSecurity('security', 1, 'const x = 1;', null, 'src/auth.js'); 1086 assert.ok(body.messages.find(m => m.role === 'user').content.includes('src/auth.js')); 1087 }); 1088 await t.test('handles JSON in markdown json code blocks', async () => { 1089 const r = { findings: [], summary: 'No issues' }; 1090 mock.method(axios, 'post', makeMockAxios(`\`\`\`json\n${JSON.stringify(r)}\n\`\`\``)); 1091 const result = await analyzeCodeSecurity('security', 1, 'const x = 1;'); 1092 assert.strictEqual(result.summary, 'No issues'); 1093 }); 1094 await t.test('handles JSON in plain code blocks', async () => { 1095 const r = { findings: [], summary: 'Clean code' }; 1096 mock.method(axios, 'post', makeMockAxios(`\`\`\`\n${JSON.stringify(r)}\n\`\`\``)); 1097 const result = await analyzeCodeSecurity('security', 1, 'const x = 1;'); 1098 assert.strictEqual(result.summary, 'Clean code'); 1099 }); 1100 await t.test('throws on invalid JSON response', async () => { 1101 mock.method(axios, 'post', makeMockAxios('Not valid JSON!')); 1102 await assert.rejects(() => analyzeCodeSecurity('security', 1, 'const x = 1;'), { 1103 message: /Failed to parse security analysis/, 1104 }); 1105 }); 1106 await t.test('defaults to check all security aspects when focusArea null', async () => { 1107 let body; 1108 mock.method(axios, 'post', async (url, capturedBody) => { 1109 body = capturedBody; 1110 return { 1111 data: { 1112 choices: [ 1113 { message: { content: JSON.stringify({ findings: [], summary: 'All clear' }) } }, 1114 ], 1115 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1116 }, 1117 }; 1118 }); 1119 await analyzeCodeSecurity('security', 1, 'const x = 1;'); 1120 assert.ok( 1121 body.messages.find(m => m.role === 'user').content.includes('Check all security aspects') 1122 ); 1123 }); 1124 await t.test('uses 3000 max tokens', async () => { 1125 let body; 1126 mock.method(axios, 'post', async (url, capturedBody) => { 1127 body = capturedBody; 1128 return { 1129 data: { 1130 choices: [{ message: { content: JSON.stringify({ findings: [], summary: 'Clean' }) } }], 1131 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1132 }, 1133 }; 1134 }); 1135 await analyzeCodeSecurity('security', 1, 'const x = 1;'); 1136 assert.strictEqual(body.max_tokens, 3000); 1137 }); 1138 await t.test('logs usage to DB', async () => { 1139 const r = { findings: [], summary: 'Clean' }; 1140 mock.method(axios, 'post', makeMockAxios(JSON.stringify(r))); 1141 await analyzeCodeSecurity('security', 8, 'const x = 1;'); 1142 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 1143 assert.strictEqual(usage[0].agent_name, 'security'); 1144 assert.strictEqual(usage[0].task_id, 8); 1145 }); 1146 }); 1147 1148 test('generateSecureFix (mocked fetch)', async t => { 1149 await t.beforeEach(() => { 1150 initTestDb(); 1151 process.env.OPENROUTER_API_KEY = 'test-key-123'; 1152 }); 1153 await t.afterEach(() => { 1154 cleanupTestDb(); 1155 mock.restoreAll(); 1156 }); 1157 1158 await t.test('returns parsed fix object', async () => { 1159 const fix = { 1160 old_string: 'vulnerable', 1161 new_string: 'secure', 1162 explanation: 'Use params', 1163 testing_notes: 'Test it', 1164 }; 1165 mock.method(axios, 'post', makeMockAxios(JSON.stringify(fix))); 1166 const finding = { 1167 type: 'sql_injection', 1168 severity: 'critical', 1169 line: 42, 1170 description: 'SQL injection', 1171 recommendation: 'Parameterize', 1172 }; 1173 const result = await generateSecureFix('security', 1, { code: 'vulnerable', finding }); 1174 assert.ok(result.old_string); 1175 assert.ok(result.new_string); 1176 assert.ok(result.explanation); 1177 }); 1178 await t.test('includes fileName in prompt', async () => { 1179 let body; 1180 const fix = { 1181 old_string: 'old', 1182 new_string: 'new', 1183 explanation: 'fixed', 1184 testing_notes: 'test', 1185 }; 1186 mock.method(axios, 'post', async (url, capturedBody) => { 1187 body = capturedBody; 1188 return { 1189 data: { 1190 choices: [{ message: { content: JSON.stringify(fix) } }], 1191 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1192 }, 1193 }; 1194 }); 1195 const finding = { 1196 type: 'xss', 1197 severity: 'high', 1198 line: 10, 1199 description: 'XSS', 1200 recommendation: 'Sanitize', 1201 }; 1202 await generateSecureFix('security', 1, { 1203 code: 'const x = userInput;', 1204 finding, 1205 fileName: 'src/render.js', 1206 }); 1207 assert.ok(body.messages.find(m => m.role === 'user').content.includes('src/render.js')); 1208 }); 1209 await t.test('handles JSON in markdown code blocks', async () => { 1210 const fix = { 1211 old_string: 'old_code', 1212 new_string: 'new_code', 1213 explanation: 'fix', 1214 testing_notes: 'test', 1215 }; 1216 mock.method(axios, 'post', makeMockAxios(`\`\`\`json\n${JSON.stringify(fix)}\n\`\`\``)); 1217 const finding = { 1218 type: 'xss', 1219 severity: 'high', 1220 line: 5, 1221 description: 'XSS', 1222 recommendation: 'Sanitize', 1223 }; 1224 const result = await generateSecureFix('security', 1, { code: 'old_code', finding }); 1225 assert.strictEqual(result.old_string, 'old_code'); 1226 }); 1227 await t.test('throws when fix missing old_string', async () => { 1228 mock.method( 1229 axios, 1230 'post', 1231 makeMockAxios(JSON.stringify({ new_string: 'secure', explanation: 'fix' })) 1232 ); 1233 const finding = { 1234 type: 'sql_injection', 1235 severity: 'critical', 1236 line: 42, 1237 description: 'SQL', 1238 recommendation: 'Parameterize', 1239 }; 1240 await assert.rejects(() => generateSecureFix('security', 1, { code: 'vulnerable', finding }), { 1241 message: /Failed to generate secure fix/, 1242 }); 1243 }); 1244 await t.test('throws when fix missing new_string', async () => { 1245 mock.method( 1246 axios, 1247 'post', 1248 makeMockAxios(JSON.stringify({ old_string: 'vulnerable', explanation: 'fix' })) 1249 ); 1250 const finding = { 1251 type: 'sql_injection', 1252 severity: 'critical', 1253 line: 42, 1254 description: 'SQL', 1255 recommendation: 'Parameterize', 1256 }; 1257 await assert.rejects(() => generateSecureFix('security', 1, { code: 'vulnerable', finding }), { 1258 message: /Failed to generate secure fix/, 1259 }); 1260 }); 1261 await t.test('throws on invalid JSON response', async () => { 1262 mock.method(axios, 'post', makeMockAxios('Not valid JSON')); 1263 const finding = { 1264 type: 'xss', 1265 severity: 'high', 1266 line: 1, 1267 description: 'XSS', 1268 recommendation: 'Sanitize', 1269 }; 1270 await assert.rejects(() => generateSecureFix('security', 1, { code: 'bad code', finding }), { 1271 message: /Failed to generate secure fix/, 1272 }); 1273 }); 1274 await t.test('handles finding without line number', async () => { 1275 const fix = { 1276 old_string: 'old', 1277 new_string: 'new', 1278 explanation: 'fixed', 1279 testing_notes: 'test', 1280 }; 1281 let body; 1282 mock.method(axios, 'post', async (url, capturedBody) => { 1283 body = capturedBody; 1284 return { 1285 data: { 1286 choices: [{ message: { content: JSON.stringify(fix) } }], 1287 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1288 }, 1289 }; 1290 }); 1291 const finding = { 1292 type: 'secrets', 1293 severity: 'critical', 1294 description: 'Hardcoded key', 1295 recommendation: 'Use env vars', 1296 }; 1297 const result = await generateSecureFix('security', 1, { 1298 code: 'const KEY = "abc123";', 1299 finding, 1300 }); 1301 assert.ok(result.old_string); 1302 assert.ok(body.messages.find(m => m.role === 'user').content.includes('unknown')); 1303 }); 1304 await t.test('uses 2000 max tokens', async () => { 1305 let body; 1306 const fix = { old_string: 'old', new_string: 'new', explanation: 'fix', testing_notes: 'test' }; 1307 mock.method(axios, 'post', async (url, capturedBody) => { 1308 body = capturedBody; 1309 return { 1310 data: { 1311 choices: [{ message: { content: JSON.stringify(fix) } }], 1312 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1313 }, 1314 }; 1315 }); 1316 const finding = { 1317 type: 'xss', 1318 severity: 'high', 1319 line: 1, 1320 description: 'XSS', 1321 recommendation: 'Fix', 1322 }; 1323 await generateSecureFix('security', 1, { code: 'code', finding }); 1324 assert.strictEqual(body.max_tokens, 2000); 1325 }); 1326 }); 1327 1328 test('performThreatModeling (mocked fetch)', async t => { 1329 await t.beforeEach(() => { 1330 initTestDb(); 1331 process.env.OPENROUTER_API_KEY = 'test-key-123'; 1332 }); 1333 await t.afterEach(() => { 1334 cleanupTestDb(); 1335 mock.restoreAll(); 1336 }); 1337 1338 await t.test('returns parsed threat model', async () => { 1339 const tm = { 1340 threats: [ 1341 { 1342 stride_category: 'Spoofing', 1343 title: 'Identity spoofing', 1344 description: 'Attacker fakes identity', 1345 attack_scenario: 'Brute force', 1346 dread: { 1347 damage: 9, 1348 reproducibility: 8, 1349 exploitability: 7, 1350 affected_users: 10, 1351 discoverability: 6, 1352 total: 40, 1353 average: 8.0, 1354 }, 1355 risk_level: 'high', 1356 mitigation: 'MFA', 1357 cwe_id: 'CWE-287', 1358 }, 1359 ], 1360 summary: 'High risk auth system', 1361 priority_threats: ['Identity spoofing'], 1362 }; 1363 mock.method(axios, 'post', makeMockAxios(JSON.stringify(tm))); 1364 const result = await performThreatModeling('security', 1, { 1365 component: 'Auth module code', 1366 componentType: 'auth', 1367 }); 1368 assert.strictEqual(result.threats.length, 1); 1369 assert.strictEqual(result.threats[0].stride_category, 'Spoofing'); 1370 assert.strictEqual(result.summary, 'High risk auth system'); 1371 assert.ok(Array.isArray(result.priority_threats)); 1372 }); 1373 await t.test('includes dataFlow in prompt', async () => { 1374 let body; 1375 const tm = { threats: [], summary: 'Clean', priority_threats: [] }; 1376 mock.method(axios, 'post', async (url, capturedBody) => { 1377 body = capturedBody; 1378 return { 1379 data: { 1380 choices: [{ message: { content: JSON.stringify(tm) } }], 1381 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1382 }, 1383 }; 1384 }); 1385 await performThreatModeling('security', 1, { 1386 component: 'API endpoint', 1387 componentType: 'api', 1388 dataFlow: 'User -> API -> DB -> Response', 1389 }); 1390 assert.ok( 1391 body.messages.find(m => m.role === 'user').content.includes('User -> API -> DB -> Response') 1392 ); 1393 }); 1394 await t.test('uses default componentType of general', async () => { 1395 let body; 1396 const tm = { threats: [], summary: 'OK', priority_threats: [] }; 1397 mock.method(axios, 'post', async (url, capturedBody) => { 1398 body = capturedBody; 1399 return { 1400 data: { 1401 choices: [{ message: { content: JSON.stringify(tm) } }], 1402 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1403 }, 1404 }; 1405 }); 1406 await performThreatModeling('security', 1, { component: 'some code' }); 1407 assert.ok(body.messages.find(m => m.role === 'user').content.includes('general')); 1408 }); 1409 await t.test('handles JSON in markdown code blocks', async () => { 1410 const tm = { threats: [], summary: 'No threats', priority_threats: [] }; 1411 mock.method(axios, 'post', makeMockAxios(`\`\`\`json\n${JSON.stringify(tm)}\n\`\`\``)); 1412 const result = await performThreatModeling('security', 1, { component: 'Component' }); 1413 assert.strictEqual(result.summary, 'No threats'); 1414 }); 1415 await t.test('throws on invalid JSON response', async () => { 1416 mock.method(axios, 'post', makeMockAxios('Not JSON at all')); 1417 await assert.rejects(() => performThreatModeling('security', 1, { component: 'code' }), { 1418 message: /Failed to perform threat modeling/, 1419 }); 1420 }); 1421 await t.test('STRIDE and DREAD in system prompt', async () => { 1422 let body; 1423 const tm = { threats: [], summary: 'OK', priority_threats: [] }; 1424 mock.method(axios, 'post', async (url, capturedBody) => { 1425 body = capturedBody; 1426 return { 1427 data: { 1428 choices: [{ message: { content: JSON.stringify(tm) } }], 1429 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1430 }, 1431 }; 1432 }); 1433 await performThreatModeling('security', 1, { component: 'API code' }); 1434 const sysContent = body.messages.find(m => m.role === 'system').content; 1435 assert.ok(sysContent.includes('STRIDE')); 1436 assert.ok(sysContent.includes('DREAD')); 1437 }); 1438 await t.test('uses 4000 max tokens', async () => { 1439 let body; 1440 const tm = { threats: [], summary: 'OK', priority_threats: [] }; 1441 mock.method(axios, 'post', async (url, capturedBody) => { 1442 body = capturedBody; 1443 return { 1444 data: { 1445 choices: [{ message: { content: JSON.stringify(tm) } }], 1446 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1447 }, 1448 }; 1449 }); 1450 await performThreatModeling('security', 1, { component: 'code' }); 1451 assert.strictEqual(body.max_tokens, 4000); 1452 }); 1453 await t.test('logs usage to DB', async () => { 1454 const tm = { threats: [], summary: 'Clean', priority_threats: [] }; 1455 mock.method(axios, 'post', makeMockAxios(JSON.stringify(tm))); 1456 await performThreatModeling('security', 3, { component: 'code' }); 1457 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 1458 assert.strictEqual(usage[0].agent_name, 'security'); 1459 assert.strictEqual(usage[0].task_id, 3); 1460 }); 1461 await t.test('omits dataFlow context when not provided', async () => { 1462 let body; 1463 const tm = { threats: [], summary: 'OK', priority_threats: [] }; 1464 mock.method(axios, 'post', async (url, capturedBody) => { 1465 body = capturedBody; 1466 return { 1467 data: { 1468 choices: [{ message: { content: JSON.stringify(tm) } }], 1469 usage: { prompt_tokens: 100, completion_tokens: 50 }, 1470 }, 1471 }; 1472 }); 1473 await performThreatModeling('security', 1, { component: 'code' }); 1474 assert.ok(!body.messages.find(m => m.role === 'user').content.includes('Data Flow:')); 1475 }); 1476 }); 1477 1478 test('cost calculation', async t => { 1479 await t.beforeEach(() => { 1480 initTestDb(); 1481 process.env.OPENROUTER_API_KEY = 'test-key-123'; 1482 }); 1483 await t.afterEach(() => { 1484 cleanupTestDb(); 1485 mock.restoreAll(); 1486 }); 1487 1488 await t.test('calculates sonnet cost for 1M tokens each ($18)', async () => { 1489 mock.method(axios, 'post', async () => ({ 1490 data: { 1491 choices: [{ message: { content: 'result' } }], 1492 usage: { prompt_tokens: 1000000, completion_tokens: 1000000 }, 1493 }, 1494 })); 1495 await analyzeCode('developer', 1, 'src/test.js', 'Review'); 1496 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 1497 assert.ok( 1498 Math.abs(usage[0].cost_usd - 18.0) < 0.01, 1499 `Expected ~$18, got $${usage[0].cost_usd}` 1500 ); 1501 }); 1502 await t.test('calculates zero cost for zero tokens', async () => { 1503 mock.method(axios, 'post', async () => ({ 1504 data: { 1505 choices: [{ message: { content: 'result' } }], 1506 usage: { prompt_tokens: 0, completion_tokens: 0 }, 1507 }, 1508 })); 1509 await analyzeCode('developer', 1, 'src/test.js', 'Review'); 1510 const usage = db.prepare('SELECT * FROM agent_llm_usage').all(); 1511 assert.strictEqual(usage[0].cost_usd, 0); 1512 }); 1513 });