agent-claude-api.js
1 /** 2 * Agent-Claude API Module 3 * 4 * Centralized Claude API interface for all agents with budget tracking. 5 * Provides 6 core functions: 6 * - analyzeCode: Code review and recommendations 7 * - generateCode: Generate new code from requirements 8 * - generateTests: Generate test cases from source code 9 * - reviewArchitecture: Architectural analysis 10 * - suggestRefactoring: Refactoring recommendations 11 * - classifyIssue: Categorize errors/issues 12 * 13 * Budget enforcement: 14 * - Daily budget limit ($10 default, configurable) 15 * - Emergency shutdown if >$5/hour burn rate 16 * - All calls logged to agent_llm_usage table 17 * - Cost calculation based on Anthropic token pricing 18 * 19 * @module agents/utils/agent-claude-api 20 */ 21 22 import Logger from '../../utils/logger.js'; 23 import { callLLM } from '../../utils/llm-provider.js'; 24 import { run, getOne, getAll } from '../../utils/db.js'; 25 26 const logger = new Logger('AgentClaude'); 27 28 // Validate that OPENROUTER_API_KEY is configured 29 function validateApiKey() { 30 if (!process.env.OPENROUTER_API_KEY) { 31 throw new Error( 32 'No LLM API key configured: set OPENROUTER_API_KEY in environment' 33 ); 34 } 35 } 36 37 // Pricing per million tokens (OpenRouter format) 38 // See: https://openrouter.ai/models/anthropic 39 const PRICING = { 40 'anthropic/claude-sonnet-4-6': { 41 input: 3.0, // $3.00 per million input tokens 42 output: 15.0, // $15.00 per million output tokens 43 }, 44 'anthropic/claude-haiku-4-5': { 45 input: 0.8, // $0.80 per million input tokens (estimated, OpenRouter may vary) 46 output: 4.0, // $4.00 per million output tokens (estimated) 47 }, 48 }; 49 50 // Default models for agents (OpenRouter format) 51 const DEFAULT_MODEL = process.env.CLAUDE_SONNET_MODEL || 'anthropic/claude-sonnet-4-6'; 52 const HAIKU_MODEL = process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5'; 53 54 // Task types that can use cheaper Haiku model (simple/pattern-based) 55 const SIMPLE_TASKS = new Set([ 56 'classify_issue', // Triage - Pattern matching 57 'scan_logs', // Monitor - Log scanning 58 'detect_anomaly', // Monitor - Anomaly detection 59 'scan_secrets', // Security - Regex-based checks 60 'find_test_files', // QA - File discovery 61 'scan_dependencies', // Security - npm audit (simple) 62 ]); 63 64 // Task types requiring Sonnet (complex reasoning/generation) 65 const COMPLEX_TASKS = new Set([ 66 'analyzeCode', // Developer/Security - Code analysis 67 'generateCode', // Developer - Code generation 68 'reviewArchitecture', // Architect - Design review 69 'suggestRefactoring', // Developer - Refactoring 70 'threat_model', // Security - Advanced threat modeling 71 'generateTests', // QA - Test generation with coverage analysis 72 ]); 73 74 /** 75 * Select appropriate model based on task complexity 76 * 77 * @param {string} taskType - Task type or function name 78 * @param {string} [complexity='standard'] - Explicit complexity override 79 * @returns {string} - Model name 80 */ 81 export function selectModel(taskType, complexity = 'standard') { 82 // Check env var to allow disabling Haiku optimization 83 const useHaikuForSimple = process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS !== 'false'; 84 85 if (!useHaikuForSimple) { 86 return DEFAULT_MODEL; 87 } 88 89 // Explicit complexity override 90 if (complexity === 'simple') { 91 return HAIKU_MODEL; 92 } else if (complexity === 'complex') { 93 return DEFAULT_MODEL; 94 } 95 96 // Auto-detect based on task type 97 if (SIMPLE_TASKS.has(taskType)) { 98 return HAIKU_MODEL; 99 } 100 101 if (COMPLEX_TASKS.has(taskType)) { 102 return DEFAULT_MODEL; 103 } 104 105 // Default to Sonnet for unknown tasks 106 return DEFAULT_MODEL; 107 } 108 109 /** 110 * Calculate cost in USD for a Claude API call 111 * 112 * @param {string} model - Model name 113 * @param {number} promptTokens - Input tokens 114 * @param {number} completionTokens - Output tokens 115 * @returns {number} - Cost in USD 116 */ 117 function calculateCost(model, promptTokens, completionTokens) { 118 const pricing = PRICING[model] || PRICING[DEFAULT_MODEL]; 119 120 const inputCost = (promptTokens / 1_000_000) * pricing.input; 121 const outputCost = (completionTokens / 1_000_000) * pricing.output; 122 123 return inputCost + outputCost; 124 } 125 126 /** 127 * Get total agent LLM spending for today 128 * 129 * @returns {number} - Total USD spent today 130 */ 131 export async function getTodaySpending() { 132 const result = await getOne( 133 `SELECT COALESCE(SUM(cost_usd), 0) as total 134 FROM tel.agent_llm_usage 135 WHERE DATE(created_at) = CURRENT_DATE` 136 ); 137 138 return result ? parseFloat(result.total) : 0; 139 } 140 141 /** 142 * Get hourly spending rate (last hour) 143 * 144 * @returns {Promise<number>} - USD spent in last hour 145 */ 146 export async function getHourlySpending() { 147 const result = await getOne( 148 `SELECT COALESCE(SUM(cost_usd), 0) as total 149 FROM tel.agent_llm_usage 150 WHERE created_at >= NOW() - INTERVAL '1 hour'` 151 ); 152 153 return result ? parseFloat(result.total) : 0; 154 } 155 156 /** 157 * Check if budget limits are exceeded 158 * 159 * @throws {Error} - If budget exceeded 160 */ 161 async function checkBudgetLimits() { 162 const dailyBudget = parseFloat(process.env.AGENT_DAILY_BUDGET || '10'); 163 const emergencyThreshold = 5.0; // $5/hour emergency shutdown 164 165 const todaySpending = await getTodaySpending(); 166 const hourlySpending = await getHourlySpending(); 167 168 if (hourlySpending >= emergencyThreshold) { 169 throw new Error( 170 `EMERGENCY: Agent LLM spending exceeded $${emergencyThreshold}/hour (current: $${hourlySpending.toFixed(2)})` 171 ); 172 } 173 174 if (todaySpending >= dailyBudget) { 175 throw new Error( 176 `Daily budget exceeded: $${todaySpending.toFixed(2)} / $${dailyBudget} (configure AGENT_DAILY_BUDGET to increase)` 177 ); 178 } 179 } 180 181 /** 182 * Log LLM usage to database 183 * 184 * @param {string} agentName - Agent making the call 185 * @param {number|null} taskId - Related task ID 186 * @param {string} model - Model name 187 * @param {number} promptTokens - Input tokens 188 * @param {number} completionTokens - Output tokens 189 * @param {number} costUsd - Cost in USD 190 */ 191 const TRACKED_AGENTS = new Set(['developer', 'qa', 'security', 'architect', 'triage', 'monitor']); 192 193 async function logUsage(agentName, taskId, model, promptTokens, completionTokens, costUsd) { 194 // Only log usage for known agents — non-agent callers (e.g. 'task-manager') would 195 // violate the agent_llm_usage CHECK constraint and cause the whole API call to fail. 196 if (!TRACKED_AGENTS.has(agentName)) return; 197 198 await run( 199 `INSERT INTO tel.agent_llm_usage (agent_name, task_id, model, prompt_tokens, completion_tokens, cost_usd) 200 VALUES ($1, $2, $3, $4, $5, $6)`, 201 [agentName, taskId, model, promptTokens, completionTokens, costUsd] 202 ); 203 204 logger.info('LLM API call logged', { 205 agent: agentName, 206 task_id: taskId, 207 model, 208 prompt_tokens: promptTokens, 209 completion_tokens: completionTokens, 210 cost_usd: costUsd.toFixed(6), 211 }); 212 } 213 214 /** 215 * Call Claude API with budget tracking 216 * 217 * @param {string} agentName - Agent making the call 218 * @param {number|null} taskId - Related task ID 219 * @param {string} systemPrompt - System prompt 220 * @param {string} userPrompt - User prompt 221 * @param {Object} [options] - Additional options 222 * @param {string} [options.model] - Model to use (default: auto-selected based on complexity) 223 * @param {number} [options.maxTokens] - Max output tokens (default: 4096) 224 * @param {string} [options.taskType] - Task type for model selection 225 * @param {string} [options.complexity] - Explicit complexity: 'simple' or 'complex' 226 * @returns {Promise<Object>} - {text: string, usage: {promptTokens, completionTokens, cost, model}} 227 */ 228 async function callClaude(agentName, taskId, systemPrompt, userPrompt, options = {}) { 229 // Check budget before making call 230 await checkBudgetLimits(); 231 232 // Validate API key 233 validateApiKey(); 234 235 // Smart model selection based on task complexity 236 let model; 237 if (options.model) { 238 model = options.model; // Explicit override 239 } else if (options.taskType || options.complexity) { 240 model = selectModel(options.taskType, options.complexity); 241 } else { 242 model = DEFAULT_MODEL; // Fallback 243 } 244 245 const maxTokens = options.maxTokens || 4096; 246 247 try { 248 const llmResponse = await callLLM({ 249 model, 250 messages: [ 251 { role: 'system', content: systemPrompt }, 252 { role: 'user', content: userPrompt }, 253 ], 254 max_tokens: maxTokens, 255 temperature: 0.3, 256 headers: { 257 'HTTP-Referer': 258 process.env.OPENROUTER_REFERER || 'https://github.com/jasonpaulneu/333Method', 259 'X-Title': process.env.OPENROUTER_TITLE || '333 Method Agent System', 260 }, 261 }); 262 263 const text = llmResponse.content; 264 const promptTokens = llmResponse.usage?.promptTokens || 0; 265 const completionTokens = llmResponse.usage?.completionTokens || 0; 266 const cost = calculateCost(model, promptTokens, completionTokens); 267 268 // Log usage 269 await logUsage(agentName, taskId, model, promptTokens, completionTokens, cost); 270 271 return { 272 text, 273 usage: { 274 promptTokens, 275 completionTokens, 276 cost, 277 model, // Include model in response for tracking 278 }, 279 }; 280 } catch (error) { 281 logger.error('Claude API call failed', { 282 agent: agentName, 283 task_id: taskId, 284 model, 285 error: error.message, 286 }); 287 throw error; 288 } 289 } 290 291 /** 292 * Analyze code for issues, recommendations, and improvements 293 * 294 * @param {string} agentName - Agent name 295 * @param {number|null} taskId - Related task ID 296 * @param {string} filePath - File path being analyzed 297 * @param {string} prompt - Analysis prompt/question 298 * @param {Object} [context] - Additional context 299 * @param {string} [context.fileContent] - File content 300 * @param {string} [context.relatedFiles] - Related files content 301 * @param {string} [context.errorMessage] - Error message if analyzing a bug 302 * @param {string} [context.model] - Model override 303 * @param {string} [context.complexity] - Complexity: 'simple' or 'complex' 304 * @returns {Promise<string>} - Analysis result 305 */ 306 export async function analyzeCode(agentName, taskId, filePath, prompt, context = {}) { 307 const systemPrompt = `You are an expert code reviewer for the 333 Method automation project. 308 Analyze code for bugs, performance issues, security vulnerabilities, and maintainability. 309 Provide actionable recommendations with specific line numbers and code examples. 310 311 Project tech stack: Node.js 22 (ESM), SQLite, Playwright, better-sqlite3 312 Coding standards: Max 150 lines/file, complexity ≤15, depth ≤4 (ESLint enforced) 313 Testing: Node.js native test runner, 80%+ coverage target`; 314 315 let userPrompt = `File: ${filePath}\n\n`; 316 317 if (context.fileContent) { 318 userPrompt += `\`\`\`javascript\n${context.fileContent}\n\`\`\`\n\n`; 319 } 320 321 if (context.relatedFiles) { 322 userPrompt += `Related files:\n${context.relatedFiles}\n\n`; 323 } 324 325 if (context.errorMessage) { 326 userPrompt += `Error:\n${context.errorMessage}\n\n`; 327 } 328 329 userPrompt += `Analysis request: ${prompt}`; 330 331 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 332 model: context.model, 333 taskType: 'analyzeCode', 334 complexity: context.complexity || 'complex', // Code analysis defaults to complex 335 }); 336 return result.text; 337 } 338 339 /** 340 * Generate code from requirements 341 * 342 * @param {string} agentName - Agent name 343 * @param {number|null} taskId - Related task ID 344 * @param {string} filePath - Target file path 345 * @param {string} requirements - Code requirements 346 * @param {string|null} [existingCode] - Existing code to modify/extend 347 * @param {Object} [options] - Additional options 348 * @param {string} [options.model] - Model override 349 * @returns {Promise<string>} - Generated code 350 */ 351 export async function generateCode( 352 agentName, 353 taskId, 354 filePath, 355 requirements, 356 existingCode = null, 357 options = {} 358 ) { 359 const systemPrompt = `You are an expert JavaScript developer for the 333 Method automation project. 360 Generate production-ready code following project standards: 361 362 - Node.js 22 ESM (import/export, not require) 363 - Async/await for asynchronous operations 364 - better-sqlite3 for database access (synchronous API) 365 - ESLint rules: max-lines: 150, complexity: 15, max-depth: 4 366 - JSDoc comments for all functions 367 - Error handling with try/catch and retryWithBackoff 368 - Logging via Logger class from src/utils/logger.js 369 370 Return ONLY the code, no explanations. Include import statements.`; 371 372 let userPrompt = `File: ${filePath}\n\n`; 373 374 if (existingCode) { 375 userPrompt += `Existing code:\n\`\`\`javascript\n${existingCode}\n\`\`\`\n\n`; 376 } 377 378 userPrompt += `Requirements:\n${requirements}\n\nGenerate the complete code:`; 379 380 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 381 maxTokens: 8192, // More tokens for code generation 382 model: options.model, 383 taskType: 'generateCode', 384 complexity: 'complex', // Code generation requires Sonnet 385 }); 386 387 return result.text; 388 } 389 390 /** 391 * Generate test cases for source code 392 * 393 * @param {string} agentName - Agent name 394 * @param {number|null} taskId - Related task ID 395 * @param {string} sourceFile - Source file path 396 * @param {string} sourceCode - Source code content 397 * @param {Object} [coverageGaps] - Coverage gaps to fill 398 * @param {Object} [options] - Additional options 399 * @param {string} [options.model] - Model override (default: Haiku for simple tests) 400 * @returns {Promise<string>} - Generated test code 401 */ 402 export async function generateTests( 403 agentName, 404 taskId, 405 sourceFile, 406 sourceCode, 407 coverageGaps = null, 408 options = {} 409 ) { 410 const systemPrompt = `You are an expert test engineer for the 333 Method automation project. 411 Generate comprehensive unit tests using Node.js native test runner: 412 413 - Use node:test and node:assert modules 414 - Test success cases, error cases, edge cases 415 - Mock external dependencies (database, API calls, file system) 416 - Follow AAA pattern: Arrange, Act, Assert 417 - Use descriptive test names 418 - Target 80%+ coverage 419 - One test file per source file (tests/filename.test.js) 420 421 Return ONLY the test code, no explanations.`; 422 423 let userPrompt = `Source file: ${sourceFile}\n\n`; 424 userPrompt += `\`\`\`javascript\n${sourceCode}\n\`\`\`\n\n`; 425 426 if (coverageGaps) { 427 userPrompt += `Coverage gaps to address:\n${JSON.stringify(coverageGaps, null, 2)}\n\n`; 428 } 429 430 userPrompt += `Generate comprehensive unit tests:`; 431 432 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 433 maxTokens: 8192, // More tokens for test generation 434 model: options.model, 435 taskType: 'generateTests', 436 complexity: 'simple', // Simple test generation can use Haiku 437 }); 438 439 return result.text; 440 } 441 442 /** 443 * Review architecture and design 444 * 445 * @param {string} agentName - Agent name 446 * @param {number|null} taskId - Related task ID 447 * @param {string} proposalText - Architecture proposal 448 * @param {Object} [context] - Additional context 449 * @param {string} [context.model] - Model override 450 * @returns {Promise<string>} - Review feedback 451 */ 452 export async function reviewArchitecture(agentName, taskId, proposalText, context = {}) { 453 const systemPrompt = `You are the system architect for the 333 Method automation project. 454 Review architecture proposals for: 455 456 - Alignment with project principles (simplicity, avoid over-engineering) 457 - Database schema design (SQLite best practices) 458 - API design and module boundaries 459 - Scalability and performance 460 - Security implications 461 - Testing strategy 462 - Migration path from current implementation 463 464 Provide constructive feedback with specific recommendations.`; 465 466 let userPrompt = `Architecture proposal:\n\n${proposalText}\n\n`; 467 468 if (context.relatedFiles) { 469 userPrompt += `Related files:\n${context.relatedFiles}\n\n`; 470 } 471 472 if (context.requirements) { 473 userPrompt += `Requirements:\n${context.requirements}\n\n`; 474 } 475 476 userPrompt += `Please review this proposal:`; 477 478 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 479 model: context.model, 480 taskType: 'reviewArchitecture', 481 complexity: 'complex', // Architecture review requires deep reasoning 482 }); 483 return result.text; 484 } 485 486 /** 487 * Suggest refactoring improvements 488 * 489 * @param {string} agentName - Agent name 490 * @param {number|null} taskId - Related task ID 491 * @param {string} filePath - File to refactor 492 * @param {string} fileContent - File content 493 * @param {string} [reason] - Reason for refactoring 494 * @param {Object} [options] - Additional options 495 * @param {string} [options.model] - Model override 496 * @returns {Promise<string>} - Refactoring suggestions 497 */ 498 export async function suggestRefactoring( 499 agentName, 500 taskId, 501 filePath, 502 fileContent, 503 reason = null, 504 options = {} 505 ) { 506 const systemPrompt = `You are an expert software engineer for the 333 Method automation project. 507 Suggest refactoring improvements for: 508 509 - Code complexity reduction (target: complexity ≤15, depth ≤4) 510 - File length reduction (target: ≤150 lines) 511 - DRY principle violations 512 - Naming improvements 513 - Function decomposition 514 - Error handling improvements 515 516 Prioritize simplicity. Avoid over-engineering. 517 Provide specific before/after code examples.`; 518 519 let userPrompt = `File: ${filePath}\n\n`; 520 userPrompt += `\`\`\`javascript\n${fileContent}\n\`\`\`\n\n`; 521 522 if (reason) { 523 userPrompt += `Reason for refactoring: ${reason}\n\n`; 524 } 525 526 userPrompt += `Suggest refactoring improvements:`; 527 528 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 529 model: options.model, 530 taskType: 'suggestRefactoring', 531 complexity: 'complex', // Refactoring suggestions require Sonnet 532 }); 533 return result.text; 534 } 535 536 /** 537 * Classify error or issue 538 * 539 * @param {string} agentName - Agent name 540 * @param {number|null} taskId - Related task ID 541 * @param {string} errorMessage - Error message 542 * @param {Object} [context] - Additional context 543 * @param {string} [context.stackTrace] - Stack trace 544 * @param {string} [context.logContext] - Log context 545 * @param {string} [context.model] - Model override 546 * @returns {Promise<Object>} - {category, severity, suggestedAction, assignTo} 547 */ 548 export async function classifyIssue(agentName, taskId, errorMessage, context = {}) { 549 const systemPrompt = `You are an error triage specialist for the 333 Method automation project. 550 Classify errors into categories and assign to appropriate agent: 551 552 Categories: NETWORK, DATABASE, API, PARSING, VALIDATION, AUTHENTICATION, RATE_LIMIT, TIMEOUT, CODE_BUG 553 Severity: low, medium, high, critical 554 Agents: developer, qa, security, architect, triage 555 556 Return JSON only (no markdown): 557 { 558 "category": "CATEGORY", 559 "severity": "severity", 560 "suggestedAction": "what to do", 561 "assignTo": "agent_name" 562 }`; 563 564 let userPrompt = `Error: ${errorMessage}\n\n`; 565 566 if (context.stackTrace) { 567 userPrompt += `Stack trace:\n${context.stackTrace}\n\n`; 568 } 569 570 if (context.logContext) { 571 userPrompt += `Log context:\n${context.logContext}\n\n`; 572 } 573 574 userPrompt += `Classify this error:`; 575 576 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 577 maxTokens: 1024, // Shorter response for classification 578 model: context.model, 579 taskType: 'classify_issue', 580 complexity: 'simple', // Classification is pattern-based, can use Haiku 581 }); 582 583 // Parse JSON response 584 try { 585 return JSON.parse(result.text); 586 } catch (e) { 587 logger.error('Failed to parse classification response', { 588 agent: agentName, 589 task_id: taskId, 590 response: result.text, 591 }); 592 throw new Error(`Invalid classification response: ${e.message}`); 593 } 594 } 595 596 /** 597 * Get agent LLM usage statistics with model breakdown 598 * 599 * @param {string} [agentName] - Filter by agent (optional) 600 * @param {number} [days] - Days to look back (default: 7) 601 * @returns {Object} - Usage statistics with model breakdown 602 */ 603 export async function getUsageStats(agentName = null, days = 7) { 604 let query = ` 605 SELECT 606 agent_name, 607 model, 608 COUNT(*) as call_count, 609 SUM(prompt_tokens) as total_prompt_tokens, 610 SUM(completion_tokens) as total_completion_tokens, 611 SUM(cost_usd) as total_cost_usd, 612 AVG(cost_usd) as avg_cost_per_call 613 FROM tel.agent_llm_usage 614 WHERE created_at >= NOW() - ($1 || ' days')::interval 615 `; 616 617 const params = [days]; 618 619 if (agentName) { 620 query += ` AND agent_name = $${params.length + 1}`; 621 params.push(agentName); 622 } 623 624 query += ' GROUP BY agent_name, model ORDER BY total_cost_usd DESC'; 625 626 const results = await getAll(query, params); 627 628 const totalCost = results.reduce((sum, row) => sum + row.total_cost_usd, 0); 629 630 // Calculate Haiku vs Sonnet breakdown 631 const haikuCalls = results.filter(r => r.model.includes('haiku')); 632 const sonnetCalls = results.filter(r => r.model.includes('sonnet')); 633 634 const haikuCost = haikuCalls.reduce((sum, r) => sum + r.total_cost_usd, 0); 635 const sonnetCost = sonnetCalls.reduce((sum, r) => sum + r.total_cost_usd, 0); 636 637 const haikuCount = haikuCalls.reduce((sum, r) => sum + r.call_count, 0); 638 const sonnetCount = sonnetCalls.reduce((sum, r) => sum + r.call_count, 0); 639 640 return { 641 agents: results, 642 totalCost, 643 days, 644 modelBreakdown: { 645 haiku: { 646 calls: haikuCount, 647 cost: haikuCost, 648 avgCost: haikuCount > 0 ? haikuCost / haikuCount : 0, 649 }, 650 sonnet: { 651 calls: sonnetCount, 652 cost: sonnetCost, 653 avgCost: sonnetCount > 0 ? sonnetCost / sonnetCount : 0, 654 }, 655 savings: sonnetCost > 0 ? ((haikuCost / (haikuCost + sonnetCost)) * 100).toFixed(1) : 0, 656 }, 657 }; 658 } 659 660 /** 661 * Simple wrapper for agents that just need a basic LLM call 662 * Compatible with old claude-api.js signature 663 * 664 * @param {string} agentName - Agent making the call 665 * @param {number|null} taskId - Related task ID 666 * @param {Object} options - Call options 667 * @param {string} options.prompt - The prompt to send 668 * @param {string} [options.systemPrompt] - Optional system prompt 669 * @param {string} [options.taskHistory] - Task history context (from context-builder) 670 * @param {number} [options.temperature] - Temperature (default: 0.3) 671 * @param {number} [options.maxTokens] - Max tokens (default: 4000) 672 * @returns {Promise<string>} - LLM response text 673 */ 674 export async function simpleLLMCall(agentName, taskId, options) { 675 const { 676 prompt, 677 systemPrompt = '', 678 taskHistory = null, 679 temperature = 0.3, 680 maxTokens = 4000, 681 } = options; 682 683 // Inject task history into system prompt if provided 684 let enhancedSystemPrompt = 685 systemPrompt || 'You are a helpful AI assistant for the 333 Method automation project.'; 686 687 if (taskHistory) { 688 enhancedSystemPrompt += `\n\n${taskHistory}`; 689 } 690 691 const result = await callClaude(agentName, taskId, enhancedSystemPrompt, prompt, { 692 maxTokens, 693 // Note: Anthropic SDK doesn't support temperature parameter directly 694 // It's always deterministic by default 695 }); 696 697 return result.text; 698 } 699 700 /** 701 * Analyze code for security vulnerabilities 702 * 703 * @param {string} agentName - Agent name 704 * @param {number|null} taskId - Related task ID 705 * @param {string} code - Code to analyze 706 * @param {string} [focusArea] - Specific security focus (sql_injection, xss, command_injection, secrets) 707 * @param {string} [fileName] - File name for context 708 * @returns {Promise<Object>} - Analysis result with findings: {findings: Array, summary: string} 709 */ 710 export async function analyzeCodeSecurity( 711 agentName, 712 taskId, 713 code, 714 focusArea = null, 715 fileName = null 716 ) { 717 const focusInstruction = focusArea 718 ? `Focus specifically on: ${focusArea.replace(/_/g, ' ')}` 719 : 'Check all security aspects'; 720 721 const fileContext = fileName ? `File: ${fileName}\n\n` : ''; 722 723 const systemPrompt = `You are a security expert analyzing code for vulnerabilities. 724 Be thorough but practical. Focus on real exploitable issues, not theoretical concerns. 725 Classify severity accurately: 726 - critical: Direct exploitable vulnerability with high impact 727 - high: Exploitable with some conditions, or severe impact 728 - medium: Security weakness requiring specific conditions 729 - low: Minor issue or best practice violation 730 731 Provide analysis in JSON format (no markdown code blocks): 732 { 733 "findings": [ 734 { 735 "type": "vulnerability_type", 736 "severity": "critical|high|medium|low", 737 "line": line_number, 738 "description": "detailed description", 739 "recommendation": "how to fix", 740 "cwe_id": "CWE-XXX (if applicable)" 741 } 742 ], 743 "summary": "overall security assessment" 744 }`; 745 746 const userPrompt = `Analyze this code for security vulnerabilities. 747 748 ${focusInstruction} 749 750 ${fileContext}Code: 751 \`\`\` 752 ${code} 753 \`\`\` 754 755 Provide security analysis:`; 756 757 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 758 maxTokens: 3000, 759 }); 760 761 try { 762 // Extract JSON from response (handle markdown code blocks) 763 const jsonMatch = 764 result.text.match(/```json\s*([\s\S]*?)\s*```/) || 765 result.text.match(/```\s*([\s\S]*?)\s*```/); 766 const jsonStr = jsonMatch ? jsonMatch[1] : result.text; 767 768 return JSON.parse(jsonStr); 769 } catch (error) { 770 logger.error('Failed to parse Claude security analysis', { 771 agent: agentName, 772 task_id: taskId, 773 error: error.message, 774 response: result.text, 775 }); 776 throw new Error(`Failed to parse security analysis: ${error.message}`); 777 } 778 } 779 780 /** 781 * Generate a secure fix for a vulnerability 782 * 783 * @param {string} agentName - Agent name 784 * @param {number|null} taskId - Related task ID 785 * @param {Object} options - Fix generation options 786 * @param {string} options.code - Original code with vulnerability 787 * @param {Object} options.finding - Security finding object 788 * @param {string} [options.fileName] - File name for context 789 * @returns {Promise<Object>} - Fix result with old_string, new_string, explanation 790 */ 791 export async function generateSecureFix(agentName, taskId, { code, finding, fileName = null }) { 792 const fileContext = fileName ? `File: ${fileName}\n\n` : ''; 793 794 const systemPrompt = `You are a security expert who fixes code vulnerabilities. 795 Provide practical, secure fixes that maintain functionality. 796 Ensure old_string matches the original code EXACTLY. 797 Follow secure coding best practices for the language. 798 799 Provide the fix in JSON format (no markdown code blocks): 800 { 801 "old_string": "exact string to replace (must match code exactly)", 802 "new_string": "secure replacement code", 803 "explanation": "why this fix is secure", 804 "testing_notes": "how to test the fix" 805 } 806 807 IMPORTANT: 808 - old_string must match the vulnerable code EXACTLY (including whitespace, indentation) 809 - new_string should maintain the same functionality but be secure 810 - Preserve code style and formatting 811 - Keep the fix minimal - only change what's necessary`; 812 813 const userPrompt = `Fix this security vulnerability in the code. 814 815 Vulnerability: 816 - Type: ${finding.type} 817 - Severity: ${finding.severity} 818 - Line: ${finding.line || 'unknown'} 819 - Description: ${finding.description} 820 - Recommendation: ${finding.recommendation} 821 822 ${fileContext}Original Code: 823 \`\`\` 824 ${code} 825 \`\`\` 826 827 Provide the secure fix:`; 828 829 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 830 maxTokens: 2000, 831 }); 832 833 try { 834 // Extract JSON from response 835 const jsonMatch = 836 result.text.match(/```json\s*([\s\S]*?)\s*```/) || 837 result.text.match(/```\s*([\s\S]*?)\s*```/); 838 const jsonStr = jsonMatch ? jsonMatch[1] : result.text; 839 840 const fix = JSON.parse(jsonStr); 841 842 // Validate fix has required fields 843 if (!fix.old_string || !fix.new_string) { 844 throw new Error('Fix missing old_string or new_string'); 845 } 846 847 return fix; 848 } catch (error) { 849 logger.error('Failed to parse Claude fix generation', { 850 agent: agentName, 851 task_id: taskId, 852 error: error.message, 853 response: result.text, 854 }); 855 throw new Error(`Failed to generate secure fix: ${error.message}`); 856 } 857 } 858 859 /** 860 * Perform STRIDE threat modeling on a system component 861 * 862 * @param {string} agentName - Agent name 863 * @param {number|null} taskId - Related task ID 864 * @param {Object} options - Threat modeling options 865 * @param {string} options.component - Component description or code 866 * @param {string} [options.componentType] - Type (api, database, auth, file_upload, etc) 867 * @param {string} [options.dataFlow] - Description of data flow 868 * @returns {Promise<Object>} - Threat model with STRIDE analysis and DREAD scores 869 */ 870 export async function performThreatModeling( 871 agentName, 872 taskId, 873 { component, componentType = 'general', dataFlow = null } 874 ) { 875 const dataFlowContext = dataFlow ? `\n\nData Flow:\n${dataFlow}` : ''; 876 877 const systemPrompt = `You are a security architect performing threat modeling. 878 Use STRIDE methodology systematically. 879 Calculate DREAD scores objectively: 880 - Damage: 10=complete compromise, 1=minimal impact 881 - Reproducibility: 10=always works, 1=difficult/unreliable 882 - Exploitability: 10=no skills needed, 1=expert only 883 - Affected users: 10=all users, 1=few users 884 - Discoverability: 10=obvious/documented, 1=obscure 885 886 Risk levels based on DREAD average: 887 - critical: 8.5-10 888 - high: 7.0-8.4 889 - medium: 4.0-6.9 890 - low: 1.0-3.9 891 892 Provide analysis in JSON format (no markdown code blocks): 893 { 894 "threats": [ 895 { 896 "stride_category": "Spoofing|Tampering|Repudiation|InformationDisclosure|DoS|ElevationOfPrivilege", 897 "title": "short threat title", 898 "description": "detailed threat description", 899 "attack_scenario": "how attacker would exploit", 900 "dread": { 901 "damage": 1-10, 902 "reproducibility": 1-10, 903 "exploitability": 1-10, 904 "affected_users": 1-10, 905 "discoverability": 1-10, 906 "total": sum, 907 "average": average 908 }, 909 "risk_level": "critical|high|medium|low", 910 "mitigation": "how to mitigate the threat", 911 "cwe_id": "CWE-XXX (if applicable)" 912 } 913 ], 914 "summary": "overall threat assessment", 915 "priority_threats": ["list of high/critical threats to fix immediately"] 916 }`; 917 918 const userPrompt = `Perform STRIDE threat modeling on this component. 919 920 Component Type: ${componentType} 921 ${dataFlowContext} 922 923 Component: 924 \`\`\` 925 ${component} 926 \`\`\` 927 928 Analyze using STRIDE methodology: 929 - Spoofing: Can identity be faked? 930 - Tampering: Can data be modified? 931 - Repudiation: Can actions be denied? 932 - Information Disclosure: Can sensitive data leak? 933 - Denial of Service: Can availability be disrupted? 934 - Elevation of Privilege: Can permissions be bypassed? 935 936 For each threat found, calculate DREAD score and provide mitigation:`; 937 938 const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, { 939 maxTokens: 4000, 940 }); 941 942 try { 943 // Extract JSON from response 944 const jsonMatch = 945 result.text.match(/```json\s*([\s\S]*?)\s*```/) || 946 result.text.match(/```\s*([\s\S]*?)\s*```/); 947 const jsonStr = jsonMatch ? jsonMatch[1] : result.text; 948 949 return JSON.parse(jsonStr); 950 } catch (error) { 951 logger.error('Failed to parse Claude threat model', { 952 agent: agentName, 953 task_id: taskId, 954 error: error.message, 955 response: result.text, 956 }); 957 throw new Error(`Failed to perform threat modeling: ${error.message}`); 958 } 959 }