Cradicle Explorer

/ src / agents / utils / agent-claude-api.js
agent-claude-api.js
  1  /**
  2   * Agent-Claude API Module
  3   *
  4   * Centralized Claude API interface for all agents with budget tracking.
  5   * Provides 6 core functions:
  6   * - analyzeCode: Code review and recommendations
  7   * - generateCode: Generate new code from requirements
  8   * - generateTests: Generate test cases from source code
  9   * - reviewArchitecture: Architectural analysis
 10   * - suggestRefactoring: Refactoring recommendations
 11   * - classifyIssue: Categorize errors/issues
 12   *
 13   * Budget enforcement:
 14   * - Daily budget limit ($10 default, configurable)
 15   * - Emergency shutdown if >$5/hour burn rate
 16   * - All calls logged to agent_llm_usage table
 17   * - Cost calculation based on Anthropic token pricing
 18   *
 19   * @module agents/utils/agent-claude-api
 20   */
 21  
 22  import Logger from '../../utils/logger.js';
 23  import { callLLM } from '../../utils/llm-provider.js';
 24  import { run, getOne, getAll } from '../../utils/db.js';
 25  
 26  const logger = new Logger('AgentClaude');
 27  
 28  // Validate that OPENROUTER_API_KEY is configured
 29  function validateApiKey() {
 30    if (!process.env.OPENROUTER_API_KEY) {
 31      throw new Error(
 32        'No LLM API key configured: set OPENROUTER_API_KEY in environment'
 33      );
 34    }
 35  }
 36  
 37  // Pricing per million tokens (OpenRouter format)
 38  // See: https://openrouter.ai/models/anthropic
 39  const PRICING = {
 40    'anthropic/claude-sonnet-4-6': {
 41      input: 3.0, // $3.00 per million input tokens
 42      output: 15.0, // $15.00 per million output tokens
 43    },
 44    'anthropic/claude-haiku-4-5': {
 45      input: 0.8, // $0.80 per million input tokens (estimated, OpenRouter may vary)
 46      output: 4.0, // $4.00 per million output tokens (estimated)
 47    },
 48  };
 49  
 50  // Default models for agents (OpenRouter format)
 51  const DEFAULT_MODEL = process.env.CLAUDE_SONNET_MODEL || 'anthropic/claude-sonnet-4-6';
 52  const HAIKU_MODEL = process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5';
 53  
 54  // Task types that can use cheaper Haiku model (simple/pattern-based)
 55  const SIMPLE_TASKS = new Set([
 56    'classify_issue', // Triage - Pattern matching
 57    'scan_logs', // Monitor - Log scanning
 58    'detect_anomaly', // Monitor - Anomaly detection
 59    'scan_secrets', // Security - Regex-based checks
 60    'find_test_files', // QA - File discovery
 61    'scan_dependencies', // Security - npm audit (simple)
 62  ]);
 63  
 64  // Task types requiring Sonnet (complex reasoning/generation)
 65  const COMPLEX_TASKS = new Set([
 66    'analyzeCode', // Developer/Security - Code analysis
 67    'generateCode', // Developer - Code generation
 68    'reviewArchitecture', // Architect - Design review
 69    'suggestRefactoring', // Developer - Refactoring
 70    'threat_model', // Security - Advanced threat modeling
 71    'generateTests', // QA - Test generation with coverage analysis
 72  ]);
 73  
 74  /**
 75   * Select appropriate model based on task complexity
 76   *
 77   * @param {string} taskType - Task type or function name
 78   * @param {string} [complexity='standard'] - Explicit complexity override
 79   * @returns {string} - Model name
 80   */
 81  export function selectModel(taskType, complexity = 'standard') {
 82    // Check env var to allow disabling Haiku optimization
 83    const useHaikuForSimple = process.env.AGENT_USE_HAIKU_FOR_SIMPLE_TASKS !== 'false';
 84  
 85    if (!useHaikuForSimple) {
 86      return DEFAULT_MODEL;
 87    }
 88  
 89    // Explicit complexity override
 90    if (complexity === 'simple') {
 91      return HAIKU_MODEL;
 92    } else if (complexity === 'complex') {
 93      return DEFAULT_MODEL;
 94    }
 95  
 96    // Auto-detect based on task type
 97    if (SIMPLE_TASKS.has(taskType)) {
 98      return HAIKU_MODEL;
 99    }
100  
101    if (COMPLEX_TASKS.has(taskType)) {
102      return DEFAULT_MODEL;
103    }
104  
105    // Default to Sonnet for unknown tasks
106    return DEFAULT_MODEL;
107  }
108  
109  /**
110   * Calculate cost in USD for a Claude API call
111   *
112   * @param {string} model - Model name
113   * @param {number} promptTokens - Input tokens
114   * @param {number} completionTokens - Output tokens
115   * @returns {number} - Cost in USD
116   */
117  function calculateCost(model, promptTokens, completionTokens) {
118    const pricing = PRICING[model] || PRICING[DEFAULT_MODEL];
119  
120    const inputCost = (promptTokens / 1_000_000) * pricing.input;
121    const outputCost = (completionTokens / 1_000_000) * pricing.output;
122  
123    return inputCost + outputCost;
124  }
125  
126  /**
127   * Get total agent LLM spending for today
128   *
129   * @returns {number} - Total USD spent today
130   */
131  export async function getTodaySpending() {
132    const result = await getOne(
133      `SELECT COALESCE(SUM(cost_usd), 0) as total
134       FROM tel.agent_llm_usage
135       WHERE DATE(created_at) = CURRENT_DATE`
136    );
137  
138    return result ? parseFloat(result.total) : 0;
139  }
140  
141  /**
142   * Get hourly spending rate (last hour)
143   *
144   * @returns {Promise<number>} - USD spent in last hour
145   */
146  export async function getHourlySpending() {
147    const result = await getOne(
148      `SELECT COALESCE(SUM(cost_usd), 0) as total
149       FROM tel.agent_llm_usage
150       WHERE created_at >= NOW() - INTERVAL '1 hour'`
151    );
152  
153    return result ? parseFloat(result.total) : 0;
154  }
155  
156  /**
157   * Check if budget limits are exceeded
158   *
159   * @throws {Error} - If budget exceeded
160   */
161  async function checkBudgetLimits() {
162    const dailyBudget = parseFloat(process.env.AGENT_DAILY_BUDGET || '10');
163    const emergencyThreshold = 5.0; // $5/hour emergency shutdown
164  
165    const todaySpending = await getTodaySpending();
166    const hourlySpending = await getHourlySpending();
167  
168    if (hourlySpending >= emergencyThreshold) {
169      throw new Error(
170        `EMERGENCY: Agent LLM spending exceeded $${emergencyThreshold}/hour (current: $${hourlySpending.toFixed(2)})`
171      );
172    }
173  
174    if (todaySpending >= dailyBudget) {
175      throw new Error(
176        `Daily budget exceeded: $${todaySpending.toFixed(2)} / $${dailyBudget} (configure AGENT_DAILY_BUDGET to increase)`
177      );
178    }
179  }
180  
181  /**
182   * Log LLM usage to database
183   *
184   * @param {string} agentName - Agent making the call
185   * @param {number|null} taskId - Related task ID
186   * @param {string} model - Model name
187   * @param {number} promptTokens - Input tokens
188   * @param {number} completionTokens - Output tokens
189   * @param {number} costUsd - Cost in USD
190   */
191  const TRACKED_AGENTS = new Set(['developer', 'qa', 'security', 'architect', 'triage', 'monitor']);
192  
193  async function logUsage(agentName, taskId, model, promptTokens, completionTokens, costUsd) {
194    // Only log usage for known agents — non-agent callers (e.g. 'task-manager') would
195    // violate the agent_llm_usage CHECK constraint and cause the whole API call to fail.
196    if (!TRACKED_AGENTS.has(agentName)) return;
197  
198    await run(
199      `INSERT INTO tel.agent_llm_usage (agent_name, task_id, model, prompt_tokens, completion_tokens, cost_usd)
200       VALUES ($1, $2, $3, $4, $5, $6)`,
201      [agentName, taskId, model, promptTokens, completionTokens, costUsd]
202    );
203  
204    logger.info('LLM API call logged', {
205      agent: agentName,
206      task_id: taskId,
207      model,
208      prompt_tokens: promptTokens,
209      completion_tokens: completionTokens,
210      cost_usd: costUsd.toFixed(6),
211    });
212  }
213  
214  /**
215   * Call Claude API with budget tracking
216   *
217   * @param {string} agentName - Agent making the call
218   * @param {number|null} taskId - Related task ID
219   * @param {string} systemPrompt - System prompt
220   * @param {string} userPrompt - User prompt
221   * @param {Object} [options] - Additional options
222   * @param {string} [options.model] - Model to use (default: auto-selected based on complexity)
223   * @param {number} [options.maxTokens] - Max output tokens (default: 4096)
224   * @param {string} [options.taskType] - Task type for model selection
225   * @param {string} [options.complexity] - Explicit complexity: 'simple' or 'complex'
226   * @returns {Promise<Object>} - {text: string, usage: {promptTokens, completionTokens, cost, model}}
227   */
228  async function callClaude(agentName, taskId, systemPrompt, userPrompt, options = {}) {
229    // Check budget before making call
230    await checkBudgetLimits();
231  
232    // Validate API key
233    validateApiKey();
234  
235    // Smart model selection based on task complexity
236    let model;
237    if (options.model) {
238      model = options.model; // Explicit override
239    } else if (options.taskType || options.complexity) {
240      model = selectModel(options.taskType, options.complexity);
241    } else {
242      model = DEFAULT_MODEL; // Fallback
243    }
244  
245    const maxTokens = options.maxTokens || 4096;
246  
247    try {
248      const llmResponse = await callLLM({
249        model,
250        messages: [
251          { role: 'system', content: systemPrompt },
252          { role: 'user', content: userPrompt },
253        ],
254        max_tokens: maxTokens,
255        temperature: 0.3,
256        headers: {
257          'HTTP-Referer':
258            process.env.OPENROUTER_REFERER || 'https://github.com/jasonpaulneu/333Method',
259          'X-Title': process.env.OPENROUTER_TITLE || '333 Method Agent System',
260        },
261      });
262  
263      const text = llmResponse.content;
264      const promptTokens = llmResponse.usage?.promptTokens || 0;
265      const completionTokens = llmResponse.usage?.completionTokens || 0;
266      const cost = calculateCost(model, promptTokens, completionTokens);
267  
268      // Log usage
269      await logUsage(agentName, taskId, model, promptTokens, completionTokens, cost);
270  
271      return {
272        text,
273        usage: {
274          promptTokens,
275          completionTokens,
276          cost,
277          model, // Include model in response for tracking
278        },
279      };
280    } catch (error) {
281      logger.error('Claude API call failed', {
282        agent: agentName,
283        task_id: taskId,
284        model,
285        error: error.message,
286      });
287      throw error;
288    }
289  }
290  
291  /**
292   * Analyze code for issues, recommendations, and improvements
293   *
294   * @param {string} agentName - Agent name
295   * @param {number|null} taskId - Related task ID
296   * @param {string} filePath - File path being analyzed
297   * @param {string} prompt - Analysis prompt/question
298   * @param {Object} [context] - Additional context
299   * @param {string} [context.fileContent] - File content
300   * @param {string} [context.relatedFiles] - Related files content
301   * @param {string} [context.errorMessage] - Error message if analyzing a bug
302   * @param {string} [context.model] - Model override
303   * @param {string} [context.complexity] - Complexity: 'simple' or 'complex'
304   * @returns {Promise<string>} - Analysis result
305   */
306  export async function analyzeCode(agentName, taskId, filePath, prompt, context = {}) {
307    const systemPrompt = `You are an expert code reviewer for the 333 Method automation project.
308  Analyze code for bugs, performance issues, security vulnerabilities, and maintainability.
309  Provide actionable recommendations with specific line numbers and code examples.
310  
311  Project tech stack: Node.js 22 (ESM), SQLite, Playwright, better-sqlite3
312  Coding standards: Max 150 lines/file, complexity ≤15, depth ≤4 (ESLint enforced)
313  Testing: Node.js native test runner, 80%+ coverage target`;
314  
315    let userPrompt = `File: ${filePath}\n\n`;
316  
317    if (context.fileContent) {
318      userPrompt += `\`\`\`javascript\n${context.fileContent}\n\`\`\`\n\n`;
319    }
320  
321    if (context.relatedFiles) {
322      userPrompt += `Related files:\n${context.relatedFiles}\n\n`;
323    }
324  
325    if (context.errorMessage) {
326      userPrompt += `Error:\n${context.errorMessage}\n\n`;
327    }
328  
329    userPrompt += `Analysis request: ${prompt}`;
330  
331    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
332      model: context.model,
333      taskType: 'analyzeCode',
334      complexity: context.complexity || 'complex', // Code analysis defaults to complex
335    });
336    return result.text;
337  }
338  
339  /**
340   * Generate code from requirements
341   *
342   * @param {string} agentName - Agent name
343   * @param {number|null} taskId - Related task ID
344   * @param {string} filePath - Target file path
345   * @param {string} requirements - Code requirements
346   * @param {string|null} [existingCode] - Existing code to modify/extend
347   * @param {Object} [options] - Additional options
348   * @param {string} [options.model] - Model override
349   * @returns {Promise<string>} - Generated code
350   */
351  export async function generateCode(
352    agentName,
353    taskId,
354    filePath,
355    requirements,
356    existingCode = null,
357    options = {}
358  ) {
359    const systemPrompt = `You are an expert JavaScript developer for the 333 Method automation project.
360  Generate production-ready code following project standards:
361  
362  - Node.js 22 ESM (import/export, not require)
363  - Async/await for asynchronous operations
364  - better-sqlite3 for database access (synchronous API)
365  - ESLint rules: max-lines: 150, complexity: 15, max-depth: 4
366  - JSDoc comments for all functions
367  - Error handling with try/catch and retryWithBackoff
368  - Logging via Logger class from src/utils/logger.js
369  
370  Return ONLY the code, no explanations. Include import statements.`;
371  
372    let userPrompt = `File: ${filePath}\n\n`;
373  
374    if (existingCode) {
375      userPrompt += `Existing code:\n\`\`\`javascript\n${existingCode}\n\`\`\`\n\n`;
376    }
377  
378    userPrompt += `Requirements:\n${requirements}\n\nGenerate the complete code:`;
379  
380    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
381      maxTokens: 8192, // More tokens for code generation
382      model: options.model,
383      taskType: 'generateCode',
384      complexity: 'complex', // Code generation requires Sonnet
385    });
386  
387    return result.text;
388  }
389  
390  /**
391   * Generate test cases for source code
392   *
393   * @param {string} agentName - Agent name
394   * @param {number|null} taskId - Related task ID
395   * @param {string} sourceFile - Source file path
396   * @param {string} sourceCode - Source code content
397   * @param {Object} [coverageGaps] - Coverage gaps to fill
398   * @param {Object} [options] - Additional options
399   * @param {string} [options.model] - Model override (default: Haiku for simple tests)
400   * @returns {Promise<string>} - Generated test code
401   */
402  export async function generateTests(
403    agentName,
404    taskId,
405    sourceFile,
406    sourceCode,
407    coverageGaps = null,
408    options = {}
409  ) {
410    const systemPrompt = `You are an expert test engineer for the 333 Method automation project.
411  Generate comprehensive unit tests using Node.js native test runner:
412  
413  - Use node:test and node:assert modules
414  - Test success cases, error cases, edge cases
415  - Mock external dependencies (database, API calls, file system)
416  - Follow AAA pattern: Arrange, Act, Assert
417  - Use descriptive test names
418  - Target 80%+ coverage
419  - One test file per source file (tests/filename.test.js)
420  
421  Return ONLY the test code, no explanations.`;
422  
423    let userPrompt = `Source file: ${sourceFile}\n\n`;
424    userPrompt += `\`\`\`javascript\n${sourceCode}\n\`\`\`\n\n`;
425  
426    if (coverageGaps) {
427      userPrompt += `Coverage gaps to address:\n${JSON.stringify(coverageGaps, null, 2)}\n\n`;
428    }
429  
430    userPrompt += `Generate comprehensive unit tests:`;
431  
432    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
433      maxTokens: 8192, // More tokens for test generation
434      model: options.model,
435      taskType: 'generateTests',
436      complexity: 'simple', // Simple test generation can use Haiku
437    });
438  
439    return result.text;
440  }
441  
442  /**
443   * Review architecture and design
444   *
445   * @param {string} agentName - Agent name
446   * @param {number|null} taskId - Related task ID
447   * @param {string} proposalText - Architecture proposal
448   * @param {Object} [context] - Additional context
449   * @param {string} [context.model] - Model override
450   * @returns {Promise<string>} - Review feedback
451   */
452  export async function reviewArchitecture(agentName, taskId, proposalText, context = {}) {
453    const systemPrompt = `You are the system architect for the 333 Method automation project.
454  Review architecture proposals for:
455  
456  - Alignment with project principles (simplicity, avoid over-engineering)
457  - Database schema design (SQLite best practices)
458  - API design and module boundaries
459  - Scalability and performance
460  - Security implications
461  - Testing strategy
462  - Migration path from current implementation
463  
464  Provide constructive feedback with specific recommendations.`;
465  
466    let userPrompt = `Architecture proposal:\n\n${proposalText}\n\n`;
467  
468    if (context.relatedFiles) {
469      userPrompt += `Related files:\n${context.relatedFiles}\n\n`;
470    }
471  
472    if (context.requirements) {
473      userPrompt += `Requirements:\n${context.requirements}\n\n`;
474    }
475  
476    userPrompt += `Please review this proposal:`;
477  
478    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
479      model: context.model,
480      taskType: 'reviewArchitecture',
481      complexity: 'complex', // Architecture review requires deep reasoning
482    });
483    return result.text;
484  }
485  
486  /**
487   * Suggest refactoring improvements
488   *
489   * @param {string} agentName - Agent name
490   * @param {number|null} taskId - Related task ID
491   * @param {string} filePath - File to refactor
492   * @param {string} fileContent - File content
493   * @param {string} [reason] - Reason for refactoring
494   * @param {Object} [options] - Additional options
495   * @param {string} [options.model] - Model override
496   * @returns {Promise<string>} - Refactoring suggestions
497   */
498  export async function suggestRefactoring(
499    agentName,
500    taskId,
501    filePath,
502    fileContent,
503    reason = null,
504    options = {}
505  ) {
506    const systemPrompt = `You are an expert software engineer for the 333 Method automation project.
507  Suggest refactoring improvements for:
508  
509  - Code complexity reduction (target: complexity ≤15, depth ≤4)
510  - File length reduction (target: ≤150 lines)
511  - DRY principle violations
512  - Naming improvements
513  - Function decomposition
514  - Error handling improvements
515  
516  Prioritize simplicity. Avoid over-engineering.
517  Provide specific before/after code examples.`;
518  
519    let userPrompt = `File: ${filePath}\n\n`;
520    userPrompt += `\`\`\`javascript\n${fileContent}\n\`\`\`\n\n`;
521  
522    if (reason) {
523      userPrompt += `Reason for refactoring: ${reason}\n\n`;
524    }
525  
526    userPrompt += `Suggest refactoring improvements:`;
527  
528    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
529      model: options.model,
530      taskType: 'suggestRefactoring',
531      complexity: 'complex', // Refactoring suggestions require Sonnet
532    });
533    return result.text;
534  }
535  
536  /**
537   * Classify error or issue
538   *
539   * @param {string} agentName - Agent name
540   * @param {number|null} taskId - Related task ID
541   * @param {string} errorMessage - Error message
542   * @param {Object} [context] - Additional context
543   * @param {string} [context.stackTrace] - Stack trace
544   * @param {string} [context.logContext] - Log context
545   * @param {string} [context.model] - Model override
546   * @returns {Promise<Object>} - {category, severity, suggestedAction, assignTo}
547   */
548  export async function classifyIssue(agentName, taskId, errorMessage, context = {}) {
549    const systemPrompt = `You are an error triage specialist for the 333 Method automation project.
550  Classify errors into categories and assign to appropriate agent:
551  
552  Categories: NETWORK, DATABASE, API, PARSING, VALIDATION, AUTHENTICATION, RATE_LIMIT, TIMEOUT, CODE_BUG
553  Severity: low, medium, high, critical
554  Agents: developer, qa, security, architect, triage
555  
556  Return JSON only (no markdown):
557  {
558    "category": "CATEGORY",
559    "severity": "severity",
560    "suggestedAction": "what to do",
561    "assignTo": "agent_name"
562  }`;
563  
564    let userPrompt = `Error: ${errorMessage}\n\n`;
565  
566    if (context.stackTrace) {
567      userPrompt += `Stack trace:\n${context.stackTrace}\n\n`;
568    }
569  
570    if (context.logContext) {
571      userPrompt += `Log context:\n${context.logContext}\n\n`;
572    }
573  
574    userPrompt += `Classify this error:`;
575  
576    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
577      maxTokens: 1024, // Shorter response for classification
578      model: context.model,
579      taskType: 'classify_issue',
580      complexity: 'simple', // Classification is pattern-based, can use Haiku
581    });
582  
583    // Parse JSON response
584    try {
585      return JSON.parse(result.text);
586    } catch (e) {
587      logger.error('Failed to parse classification response', {
588        agent: agentName,
589        task_id: taskId,
590        response: result.text,
591      });
592      throw new Error(`Invalid classification response: ${e.message}`);
593    }
594  }
595  
596  /**
597   * Get agent LLM usage statistics with model breakdown
598   *
599   * @param {string} [agentName] - Filter by agent (optional)
600   * @param {number} [days] - Days to look back (default: 7)
601   * @returns {Object} - Usage statistics with model breakdown
602   */
603  export async function getUsageStats(agentName = null, days = 7) {
604    let query = `
605      SELECT
606        agent_name,
607        model,
608        COUNT(*) as call_count,
609        SUM(prompt_tokens) as total_prompt_tokens,
610        SUM(completion_tokens) as total_completion_tokens,
611        SUM(cost_usd) as total_cost_usd,
612        AVG(cost_usd) as avg_cost_per_call
613      FROM tel.agent_llm_usage
614      WHERE created_at >= NOW() - ($1 || ' days')::interval
615    `;
616  
617    const params = [days];
618  
619    if (agentName) {
620      query += ` AND agent_name = $${params.length + 1}`;
621      params.push(agentName);
622    }
623  
624    query += ' GROUP BY agent_name, model ORDER BY total_cost_usd DESC';
625  
626    const results = await getAll(query, params);
627  
628    const totalCost = results.reduce((sum, row) => sum + row.total_cost_usd, 0);
629  
630    // Calculate Haiku vs Sonnet breakdown
631    const haikuCalls = results.filter(r => r.model.includes('haiku'));
632    const sonnetCalls = results.filter(r => r.model.includes('sonnet'));
633  
634    const haikuCost = haikuCalls.reduce((sum, r) => sum + r.total_cost_usd, 0);
635    const sonnetCost = sonnetCalls.reduce((sum, r) => sum + r.total_cost_usd, 0);
636  
637    const haikuCount = haikuCalls.reduce((sum, r) => sum + r.call_count, 0);
638    const sonnetCount = sonnetCalls.reduce((sum, r) => sum + r.call_count, 0);
639  
640    return {
641      agents: results,
642      totalCost,
643      days,
644      modelBreakdown: {
645        haiku: {
646          calls: haikuCount,
647          cost: haikuCost,
648          avgCost: haikuCount > 0 ? haikuCost / haikuCount : 0,
649        },
650        sonnet: {
651          calls: sonnetCount,
652          cost: sonnetCost,
653          avgCost: sonnetCount > 0 ? sonnetCost / sonnetCount : 0,
654        },
655        savings: sonnetCost > 0 ? ((haikuCost / (haikuCost + sonnetCost)) * 100).toFixed(1) : 0,
656      },
657    };
658  }
659  
660  /**
661   * Simple wrapper for agents that just need a basic LLM call
662   * Compatible with old claude-api.js signature
663   *
664   * @param {string} agentName - Agent making the call
665   * @param {number|null} taskId - Related task ID
666   * @param {Object} options - Call options
667   * @param {string} options.prompt - The prompt to send
668   * @param {string} [options.systemPrompt] - Optional system prompt
669   * @param {string} [options.taskHistory] - Task history context (from context-builder)
670   * @param {number} [options.temperature] - Temperature (default: 0.3)
671   * @param {number} [options.maxTokens] - Max tokens (default: 4000)
672   * @returns {Promise<string>} - LLM response text
673   */
674  export async function simpleLLMCall(agentName, taskId, options) {
675    const {
676      prompt,
677      systemPrompt = '',
678      taskHistory = null,
679      temperature = 0.3,
680      maxTokens = 4000,
681    } = options;
682  
683    // Inject task history into system prompt if provided
684    let enhancedSystemPrompt =
685      systemPrompt || 'You are a helpful AI assistant for the 333 Method automation project.';
686  
687    if (taskHistory) {
688      enhancedSystemPrompt += `\n\n${taskHistory}`;
689    }
690  
691    const result = await callClaude(agentName, taskId, enhancedSystemPrompt, prompt, {
692      maxTokens,
693      // Note: Anthropic SDK doesn't support temperature parameter directly
694      // It's always deterministic by default
695    });
696  
697    return result.text;
698  }
699  
700  /**
701   * Analyze code for security vulnerabilities
702   *
703   * @param {string} agentName - Agent name
704   * @param {number|null} taskId - Related task ID
705   * @param {string} code - Code to analyze
706   * @param {string} [focusArea] - Specific security focus (sql_injection, xss, command_injection, secrets)
707   * @param {string} [fileName] - File name for context
708   * @returns {Promise<Object>} - Analysis result with findings: {findings: Array, summary: string}
709   */
710  export async function analyzeCodeSecurity(
711    agentName,
712    taskId,
713    code,
714    focusArea = null,
715    fileName = null
716  ) {
717    const focusInstruction = focusArea
718      ? `Focus specifically on: ${focusArea.replace(/_/g, ' ')}`
719      : 'Check all security aspects';
720  
721    const fileContext = fileName ? `File: ${fileName}\n\n` : '';
722  
723    const systemPrompt = `You are a security expert analyzing code for vulnerabilities.
724  Be thorough but practical. Focus on real exploitable issues, not theoretical concerns.
725  Classify severity accurately:
726  - critical: Direct exploitable vulnerability with high impact
727  - high: Exploitable with some conditions, or severe impact
728  - medium: Security weakness requiring specific conditions
729  - low: Minor issue or best practice violation
730  
731  Provide analysis in JSON format (no markdown code blocks):
732  {
733    "findings": [
734      {
735        "type": "vulnerability_type",
736        "severity": "critical|high|medium|low",
737        "line": line_number,
738        "description": "detailed description",
739        "recommendation": "how to fix",
740        "cwe_id": "CWE-XXX (if applicable)"
741      }
742    ],
743    "summary": "overall security assessment"
744  }`;
745  
746    const userPrompt = `Analyze this code for security vulnerabilities.
747  
748  ${focusInstruction}
749  
750  ${fileContext}Code:
751  \`\`\`
752  ${code}
753  \`\`\`
754  
755  Provide security analysis:`;
756  
757    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
758      maxTokens: 3000,
759    });
760  
761    try {
762      // Extract JSON from response (handle markdown code blocks)
763      const jsonMatch =
764        result.text.match(/```json\s*([\s\S]*?)\s*```/) ||
765        result.text.match(/```\s*([\s\S]*?)\s*```/);
766      const jsonStr = jsonMatch ? jsonMatch[1] : result.text;
767  
768      return JSON.parse(jsonStr);
769    } catch (error) {
770      logger.error('Failed to parse Claude security analysis', {
771        agent: agentName,
772        task_id: taskId,
773        error: error.message,
774        response: result.text,
775      });
776      throw new Error(`Failed to parse security analysis: ${error.message}`);
777    }
778  }
779  
780  /**
781   * Generate a secure fix for a vulnerability
782   *
783   * @param {string} agentName - Agent name
784   * @param {number|null} taskId - Related task ID
785   * @param {Object} options - Fix generation options
786   * @param {string} options.code - Original code with vulnerability
787   * @param {Object} options.finding - Security finding object
788   * @param {string} [options.fileName] - File name for context
789   * @returns {Promise<Object>} - Fix result with old_string, new_string, explanation
790   */
791  export async function generateSecureFix(agentName, taskId, { code, finding, fileName = null }) {
792    const fileContext = fileName ? `File: ${fileName}\n\n` : '';
793  
794    const systemPrompt = `You are a security expert who fixes code vulnerabilities.
795  Provide practical, secure fixes that maintain functionality.
796  Ensure old_string matches the original code EXACTLY.
797  Follow secure coding best practices for the language.
798  
799  Provide the fix in JSON format (no markdown code blocks):
800  {
801    "old_string": "exact string to replace (must match code exactly)",
802    "new_string": "secure replacement code",
803    "explanation": "why this fix is secure",
804    "testing_notes": "how to test the fix"
805  }
806  
807  IMPORTANT:
808  - old_string must match the vulnerable code EXACTLY (including whitespace, indentation)
809  - new_string should maintain the same functionality but be secure
810  - Preserve code style and formatting
811  - Keep the fix minimal - only change what's necessary`;
812  
813    const userPrompt = `Fix this security vulnerability in the code.
814  
815  Vulnerability:
816  - Type: ${finding.type}
817  - Severity: ${finding.severity}
818  - Line: ${finding.line || 'unknown'}
819  - Description: ${finding.description}
820  - Recommendation: ${finding.recommendation}
821  
822  ${fileContext}Original Code:
823  \`\`\`
824  ${code}
825  \`\`\`
826  
827  Provide the secure fix:`;
828  
829    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
830      maxTokens: 2000,
831    });
832  
833    try {
834      // Extract JSON from response
835      const jsonMatch =
836        result.text.match(/```json\s*([\s\S]*?)\s*```/) ||
837        result.text.match(/```\s*([\s\S]*?)\s*```/);
838      const jsonStr = jsonMatch ? jsonMatch[1] : result.text;
839  
840      const fix = JSON.parse(jsonStr);
841  
842      // Validate fix has required fields
843      if (!fix.old_string || !fix.new_string) {
844        throw new Error('Fix missing old_string or new_string');
845      }
846  
847      return fix;
848    } catch (error) {
849      logger.error('Failed to parse Claude fix generation', {
850        agent: agentName,
851        task_id: taskId,
852        error: error.message,
853        response: result.text,
854      });
855      throw new Error(`Failed to generate secure fix: ${error.message}`);
856    }
857  }
858  
859  /**
860   * Perform STRIDE threat modeling on a system component
861   *
862   * @param {string} agentName - Agent name
863   * @param {number|null} taskId - Related task ID
864   * @param {Object} options - Threat modeling options
865   * @param {string} options.component - Component description or code
866   * @param {string} [options.componentType] - Type (api, database, auth, file_upload, etc)
867   * @param {string} [options.dataFlow] - Description of data flow
868   * @returns {Promise<Object>} - Threat model with STRIDE analysis and DREAD scores
869   */
870  export async function performThreatModeling(
871    agentName,
872    taskId,
873    { component, componentType = 'general', dataFlow = null }
874  ) {
875    const dataFlowContext = dataFlow ? `\n\nData Flow:\n${dataFlow}` : '';
876  
877    const systemPrompt = `You are a security architect performing threat modeling.
878  Use STRIDE methodology systematically.
879  Calculate DREAD scores objectively:
880  - Damage: 10=complete compromise, 1=minimal impact
881  - Reproducibility: 10=always works, 1=difficult/unreliable
882  - Exploitability: 10=no skills needed, 1=expert only
883  - Affected users: 10=all users, 1=few users
884  - Discoverability: 10=obvious/documented, 1=obscure
885  
886  Risk levels based on DREAD average:
887  - critical: 8.5-10
888  - high: 7.0-8.4
889  - medium: 4.0-6.9
890  - low: 1.0-3.9
891  
892  Provide analysis in JSON format (no markdown code blocks):
893  {
894    "threats": [
895      {
896        "stride_category": "Spoofing|Tampering|Repudiation|InformationDisclosure|DoS|ElevationOfPrivilege",
897        "title": "short threat title",
898        "description": "detailed threat description",
899        "attack_scenario": "how attacker would exploit",
900        "dread": {
901          "damage": 1-10,
902          "reproducibility": 1-10,
903          "exploitability": 1-10,
904          "affected_users": 1-10,
905          "discoverability": 1-10,
906          "total": sum,
907          "average": average
908        },
909        "risk_level": "critical|high|medium|low",
910        "mitigation": "how to mitigate the threat",
911        "cwe_id": "CWE-XXX (if applicable)"
912      }
913    ],
914    "summary": "overall threat assessment",
915    "priority_threats": ["list of high/critical threats to fix immediately"]
916  }`;
917  
918    const userPrompt = `Perform STRIDE threat modeling on this component.
919  
920  Component Type: ${componentType}
921  ${dataFlowContext}
922  
923  Component:
924  \`\`\`
925  ${component}
926  \`\`\`
927  
928  Analyze using STRIDE methodology:
929  - Spoofing: Can identity be faked?
930  - Tampering: Can data be modified?
931  - Repudiation: Can actions be denied?
932  - Information Disclosure: Can sensitive data leak?
933  - Denial of Service: Can availability be disrupted?
934  - Elevation of Privilege: Can permissions be bypassed?
935  
936  For each threat found, calculate DREAD score and provide mitigation:`;
937  
938    const result = await callClaude(agentName, taskId, systemPrompt, userPrompt, {
939      maxTokens: 4000,
940    });
941  
942    try {
943      // Extract JSON from response
944      const jsonMatch =
945        result.text.match(/```json\s*([\s\S]*?)\s*```/) ||
946        result.text.match(/```\s*([\s\S]*?)\s*```/);
947      const jsonStr = jsonMatch ? jsonMatch[1] : result.text;
948  
949      return JSON.parse(jsonStr);
950    } catch (error) {
951      logger.error('Failed to parse Claude threat model', {
952        agent: agentName,
953        task_id: taskId,
954        error: error.message,
955        response: result.text,
956      });
957      throw new Error(`Failed to perform threat modeling: ${error.message}`);
958    }
959  }