Cradicle Explorer

/ __quarantined_tests__ / agents / e2e-agent-system.test.js
e2e-agent-system.test.js
   1  /**
   2   * End-to-End Agent System Tests
   3   *
   4   * Comprehensive test suite to verify the agent system is production-ready.
   5   *
   6   * Test Coverage:
   7   * 1. Task Lifecycle - Create task → Agent picks up → Processes → Completes
   8   * 2. Inter-agent Communication - Developer creates task → QA reviews
   9   * 3. Error Handling - Invalid context → Graceful failure with clear error
  10   * 4. Circuit Breaker - Multiple failures → Circuit opens → Auto-recovers
  11   * 5. Task Routing - Triage correctly routes to Developer/Security/etc
  12   * 6. Priority Handling - High priority tasks processed first
  13   * 7. Row-level Locking - Concurrent agents don't claim same task
  14   * 8. Known Error Database - Similar errors get suggested fixes
  15   * 9. Coverage Gates - Developer enforces 85% coverage before commits
  16   * 10. Workflow Dependencies - Features require approved designs
  17   */
  18  
  19  import { test, describe, beforeEach, afterEach, mock } from 'node:test';
  20  import assert from 'node:assert';
  21  import Database from 'better-sqlite3';
  22  import { TriageAgent } from '../../src/agents/triage.js';
  23  import { DeveloperAgent, _deps as devDeps } from '../../src/agents/developer.js';
  24  import { QAAgent } from '../../src/agents/qa.js';
  25  import { SecurityAgent } from '../../src/agents/security.js';
  26  import { ArchitectAgent } from '../../src/agents/architect.js';
  27  import { MonitorAgent } from '../../src/agents/monitor.js';
  28  import { resetDb as resetBaseDb } from '../../src/agents/base-agent.js';
  29  import { resetDb as resetTaskDb } from '../../src/agents/utils/task-manager.js';
  30  import { resetDb as resetMessageDb } from '../../src/agents/utils/message-manager.js';
  31  import { resetDb as resetStructuredLoggerDb } from '../../src/agents/utils/structured-logger.js';
  32  import fs from 'fs/promises';
  33  import path from 'path';
  34  
  35  // Use temporary file database for tests
  36  let db;
  37  const agents = {};
  38  const TEST_DB_PATH = './tests/agents/test-e2e.db';
  39  
  40  // Save originals so afterEach can restore them
  41  const originalDevDeps = { ...devDeps };
  42  
  43  /**
  44   * Initialize test database with full schema
  45   */
  46  function createTestSchema(db) {
  47    db.exec(`
  48      -- Agent task queue
  49      CREATE TABLE agent_tasks (
  50        id INTEGER PRIMARY KEY AUTOINCREMENT,
  51        task_type TEXT NOT NULL,
  52        assigned_to TEXT NOT NULL CHECK(assigned_to IN ('developer', 'qa', 'security', 'architect', 'triage', 'monitor')),
  53        created_by TEXT,
  54        status TEXT DEFAULT 'pending' CHECK(status IN ('pending', 'running', 'completed', 'failed', 'blocked')),
  55        priority INTEGER DEFAULT 5 CHECK(priority >= 1 AND priority <= 10),
  56        context_json TEXT,
  57        result_json TEXT,
  58        parent_task_id INTEGER REFERENCES agent_tasks(id),
  59        error_message TEXT,
  60        reviewed_by TEXT,
  61        approval_json TEXT,
  62        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  63        started_at DATETIME,
  64        completed_at DATETIME,
  65        retry_count INTEGER DEFAULT 0
  66      );
  67  
  68      -- Inter-agent messaging
  69      CREATE TABLE agent_messages (
  70        id INTEGER PRIMARY KEY AUTOINCREMENT,
  71        task_id INTEGER REFERENCES agent_tasks(id),
  72        from_agent TEXT NOT NULL,
  73        to_agent TEXT NOT NULL,
  74        message_type TEXT CHECK(message_type IN ('question', 'answer', 'handoff', 'notification')),
  75        content TEXT NOT NULL,
  76        metadata_json TEXT,
  77        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  78        read_at DATETIME
  79      );
  80  
  81      -- Agent execution logs
  82      CREATE TABLE agent_logs (
  83        id INTEGER PRIMARY KEY AUTOINCREMENT,
  84        task_id INTEGER,
  85        agent_name TEXT NOT NULL,
  86        log_level TEXT CHECK(log_level IN ('debug', 'info', 'warn', 'error')),
  87        message TEXT NOT NULL,
  88        data_json TEXT,
  89        created_at DATETIME DEFAULT CURRENT_TIMESTAMP
  90      );
  91  
  92      -- Agent status tracking
  93      CREATE TABLE agent_state (
  94        agent_name TEXT PRIMARY KEY CHECK(agent_name IN ('developer', 'qa', 'security', 'architect', 'triage', 'monitor')),
  95        last_active DATETIME DEFAULT CURRENT_TIMESTAMP,
  96        current_task_id INTEGER,
  97        status TEXT DEFAULT 'idle' CHECK(status IN ('idle', 'working', 'blocked')),
  98        metrics_json TEXT
  99      );
 100  
 101      -- Agent outcomes for learning
 102      CREATE TABLE agent_outcomes (
 103        id INTEGER PRIMARY KEY AUTOINCREMENT,
 104        task_id INTEGER NOT NULL,
 105        agent_name TEXT NOT NULL,
 106        task_type TEXT NOT NULL,
 107        outcome TEXT NOT NULL CHECK(outcome IN ('success', 'failure')),
 108        context_json TEXT,
 109        result_json TEXT,
 110        duration_ms INTEGER,
 111        created_at DATETIME DEFAULT CURRENT_TIMESTAMP
 112      );
 113  
 114      -- Circuit breaker state (for OpenRouter/external APIs)
 115      CREATE TABLE IF NOT EXISTS circuit_breaker_state (
 116        service_name TEXT PRIMARY KEY,
 117        state TEXT NOT NULL CHECK(state IN ('closed', 'open', 'half_open')),
 118        failure_count INTEGER DEFAULT 0,
 119        last_failure_at DATETIME,
 120        opened_at DATETIME,
 121        updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
 122      );
 123  
 124      -- Performance indexes
 125      CREATE INDEX idx_agent_tasks_assigned_to ON agent_tasks(assigned_to, status);
 126      CREATE INDEX idx_agent_tasks_parent ON agent_tasks(parent_task_id);
 127      CREATE INDEX idx_agent_tasks_priority ON agent_tasks(priority DESC, created_at ASC);
 128      CREATE INDEX idx_agent_messages_task ON agent_messages(task_id);
 129      CREATE INDEX idx_agent_messages_to ON agent_messages(to_agent, read_at);
 130      CREATE INDEX idx_agent_logs_task ON agent_logs(task_id);
 131      CREATE INDEX idx_agent_logs_agent ON agent_logs(agent_name, created_at);
 132  
 133      -- Initialize agent state for all agents
 134      INSERT INTO agent_state (agent_name, status, metrics_json) VALUES
 135        ('triage', 'idle', '{}'),
 136        ('developer', 'idle', '{}'),
 137        ('qa', 'idle', '{}'),
 138        ('security', 'idle', '{}'),
 139        ('architect', 'idle', '{}'),
 140        ('monitor', 'idle', '{}');
 141    `);
 142  }
 143  
 144  beforeEach(async () => {
 145    // Disable immediate invocation to avoid infinite recursion in tests
 146    process.env.AGENT_IMMEDIATE_INVOCATION = 'false';
 147  
 148    // Remove existing test database
 149    try {
 150      await fs.unlink(TEST_DB_PATH);
 151    } catch (e) {
 152      // Ignore if file doesn't exist
 153    }
 154  
 155    // Create temporary test database
 156    db = new Database(TEST_DB_PATH);
 157    process.env.DATABASE_PATH = TEST_DB_PATH;
 158  
 159    // Create full schema
 160    createTestSchema(db);
 161  
 162    // Initialize all agents
 163    agents.triage = new TriageAgent();
 164    agents.developer = new DeveloperAgent();
 165    agents.qa = new QAAgent();
 166    agents.security = new SecurityAgent();
 167    agents.architect = new ArchitectAgent();
 168    agents.monitor = new MonitorAgent();
 169  
 170    // Initialize agents (loads context)
 171    for (const agent of Object.values(agents)) {
 172      await agent.initialize();
 173    }
 174  });
 175  
 176  afterEach(async () => {
 177    // Reset all database connections
 178    resetBaseDb();
 179    resetTaskDb();
 180    resetMessageDb();
 181    resetStructuredLoggerDb();
 182  
 183    if (db) {
 184      db.close();
 185    }
 186  
 187    // Clean up test database
 188    try {
 189      await fs.unlink(TEST_DB_PATH);
 190    } catch (e) {
 191      // Ignore if file doesn't exist
 192    }
 193  
 194    // Restore env vars
 195    delete process.env.AGENT_IMMEDIATE_INVOCATION;
 196  
 197    // Restore devDeps to originals (tests may have replaced them)
 198    Object.assign(devDeps, originalDevDeps);
 199  
 200    // Reset any node:test mocks
 201    mock.restoreAll();
 202  });
 203  
 204  describe('E2E Agent System Tests', () => {
 205    describe('1. Task Lifecycle', () => {
 206      test('complete task lifecycle: create → pick up → process → complete', async () => {
 207        // Create a simple classify_error task for Triage
 208        const taskId = db
 209          .prepare(
 210            `
 211          INSERT INTO agent_tasks (task_type, assigned_to, status, priority, context_json)
 212          VALUES ('classify_error', 'triage', 'pending', 5, ?)
 213        `
 214          )
 215          .run(
 216            JSON.stringify({
 217              error_message: 'TypeError: Cannot read property "score" of null',
 218              stack_trace: 'at Object.<anonymous> (src/scoring.js:100:20)',
 219              stage: 'scoring',
 220              frequency: 1,
 221            })
 222          ).lastInsertRowid;
 223  
 224        // Verify task created
 225        const createdTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId);
 226        assert.strictEqual(createdTask.status, 'pending');
 227        assert.strictEqual(createdTask.assigned_to, 'triage');
 228  
 229        // Agent picks up task
 230        const processedCount = await agents.triage.pollTasks(1);
 231        assert.strictEqual(processedCount, 1, 'Agent should process 1 task');
 232  
 233        // Verify task completed
 234        const completedTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId);
 235        assert.strictEqual(completedTask.status, 'completed');
 236        assert.ok(completedTask.started_at, 'Task should have started_at timestamp');
 237        assert.ok(completedTask.completed_at, 'Task should have completed_at timestamp');
 238        assert.ok(completedTask.result_json, 'Task should have result_json');
 239  
 240        const result = JSON.parse(completedTask.result_json);
 241        assert.strictEqual(result.classification, 'null_pointer');
 242        assert.strictEqual(result.routed_to, 'developer');
 243      });
 244  
 245      test('task status transitions: pending → running → completed', async () => {
 246        const taskId = db
 247          .prepare(
 248            `
 249          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 250          VALUES ('classify_error', 'triage', 'pending', ?)
 251        `
 252          )
 253          .run(
 254            JSON.stringify({
 255              error_message: 'ECONNREFUSED: Connection refused',
 256              stack_trace: '',
 257              stage: 'serps',
 258              frequency: 5,
 259            })
 260          ).lastInsertRowid;
 261  
 262        // Track status before processing
 263        const beforeTask = db.prepare('SELECT status FROM agent_tasks WHERE id = ?').get(taskId);
 264        assert.strictEqual(beforeTask.status, 'pending');
 265  
 266        // Process task
 267        await agents.triage.pollTasks(1);
 268  
 269        // Verify completed
 270        const afterTask = db.prepare('SELECT status FROM agent_tasks WHERE id = ?').get(taskId);
 271        assert.strictEqual(afterTask.status, 'completed');
 272  
 273        // Verify logs show status transitions
 274        const logs = db
 275          .prepare('SELECT * FROM agent_logs WHERE task_id = ? ORDER BY created_at ASC')
 276          .all(taskId);
 277  
 278        assert.ok(logs.length > 0, 'Should have logged activity');
 279        assert.ok(
 280          logs.some(log => log.message.includes('Task started')),
 281          'Should log task start'
 282        );
 283        assert.ok(
 284          logs.some(log => log.message.includes('Task completed')),
 285          'Should log task completion'
 286        );
 287      });
 288    });
 289  
 290    describe('2. Inter-agent Communication', () => {
 291      test('Developer creates task → QA reviews', async () => {
 292        // Create fix_bug task for Developer (simulate Triage completed)
 293        const devTaskId = db
 294          .prepare(
 295            `
 296          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, created_by)
 297          VALUES ('fix_bug', 'developer', 'pending', ?, 'triage')
 298        `
 299          )
 300          .run(
 301            JSON.stringify({
 302              error_message: 'UNIQUE constraint failed: sites.domain',
 303              error_type: 'database',
 304              stage: 'serps',
 305              file_path: 'src/scrape.js',
 306              suggested_fix: 'Check for existing record before INSERT',
 307            })
 308          ).lastInsertRowid;
 309  
 310        // Mock file operations and test runner via _deps (avoids ESM namespace issues)
 311        devDeps.readFile = () =>
 312          Promise.resolve({
 313            content: 'const db = Database(); db.prepare("INSERT INTO sites...").run();',
 314            size: 100,
 315          });
 316        devDeps.getFileContext = () =>
 317          Promise.resolve({
 318            imports: ['better-sqlite3'],
 319            testFiles: ['tests/serps.test.js'],
 320          });
 321        devDeps.editFile = () =>
 322          Promise.resolve({
 323            backupPath: '/tmp/backup.js',
 324            diff: 'Added null check',
 325          });
 326        devDeps.cleanupBackups = () => Promise.resolve();
 327        devDeps.runTestsForFile = () =>
 328          Promise.resolve({
 329            success: true,
 330            stats: { pass: 10, fail: 0 },
 331            coverage: { lines: { pct: 90 } },
 332          });
 333        devDeps.simpleLLMCall = () =>
 334          Promise.resolve(
 335            JSON.stringify({
 336              old_string: 'db.prepare("INSERT INTO sites...").run();',
 337              new_string:
 338                'const existing = db.prepare("SELECT id FROM sites WHERE domain = ?").get(domain);\nif (!existing) db.prepare("INSERT INTO sites...").run();',
 339              explanation: 'Added check for existing record before INSERT',
 340              test_cases: ['Duplicate domain', 'New domain'],
 341            })
 342          );
 343  
 344        // Mock git commands and coverage read via devDeps
 345        devDeps.execSync = cmd => {
 346          if (cmd.includes('git commit')) return 'commit-hash-123\n';
 347          return '';
 348        };
 349        devDeps.readFileCoverage = () =>
 350          Promise.resolve(JSON.stringify({ 'src/scrape.js': { lines: { pct: 90 } } }));
 351  
 352        // Developer processes the bug fix
 353        await agents.developer.pollTasks(1);
 354  
 355        // Verify Developer completed task
 356        const devTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(devTaskId);
 357        assert.strictEqual(devTask.status, 'completed');
 358  
 359        // Verify QA task was created
 360        const qaTasks = db
 361          .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?')
 362          .all('qa', devTaskId);
 363  
 364        assert.strictEqual(qaTasks.length, 1, 'QA task should be created');
 365        assert.strictEqual(qaTasks[0].task_type, 'verify_fix');
 366  
 367        // Verify handoff message exists
 368        const messages = db
 369          .prepare('SELECT * FROM agent_messages WHERE from_agent = ? AND to_agent = ?')
 370          .all('developer', 'qa');
 371  
 372        assert.strictEqual(messages.length, 1, 'Handoff message should exist');
 373        assert.strictEqual(messages[0].message_type, 'handoff');
 374        assert.ok(
 375          messages[0].content.includes('Bug fix complete'),
 376          'Handoff message should describe completion'
 377        );
 378      });
 379  
 380      test('agents can ask questions and receive answers', async () => {
 381        // Create task for Developer
 382        const taskId = db
 383          .prepare(
 384            `
 385          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 386          VALUES ('fix_bug', 'developer', 'pending', ?)
 387        `
 388          )
 389          .run(
 390            JSON.stringify({
 391              error_message: 'Unclear error with no stack trace',
 392              error_type: 'unknown',
 393              stage: 'scoring',
 394            })
 395          ).lastInsertRowid;
 396  
 397        // Developer asks Triage for clarification
 398        const questionId = await agents.developer.askQuestion(
 399          taskId,
 400          'triage',
 401          'Could not identify file from error. Please provide file path.'
 402        );
 403  
 404        // Verify question message created
 405        const question = db.prepare('SELECT * FROM agent_messages WHERE id = ?').get(questionId);
 406        assert.strictEqual(question.from_agent, 'developer');
 407        assert.strictEqual(question.to_agent, 'triage');
 408        assert.strictEqual(question.message_type, 'question');
 409        assert.ok(!question.read_at, 'Question should be unread initially');
 410  
 411        // Triage checks for unread messages
 412        const unreadMessages = agents.triage.getUnreadMessages(10);
 413        assert.strictEqual(unreadMessages.length, 1);
 414        assert.strictEqual(unreadMessages[0].id, questionId);
 415  
 416        // Triage sends answer
 417        const answerId = await agents.triage.sendAnswer(
 418          taskId,
 419          'developer',
 420          'Error is in src/scoring.js line 100',
 421          questionId
 422        );
 423  
 424        // Verify answer message created
 425        const answer = db.prepare('SELECT * FROM agent_messages WHERE id = ?').get(answerId);
 426        assert.strictEqual(answer.from_agent, 'triage');
 427        assert.strictEqual(answer.to_agent, 'developer');
 428        assert.strictEqual(answer.message_type, 'answer');
 429  
 430        // Verify metadata links question and answer
 431        const metadata = JSON.parse(answer.metadata_json);
 432        assert.strictEqual(metadata.in_reply_to, questionId);
 433      });
 434    });
 435  
 436    describe('3. Error Handling', () => {
 437      test('invalid context → graceful failure with clear error', async () => {
 438        // Create task with missing required field
 439        const taskId = db
 440          .prepare(
 441            `
 442          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 443          VALUES ('fix_bug', 'developer', 'pending', ?)
 444        `
 445          )
 446          .run(
 447            JSON.stringify({
 448              // Missing error_message field
 449              error_type: 'null_pointer',
 450              stage: 'scoring',
 451            })
 452          ).lastInsertRowid;
 453  
 454        // Process task
 455        await agents.developer.pollTasks(1);
 456  
 457        // Verify task failed gracefully
 458        const failedTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId);
 459        assert.strictEqual(failedTask.status, 'failed');
 460        assert.ok(
 461          failedTask.error_message.includes('Missing required field: error_message'),
 462          'Error message should be descriptive'
 463        );
 464  
 465        // Verify error was logged
 466        const errorLogs = db
 467          .prepare("SELECT * FROM agent_logs WHERE task_id = ? AND log_level = 'error'")
 468          .all(taskId);
 469  
 470        assert.ok(errorLogs.length > 0, 'Error should be logged');
 471      });
 472  
 473      test('task with malformed JSON → fails with parse error', async () => {
 474        // Create task with malformed JSON
 475        const taskId = db
 476          .prepare(
 477            `
 478          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 479          VALUES ('classify_error', 'triage', 'pending', ?)
 480        `
 481          )
 482          .run('{invalid json here').lastInsertRowid;
 483  
 484        // Process task
 485        await agents.triage.pollTasks(1);
 486  
 487        // Task should still complete (context_json is optional)
 488        // Agent handles parse errors gracefully
 489        const task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId);
 490        assert.ok(
 491          task.status === 'completed' || task.status === 'failed',
 492          'Task should handle malformed JSON gracefully'
 493        );
 494      });
 495  
 496      test('retry logic: task fails → marked as failed with error message', async () => {
 497        // Create task that will fail (missing file)
 498        const taskId = db
 499          .prepare(
 500            `
 501          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 502          VALUES ('fix_bug', 'developer', 'pending', ?)
 503        `
 504          )
 505          .run(
 506            JSON.stringify({
 507              error_message: 'TypeError: Cannot read property "x" of null',
 508              stack_trace: 'at Object.<anonymous> (src/nonexistent-dev-test-file.js:100:20)',
 509              error_type: 'null_pointer',
 510              stage: 'scoring',
 511            })
 512          ).lastInsertRowid;
 513  
 514        // Process task (will fail - developer handles errors internally via failTask)
 515        await agents.developer.pollTasks(1);
 516  
 517        // Developer agent handles errors internally without retry mechanism
 518        // It calls failTask() directly, marking the task failed with an error message
 519        const finalTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId);
 520        assert.strictEqual(finalTask.status, 'failed');
 521        assert.ok(finalTask.error_message, 'Task should have an error message');
 522      });
 523    });
 524  
 525    describe('4. Circuit Breaker', () => {
 526      test('multiple failures → circuit opens', async () => {
 527        // Create circuit breaker state table entry
 528        db.prepare(
 529          `
 530          INSERT INTO circuit_breaker_state (service_name, state, failure_count)
 531          VALUES ('openrouter', 'closed', 0)
 532        `
 533        ).run();
 534  
 535        // Simulate 5 consecutive failures (threshold to open circuit)
 536        for (let i = 0; i < 5; i++) {
 537          db.prepare(
 538            `
 539            UPDATE circuit_breaker_state
 540            SET failure_count = failure_count + 1,
 541                last_failure_at = datetime('now'),
 542                state = CASE WHEN failure_count >= 4 THEN 'open' ELSE 'closed' END
 543            WHERE service_name = 'openrouter'
 544          `
 545          ).run();
 546        }
 547  
 548        // Verify circuit is open
 549        const cbState = db
 550          .prepare('SELECT * FROM circuit_breaker_state WHERE service_name = ?')
 551          .get('openrouter');
 552  
 553        assert.strictEqual(cbState.state, 'open');
 554        assert.ok(cbState.failure_count >= 5);
 555      });
 556  
 557      test('circuit opens → auto-recovers after cooldown', async () => {
 558        // Insert open circuit breaker state with old timestamp (use SQLite datetime format for comparison)
 559        db.prepare(
 560          `
 561          INSERT INTO circuit_breaker_state (service_name, state, failure_count, opened_at)
 562          VALUES ('openrouter', 'open', 10, datetime('now', '-35 minutes'))
 563        `
 564        ).run();
 565  
 566        // Simulate circuit breaker check (normally done by circuit-breaker.js)
 567        // After 30 minutes, circuit should transition to half_open
 568        db.prepare(
 569          `
 570          UPDATE circuit_breaker_state
 571          SET state = 'half_open'
 572          WHERE service_name = 'openrouter'
 573            AND state = 'open'
 574            AND opened_at < datetime('now', '-30 minutes')
 575        `
 576        ).run();
 577  
 578        const cbState = db
 579          .prepare('SELECT * FROM circuit_breaker_state WHERE service_name = ?')
 580          .get('openrouter');
 581  
 582        assert.strictEqual(cbState.state, 'half_open', 'Circuit should be half_open after cooldown');
 583  
 584        // Simulate successful request → circuit closes
 585        db.prepare(
 586          `
 587          UPDATE circuit_breaker_state
 588          SET state = 'closed', failure_count = 0
 589          WHERE service_name = 'openrouter' AND state = 'half_open'
 590        `
 591        ).run();
 592  
 593        const recoveredState = db
 594          .prepare('SELECT * FROM circuit_breaker_state WHERE service_name = ?')
 595          .get('openrouter');
 596  
 597        assert.strictEqual(recoveredState.state, 'closed', 'Circuit should close after success');
 598        assert.strictEqual(recoveredState.failure_count, 0, 'Failure count should reset');
 599      });
 600    });
 601  
 602    describe('5. Task Routing', () => {
 603      test('triage routes security error to Security agent', async () => {
 604        const taskId = db
 605          .prepare(
 606            `
 607          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 608          VALUES ('classify_error', 'triage', 'pending', ?)
 609        `
 610          )
 611          .run(
 612            JSON.stringify({
 613              error_message: 'Unauthorized: Invalid API signature',
 614              stack_trace: '',
 615              stage: 'outreach',
 616              frequency: 1,
 617            })
 618          ).lastInsertRowid;
 619  
 620        await agents.triage.pollTasks(1);
 621  
 622        // Verify routed to Security
 623        const securityTasks = db
 624          .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?')
 625          .all('security', taskId);
 626  
 627        assert.strictEqual(securityTasks.length, 1);
 628        assert.strictEqual(securityTasks[0].task_type, 'fix_bug');
 629  
 630        const context = JSON.parse(securityTasks[0].context_json);
 631        assert.strictEqual(context.error_type, 'security');
 632        assert.strictEqual(context.severity, 'critical');
 633      });
 634  
 635      test('triage routes database constraint to Developer agent', async () => {
 636        const taskId = db
 637          .prepare(
 638            `
 639          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 640          VALUES ('classify_error', 'triage', 'pending', ?)
 641        `
 642          )
 643          .run(
 644            JSON.stringify({
 645              error_message: 'UNIQUE constraint failed: sites.domain',
 646              stack_trace: 'at db.prepare(...).run() (src/serps.js:50:10)',
 647              stage: 'serps',
 648              frequency: 1,
 649            })
 650          ).lastInsertRowid;
 651  
 652        await agents.triage.pollTasks(1);
 653  
 654        // Verify routed to Developer
 655        const devTasks = db
 656          .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?')
 657          .all('developer', taskId);
 658  
 659        assert.strictEqual(devTasks.length, 1);
 660  
 661        const context = JSON.parse(devTasks[0].context_json);
 662        assert.strictEqual(context.error_type, 'database');
 663        assert.ok(
 664          context.suggested_fix.includes('INSERT OR IGNORE'),
 665          'Should suggest fix for UNIQUE constraint'
 666        );
 667      });
 668  
 669      test('triage routes network error to Architect agent', async () => {
 670        const taskId = db
 671          .prepare(
 672            `
 673          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 674          VALUES ('classify_error', 'triage', 'pending', ?)
 675        `
 676          )
 677          .run(
 678            JSON.stringify({
 679              error_message: 'ETIMEDOUT: Connection timed out',
 680              stack_trace: '',
 681              stage: 'serps',
 682              frequency: 10,
 683            })
 684          ).lastInsertRowid;
 685  
 686        await agents.triage.pollTasks(1);
 687  
 688        // Verify routed to Architect (infrastructure issue)
 689        const architectTasks = db
 690          .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?')
 691          .all('architect', taskId);
 692  
 693        assert.strictEqual(architectTasks.length, 1);
 694  
 695        const context = JSON.parse(architectTasks[0].context_json);
 696        assert.strictEqual(context.error_type, 'network');
 697      });
 698    });
 699  
 700    describe('6. Priority Handling', () => {
 701      test('high priority tasks processed first', async () => {
 702        // Create 3 tasks with different priorities
 703        const lowPriorityId = db
 704          .prepare(
 705            `
 706          INSERT INTO agent_tasks (task_type, assigned_to, status, priority, context_json)
 707          VALUES ('classify_error', 'triage', 'pending', 3, ?)
 708        `
 709          )
 710          .run(
 711            JSON.stringify({ error_message: 'Low priority error', stage: 'proposals' })
 712          ).lastInsertRowid;
 713  
 714        const highPriorityId = db
 715          .prepare(
 716            `
 717          INSERT INTO agent_tasks (task_type, assigned_to, status, priority, context_json)
 718          VALUES ('classify_error', 'triage', 'pending', 9, ?)
 719        `
 720          )
 721          .run(
 722            JSON.stringify({ error_message: 'High priority error', stage: 'scoring' })
 723          ).lastInsertRowid;
 724  
 725        const mediumPriorityId = db
 726          .prepare(
 727            `
 728          INSERT INTO agent_tasks (task_type, assigned_to, status, priority, context_json)
 729          VALUES ('classify_error', 'triage', 'pending', 5, ?)
 730        `
 731          )
 732          .run(
 733            JSON.stringify({ error_message: 'Medium priority error', stage: 'enrichment' })
 734          ).lastInsertRowid;
 735  
 736        // Process one task at a time
 737        await agents.triage.pollTasks(1);
 738  
 739        // Verify high priority task was processed first
 740        const firstProcessed = db
 741          .prepare('SELECT * FROM agent_tasks WHERE status = ? ORDER BY completed_at ASC LIMIT 1')
 742          .get('completed');
 743  
 744        assert.strictEqual(firstProcessed.id, highPriorityId, 'High priority task should run first');
 745  
 746        // Process second task
 747        await agents.triage.pollTasks(1);
 748  
 749        const secondProcessed = db
 750          .prepare('SELECT * FROM agent_tasks WHERE status = ? ORDER BY completed_at ASC LIMIT 1, 1')
 751          .get('completed');
 752  
 753        assert.strictEqual(
 754          secondProcessed.id,
 755          mediumPriorityId,
 756          'Medium priority task should run second'
 757        );
 758      });
 759  
 760      test('priority calculation based on severity and stage', async () => {
 761        // Critical error in early stage (scoring) should get high priority
 762        const criticalEarlyId = db
 763          .prepare(
 764            `
 765          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 766          VALUES ('classify_error', 'triage', 'pending', ?)
 767        `
 768          )
 769          .run(
 770            JSON.stringify({
 771              error_message: 'Security breach detected',
 772              stage: 'scoring',
 773              frequency: 1,
 774            })
 775          ).lastInsertRowid;
 776  
 777        await agents.triage.pollTasks(1);
 778  
 779        // Check created task priority
 780        const routedTask = db
 781          .prepare('SELECT * FROM agent_tasks WHERE parent_task_id = ?')
 782          .get(criticalEarlyId);
 783  
 784        assert.ok(routedTask.priority >= 8, 'Critical security error should get priority >= 8');
 785      });
 786    });
 787  
 788    describe('7. Row-level Locking (Concurrent Agents)', () => {
 789      test('concurrent agents do not claim same task', async () => {
 790        // Enable row-level locking
 791        process.env.AGENT_ENABLE_ROW_LOCKING = 'true';
 792        process.env.AGENT_ALLOW_HORIZONTAL_SCALING = 'true';
 793  
 794        // Create a single task
 795        const taskId = db
 796          .prepare(
 797            `
 798          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 799          VALUES ('classify_error', 'triage', 'pending', ?)
 800        `
 801          )
 802          .run(
 803            JSON.stringify({
 804              error_message: 'Test error',
 805              stage: 'scoring',
 806            })
 807          ).lastInsertRowid;
 808  
 809        // Simulate two concurrent agent instances
 810        const agent1 = new TriageAgent();
 811        const agent2 = new TriageAgent();
 812  
 813        await agent1.initialize();
 814        await agent2.initialize();
 815  
 816        // Both agents try to claim the task simultaneously
 817        const [count1, count2] = await Promise.all([agent1.pollTasks(1), agent2.pollTasks(1)]);
 818  
 819        // Only one should have processed the task
 820        assert.strictEqual(count1 + count2, 1, 'Only one agent should claim and process the task');
 821  
 822        // Verify task is completed (not running twice)
 823        const task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId);
 824        assert.strictEqual(task.status, 'completed');
 825  
 826        // Clean up
 827        delete process.env.AGENT_ENABLE_ROW_LOCKING;
 828        delete process.env.AGENT_ALLOW_HORIZONTAL_SCALING;
 829      });
 830    });
 831  
 832    describe('8. Known Error Database', () => {
 833      test('similar errors get suggested fixes from past tasks', async () => {
 834        // Step 1: Create a completed fix_bug task (known fix)
 835        // Use old created_at to prevent deduplication from treating it as recent
 836        const completedTaskId = db
 837          .prepare(
 838            `
 839          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, result_json, completed_at, created_at)
 840          VALUES ('fix_bug', 'developer', 'completed', ?, ?, datetime('now', '-1 day'), datetime('now', '-1 day'))
 841        `
 842          )
 843          .run(
 844            JSON.stringify({
 845              error_message:
 846                'TypeError: Cannot read property "conversion_score" of null at scoring.js:100',
 847              error_type: 'null_pointer',
 848              stage: 'scoring',
 849            }),
 850            JSON.stringify({
 851              fix_description: 'Added null check: score?.conversion_score || 0',
 852              files_changed: ['src/scoring.js'],
 853            })
 854          ).lastInsertRowid;
 855  
 856        // Step 2: Create similar error (same type, different line number)
 857        const newTaskId = db
 858          .prepare(
 859            `
 860          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 861          VALUES ('classify_error', 'triage', 'pending', ?)
 862        `
 863          )
 864          .run(
 865            JSON.stringify({
 866              error_message:
 867                'TypeError: Cannot read property "conversion_score" of null at scoring.js:250',
 868              stack_trace: '',
 869              stage: 'scoring',
 870              frequency: 1,
 871            })
 872          ).lastInsertRowid;
 873  
 874        // Process new error
 875        await agents.triage.pollTasks(1);
 876  
 877        // Verify triage found known fix
 878        const newTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(newTaskId);
 879        const result = JSON.parse(newTask.result_json);
 880  
 881        assert.ok(result.known_fix, 'Should detect known fix');
 882        assert.ok(result.known_fix.task_id === completedTaskId, 'Should reference previous fix task');
 883        assert.ok(result.known_fix.similarity >= 0.7, 'Similarity should be >= 70%');
 884  
 885        // Verify routed task includes suggested fix
 886        const routedTask = db
 887          .prepare('SELECT * FROM agent_tasks WHERE parent_task_id = ?')
 888          .get(newTaskId);
 889  
 890        const context = JSON.parse(routedTask.context_json);
 891        assert.ok(context.suggested_fix.includes('null check'), 'Should suggest known fix approach');
 892      });
 893    });
 894  
 895    describe('9. Coverage Gates (Developer Agent)', () => {
 896      test('developer enforces 85% coverage before commit', async () => {
 897        // This test verifies the checkCoverageBeforeCommit gate
 898        // Mock scenario: Developer attempts commit with low coverage
 899  
 900        const taskId = db
 901          .prepare(
 902            `
 903          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 904          VALUES ('fix_bug', 'developer', 'pending', ?)
 905        `
 906          )
 907          .run(
 908            JSON.stringify({
 909              error_message: 'TypeError: x is null',
 910              error_type: 'null_pointer',
 911              stage: 'scoring',
 912              file_path: 'src/score.js',
 913            })
 914          ).lastInsertRowid;
 915  
 916        // Mock file operations via devDeps
 917        devDeps.readFile = () =>
 918          Promise.resolve({ content: 'const x = null; console.log(x.value);', size: 50 });
 919        devDeps.getFileContext = () => Promise.resolve({ imports: [], testFiles: [] });
 920        devDeps.editFile = () =>
 921          Promise.resolve({ backupPath: '/tmp/backup.js', diff: 'Added null check' });
 922        devDeps.cleanupBackups = () => Promise.resolve();
 923  
 924        // Mock test runner — tests pass but coverage is below 85% threshold
 925        devDeps.runTestsForFile = () =>
 926          Promise.resolve({
 927            success: true,
 928            stats: { pass: 5 },
 929            coverage: { lines: { pct: 70 } },
 930          });
 931  
 932        // Mock LLM
 933        devDeps.simpleLLMCall = () =>
 934          Promise.resolve(
 935            JSON.stringify({
 936              old_string: 'const x = null; console.log(x.value);',
 937              new_string: 'const x = null; if (x) console.log(x.value);',
 938              explanation: 'Added null check',
 939            })
 940          );
 941  
 942        // Mock coverage check — npm test succeeds but coverage file shows low coverage
 943        devDeps.execSync = () => '';
 944        devDeps.readFileCoverage = path => {
 945          if (path.includes('coverage-summary.json')) {
 946            return Promise.resolve(JSON.stringify({ 'src/score.js': { lines: { pct: 70 } } }));
 947          }
 948          if (path.includes('coverage-final.json')) {
 949            // Return valid coverage data so getDetailedCoverage succeeds
 950            return Promise.resolve(
 951              JSON.stringify({
 952                '/home/jason/code/333Method/src/score.js': {
 953                  lines: { pct: 70 },
 954                  statementMap: { 1: { start: { line: 1 }, end: { line: 1 } } },
 955                  s: { 1: 0 },
 956                },
 957              })
 958            );
 959          }
 960          // Source file read
 961          return Promise.resolve('const x = null; console.log(x.value);');
 962        };
 963  
 964        // Process task
 965        await agents.developer.pollTasks(1);
 966  
 967        // Verify task blocked due to coverage
 968        const task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId);
 969  
 970        assert.strictEqual(task.status, 'blocked', 'Task should be blocked due to coverage gate');
 971        assert.ok(
 972          task.error_message.includes('coverage') || task.error_message.includes('85%'),
 973          'Error should mention coverage threshold'
 974        );
 975      });
 976    });
 977  
 978    describe('10. Workflow Dependencies', () => {
 979      test('features require approved design proposal', async () => {
 980        // Try to implement feature without approved design
 981        const featureTaskId = db
 982          .prepare(
 983            `
 984          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
 985          VALUES ('implement_feature', 'developer', 'pending', ?)
 986        `
 987          )
 988          .run(
 989            JSON.stringify({
 990              feature_description: 'Add SMS rate limiting',
 991              requirements: ['Limit to 10 SMS per hour per number'],
 992              files_to_modify: ['src/outreach/sms.js'],
 993            })
 994          ).lastInsertRowid;
 995  
 996        // Process feature task (should block and create design_proposal task)
 997        await agents.developer.pollTasks(1);
 998  
 999        // Verify feature task is blocked
1000        const featureTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(featureTaskId);
1001        assert.strictEqual(
1002          featureTask.status,
1003          'blocked',
1004          'Feature implementation should be blocked without approved design'
1005        );
1006  
1007        // Verify design_proposal task created
1008        const designTasks = db
1009          .prepare('SELECT * FROM agent_tasks WHERE task_type = ? AND assigned_to = ?')
1010          .all('design_proposal', 'architect');
1011  
1012        assert.strictEqual(designTasks.length, 1, 'Design proposal task should be auto-created');
1013      });
1014  
1015      test('approved design enables feature implementation', async () => {
1016        // Step 1: Create approved design proposal
1017        const designTaskId = db
1018          .prepare(
1019            `
1020          INSERT INTO agent_tasks (task_type, assigned_to, status, result_json, approval_json, reviewed_by, completed_at)
1021          VALUES ('design_proposal', 'architect', 'completed', ?, ?, 'ProductOwner', datetime('now'))
1022        `
1023          )
1024          .run(
1025            JSON.stringify({
1026              design_proposal: {
1027                title: 'SMS Rate Limiting',
1028                files_affected: ['src/outreach/sms.js'],
1029                requires_migration: false,
1030              },
1031            }),
1032            JSON.stringify({
1033              decision: 'approved',
1034              reviewer: 'ProductOwner',
1035              timestamp: new Date().toISOString(),
1036            })
1037          ).lastInsertRowid;
1038  
1039        // Step 2: Create feature implementation task with approved parent
1040        const featureTaskId = db
1041          .prepare(
1042            `
1043          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, parent_task_id)
1044          VALUES ('implement_feature', 'developer', 'pending', ?, ?)
1045        `
1046          )
1047          .run(
1048            JSON.stringify({
1049              feature_description: 'Add SMS rate limiting',
1050              requirements: ['Limit to 10 SMS per hour'],
1051              files_to_modify: ['src/outreach/sms.js'],
1052            }),
1053            designTaskId
1054          ).lastInsertRowid;
1055  
1056        // Mock file operations and tests via devDeps
1057        devDeps.readFile = () =>
1058          Promise.resolve({ content: 'export function sendSMS() { /* ... */ }', size: 100 });
1059        devDeps.getFileContext = () => Promise.resolve({ imports: [], testFiles: [] });
1060        devDeps.writeFile = () => Promise.resolve({ backupPath: '/tmp/backup.js' });
1061        devDeps.editFile = () =>
1062          Promise.resolve({ backupPath: '/tmp/backup.js', diff: 'Added rate limiting' });
1063        devDeps.runTests = () =>
1064          Promise.resolve({ success: true, stats: { pass: 10 }, coverage: { lines: { pct: 90 } } });
1065        devDeps.simpleLLMCall = () =>
1066          Promise.resolve(
1067            JSON.stringify({
1068              old_string: 'export function sendSMS() {',
1069              new_string: 'const rateLimit = new Map();\nexport function sendSMS() {',
1070              explanation: 'Added rate limiting',
1071            })
1072          );
1073  
1074        // Mock execSync (git add + commit) and coverage file read
1075        devDeps.execSync = cmd => {
1076          if (cmd.includes('git commit')) return 'commit-hash-456\n';
1077          return '';
1078        };
1079        devDeps.readFileCoverage = () =>
1080          Promise.resolve(JSON.stringify({ 'src/outreach/sms.js': { lines: { pct: 90 } } }));
1081  
1082        // Process feature task
1083        await agents.developer.pollTasks(1);
1084  
1085        // Verify feature task completed (not blocked)
1086        const featureTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(featureTaskId);
1087        assert.strictEqual(
1088          featureTask.status,
1089          'completed',
1090          'Feature should implement successfully with approved design'
1091        );
1092      });
1093    });
1094  
1095    describe('11. Agent System Integration', () => {
1096      test('full bug fix workflow: Monitor → Triage → Developer → QA', async () => {
1097        // Step 1: Monitor detects error and creates classify_error task
1098        const monitorTaskId = db
1099          .prepare(
1100            `
1101          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, created_by)
1102          VALUES ('scan_logs', 'monitor', 'pending', ?, 'system')
1103        `
1104          )
1105          .run(
1106            JSON.stringify({
1107              log_file: 'logs/pipeline-2024-01-01.log',
1108              error_patterns: ['TypeError', 'null'],
1109            })
1110          ).lastInsertRowid;
1111  
1112        // Mock log scanning (Monitor would create Triage task)
1113        const triageTaskId = db
1114          .prepare(
1115            `
1116          INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, created_by)
1117          VALUES ('classify_error', 'triage', 'pending', ?, 'monitor')
1118        `
1119          )
1120          .run(
1121            JSON.stringify({
1122              error_message: 'TypeError: Cannot read property "score" of undefined',
1123              stack_trace: 'at Object.<anonymous> (src/score.js:150:10)',
1124              stage: 'scoring',
1125              frequency: 5,
1126            })
1127          ).lastInsertRowid;
1128  
1129        // Step 2: Triage classifies and routes
1130        await agents.triage.pollTasks(1);
1131  
1132        const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId);
1133        assert.strictEqual(triageTask.status, 'completed');
1134  
1135        // Step 3: Developer fixes bug
1136        const devTasks = db
1137          .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?')
1138          .all('developer', triageTaskId);
1139  
1140        assert.strictEqual(devTasks.length, 1, 'Developer task should be created');
1141  
1142        // Mock Developer operations via devDeps
1143        devDeps.readFile = () => Promise.resolve({ content: 'const score = data.score;', size: 50 });
1144        devDeps.getFileContext = () => Promise.resolve({ imports: [], testFiles: [] });
1145        devDeps.editFile = () => Promise.resolve({ backupPath: '/tmp/backup.js', diff: 'fix' });
1146        devDeps.cleanupBackups = () => Promise.resolve();
1147        devDeps.runTestsForFile = () =>
1148          Promise.resolve({ success: true, stats: { pass: 8 }, coverage: { lines: { pct: 90 } } });
1149        devDeps.simpleLLMCall = () =>
1150          Promise.resolve(
1151            JSON.stringify({
1152              old_string: 'const score = data.score;',
1153              new_string: 'const score = data?.score || 0;',
1154              explanation: 'Added null check',
1155            })
1156          );
1157        devDeps.execSync = cmd => {
1158          if (cmd.includes('git commit')) return 'hash-789\n';
1159          return '';
1160        };
1161        devDeps.readFileCoverage = () =>
1162          Promise.resolve(JSON.stringify({ 'src/score.js': { lines: { pct: 90 } } }));
1163  
1164        await agents.developer.pollTasks(1);
1165  
1166        const devTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(devTasks[0].id);
1167        assert.strictEqual(devTask.status, 'completed', 'Developer should complete fix');
1168  
1169        // Step 4: Verify QA task created
1170        const qaTasks = db
1171          .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?')
1172          .all('qa', devTask.id);
1173  
1174        assert.strictEqual(qaTasks.length, 1, 'QA verification task should be created');
1175  
1176        // Verify complete workflow chain
1177        const allTasks = db
1178          .prepare('SELECT task_type, assigned_to, status FROM agent_tasks ORDER BY id ASC')
1179          .all();
1180  
1181        assert.ok(
1182          allTasks.some(t => t.assigned_to === 'monitor'),
1183          'Monitor task exists'
1184        );
1185        assert.ok(
1186          allTasks.some(t => t.assigned_to === 'triage'),
1187          'Triage task exists'
1188        );
1189        assert.ok(
1190          allTasks.some(t => t.assigned_to === 'developer'),
1191          'Developer task exists'
1192        );
1193        assert.ok(
1194          allTasks.some(t => t.assigned_to === 'qa'),
1195          'QA task exists'
1196        );
1197      });
1198    });
1199  });