Cradicle Explorer

/ __quarantined_tests__ / agents / agents-e2e-implementation.test.js
agents-e2e-implementation.test.js
   1  /**
   2   * Agent System E2E Implementation Tests
   3   *
   4   * Comprehensive end-to-end tests for the complete agent workflow system.
   5   * Tests real-world scenarios including bug fixes, feature implementation,
   6   * security fixes, coverage improvements, rollback on failure, and budget enforcement.
   7   *
   8   * Based on Phase 6.1 Integration & Testing requirements.
   9   */
  10  
  11  import { test, describe, beforeEach, afterEach } from 'node:test';
  12  import assert from 'node:assert/strict';
  13  import Database from 'better-sqlite3';
  14  import { fileURLToPath } from 'url';
  15  import { dirname, join } from 'path';
  16  import { existsSync, unlinkSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'fs';
  17  import { execSync } from 'child_process';
  18  
  19  const __filename = fileURLToPath(import.meta.url);
  20  const __dirname = dirname(__filename);
  21  const projectRoot = join(__dirname, '../..');
  22  
  23  // Use a test database
  24  const TEST_DB_PATH = join(projectRoot, 'db/test-agents-e2e-impl.db');
  25  const TEST_REPO_PATH = join(projectRoot, 'tests/fixtures/test-repo');
  26  
  27  // Import agent system components
  28  import {
  29    createAgentTask,
  30    getAgentTasks,
  31    updateTaskStatus,
  32    completeTask,
  33    failTask,
  34    resetDbConnection,
  35  } from '../../src/agents/utils/task-manager.js';
  36  
  37  /**
  38   * Setup test database with complete schema
  39   */
  40  function setupTestDatabase() {
  41    const db = new Database(TEST_DB_PATH);
  42  
  43    db.exec(`
  44      -- Agent tasks table
  45      CREATE TABLE IF NOT EXISTS agent_tasks (
  46        id INTEGER PRIMARY KEY AUTOINCREMENT,
  47        task_type TEXT NOT NULL,
  48        assigned_to TEXT NOT NULL,
  49        created_by TEXT,
  50        priority INTEGER DEFAULT 5,
  51        status TEXT DEFAULT 'pending',
  52        context_json TEXT,
  53        parent_task_id INTEGER,
  54        result_json TEXT,
  55        error_message TEXT,
  56        retry_count INTEGER DEFAULT 0,
  57        reviewed_by TEXT,
  58        approval_json TEXT,
  59        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  60        started_at TIMESTAMP,
  61        completed_at TIMESTAMP,
  62        FOREIGN KEY (parent_task_id) REFERENCES agent_tasks(id),
  63        CHECK (status IN ('pending', 'running', 'completed', 'failed', 'blocked', 'awaiting_po_approval', 'awaiting_architect_approval'))
  64      );
  65  
  66      CREATE INDEX IF NOT EXISTS idx_agent_tasks_assigned_status
  67        ON agent_tasks(assigned_to, status, priority DESC, created_at ASC);
  68      CREATE INDEX IF NOT EXISTS idx_agent_tasks_parent
  69        ON agent_tasks(parent_task_id);
  70  
  71      -- Agent logs table
  72      CREATE TABLE IF NOT EXISTS agent_logs (
  73        id INTEGER PRIMARY KEY AUTOINCREMENT,
  74        task_id INTEGER,
  75        agent_name TEXT NOT NULL,
  76        log_level TEXT NOT NULL,
  77        message TEXT NOT NULL,
  78        context_json TEXT,
  79        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  80        FOREIGN KEY (task_id) REFERENCES agent_tasks(id)
  81      );
  82  
  83      CREATE INDEX IF NOT EXISTS idx_agent_logs_task ON agent_logs(task_id);
  84      CREATE INDEX IF NOT EXISTS idx_agent_logs_created ON agent_logs(created_at DESC);
  85  
  86      -- Agent state table
  87      CREATE TABLE IF NOT EXISTS agent_state (
  88        agent_name TEXT PRIMARY KEY CHECK(agent_name IN ('developer', 'qa', 'security', 'architect', 'triage', 'monitor')),
  89        last_active DATETIME DEFAULT CURRENT_TIMESTAMP,
  90        current_task_id INTEGER REFERENCES agent_tasks(id),
  91        status TEXT DEFAULT 'idle' CHECK(status IN ('idle', 'working', 'blocked')),
  92        metrics_json TEXT
  93      );
  94  
  95      -- Agent messages table
  96      CREATE TABLE IF NOT EXISTS agent_messages (
  97        id INTEGER PRIMARY KEY AUTOINCREMENT,
  98        from_agent TEXT NOT NULL,
  99        to_agent TEXT NOT NULL,
 100        message_type TEXT NOT NULL,
 101        payload_json TEXT NOT NULL,
 102        read INTEGER DEFAULT 0,
 103        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 104      );
 105  
 106      CREATE INDEX IF NOT EXISTS idx_agent_messages_to_unread
 107        ON agent_messages(to_agent, read, created_at);
 108  
 109      -- Agent budget tracking table
 110      CREATE TABLE IF NOT EXISTS agent_budget (
 111        id INTEGER PRIMARY KEY AUTOINCREMENT,
 112        agent_name TEXT NOT NULL,
 113        date DATE NOT NULL,
 114        api_calls INTEGER DEFAULT 0,
 115        tokens_used INTEGER DEFAULT 0,
 116        cost_usd REAL DEFAULT 0.0,
 117        daily_limit_usd REAL DEFAULT 10.0,
 118        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
 119        UNIQUE(agent_name, date)
 120      );
 121    `);
 122  
 123    return db;
 124  }
 125  
 126  /**
 127   * Setup test git repository (simplified - no actual git operations needed for tests)
 128   */
 129  function setupTestRepository() {
 130    // Skip actual git setup for faster tests
 131    // Tests validate workflow logic, not git operations
 132    return TEST_REPO_PATH;
 133  }
 134  
 135  /**
 136   * Mock Anthropic API for testing
 137   */
 138  class MockAnthropicAPI {
 139    constructor() {
 140      this.calls = [];
 141      this.shouldFail = false;
 142      this.responseDelay = 0;
 143    }
 144  
 145    async createMessage(params) {
 146      this.calls.push({ type: 'createMessage', params });
 147  
 148      if (this.shouldFail) {
 149        throw new Error('API call failed (mocked)');
 150      }
 151  
 152      await new Promise(resolve => setTimeout(resolve, this.responseDelay));
 153  
 154      // Return mock response based on the prompt
 155      const prompt = params.messages?.[0]?.content || '';
 156  
 157      // Mock bug fix response
 158      if (prompt.includes('fix') || prompt.includes('bug')) {
 159        return {
 160          content: [
 161            {
 162              type: 'text',
 163              text: JSON.stringify({
 164                analysis: 'Null pointer exception in processData function',
 165                fix: 'Add null check before accessing data.items',
 166                files_changed: ['src/buggy-module.js'],
 167                test_needed: true,
 168              }),
 169            },
 170          ],
 171          usage: { input_tokens: 1000, output_tokens: 200 },
 172        };
 173      }
 174  
 175      // Mock security scan response
 176      if (prompt.includes('security') || prompt.includes('audit')) {
 177        return {
 178          content: [
 179            {
 180              type: 'text',
 181              text: JSON.stringify({
 182                findings: [
 183                  {
 184                    severity: 'medium',
 185                    type: 'missing_validation',
 186                    file: 'src/buggy-module.js',
 187                    line: 3,
 188                    description: 'Missing input validation',
 189                  },
 190                ],
 191              }),
 192            },
 193          ],
 194          usage: { input_tokens: 800, output_tokens: 150 },
 195        };
 196      }
 197  
 198      // Default response
 199      return {
 200        content: [{ type: 'text', text: 'Task completed successfully' }],
 201        usage: { input_tokens: 500, output_tokens: 100 },
 202      };
 203    }
 204  
 205    reset() {
 206      this.calls = [];
 207      this.shouldFail = false;
 208      this.responseDelay = 0;
 209    }
 210  }
 211  
 212  // Shared test state
 213  let db;
 214  let mockAPI;
 215  let originalDbPath;
 216  let originalRealtimeNotifications;
 217  
 218  beforeEach(async () => {
 219    // Reset DB connections first
 220    resetDbConnection();
 221  
 222    // Clean up any existing test database
 223    if (existsSync(TEST_DB_PATH)) {
 224      try {
 225        unlinkSync(TEST_DB_PATH);
 226      } catch (e) {
 227        // If locked, wait and retry
 228        await new Promise(resolve => setTimeout(resolve, 200));
 229        try {
 230          unlinkSync(TEST_DB_PATH);
 231        } catch (e2) {
 232          // Ignore - will create new one
 233        }
 234      }
 235    }
 236  
 237    // Setup test database
 238    db = setupTestDatabase();
 239  
 240    // Setup test repository
 241    setupTestRepository();
 242  
 243    // Setup environment
 244    originalDbPath = process.env.DATABASE_PATH;
 245    originalRealtimeNotifications = process.env.AGENT_REALTIME_NOTIFICATIONS;
 246    process.env.DATABASE_PATH = TEST_DB_PATH;
 247    process.env.AGENT_REALTIME_NOTIFICATIONS = 'false'; // Disable spawning in tests
 248  
 249    // Setup mock API
 250    mockAPI = new MockAnthropicAPI();
 251  });
 252  
 253  afterEach(async () => {
 254    // Close database first
 255    if (db) {
 256      try {
 257        db.close();
 258      } catch (e) {
 259        // Already closed
 260      }
 261      db = null;
 262    }
 263  
 264    // Reset connections
 265    resetDbConnection();
 266  
 267    // Wait for any pending operations
 268    await new Promise(resolve => setTimeout(resolve, 200));
 269  
 270    // Restore environment
 271    process.env.DATABASE_PATH = originalDbPath;
 272    process.env.AGENT_REALTIME_NOTIFICATIONS = originalRealtimeNotifications;
 273  
 274    // Clean up test database
 275    if (existsSync(TEST_DB_PATH)) {
 276      try {
 277        unlinkSync(TEST_DB_PATH);
 278      } catch (e) {
 279        // Ignore - may be locked
 280      }
 281    }
 282  
 283    // Clean up test repository
 284    if (existsSync(TEST_REPO_PATH)) {
 285      try {
 286        rmSync(TEST_REPO_PATH, { recursive: true, force: true });
 287      } catch (e) {
 288        // Ignore cleanup errors
 289      }
 290    }
 291  });
 292  
 293  describe('E2E Workflow Tests', () => {
 294    test('Scenario 1: Bug Fix Workflow - Complete Success Path', async () => {
 295      // Create bug task → Developer fixes → Tests pass → QA verifies → Commit created
 296  
 297      // Step 1: Create triage task for classification
 298      const triageTaskId = await createAgentTask({
 299        task_type: 'classify_error',
 300        assigned_to: 'triage',
 301        created_by: 'monitor',
 302        priority: 7,
 303        context: {
 304          error_message: 'TypeError: Cannot read property "items" of null',
 305          stack_trace: 'at processData (src/buggy-module.js:3:15)',
 306          stage: 'processing',
 307          frequency: 5,
 308        },
 309      });
 310  
 311      assert.ok(triageTaskId, 'Triage task should be created');
 312  
 313      // Step 2: Triage classifies and routes to Developer
 314      // (Skip agent initialization for faster tests - we're testing workflow, not initialization)
 315      updateTaskStatus(triageTaskId, 'running');
 316  
 317      // Triage creates developer task
 318      const devTaskId = await createAgentTask({
 319        task_type: 'fix_bug',
 320        assigned_to: 'developer',
 321        created_by: 'triage',
 322        priority: 8,
 323        parent_task_id: triageTaskId,
 324        context: {
 325          error_type: 'null_pointer',
 326          error_message: 'Cannot read property "items" of null',
 327          file: 'src/buggy-module.js',
 328          line: 3,
 329          stage: 'processing',
 330          suggested_fix: 'Add null check for data parameter',
 331        },
 332      });
 333  
 334      completeTask(triageTaskId, {
 335        classification: 'null_pointer',
 336        severity: 'medium',
 337        routed_to: 'developer',
 338        dev_task_id: devTaskId,
 339      });
 340  
 341      // Verify triage completed
 342      const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId);
 343      assert.strictEqual(triageTask.status, 'completed');
 344  
 345      // Step 3: Developer processes bug fix
 346      const devTasks = getAgentTasks('developer', 'pending', 1);
 347      assert.strictEqual(devTasks.length, 1);
 348      assert.strictEqual(devTasks[0].id, devTaskId);
 349  
 350      updateTaskStatus(devTaskId, 'running');
 351  
 352      // Simulate fix (in real scenario, would call LLM and modify files)
 353      const qaTaskId = await createAgentTask({
 354        task_type: 'verify_fix',
 355        assigned_to: 'qa',
 356        created_by: 'developer',
 357        priority: 8,
 358        parent_task_id: devTaskId,
 359        context: {
 360          files_changed: ['src/buggy-module.js'],
 361          fix_commit: 'abc123',
 362          test_file: 'tests/buggy-module.test.js',
 363          original_error: 'Cannot read property "items" of null',
 364        },
 365      });
 366  
 367      completeTask(devTaskId, {
 368        fixed: true,
 369        files_changed: ['src/buggy-module.js'],
 370        qa_task_id: qaTaskId,
 371      });
 372  
 373      // Step 4: QA verifies the fix
 374      const qaTasks = getAgentTasks('qa', 'pending', 1);
 375      assert.strictEqual(qaTasks.length, 1);
 376      assert.strictEqual(qaTasks[0].id, qaTaskId);
 377  
 378      updateTaskStatus(qaTaskId, 'running');
 379  
 380      // Simulate test execution
 381      completeTask(qaTaskId, {
 382        tests_passed: true,
 383        coverage: 92,
 384        verification_complete: true,
 385      });
 386  
 387      // Step 5: Verify complete workflow chain
 388      const allTasks = db
 389        .prepare(
 390          `SELECT id, task_type, assigned_to, status, parent_task_id
 391           FROM agent_tasks
 392           ORDER BY id ASC`
 393        )
 394        .all();
 395  
 396      assert.strictEqual(allTasks.length, 3, 'Should have 3 tasks in workflow');
 397      assert.strictEqual(allTasks[0].assigned_to, 'triage');
 398      assert.strictEqual(allTasks[0].status, 'completed');
 399      assert.strictEqual(allTasks[1].assigned_to, 'developer');
 400      assert.strictEqual(allTasks[1].status, 'completed');
 401      assert.strictEqual(allTasks[2].assigned_to, 'qa');
 402      assert.strictEqual(allTasks[2].status, 'completed');
 403  
 404      // Verify parent-child relationships
 405      assert.strictEqual(allTasks[1].parent_task_id, allTasks[0].id);
 406      assert.strictEqual(allTasks[2].parent_task_id, allTasks[1].id);
 407  
 408      // Verify logs table exists (actual logging happens in real agent execution)
 409      const logTableExists = db
 410        .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='agent_logs'")
 411        .get();
 412      assert.ok(logTableExists, 'agent_logs table should exist');
 413    });
 414  
 415    test('Scenario 2: Feature Implementation - Multi-Agent Collaboration', async () => {
 416      // Create feature task → Developer implements → Tests written → Coverage 85%+ → Commit created
 417  
 418      const featureTaskId = await createAgentTask({
 419        task_type: 'implement_feature',
 420        assigned_to: 'developer',
 421        created_by: 'architect',
 422        priority: 6,
 423        context: {
 424          feature_name: 'Add data validation',
 425          description: 'Add input validation to processData function',
 426          files_to_modify: ['src/buggy-module.js'],
 427          acceptance_criteria: [
 428            'Validate input is not null',
 429            'Validate items is an array',
 430            'Return empty array for invalid input',
 431          ],
 432        },
 433      });
 434  
 435      // Developer implements
 436      updateTaskStatus(featureTaskId, 'running');
 437  
 438      const qaTaskId = await createAgentTask({
 439        task_type: 'write_test',
 440        assigned_to: 'qa',
 441        created_by: 'developer',
 442        priority: 7,
 443        parent_task_id: featureTaskId,
 444        context: {
 445          files_to_test: ['src/buggy-module.js'],
 446          test_instructions: 'Write tests for null/undefined input, empty arrays, invalid types',
 447          coverage_target: 85,
 448        },
 449      });
 450  
 451      completeTask(featureTaskId, {
 452        implemented: true,
 453        files_changed: ['src/buggy-module.js'],
 454        qa_task_id: qaTaskId,
 455      });
 456  
 457      // QA writes tests
 458      updateTaskStatus(qaTaskId, 'running');
 459  
 460      completeTask(qaTaskId, {
 461        tests_written: true,
 462        test_file: 'tests/buggy-module.test.js',
 463        coverage: 88,
 464        all_tests_passing: true,
 465      });
 466  
 467      // Verify workflow
 468      const tasks = db
 469        .prepare('SELECT * FROM agent_tasks ORDER BY id ASC')
 470        .all()
 471        .map(t => ({
 472          ...t,
 473          context_json: JSON.parse(t.context_json || '{}'),
 474          result_json: JSON.parse(t.result_json || '{}'),
 475        }));
 476  
 477      assert.strictEqual(tasks.length, 2);
 478      assert.strictEqual(tasks[0].status, 'completed');
 479      assert.strictEqual(tasks[1].status, 'completed');
 480      assert.ok(tasks[1].result_json.coverage >= 85, 'Coverage should meet 85%+ target');
 481    });
 482  
 483    test('Scenario 3: Security Fix - High Priority Workflow', async () => {
 484      // Security scan finds issue → Security agent fixes → Tests pass → Commit created
 485  
 486      const securityScanTaskId = await createAgentTask({
 487        task_type: 'audit_code',
 488        assigned_to: 'security',
 489        created_by: 'monitor',
 490        priority: 9, // High priority
 491        context: {
 492          files: ['src/buggy-module.js'],
 493          focus_areas: ['input_validation', 'null_checks'],
 494        },
 495      });
 496  
 497      updateTaskStatus(securityScanTaskId, 'running');
 498  
 499      // Security finds vulnerability
 500      const fixTaskId = await createAgentTask({
 501        task_type: 'fix_security_issue',
 502        assigned_to: 'developer',
 503        created_by: 'security',
 504        priority: 10, // Critical
 505        parent_task_id: securityScanTaskId,
 506        context: {
 507          vulnerability_type: 'missing_input_validation',
 508          severity: 'high',
 509          file: 'src/buggy-module.js',
 510          line: 3,
 511          description: 'Function accepts untrusted input without validation',
 512          remediation: 'Add input validation and type checking',
 513        },
 514      });
 515  
 516      completeTask(securityScanTaskId, {
 517        findings_count: 1,
 518        critical: 0,
 519        high: 1,
 520        fix_task_id: fixTaskId,
 521      });
 522  
 523      // Developer fixes
 524      updateTaskStatus(fixTaskId, 'running');
 525  
 526      const qaTaskId = await createAgentTask({
 527        task_type: 'verify_fix',
 528        assigned_to: 'qa',
 529        created_by: 'developer',
 530        priority: 9,
 531        parent_task_id: fixTaskId,
 532        context: {
 533          files_changed: ['src/buggy-module.js'],
 534          security_fix: true,
 535          requires_security_review: true,
 536        },
 537      });
 538  
 539      completeTask(fixTaskId, {
 540        fixed: true,
 541        qa_task_id: qaTaskId,
 542      });
 543  
 544      // QA verifies
 545      updateTaskStatus(qaTaskId, 'running');
 546      completeTask(qaTaskId, {
 547        tests_passed: true,
 548        security_verified: true,
 549      });
 550  
 551      // Verify high priority maintained
 552      const tasks = db.prepare('SELECT * FROM agent_tasks ORDER BY id ASC').all();
 553      assert.ok(
 554        tasks.some(t => t.priority >= 9),
 555        'Should maintain high priority'
 556      );
 557      assert.strictEqual(tasks.filter(t => t.status === 'completed').length, 3);
 558    });
 559  
 560    test('Scenario 4: Coverage Improvement - QA Proactive Testing', async () => {
 561      // QA detects low coverage → Writes tests → Coverage improves → Commit created
 562  
 563      const coverageTaskId = await createAgentTask({
 564        task_type: 'check_coverage',
 565        assigned_to: 'qa',
 566        created_by: 'monitor',
 567        priority: 5,
 568        context: {
 569          target_coverage: 85,
 570          current_coverage: 62,
 571          files_below_target: [
 572            { file: 'src/buggy-module.js', coverage: 45 },
 573            { file: 'src/other-module.js', coverage: 68 },
 574          ],
 575        },
 576      });
 577  
 578      updateTaskStatus(coverageTaskId, 'running');
 579  
 580      // QA creates write_test tasks
 581      const writeTestTaskId = await createAgentTask({
 582        task_type: 'write_test',
 583        assigned_to: 'qa',
 584        created_by: 'qa',
 585        priority: 6,
 586        parent_task_id: coverageTaskId,
 587        context: {
 588          files_to_test: ['src/buggy-module.js'],
 589          current_coverage: 45,
 590          target_coverage: 85,
 591          missing_scenarios: ['null input', 'undefined input', 'invalid types'],
 592        },
 593      });
 594  
 595      completeTask(coverageTaskId, {
 596        analysis_complete: true,
 597        write_test_task_id: writeTestTaskId,
 598      });
 599  
 600      // QA writes tests
 601      updateTaskStatus(writeTestTaskId, 'running');
 602      completeTask(writeTestTaskId, {
 603        tests_written: true,
 604        new_coverage: 88,
 605        improvement: 43,
 606      });
 607  
 608      const result = db
 609        .prepare('SELECT result_json FROM agent_tasks WHERE id = ?')
 610        .get(writeTestTaskId);
 611      const resultData = JSON.parse(result.result_json);
 612  
 613      assert.ok(resultData.new_coverage >= 85, 'Should improve coverage to 85%+');
 614      assert.ok(resultData.improvement > 0, 'Should show positive improvement');
 615    });
 616  
 617    test('Scenario 5: Rollback on Failure - Error Recovery', async () => {
 618      // Developer fix breaks tests → Rollback → Retry with different approach
 619  
 620      const bugTaskId = await createAgentTask({
 621        task_type: 'fix_bug',
 622        assigned_to: 'developer',
 623        created_by: 'triage',
 624        priority: 7,
 625        context: {
 626          error_message: 'Function returns incorrect values',
 627          file: 'src/buggy-module.js',
 628        },
 629      });
 630  
 631      // First attempt - fails
 632      updateTaskStatus(bugTaskId, 'running');
 633  
 634      const qaTaskId1 = await createAgentTask({
 635        task_type: 'verify_fix',
 636        assigned_to: 'qa',
 637        created_by: 'developer',
 638        priority: 7,
 639        parent_task_id: bugTaskId,
 640        context: {
 641          files_changed: ['src/buggy-module.js'],
 642          attempt: 1,
 643        },
 644      });
 645  
 646      completeTask(bugTaskId, {
 647        fixed: true,
 648        qa_task_id: qaTaskId1,
 649      });
 650  
 651      // QA finds tests fail
 652      updateTaskStatus(qaTaskId1, 'running');
 653      failTask(qaTaskId1, 'Tests failed: 3 of 10 tests failing after fix');
 654  
 655      // Verify failure recorded
 656      const failedQATask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(qaTaskId1);
 657      assert.strictEqual(failedQATask.status, 'failed');
 658      assert.ok(failedQATask.error_message.includes('Tests failed'));
 659  
 660      // Developer retries with different approach
 661      const retryTaskId = await createAgentTask({
 662        task_type: 'fix_bug',
 663        assigned_to: 'developer',
 664        created_by: 'qa',
 665        priority: 8, // Higher priority for retry
 666        parent_task_id: bugTaskId,
 667        context: {
 668          error_message: 'Function returns incorrect values',
 669          file: 'src/buggy-module.js',
 670          previous_attempt_failed: true,
 671          previous_task_id: bugTaskId,
 672          failure_reason: 'Tests failed: 3 of 10 tests failing',
 673          retry_count: 1,
 674        },
 675      });
 676  
 677      // Second attempt succeeds
 678      updateTaskStatus(retryTaskId, 'running');
 679  
 680      const qaTaskId2 = await createAgentTask({
 681        task_type: 'verify_fix',
 682        assigned_to: 'qa',
 683        created_by: 'developer',
 684        priority: 8,
 685        parent_task_id: retryTaskId,
 686        context: {
 687          files_changed: ['src/buggy-module.js'],
 688          attempt: 2,
 689        },
 690      });
 691  
 692      completeTask(retryTaskId, {
 693        fixed: true,
 694        qa_task_id: qaTaskId2,
 695      });
 696  
 697      updateTaskStatus(qaTaskId2, 'running');
 698      completeTask(qaTaskId2, {
 699        tests_passed: true,
 700        all_tests_passing: true,
 701      });
 702  
 703      // Verify retry workflow
 704      const tasks = db.prepare('SELECT * FROM agent_tasks ORDER BY id ASC').all();
 705      assert.ok(
 706        tasks.some(t => t.status === 'failed'),
 707        'Should have failed task'
 708      );
 709      assert.ok(
 710        tasks.some(t => t.status === 'completed'),
 711        'Should have successful retry'
 712      );
 713  
 714      const retryTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(retryTaskId);
 715      const retryContext = JSON.parse(retryTask.context_json);
 716      assert.strictEqual(retryContext.retry_count, 1, 'Should track retry count');
 717    });
 718  
 719    test('Scenario 6: Budget Enforcement - API Call Limits', async () => {
 720      // Exceed daily budget → API calls blocked → Emergency shutdown
 721  
 722      // Setup budget tracking
 723      const today = new Date().toISOString().split('T')[0];
 724      db.prepare(
 725        `INSERT INTO agent_budget (agent_name, date, api_calls, tokens_used, cost_usd, daily_limit_usd)
 726         VALUES (?, ?, ?, ?, ?, ?)`
 727      ).run('developer', today, 50, 100000, 9.8, 10.0);
 728  
 729      // Create task that would exceed budget
 730      const taskId = await createAgentTask({
 731        task_type: 'implement_feature',
 732        assigned_to: 'developer',
 733        priority: 5,
 734        context: {
 735          feature: 'Complex feature requiring many API calls',
 736        },
 737      });
 738  
 739      // Check budget before processing
 740      const budget = db
 741        .prepare('SELECT * FROM agent_budget WHERE agent_name = ? AND date = ?')
 742        .get('developer', today);
 743  
 744      assert.ok(budget, 'Budget record should exist');
 745      assert.ok(budget.cost_usd < budget.daily_limit_usd, 'Should be under budget initially');
 746  
 747      // Simulate API call that exceeds budget
 748      const newCost = budget.cost_usd + 0.5; // Would exceed $10 limit
 749  
 750      if (newCost >= budget.daily_limit_usd) {
 751        // Block task due to budget
 752        updateTaskStatus(taskId, 'blocked');
 753        db.prepare(
 754          `UPDATE agent_tasks
 755           SET error_message = ?
 756           WHERE id = ?`
 757        ).run('Budget exceeded: Daily limit of $10.00 reached', taskId);
 758  
 759        // Log budget enforcement
 760        db.prepare(
 761          `INSERT INTO agent_logs (task_id, agent_name, log_level, message, context_json)
 762           VALUES (?, ?, ?, ?, ?)`
 763        ).run(
 764          taskId,
 765          'developer',
 766          'warn',
 767          'Task blocked due to budget limit',
 768          JSON.stringify({ cost_usd: newCost, daily_limit_usd: budget.daily_limit_usd })
 769        );
 770      }
 771  
 772      // Verify budget enforcement
 773      const blockedTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId);
 774      assert.strictEqual(blockedTask.status, 'blocked');
 775      assert.ok(blockedTask.error_message.includes('Budget exceeded'));
 776  
 777      const logs = db
 778        .prepare('SELECT * FROM agent_logs WHERE task_id = ? AND log_level = ?')
 779        .all(taskId, 'warn');
 780      assert.ok(logs.length > 0, 'Should log budget enforcement');
 781      assert.ok(
 782        logs.some(l => l.message.includes('budget')),
 783        'Should mention budget in logs'
 784      );
 785  
 786      // Verify emergency shutdown would be triggered
 787      const shouldShutdown = newCost >= budget.daily_limit_usd;
 788      assert.strictEqual(shouldShutdown, true, 'Should trigger emergency shutdown');
 789    });
 790  
 791    test('Scenario 7: Monitor-Driven Bug Discovery - Autonomous Detection Loop', async () => {
 792      // Monitor scans logs → finds error loop → Triage classifies → Developer fixes → QA verifies
 793  
 794      // Step 1: Monitor agent scans logs and detects error loop (>3 errors in 1 hour)
 795      const monitorTaskId = await createAgentTask({
 796        task_type: 'scan_logs',
 797        assigned_to: 'monitor',
 798        created_by: 'cron',
 799        priority: 6,
 800        context: {
 801          log_files: ['logs/pipeline-2026-02-15.log'],
 802          time_window: '1 hour',
 803          error_threshold: 3,
 804        },
 805      });
 806  
 807      updateTaskStatus(monitorTaskId, 'running');
 808  
 809      // Monitor finds error loop (5 occurrences of same error in 1 hour)
 810      const triageTaskId = await createAgentTask({
 811        task_type: 'classify_error',
 812        assigned_to: 'triage',
 813        created_by: 'monitor',
 814        priority: 8, // High priority due to frequency
 815        parent_task_id: monitorTaskId,
 816        context: {
 817          error_message: 'Cannot read property "score" of null',
 818          stack_trace: 'at calculateScore (src/scoring.js:179:12)',
 819          frequency: 5,
 820          time_window: '1 hour',
 821          first_seen: '2026-02-15T10:00:00Z',
 822          last_seen: '2026-02-15T10:45:00Z',
 823          affected_sites: [12824, 12825, 12826, 12827, 12828],
 824        },
 825      });
 826  
 827      completeTask(monitorTaskId, {
 828        errors_found: 1,
 829        error_loop_detected: true,
 830        triage_task_id: triageTaskId,
 831      });
 832  
 833      // Step 2: Triage classifies as null_pointer with priority 8
 834      updateTaskStatus(triageTaskId, 'running');
 835  
 836      const devTaskId = await createAgentTask({
 837        task_type: 'fix_bug',
 838        assigned_to: 'developer',
 839        created_by: 'triage',
 840        priority: 8,
 841        parent_task_id: triageTaskId,
 842        context: {
 843          error_type: 'null_pointer',
 844          error_message: 'Cannot read property "score" of null',
 845          file: 'src/scoring.js',
 846          line: 179,
 847          frequency: 5,
 848          severity: 'high',
 849          suggested_fix: 'Add null check with optional chaining (score?.value)',
 850          similar_errors_count: 0, // New error pattern
 851        },
 852      });
 853  
 854      completeTask(triageTaskId, {
 855        classification: 'null_pointer',
 856        severity: 'high',
 857        priority: 8,
 858        routed_to: 'developer',
 859        dev_task_id: devTaskId,
 860      });
 861  
 862      // Step 3: Developer fixes the bug
 863      updateTaskStatus(devTaskId, 'running');
 864  
 865      const qaTaskId = await createAgentTask({
 866        task_type: 'verify_fix',
 867        assigned_to: 'qa',
 868        created_by: 'developer',
 869        priority: 8,
 870        parent_task_id: devTaskId,
 871        context: {
 872          files_changed: ['src/scoring.js'],
 873          fix_commit: 'abc123',
 874          test_file: 'tests/scoring.test.js',
 875          original_error: 'Cannot read property "score" of null',
 876          coverage_target: 85,
 877        },
 878      });
 879  
 880      completeTask(devTaskId, {
 881        fixed: true,
 882        files_changed: ['src/scoring.js'],
 883        approach: 'Added optional chaining and null check before accessing score property',
 884        qa_task_id: qaTaskId,
 885      });
 886  
 887      // Step 4: QA verifies fix with test coverage check
 888      updateTaskStatus(qaTaskId, 'running');
 889  
 890      completeTask(qaTaskId, {
 891        tests_passed: true,
 892        tests_added: 3,
 893        coverage: 88,
 894        verification_complete: true,
 895      });
 896  
 897      // Verify complete autonomous workflow chain
 898      const allTasks = db
 899        .prepare(
 900          `SELECT id, task_type, assigned_to, status, priority, parent_task_id
 901           FROM agent_tasks
 902           ORDER BY id ASC`
 903        )
 904        .all();
 905  
 906      assert.strictEqual(
 907        allTasks.length,
 908        4,
 909        'Should have 4 tasks in Monitor→Triage→Developer→QA chain'
 910      );
 911      assert.strictEqual(allTasks[0].assigned_to, 'monitor', 'First task should be Monitor');
 912      assert.strictEqual(allTasks[0].status, 'completed');
 913      assert.strictEqual(allTasks[1].assigned_to, 'triage', 'Second task should be Triage');
 914      assert.strictEqual(allTasks[1].status, 'completed');
 915      assert.strictEqual(allTasks[2].assigned_to, 'developer', 'Third task should be Developer');
 916      assert.strictEqual(allTasks[2].status, 'completed');
 917      assert.strictEqual(allTasks[3].assigned_to, 'qa', 'Fourth task should be QA');
 918      assert.strictEqual(allTasks[3].status, 'completed');
 919  
 920      // Verify parent-child chain
 921      assert.strictEqual(
 922        allTasks[1].parent_task_id,
 923        allTasks[0].id,
 924        'Triage parent should be Monitor'
 925      );
 926      assert.strictEqual(
 927        allTasks[2].parent_task_id,
 928        allTasks[1].id,
 929        'Developer parent should be Triage'
 930      );
 931      assert.strictEqual(allTasks[3].parent_task_id, allTasks[2].id, 'QA parent should be Developer');
 932  
 933      // Verify priority escalation (frequency-based)
 934      assert.strictEqual(allTasks[1].priority, 8, 'High frequency should escalate priority to 8');
 935      assert.strictEqual(allTasks[2].priority, 8, 'Priority should propagate through chain');
 936    });
 937  
 938    test('Scenario 8: Known Error Detection - Learning from Past Fixes', async () => {
 939      // Previous error fixed → New similar error occurs → Triage detects 80% similarity →
 940      // Routes with lower priority + known solution context
 941  
 942      // Setup: Populate known error database with previous fix
 943      db.prepare(
 944        `CREATE TABLE IF NOT EXISTS known_errors (
 945          id INTEGER PRIMARY KEY AUTOINCREMENT,
 946          error_signature TEXT NOT NULL,
 947          error_type TEXT NOT NULL,
 948          file_pattern TEXT,
 949          solution_summary TEXT NOT NULL,
 950          fix_commit TEXT,
 951          success_count INTEGER DEFAULT 1,
 952          last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
 953          created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 954        )`
 955      ).run();
 956  
 957      db.prepare(
 958        `INSERT INTO known_errors (error_signature, error_type, file_pattern, solution_summary, fix_commit, success_count)
 959         VALUES (?, ?, ?, ?, ?, ?)`
 960      ).run(
 961        'Cannot read property "score" of null',
 962        'null_pointer',
 963        'src/scoring.js',
 964        'Add null check with optional chaining before accessing score property',
 965        '7cc5482',
 966        1
 967      );
 968  
 969      // Step 1: New similar error occurs in different file
 970      const triageTaskId = await createAgentTask({
 971        task_type: 'classify_error',
 972        assigned_to: 'triage',
 973        created_by: 'monitor',
 974        priority: 7,
 975        context: {
 976          error_message: 'Cannot read property "score" of null',
 977          stack_trace: 'at calculateRescoring (src/rescoring.js:234:18)',
 978          frequency: 2,
 979          file: 'src/rescoring.js',
 980          line: 234,
 981        },
 982      });
 983  
 984      // Step 2: Triage detects similarity (80% match with known error)
 985      updateTaskStatus(triageTaskId, 'running');
 986  
 987      // Query known errors for similarity
 988      const knownErrors = db.prepare('SELECT * FROM known_errors').all();
 989      const matchedError = knownErrors.find(
 990        e => e.error_signature === 'Cannot read property "score" of null'
 991      );
 992  
 993      assert.ok(matchedError, 'Should find matching known error');
 994      assert.strictEqual(matchedError.error_type, 'null_pointer');
 995  
 996      // Step 3: Route to Developer with lower priority + solution context
 997      const devTaskId = await createAgentTask({
 998        task_type: 'fix_bug',
 999        assigned_to: 'developer',
1000        created_by: 'triage',
1001        priority: 5, // Lower priority (was 7) because solution is known
1002        parent_task_id: triageTaskId,
1003        context: {
1004          error_type: 'null_pointer',
1005          error_message: 'Cannot read property "score" of null',
1006          file: 'src/rescoring.js',
1007          line: 234,
1008          similarity: 0.8, // 80% match
1009          known_solution: matchedError.solution_summary,
1010          previous_fix_commit: matchedError.fix_commit,
1011          suggested_approach: 'Apply same fix pattern from scoring.js: Add optional chaining',
1012        },
1013      });
1014  
1015      completeTask(triageTaskId, {
1016        classification: 'null_pointer',
1017        severity: 'medium', // Downgraded from high because solution is known
1018        priority: 5,
1019        routed_to: 'developer',
1020        dev_task_id: devTaskId,
1021        known_error_match: true,
1022        similarity_score: 0.8,
1023      });
1024  
1025      // Step 4: Developer applies known fix quickly
1026      updateTaskStatus(devTaskId, 'running');
1027  
1028      const qaTaskId = await createAgentTask({
1029        task_type: 'verify_fix',
1030        assigned_to: 'qa',
1031        created_by: 'developer',
1032        priority: 5,
1033        parent_task_id: devTaskId,
1034        context: {
1035          files_changed: ['src/rescoring.js'],
1036          fix_commit: 'def456',
1037          applied_known_solution: true,
1038          based_on_commit: matchedError.fix_commit,
1039        },
1040      });
1041  
1042      completeTask(devTaskId, {
1043        fixed: true,
1044        files_changed: ['src/rescoring.js'],
1045        approach: 'Applied known fix pattern: optional chaining',
1046        qa_task_id: qaTaskId,
1047      });
1048  
1049      // Step 5: QA verifies
1050      updateTaskStatus(qaTaskId, 'running');
1051      completeTask(qaTaskId, {
1052        tests_passed: true,
1053        coverage: 87,
1054        verification_complete: true,
1055      });
1056  
1057      // Step 6: Update known_errors success count
1058      db.prepare('UPDATE known_errors SET success_count = success_count + 1 WHERE id = ?').run(
1059        matchedError.id
1060      );
1061  
1062      // Verify learning workflow
1063      const triageResult = db
1064        .prepare('SELECT result_json FROM agent_tasks WHERE id = ?')
1065        .get(triageTaskId);
1066      const triageData = JSON.parse(triageResult.result_json);
1067  
1068      assert.strictEqual(triageData.known_error_match, true, 'Should detect known error');
1069      assert.strictEqual(triageData.similarity_score, 0.8, 'Should calculate 80% similarity');
1070      assert.strictEqual(triageData.priority, 5, 'Should lower priority for known errors');
1071  
1072      const devResult = db
1073        .prepare('SELECT context_json FROM agent_tasks WHERE id = ?')
1074        .get(devTaskId);
1075      const devContext = JSON.parse(devResult.context_json);
1076  
1077      assert.ok(devContext.known_solution, 'Developer should receive known solution');
1078      assert.ok(devContext.previous_fix_commit, 'Developer should receive previous commit reference');
1079  
1080      // Verify learning database updated
1081      const updatedError = db.prepare('SELECT * FROM known_errors WHERE id = ?').get(matchedError.id);
1082      assert.strictEqual(updatedError.success_count, 2, 'Should increment success count after fix');
1083    });
1084  
1085    test('Scenario 9: Circuit Breaker Auto-Recovery - System Resilience', async () => {
1086      // Agent fails 10 tasks (>30% failure rate) → Circuit breaker triggers →
1087      // Status = blocked → Wait cooldown → Failure rate drops → Auto-recovery
1088  
1089      // Step 1: Setup agent state with circuit breaker metrics
1090      db.prepare(
1091        `INSERT OR REPLACE INTO agent_state (agent_name, status, metrics_json)
1092         VALUES (?, ?, ?)`
1093      ).run(
1094        'developer',
1095        'working',
1096        JSON.stringify({
1097          circuit_breaker: {
1098            state: 'closed',
1099            failure_count: 0,
1100            success_count: 0,
1101            failure_rate: 0,
1102            last_failure: null,
1103          },
1104        })
1105      );
1106  
1107      // Step 2: Create and fail 10 tasks to trigger circuit breaker
1108      const failedTaskIds = [];
1109      for (let i = 0; i < 10; i++) {
1110        const taskId = await createAgentTask({
1111          task_type: 'fix_bug',
1112          assigned_to: 'developer',
1113          priority: 6,
1114          context: { bug_id: i + 1 },
1115        });
1116  
1117        updateTaskStatus(taskId, 'running');
1118        failTask(taskId, `API call failed: Rate limit exceeded (task ${i + 1})`);
1119        failedTaskIds.push(taskId);
1120      }
1121  
1122      // Step 3: Calculate failure rate (10 failed out of 10 total = 100% > 30% threshold)
1123      const totalTasks = 10;
1124      const failedCount = db
1125        .prepare('SELECT COUNT(*) as count FROM agent_tasks WHERE assigned_to = ? AND status = ?')
1126        .get('developer', 'failed').count;
1127  
1128      const failureRate = failedCount / totalTasks;
1129      assert.ok(failureRate > 0.3, 'Failure rate should exceed 30% threshold');
1130  
1131      // Step 4: Trigger circuit breaker (open state, block agent)
1132      const circuitBreakerTriggered = failureRate > 0.3;
1133  
1134      if (circuitBreakerTriggered) {
1135        db.prepare(
1136          `UPDATE agent_state
1137           SET status = ?,
1138               metrics_json = ?
1139           WHERE agent_name = ?`
1140        ).run(
1141          'blocked',
1142          JSON.stringify({
1143            circuit_breaker: {
1144              state: 'open',
1145              failure_count: failedCount,
1146              success_count: 0,
1147              failure_rate: failureRate,
1148              triggered_at: new Date().toISOString(),
1149              cooldown_until: new Date(Date.now() + 30 * 60 * 1000).toISOString(), // 30 min cooldown
1150            },
1151          }),
1152          'developer'
1153        );
1154  
1155        // Log circuit breaker trigger
1156        db.prepare(
1157          `INSERT INTO agent_logs (agent_name, log_level, message, context_json)
1158           VALUES (?, ?, ?, ?)`
1159        ).run(
1160          'developer',
1161          'error',
1162          'Circuit breaker triggered: Failure rate exceeded threshold',
1163          JSON.stringify({
1164            failure_rate: failureRate,
1165            threshold: 0.3,
1166            failed_count: failedCount,
1167            total_count: totalTasks,
1168          })
1169        );
1170      }
1171  
1172      // Verify circuit breaker triggered
1173      const blockedState = db
1174        .prepare('SELECT * FROM agent_state WHERE agent_name = ?')
1175        .get('developer');
1176      assert.strictEqual(blockedState.status, 'blocked', 'Agent should be blocked');
1177  
1178      const metrics = JSON.parse(blockedState.metrics_json);
1179      assert.strictEqual(metrics.circuit_breaker.state, 'open', 'Circuit breaker should be open');
1180      assert.ok(metrics.circuit_breaker.failure_rate > 0.3, 'Should record failure rate > 30%');
1181  
1182      // Step 5: Simulate cooldown period (30 minutes)
1183      // In real scenario, would wait or fast-forward time
1184      // For testing, we'll simulate the auto-recovery check
1185  
1186      // Step 6: Add successful tasks to improve failure rate
1187      const successfulTaskIds = [];
1188      for (let i = 0; i < 20; i++) {
1189        const taskId = await createAgentTask({
1190          task_type: 'fix_bug',
1191          assigned_to: 'developer',
1192          priority: 6,
1193          context: { bug_id: i + 100 },
1194        });
1195  
1196        updateTaskStatus(taskId, 'running');
1197        completeTask(taskId, { fixed: true });
1198        successfulTaskIds.push(taskId);
1199      }
1200  
1201      // Step 7: Recalculate failure rate (10 failed, 20 successful = 33% success, 67% failure)
1202      // Wait, that's still high. Let me recalculate properly.
1203      const newTotalTasks = 30; // 10 failed + 20 successful
1204      const newFailureRate = failedCount / newTotalTasks; // 10/30 = 0.33 (still > 30%)
1205  
1206      // Need more successful tasks to drop below 30%
1207      for (let i = 0; i < 15; i++) {
1208        const taskId = await createAgentTask({
1209          task_type: 'fix_bug',
1210          assigned_to: 'developer',
1211          priority: 6,
1212          context: { bug_id: i + 200 },
1213        });
1214  
1215        updateTaskStatus(taskId, 'running');
1216        completeTask(taskId, { fixed: true });
1217        successfulTaskIds.push(taskId);
1218      }
1219  
1220      // Now: 10 failed, 35 successful = 45 total, failure rate = 10/45 = 22% (< 30% threshold)
1221      const finalTotalTasks = 45;
1222      const finalFailureRate = failedCount / finalTotalTasks;
1223      assert.ok(finalFailureRate < 0.3, 'Failure rate should drop below 30%');
1224  
1225      // Step 8: Auto-recovery - Reset circuit breaker to half-open
1226      if (finalFailureRate < 0.3) {
1227        db.prepare(
1228          `UPDATE agent_state
1229           SET status = ?,
1230               metrics_json = ?
1231           WHERE agent_name = ?`
1232        ).run(
1233          'idle',
1234          JSON.stringify({
1235            circuit_breaker: {
1236              state: 'half-open',
1237              failure_count: failedCount,
1238              success_count: 35,
1239              failure_rate: finalFailureRate,
1240              recovered_at: new Date().toISOString(),
1241            },
1242          }),
1243          'developer'
1244        );
1245  
1246        db.prepare(
1247          `INSERT INTO agent_logs (agent_name, log_level, message, context_json)
1248           VALUES (?, ?, ?, ?)`
1249        ).run(
1250          'developer',
1251          'info',
1252          'Circuit breaker auto-recovery: Failure rate below threshold',
1253          JSON.stringify({
1254            failure_rate: finalFailureRate,
1255            threshold: 0.3,
1256            state: 'half-open',
1257          })
1258        );
1259      }
1260  
1261      // Verify auto-recovery
1262      const recoveredState = db
1263        .prepare('SELECT * FROM agent_state WHERE agent_name = ?')
1264        .get('developer');
1265      assert.strictEqual(recoveredState.status, 'idle', 'Agent should be recovered to idle');
1266  
1267      const recoveredMetrics = JSON.parse(recoveredState.metrics_json);
1268      assert.strictEqual(
1269        recoveredMetrics.circuit_breaker.state,
1270        'half-open',
1271        'Circuit breaker should be half-open'
1272      );
1273      assert.ok(
1274        recoveredMetrics.circuit_breaker.failure_rate < 0.3,
1275        'Failure rate should be below threshold'
1276      );
1277  
1278      // Verify logging of circuit breaker events
1279      const cbLogs = db
1280        .prepare(
1281          `SELECT * FROM agent_logs
1282           WHERE agent_name = ?
1283           AND message LIKE '%circuit breaker%'
1284           ORDER BY created_at ASC`
1285        )
1286        .all('developer');
1287  
1288      assert.ok(cbLogs.length >= 2, 'Should have trigger and recovery logs');
1289      assert.ok(
1290        cbLogs.some(l => l.message.includes('triggered')),
1291        'Should log trigger event'
1292      );
1293      assert.ok(
1294        cbLogs.some(l => l.message.includes('auto-recovery')),
1295        'Should log recovery event'
1296      );
1297  
1298      // Verify resilience: Agent can now accept new tasks
1299      const newTaskId = await createAgentTask({
1300        task_type: 'fix_bug',
1301        assigned_to: 'developer',
1302        priority: 6,
1303        context: { test_recovery: true },
1304      });
1305  
1306      const newTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(newTaskId);
1307      assert.strictEqual(newTask.status, 'pending', 'Should accept new tasks after recovery');
1308    });
1309  });
1310  
1311  test('Scenario 10: SLO Violation → Performance Investigation', async () => {
1312    // Monitor detects SLO breach → Creates optimization task for Architect
1313  
1314    // Step 1: Monitor checks SLO compliance (P95 latency for serps_to_assets stage)
1315    const sloCheckTaskId = await createAgentTask({
1316      task_type: 'check_slo_compliance',
1317      assigned_to: 'monitor',
1318      created_by: 'cron',
1319      priority: 6,
1320      context: {
1321        slo_name: 'serps_to_assets_p95_latency',
1322        target_minutes: 60,
1323        actual_minutes: 75,
1324        breach_severity: 'moderate',
1325        affected_count: 423,
1326      },
1327    });
1328  
1329    updateTaskStatus(sloCheckTaskId, 'running');
1330  
1331    // Monitor detects breach: P95 = 75 min (target: 60 min)
1332    const architectTaskId = await createAgentTask({
1333      task_type: 'design_optimization',
1334      assigned_to: 'architect',
1335      created_by: 'monitor',
1336      priority: 8, // High priority for SLO breach
1337      parent_task_id: sloCheckTaskId,
1338      context: {
1339        slo_name: 'serps_to_assets_p95_latency',
1340        current_p95: 75,
1341        target_p95: 60,
1342        breach_percentage: 25, // 25% over target
1343        stage: 'serps_to_assets',
1344        bottleneck_candidates: ['capture.js', 'screenshot processing', 'network latency'],
1345      },
1346    });
1347  
1348    completeTask(sloCheckTaskId, {
1349      slo_breached: true,
1350      architect_task_id: architectTaskId,
1351    });
1352  
1353    // Step 2: Architect profiles performance and finds bottleneck
1354    updateTaskStatus(architectTaskId, 'running');
1355  
1356    const devTaskId = await createAgentTask({
1357      task_type: 'suggest_refactor',
1358      assigned_to: 'developer',
1359      created_by: 'architect',
1360      priority: 8,
1361      parent_task_id: architectTaskId,
1362      context: {
1363        bottleneck_found: 'Playwright browser launch taking 15-20s per site',
1364        optimization_approach: 'Implement browser pooling with 5 persistent browsers',
1365        expected_improvement: '60% reduction in P95 latency',
1366        files_to_modify: ['src/capture.js', 'src/utils/browser-pool.js'],
1367        estimated_effort_hours: 4,
1368      },
1369    });
1370  
1371    completeTask(architectTaskId, {
1372      analysis_complete: true,
1373      bottleneck_identified: 'Browser launch overhead',
1374      dev_task_id: devTaskId,
1375    });
1376  
1377    // Step 3: Developer implements optimization
1378    updateTaskStatus(devTaskId, 'running');
1379  
1380    const qaTaskId = await createAgentTask({
1381      task_type: 'verify_fix',
1382      assigned_to: 'qa',
1383      created_by: 'developer',
1384      priority: 8,
1385      parent_task_id: devTaskId,
1386      context: {
1387        files_changed: ['src/capture.js', 'src/utils/browser-pool.js'],
1388        optimization_type: 'performance',
1389        requires_load_testing: true,
1390      },
1391    });
1392  
1393    completeTask(devTaskId, {
1394      optimization_complete: true,
1395      qa_task_id: qaTaskId,
1396    });
1397  
1398    // Step 4: QA verifies performance improvement
1399    updateTaskStatus(qaTaskId, 'running');
1400    completeTask(qaTaskId, {
1401      tests_passed: true,
1402      performance_verified: true,
1403      new_p95_latency: 42, // Improved from 75 to 42 minutes
1404      improvement_percentage: 44,
1405    });
1406  
1407    // Verify SLO-driven workflow
1408    const allTasks = db
1409      .prepare(
1410        `SELECT id, task_type, assigned_to, status, parent_task_id
1411           FROM agent_tasks
1412           ORDER BY id ASC`
1413      )
1414      .all();
1415  
1416    assert.strictEqual(allTasks.length, 4, 'Should have Monitor→Architect→Developer→QA chain');
1417    assert.strictEqual(allTasks[0].assigned_to, 'monitor');
1418    assert.strictEqual(allTasks[1].assigned_to, 'architect');
1419    assert.strictEqual(allTasks[2].assigned_to, 'developer');
1420    assert.strictEqual(allTasks[3].assigned_to, 'qa');
1421  
1422    // Verify all completed
1423    assert.ok(
1424      allTasks.every(t => t.status === 'completed'),
1425      'All tasks should be completed'
1426    );
1427  
1428    // Verify parent-child chain
1429    assert.strictEqual(allTasks[1].parent_task_id, allTasks[0].id);
1430    assert.strictEqual(allTasks[2].parent_task_id, allTasks[1].id);
1431    assert.strictEqual(allTasks[3].parent_task_id, allTasks[2].id);
1432  });
1433  
1434  test('Scenario 11: Threat Modeling Workflow', async () => {
1435    // Security performs STRIDE analysis → Finds vulnerability → Creates fix task
1436  
1437    // Step 1: Security performs threat model on scoring.js
1438    const threatModelTaskId = await createAgentTask({
1439      task_type: 'threat_model',
1440      assigned_to: 'security',
1441      created_by: 'architect',
1442      priority: 7,
1443      context: {
1444        files: ['src/scoring.js'],
1445        feature: 'AI-powered scoring system',
1446        threat_frameworks: ['STRIDE', 'DREAD'],
1447      },
1448    });
1449  
1450    updateTaskStatus(threatModelTaskId, 'running');
1451  
1452    // Step 2: STRIDE analysis finds SQL injection risk
1453    const fixTaskId = await createAgentTask({
1454      task_type: 'fix_security_issue',
1455      assigned_to: 'developer',
1456      created_by: 'security',
1457      priority: 10, // Critical severity
1458      parent_task_id: threatModelTaskId,
1459      context: {
1460        vulnerability_type: 'sql_injection',
1461        severity: 'critical',
1462        dread_score: 8.5,
1463        file: 'src/scoring.js',
1464        line: 142,
1465        stride_category: 'Tampering',
1466        description: 'Unsanitized input passed to database query',
1467        mitigation_strategies: [
1468          'Use parameterized queries',
1469          'Input validation with allowlist',
1470          'Escape special SQL characters',
1471        ],
1472        affected_endpoints: ['POST /api/score', 'PUT /api/rescore'],
1473      },
1474    });
1475  
1476    completeTask(threatModelTaskId, {
1477      threats_found: 1,
1478      critical_threats: 1,
1479      high_threats: 0,
1480      medium_threats: 0,
1481      fix_task_id: fixTaskId,
1482      stride_results: {
1483        spoofing: 0,
1484        tampering: 1,
1485        repudiation: 0,
1486        information_disclosure: 0,
1487        denial_of_service: 0,
1488        elevation_of_privilege: 0,
1489      },
1490    });
1491  
1492    // Step 3: Developer fixes security issue
1493    updateTaskStatus(fixTaskId, 'running');
1494  
1495    const qaTaskId = await createAgentTask({
1496      task_type: 'verify_fix',
1497      assigned_to: 'qa',
1498      created_by: 'developer',
1499      priority: 10,
1500      parent_task_id: fixTaskId,
1501      context: {
1502        files_changed: ['src/scoring.js'],
1503        security_fix: true,
1504        requires_security_review: true,
1505        mitigation_applied: 'Parameterized queries',
1506      },
1507    });
1508  
1509    completeTask(fixTaskId, {
1510      fixed: true,
1511      mitigation_strategy: 'Parameterized queries with input validation',
1512      qa_task_id: qaTaskId,
1513    });
1514  
1515    // Step 4: QA verifies security fix
1516    updateTaskStatus(qaTaskId, 'running');
1517    completeTask(qaTaskId, {
1518      tests_passed: true,
1519      security_verified: true,
1520      vulnerability_closed: true,
1521    });
1522  
1523    // Verify threat modeling workflow
1524    const threatModelResult = db
1525      .prepare('SELECT result_json FROM agent_tasks WHERE id = ?')
1526      .get(threatModelTaskId);
1527    const threatData = JSON.parse(threatModelResult.result_json);
1528  
1529    assert.strictEqual(threatData.threats_found, 1, 'Should find 1 threat');
1530    assert.strictEqual(threatData.critical_threats, 1, 'Should be critical severity');
1531    assert.strictEqual(threatData.stride_results.tampering, 1, 'Should categorize as Tampering');
1532  
1533    // Verify DREAD score in fix task context
1534    const fixTask = db.prepare('SELECT context_json FROM agent_tasks WHERE id = ?').get(fixTaskId);
1535    const fixContext = JSON.parse(fixTask.context_json);
1536  
1537    assert.strictEqual(fixContext.dread_score, 8.5, 'Should have DREAD score of 8.5');
1538    assert.strictEqual(fixContext.vulnerability_type, 'sql_injection');
1539    assert.ok(fixContext.mitigation_strategies.length >= 3, 'Should provide mitigation strategies');
1540  });
1541  
1542  test('Scenario 12: Concurrent Multi-Agent Execution', async () => {
1543    // 3 tasks created simultaneously → All agents poll and claim → No conflicts
1544  
1545    // Step 1: Create 3 tasks simultaneously for different agents
1546    const task1Id = await createAgentTask({
1547      task_type: 'fix_bug',
1548      assigned_to: 'developer',
1549      priority: 8,
1550      context: {
1551        error: 'Null pointer in scoring.js',
1552      },
1553    });
1554  
1555    const task2Id = await createAgentTask({
1556      task_type: 'write_test',
1557      assigned_to: 'qa',
1558      priority: 6,
1559      context: {
1560        test_file: 'tests/scoring.test.js',
1561      },
1562    });
1563  
1564    const task3Id = await createAgentTask({
1565      task_type: 'audit_code',
1566      assigned_to: 'security',
1567      priority: 7,
1568      context: {
1569        files: ['src/scoring.js'],
1570      },
1571    });
1572  
1573    // Verify all 3 tasks created
1574    const allPending = db
1575      .prepare('SELECT id, assigned_to, priority, status FROM agent_tasks WHERE status = ?')
1576      .all('pending');
1577  
1578    assert.strictEqual(allPending.length, 3, 'Should have 3 pending tasks');
1579  
1580    // Step 2: Simulate agents claiming tasks in priority order
1581    // Highest priority first: task1 (P8), task3 (P7), task2 (P6)
1582  
1583    updateTaskStatus(task1Id, 'running');
1584    updateTaskStatus(task3Id, 'running');
1585    updateTaskStatus(task2Id, 'running');
1586  
1587    // Verify no status conflicts (all running)
1588    const allRunning = db
1589      .prepare('SELECT id, assigned_to FROM agent_tasks WHERE status = ?')
1590      .all('running');
1591  
1592    assert.strictEqual(allRunning.length, 3, 'All 3 tasks should be running concurrently');
1593  
1594    // Step 3: Complete tasks in different order
1595    completeTask(task3Id, { findings: [] }); // Security finishes first
1596    completeTask(task1Id, { fixed: true }); // Developer finishes second
1597    completeTask(task2Id, { tests_written: true }); // QA finishes last
1598  
1599    // Verify all completed
1600    const allCompleted = db.prepare('SELECT id FROM agent_tasks WHERE status = ?').all('completed');
1601  
1602    assert.strictEqual(allCompleted.length, 3, 'All 3 tasks should be completed');
1603  
1604    // Verify task assignment integrity (no task claimed by wrong agent)
1605    const tasks = db.prepare('SELECT id, assigned_to FROM agent_tasks ORDER BY id ASC').all();
1606  
1607    assert.strictEqual(tasks[0].assigned_to, 'developer');
1608    assert.strictEqual(tasks[1].assigned_to, 'qa');
1609    assert.strictEqual(tasks[2].assigned_to, 'security');
1610  
1611    // Verify priority ordering was respected
1612    const tasksByPriority = db
1613      .prepare('SELECT id, priority FROM agent_tasks ORDER BY priority DESC, id ASC')
1614      .all();
1615  
1616    assert.strictEqual(tasksByPriority[0].priority, 8, 'Highest priority should be 8');
1617    assert.strictEqual(tasksByPriority[1].priority, 7, 'Second priority should be 7');
1618    assert.strictEqual(tasksByPriority[2].priority, 6, 'Third priority should be 6');
1619  });
1620  
1621  test('Scenario 13: Documentation Automation', async () => {
1622    // Code changes → Architect detects outdated docs → Updates documentation
1623  
1624    // Step 1: Developer modifies src/agents/qa.js (adds new method)
1625    const devTaskId = await createAgentTask({
1626      task_type: 'implement_feature',
1627      assigned_to: 'developer',
1628      priority: 6,
1629      context: {
1630        feature: 'Add load testing capability to QA agent',
1631        files_to_modify: ['src/agents/qa.js'],
1632      },
1633    });
1634  
1635    updateTaskStatus(devTaskId, 'running');
1636  
1637    // Developer creates doc update task for Architect
1638    const docTaskId = await createAgentTask({
1639      task_type: 'update_documentation',
1640      assigned_to: 'architect',
1641      created_by: 'developer',
1642      priority: 5,
1643      parent_task_id: devTaskId,
1644      context: {
1645        code_changed: ['src/agents/qa.js'],
1646        docs_affected: ['docs/06-automation/agent-system.md'],
1647        new_methods: ['runLoadTest()'],
1648        change_summary: 'Added load testing capability to QA agent',
1649      },
1650    });
1651  
1652    completeTask(devTaskId, {
1653      feature_complete: true,
1654      docs_task_id: docTaskId,
1655    });
1656  
1657    // Step 2: Architect detects change and identifies affected docs
1658    updateTaskStatus(docTaskId, 'running');
1659  
1660    // Simulate Architect using Claude API to generate updated documentation
1661    completeTask(docTaskId, {
1662      docs_updated: true,
1663      files_modified: ['docs/06-automation/agent-system.md'],
1664      commit_sha: 'abc123',
1665      doc_sections_updated: ['QA Agent - Load Testing'],
1666    });
1667  
1668    // Verify documentation workflow
1669    const docResult = db.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(docTaskId);
1670    const docData = JSON.parse(docResult.result_json);
1671  
1672    assert.strictEqual(docData.docs_updated, true, 'Docs should be updated');
1673    assert.ok(
1674      docData.files_modified.includes('docs/06-automation/agent-system.md'),
1675      'Should update agent-system.md'
1676    );
1677    assert.ok(docData.commit_sha, 'Should create commit');
1678  
1679    // Verify parent-child relationship
1680    const docTask = db.prepare('SELECT parent_task_id FROM agent_tasks WHERE id = ?').get(docTaskId);
1681    assert.strictEqual(docTask.parent_task_id, devTaskId, 'Doc task should be child of dev task');
1682  });
1683  
1684  test('Scenario 14: Design Review Workflow', async () => {
1685    // Design proposal → Architect reviews → Approval/Rejection
1686  
1687    // Setup: Add known_errors table for this test (needed for Scenario 8 compatibility)
1688    db.prepare(
1689      `CREATE TABLE IF NOT EXISTS known_errors (
1690          id INTEGER PRIMARY KEY AUTOINCREMENT,
1691          error_signature TEXT NOT NULL,
1692          error_type TEXT NOT NULL,
1693          file_pattern TEXT,
1694          solution_summary TEXT NOT NULL,
1695          fix_commit TEXT,
1696          success_count INTEGER DEFAULT 1,
1697          last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1698          created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
1699        )`
1700    ).run();
1701  
1702    // Step 1: Developer proposes new feature requiring >4 files
1703    const proposalTaskId = await createAgentTask({
1704      task_type: 'design_proposal',
1705      assigned_to: 'architect',
1706      created_by: 'developer',
1707      priority: 7,
1708      context: {
1709        feature_description: 'Add real-time pipeline monitoring dashboard',
1710        files_affected: [
1711          'src/dashboard.js',
1712          'src/api/metrics.js',
1713          'src/utils/sse.js',
1714          'db/migrations/055-metrics-table.sql',
1715          'docs/DASHBOARD.md',
1716        ],
1717        estimated_effort_hours: 12,
1718        requires_review: true,
1719      },
1720    });
1721  
1722    // Task should be awaiting architect approval
1723    // Note: This would use 'awaiting_architect_approval' status if migration 051 is applied
1724  
1725    // Step 2: Architect reviews design for complexity, patterns, security
1726    updateTaskStatus(proposalTaskId, 'running');
1727  
1728    const reviewDecision = {
1729      approved: true,
1730      complexity_score: 7, // Out of 10
1731      pattern_violations: [],
1732      security_concerns: [],
1733      feedback:
1734        'Design looks good. Consider using WebSockets instead of SSE for bi-directional communication.',
1735      conditions: ['Add rate limiting to metrics API', 'Document SSE connection lifecycle'],
1736    };
1737  
1738    // If approved, create implementation task
1739    const implTaskId = await createAgentTask({
1740      task_type: 'implement_feature',
1741      assigned_to: 'developer',
1742      created_by: 'architect',
1743      priority: 7,
1744      parent_task_id: proposalTaskId,
1745      context: {
1746        design_approved: true,
1747        approval_conditions: reviewDecision.conditions,
1748        files_to_modify: ['src/dashboard.js', 'src/api/metrics.js', 'src/utils/sse.js'],
1749      },
1750    });
1751  
1752    completeTask(proposalTaskId, {
1753      review_complete: true,
1754      approved: reviewDecision.approved,
1755      feedback: reviewDecision.feedback,
1756      conditions: reviewDecision.conditions,
1757      impl_task_id: implTaskId,
1758    });
1759  
1760    // Step 3: Developer implements with conditions applied
1761    updateTaskStatus(implTaskId, 'running');
1762  
1763    completeTask(implTaskId, {
1764      implemented: true,
1765      conditions_met: true,
1766      files_changed: [
1767        'src/dashboard.js',
1768        'src/api/metrics.js',
1769        'src/utils/sse.js',
1770        'db/migrations/055-metrics-table.sql',
1771      ],
1772    });
1773  
1774    // Verify design review workflow
1775    const proposalResult = db
1776      .prepare('SELECT result_json FROM agent_tasks WHERE id = ?')
1777      .get(proposalTaskId);
1778    const proposalData = JSON.parse(proposalResult.result_json);
1779  
1780    assert.strictEqual(proposalData.approved, true, 'Design should be approved');
1781    assert.ok(proposalData.conditions.length > 0, 'Should have approval conditions');
1782    assert.ok(proposalData.feedback, 'Should provide feedback');
1783  
1784    // Verify implementation met conditions
1785    const implResult = db.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(implTaskId);
1786    const implData = JSON.parse(implResult.result_json);
1787  
1788    assert.strictEqual(implData.conditions_met, true, 'Should meet approval conditions');
1789  });
1790  
1791  test('Scenario 15: Agent Learning from Outcomes', async () => {
1792    // Developer completes 10 tasks → 8 succeed, 2 fail → Learning analysis
1793  
1794    // Setup: Add structured logging table
1795    db.prepare(
1796      `CREATE TABLE IF NOT EXISTS structured_logs (
1797          id INTEGER PRIMARY KEY AUTOINCREMENT,
1798          agent_name TEXT NOT NULL,
1799          task_id INTEGER,
1800          task_type TEXT,
1801          outcome TEXT,
1802          success INTEGER,
1803          context_json TEXT,
1804          created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1805          FOREIGN KEY (task_id) REFERENCES agent_tasks(id)
1806        )`
1807    ).run();
1808  
1809    // Step 1: Create and complete 10 fix_bug tasks
1810    const taskIds = [];
1811  
1812    // 8 successful null_pointer fixes
1813    for (let i = 0; i < 8; i++) {
1814      const taskId = await createAgentTask({
1815        task_type: 'fix_bug',
1816        assigned_to: 'developer',
1817        priority: 6,
1818        context: {
1819          error_type: 'null_pointer',
1820          error_message: `Null pointer error ${i + 1}`,
1821        },
1822      });
1823  
1824      updateTaskStatus(taskId, 'running');
1825      completeTask(taskId, {
1826        fixed: true,
1827        approach: 'Added null check with optional chaining',
1828      });
1829  
1830      // Log success
1831      db.prepare(
1832        `INSERT INTO structured_logs (agent_name, task_id, task_type, outcome, success, context_json)
1833           VALUES (?, ?, ?, ?, ?, ?)`
1834      ).run(
1835        'developer',
1836        taskId,
1837        'fix_bug',
1838        'null_pointer_fix_success',
1839        1,
1840        JSON.stringify({ error_type: 'null_pointer' })
1841      );
1842  
1843      taskIds.push(taskId);
1844    }
1845  
1846    // 2 failed database fixes
1847    for (let i = 0; i < 2; i++) {
1848      const taskId = await createAgentTask({
1849        task_type: 'fix_bug',
1850        assigned_to: 'developer',
1851        priority: 6,
1852        context: {
1853          error_type: 'database',
1854          error_message: `Database error ${i + 1}`,
1855        },
1856      });
1857  
1858      updateTaskStatus(taskId, 'running');
1859      failTask(taskId, 'Database migration failed - schema conflict');
1860  
1861      // Log failure
1862      db.prepare(
1863        `INSERT INTO structured_logs (agent_name, task_id, task_type, outcome, success, context_json)
1864           VALUES (?, ?, ?, ?, ?, ?)`
1865      ).run(
1866        'developer',
1867        taskId,
1868        'fix_bug',
1869        'database_fix_failed',
1870        0,
1871        JSON.stringify({ error_type: 'database' })
1872      );
1873  
1874      taskIds.push(taskId);
1875    }
1876  
1877    // Step 2: Analyze outcomes by error type
1878    const outcomeAnalysis = db
1879      .prepare(
1880        `
1881        SELECT
1882          json_extract(context_json, '$.error_type') as error_type,
1883          COUNT(*) as total,
1884          SUM(success) as successes,
1885          (CAST(SUM(success) AS REAL) / COUNT(*)) * 100 as success_rate
1886        FROM structured_logs
1887        WHERE agent_name = 'developer'
1888          AND task_type = 'fix_bug'
1889        GROUP BY json_extract(context_json, '$.error_type')
1890      `
1891      )
1892      .all();
1893  
1894    // Verify learning insights
1895    const nullPointerResults = outcomeAnalysis.find(r => r.error_type === 'null_pointer');
1896    const databaseResults = outcomeAnalysis.find(r => r.error_type === 'database');
1897  
1898    assert.ok(nullPointerResults, 'Should have null_pointer results');
1899    assert.strictEqual(nullPointerResults.total, 8, 'Should have 8 null_pointer fixes');
1900    assert.strictEqual(nullPointerResults.successes, 8, 'All null_pointer fixes should succeed');
1901    assert.strictEqual(
1902      nullPointerResults.success_rate,
1903      100,
1904      'null_pointer should have 100% success rate'
1905    );
1906  
1907    assert.ok(databaseResults, 'Should have database results');
1908    assert.strictEqual(databaseResults.total, 2, 'Should have 2 database fixes');
1909    assert.strictEqual(databaseResults.successes, 0, 'All database fixes should fail');
1910    assert.strictEqual(databaseResults.success_rate, 0, 'database should have 0% success rate');
1911  
1912    // Step 3: Learning recommendations based on outcomes
1913    const learnings = [];
1914  
1915    for (const result of outcomeAnalysis) {
1916      if (result.success_rate === 100) {
1917        learnings.push({
1918          error_type: result.error_type,
1919          recommendation: 'continue_approach',
1920          reason: `${result.total} successful fixes demonstrate this approach works well`,
1921        });
1922      } else if (result.success_rate === 0) {
1923        learnings.push({
1924          error_type: result.error_type,
1925          recommendation: 'avoid_pattern',
1926          reason: `${result.total} failed attempts suggest different approach needed`,
1927        });
1928      }
1929    }
1930  
1931    assert.strictEqual(learnings.length, 2, 'Should have 2 learning insights');
1932    assert.ok(
1933      learnings.some(
1934        l => l.error_type === 'null_pointer' && l.recommendation === 'continue_approach'
1935      ),
1936      'Should recommend continuing null_pointer approach'
1937    );
1938    assert.ok(
1939      learnings.some(l => l.error_type === 'database' && l.recommendation === 'avoid_pattern'),
1940      'Should recommend avoiding database approach'
1941    );
1942  });
1943  
1944  test('Scenario 16: Full Multi-Agent Pipeline', async () => {
1945    // Monitor → Triage → Developer → QA → Security → Architect (all 6 agents collaborate)
1946  
1947    // Step 1: Monitor scans logs, finds pipeline stuck (check_pipeline_health)
1948    const monitorTaskId = await createAgentTask({
1949      task_type: 'check_pipeline_health',
1950      assigned_to: 'monitor',
1951      created_by: 'cron',
1952      priority: 8,
1953      context: {
1954        recurring: true,
1955      },
1956    });
1957  
1958    updateTaskStatus(monitorTaskId, 'running');
1959  
1960    // Monitor finds 1000 sites stuck at 'scored' stage
1961    const triageTaskId = await createAgentTask({
1962      task_type: 'classify_error',
1963      assigned_to: 'triage',
1964      created_by: 'monitor',
1965      priority: 9,
1966      parent_task_id: monitorTaskId,
1967      context: {
1968        error_type: 'pipeline_blockage',
1969        error_message: '1000 sites stuck at scored stage',
1970        stage: 'rescoring',
1971        affected_count: 1000,
1972      },
1973    });
1974  
1975    completeTask(monitorTaskId, {
1976      pipeline_issue_detected: true,
1977      affected_sites: 1000,
1978      triage_task_id: triageTaskId,
1979    });
1980  
1981    // Step 2: Triage classifies as pipeline_blockage
1982    updateTaskStatus(triageTaskId, 'running');
1983  
1984    const devTaskId = await createAgentTask({
1985      task_type: 'fix_bug',
1986      assigned_to: 'developer',
1987      created_by: 'triage',
1988      priority: 9,
1989      parent_task_id: triageTaskId,
1990      context: {
1991        error_type: 'pipeline_blockage',
1992        error_message: 'Rescoring stuck due to circuit breaker',
1993        file: 'src/rescoring.js',
1994        affected_count: 1000,
1995      },
1996    });
1997  
1998    completeTask(triageTaskId, {
1999      classification: 'pipeline_blockage',
2000      severity: 'high',
2001      dev_task_id: devTaskId,
2002    });
2003  
2004    // Step 3: Developer investigates, finds rescoring bug
2005    updateTaskStatus(devTaskId, 'running');
2006  
2007    const qaTaskId = await createAgentTask({
2008      task_type: 'write_test',
2009      assigned_to: 'qa',
2010      created_by: 'developer',
2011      priority: 9,
2012      parent_task_id: devTaskId,
2013      context: {
2014        files_to_test: ['src/rescoring.js'],
2015        test_scenarios: ['circuit breaker recovery', 'rescoring retry logic'],
2016        coverage_target: 90,
2017      },
2018    });
2019  
2020    completeTask(devTaskId, {
2021      fixed: true,
2022      bug_found: 'Circuit breaker not resetting after cooldown',
2023      files_changed: ['src/rescoring.js'],
2024      qa_task_id: qaTaskId,
2025    });
2026  
2027    // Step 4: QA writes tests for fix
2028    updateTaskStatus(qaTaskId, 'running');
2029  
2030    const securityTaskId = await createAgentTask({
2031      task_type: 'audit_code',
2032      assigned_to: 'security',
2033      created_by: 'qa',
2034      priority: 8,
2035      parent_task_id: qaTaskId,
2036      context: {
2037        files: ['src/rescoring.js'],
2038        focus_areas: ['error_handling', 'state_management'],
2039      },
2040    });
2041  
2042    completeTask(qaTaskId, {
2043      tests_written: true,
2044      coverage: 92,
2045      all_tests_passing: true,
2046      security_task_id: securityTaskId,
2047    });
2048  
2049    // Step 5: Security audits fix for vulnerabilities
2050    updateTaskStatus(securityTaskId, 'running');
2051  
2052    const architectTaskId = await createAgentTask({
2053      task_type: 'review_performance',
2054      assigned_to: 'architect',
2055      created_by: 'security',
2056      priority: 7,
2057      parent_task_id: securityTaskId,
2058      context: {
2059        files: ['src/rescoring.js'],
2060        change_type: 'circuit_breaker_fix',
2061        performance_impact_expected: 'moderate',
2062      },
2063    });
2064  
2065    completeTask(securityTaskId, {
2066      audit_complete: true,
2067      vulnerabilities_found: 0,
2068      compliance_verified: true,
2069      architect_task_id: architectTaskId,
2070    });
2071  
2072    // Step 6: Architect reviews for performance implications
2073    updateTaskStatus(architectTaskId, 'running');
2074  
2075    completeTask(architectTaskId, {
2076      performance_review_complete: true,
2077      no_regressions_detected: true,
2078      recommendations: ['Monitor circuit breaker metrics post-deployment'],
2079    });
2080  
2081    // Verify all 6 agents collaborated
2082    const allTasks = db
2083      .prepare(
2084        `SELECT id, task_type, assigned_to, status
2085           FROM agent_tasks
2086           ORDER BY id ASC`
2087      )
2088      .all();
2089  
2090    assert.strictEqual(allTasks.length, 6, 'Should have 6 tasks (one per agent)');
2091  
2092    const agentsInvolved = [...new Set(allTasks.map(t => t.assigned_to))];
2093    assert.strictEqual(agentsInvolved.length, 6, 'Should involve all 6 agents');
2094    assert.ok(agentsInvolved.includes('monitor'), 'Should include Monitor');
2095    assert.ok(agentsInvolved.includes('triage'), 'Should include Triage');
2096    assert.ok(agentsInvolved.includes('developer'), 'Should include Developer');
2097    assert.ok(agentsInvolved.includes('qa'), 'Should include QA');
2098    assert.ok(agentsInvolved.includes('security'), 'Should include Security');
2099    assert.ok(agentsInvolved.includes('architect'), 'Should include Architect');
2100  
2101    // Verify all completed
2102    assert.ok(
2103      allTasks.every(t => t.status === 'completed'),
2104      'All tasks should be completed'
2105    );
2106  
2107    // Verify parent-child chain through all 6 agents
2108    for (let i = 1; i < allTasks.length; i++) {
2109      const task = db
2110        .prepare('SELECT parent_task_id FROM agent_tasks WHERE id = ?')
2111        .get(allTasks[i].id);
2112      assert.strictEqual(
2113        task.parent_task_id,
2114        allTasks[i - 1].id,
2115        `Task ${i + 1} should be child of task ${i}`
2116      );
2117    }
2118  });
2119  
2120  describe('Integration Tests - Cross-Agent Communication', () => {
2121    test('Inter-agent messaging works correctly', async () => {
2122      // Create message from developer to QA
2123      db.prepare(
2124        `INSERT INTO agent_messages (from_agent, to_agent, message_type, payload_json)
2125         VALUES (?, ?, ?, ?)`
2126      ).run(
2127        'developer',
2128        'qa',
2129        'handoff',
2130        JSON.stringify({
2131          task_id: 123,
2132          files_changed: ['src/buggy-module.js'],
2133          test_instructions: 'Verify null handling',
2134        })
2135      );
2136  
2137      // QA reads messages
2138      const messages = db
2139        .prepare(
2140          `SELECT * FROM agent_messages
2141           WHERE to_agent = ? AND read = 0
2142           ORDER BY created_at ASC`
2143        )
2144        .all('qa');
2145  
2146      assert.strictEqual(messages.length, 1);
2147      assert.strictEqual(messages[0].from_agent, 'developer');
2148      assert.strictEqual(messages[0].message_type, 'handoff');
2149  
2150      const payload = JSON.parse(messages[0].payload_json);
2151      assert.strictEqual(payload.task_id, 123);
2152  
2153      // Mark as read
2154      db.prepare('UPDATE agent_messages SET read = 1 WHERE id = ?').run(messages[0].id);
2155  
2156      const unreadCount = db
2157        .prepare('SELECT COUNT(*) as count FROM agent_messages WHERE to_agent = ? AND read = 0')
2158        .get('qa').count;
2159      assert.strictEqual(unreadCount, 0);
2160    });
2161  
2162    test('Task priority ordering works correctly', async () => {
2163      // Create tasks with different priorities
2164      await createAgentTask({
2165        task_type: 'task_low',
2166        assigned_to: 'developer',
2167        priority: 3,
2168        context: { name: 'low' },
2169      });
2170  
2171      await createAgentTask({
2172        task_type: 'task_high',
2173        assigned_to: 'developer',
2174        priority: 9,
2175        context: { name: 'high' },
2176      });
2177  
2178      await createAgentTask({
2179        task_type: 'task_medium',
2180        assigned_to: 'developer',
2181        priority: 5,
2182        context: { name: 'medium' },
2183      });
2184  
2185      // Get tasks - should be ordered by priority DESC
2186      const tasks = getAgentTasks('developer', 'pending', 10);
2187  
2188      assert.strictEqual(tasks.length, 3);
2189      assert.strictEqual(tasks[0].priority, 9, 'Highest priority should be first');
2190      assert.strictEqual(tasks[1].priority, 5, 'Medium priority should be second');
2191      assert.strictEqual(tasks[2].priority, 3, 'Lowest priority should be last');
2192    });
2193  
2194    test('Parent-child task relationships maintained', async () => {
2195      const parentId = await createAgentTask({
2196        task_type: 'parent_task',
2197        assigned_to: 'architect',
2198        priority: 6,
2199        context: {},
2200      });
2201  
2202      const child1Id = await createAgentTask({
2203        task_type: 'child_task_1',
2204        assigned_to: 'developer',
2205        priority: 7,
2206        parent_task_id: parentId,
2207        context: {},
2208      });
2209  
2210      const child2Id = await createAgentTask({
2211        task_type: 'child_task_2',
2212        assigned_to: 'qa',
2213        priority: 7,
2214        parent_task_id: child1Id,
2215        context: {},
2216      });
2217  
2218      // Verify relationships
2219      const parent = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(parentId);
2220      const child1 = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(child1Id);
2221      const child2 = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(child2Id);
2222  
2223      assert.strictEqual(child1.parent_task_id, parentId);
2224      assert.strictEqual(child2.parent_task_id, child1Id);
2225  
2226      // Query task chain
2227      const chain = db
2228        .prepare(
2229          `
2230        WITH RECURSIVE task_chain AS (
2231          SELECT * FROM agent_tasks WHERE id = ?
2232          UNION ALL
2233          SELECT t.* FROM agent_tasks t
2234          INNER JOIN task_chain tc ON t.parent_task_id = tc.id
2235        )
2236        SELECT * FROM task_chain ORDER BY id ASC
2237      `
2238        )
2239        .all(parentId);
2240  
2241      assert.strictEqual(chain.length, 3);
2242      assert.strictEqual(chain[0].id, parentId);
2243      assert.strictEqual(chain[1].id, child1Id);
2244      assert.strictEqual(chain[2].id, child2Id);
2245    });
2246  });
2247  
2248  describe('Database Integrity Tests', () => {
2249    test('No test database pollution after cleanup', async () => {
2250      // Create some test data
2251      await createAgentTask({
2252        task_type: 'test_task',
2253        assigned_to: 'developer',
2254        priority: 5,
2255        context: {},
2256      });
2257  
2258      db.prepare(
2259        `INSERT INTO agent_logs (agent_name, log_level, message)
2260         VALUES (?, ?, ?)`
2261      ).run('developer', 'info', 'Test log');
2262  
2263      const taskCount = db.prepare('SELECT COUNT(*) as count FROM agent_tasks').get().count;
2264      const logCount = db.prepare('SELECT COUNT(*) as count FROM agent_logs').get().count;
2265  
2266      assert.ok(taskCount > 0, 'Should have test tasks');
2267      assert.ok(logCount > 0, 'Should have test logs');
2268  
2269      // Cleanup will happen in afterEach - this test verifies structure exists
2270    });
2271  
2272    test('Foreign key constraints enforced', async () => {
2273      const parentId = await createAgentTask({
2274        task_type: 'parent',
2275        assigned_to: 'developer',
2276        priority: 5,
2277        context: {},
2278      });
2279  
2280      // Try to create child with non-existent parent
2281      assert.throws(() => {
2282        db.prepare(
2283          `INSERT INTO agent_tasks (task_type, assigned_to, parent_task_id, context_json)
2284           VALUES (?, ?, ?, ?)`
2285        ).run('child', 'developer', 99999, '{}');
2286      }, 'Should enforce foreign key constraint');
2287  
2288      // Valid parent should work
2289      const childId = await createAgentTask({
2290        task_type: 'child',
2291        assigned_to: 'developer',
2292        priority: 5,
2293        parent_task_id: parentId,
2294        context: {},
2295      });
2296  
2297      assert.ok(childId, 'Should create child with valid parent');
2298    });
2299  
2300    test('Task status constraints enforced', async () => {
2301      const taskId = await createAgentTask({
2302        task_type: 'test',
2303        assigned_to: 'developer',
2304        priority: 5,
2305        context: {},
2306      });
2307  
2308      // Valid status changes
2309      updateTaskStatus(taskId, 'running');
2310      updateTaskStatus(taskId, 'completed');
2311  
2312      // Invalid status should fail at DB level
2313      assert.throws(() => {
2314        db.prepare('UPDATE agent_tasks SET status = ? WHERE id = ?').run('invalid_status', taskId);
2315      }, 'Should enforce status constraint');
2316    });
2317  });