e2e-agent-system.test.js
1 /** 2 * End-to-End Agent System Tests 3 * 4 * Comprehensive test suite to verify the agent system is production-ready. 5 * 6 * Test Coverage: 7 * 1. Task Lifecycle - Create task → Agent picks up → Processes → Completes 8 * 2. Inter-agent Communication - Developer creates task → QA reviews 9 * 3. Error Handling - Invalid context → Graceful failure with clear error 10 * 4. Circuit Breaker - Multiple failures → Circuit opens → Auto-recovers 11 * 5. Task Routing - Triage correctly routes to Developer/Security/etc 12 * 6. Priority Handling - High priority tasks processed first 13 * 7. Row-level Locking - Concurrent agents don't claim same task 14 * 8. Known Error Database - Similar errors get suggested fixes 15 * 9. Coverage Gates - Developer enforces 85% coverage before commits 16 * 10. Workflow Dependencies - Features require approved designs 17 */ 18 19 import { test, describe, beforeEach, afterEach, mock } from 'node:test'; 20 import assert from 'node:assert'; 21 import Database from 'better-sqlite3'; 22 import { TriageAgent } from '../../src/agents/triage.js'; 23 import { DeveloperAgent, _deps as devDeps } from '../../src/agents/developer.js'; 24 import { QAAgent } from '../../src/agents/qa.js'; 25 import { SecurityAgent } from '../../src/agents/security.js'; 26 import { ArchitectAgent } from '../../src/agents/architect.js'; 27 import { MonitorAgent } from '../../src/agents/monitor.js'; 28 import { resetDb as resetBaseDb } from '../../src/agents/base-agent.js'; 29 import { resetDb as resetTaskDb } from '../../src/agents/utils/task-manager.js'; 30 import { resetDb as resetMessageDb } from '../../src/agents/utils/message-manager.js'; 31 import { resetDb as resetStructuredLoggerDb } from '../../src/agents/utils/structured-logger.js'; 32 import fs from 'fs/promises'; 33 import path from 'path'; 34 35 // Use temporary file database for tests 36 let db; 37 const agents = {}; 38 const TEST_DB_PATH = './tests/agents/test-e2e.db'; 39 40 // Save originals so afterEach can restore them 41 const originalDevDeps = { ...devDeps }; 42 43 /** 44 * Initialize test database with full schema 45 */ 46 function createTestSchema(db) { 47 db.exec(` 48 -- Agent task queue 49 CREATE TABLE agent_tasks ( 50 id INTEGER PRIMARY KEY AUTOINCREMENT, 51 task_type TEXT NOT NULL, 52 assigned_to TEXT NOT NULL CHECK(assigned_to IN ('developer', 'qa', 'security', 'architect', 'triage', 'monitor')), 53 created_by TEXT, 54 status TEXT DEFAULT 'pending' CHECK(status IN ('pending', 'running', 'completed', 'failed', 'blocked')), 55 priority INTEGER DEFAULT 5 CHECK(priority >= 1 AND priority <= 10), 56 context_json TEXT, 57 result_json TEXT, 58 parent_task_id INTEGER REFERENCES agent_tasks(id), 59 error_message TEXT, 60 reviewed_by TEXT, 61 approval_json TEXT, 62 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 63 started_at DATETIME, 64 completed_at DATETIME, 65 retry_count INTEGER DEFAULT 0 66 ); 67 68 -- Inter-agent messaging 69 CREATE TABLE agent_messages ( 70 id INTEGER PRIMARY KEY AUTOINCREMENT, 71 task_id INTEGER REFERENCES agent_tasks(id), 72 from_agent TEXT NOT NULL, 73 to_agent TEXT NOT NULL, 74 message_type TEXT CHECK(message_type IN ('question', 'answer', 'handoff', 'notification')), 75 content TEXT NOT NULL, 76 metadata_json TEXT, 77 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 78 read_at DATETIME 79 ); 80 81 -- Agent execution logs 82 CREATE TABLE agent_logs ( 83 id INTEGER PRIMARY KEY AUTOINCREMENT, 84 task_id INTEGER, 85 agent_name TEXT NOT NULL, 86 log_level TEXT CHECK(log_level IN ('debug', 'info', 'warn', 'error')), 87 message TEXT NOT NULL, 88 data_json TEXT, 89 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 90 ); 91 92 -- Agent status tracking 93 CREATE TABLE agent_state ( 94 agent_name TEXT PRIMARY KEY CHECK(agent_name IN ('developer', 'qa', 'security', 'architect', 'triage', 'monitor')), 95 last_active DATETIME DEFAULT CURRENT_TIMESTAMP, 96 current_task_id INTEGER, 97 status TEXT DEFAULT 'idle' CHECK(status IN ('idle', 'working', 'blocked')), 98 metrics_json TEXT 99 ); 100 101 -- Agent outcomes for learning 102 CREATE TABLE agent_outcomes ( 103 id INTEGER PRIMARY KEY AUTOINCREMENT, 104 task_id INTEGER NOT NULL, 105 agent_name TEXT NOT NULL, 106 task_type TEXT NOT NULL, 107 outcome TEXT NOT NULL CHECK(outcome IN ('success', 'failure')), 108 context_json TEXT, 109 result_json TEXT, 110 duration_ms INTEGER, 111 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 112 ); 113 114 -- Circuit breaker state (for OpenRouter/external APIs) 115 CREATE TABLE IF NOT EXISTS circuit_breaker_state ( 116 service_name TEXT PRIMARY KEY, 117 state TEXT NOT NULL CHECK(state IN ('closed', 'open', 'half_open')), 118 failure_count INTEGER DEFAULT 0, 119 last_failure_at DATETIME, 120 opened_at DATETIME, 121 updated_at DATETIME DEFAULT CURRENT_TIMESTAMP 122 ); 123 124 -- Performance indexes 125 CREATE INDEX idx_agent_tasks_assigned_to ON agent_tasks(assigned_to, status); 126 CREATE INDEX idx_agent_tasks_parent ON agent_tasks(parent_task_id); 127 CREATE INDEX idx_agent_tasks_priority ON agent_tasks(priority DESC, created_at ASC); 128 CREATE INDEX idx_agent_messages_task ON agent_messages(task_id); 129 CREATE INDEX idx_agent_messages_to ON agent_messages(to_agent, read_at); 130 CREATE INDEX idx_agent_logs_task ON agent_logs(task_id); 131 CREATE INDEX idx_agent_logs_agent ON agent_logs(agent_name, created_at); 132 133 -- Initialize agent state for all agents 134 INSERT INTO agent_state (agent_name, status, metrics_json) VALUES 135 ('triage', 'idle', '{}'), 136 ('developer', 'idle', '{}'), 137 ('qa', 'idle', '{}'), 138 ('security', 'idle', '{}'), 139 ('architect', 'idle', '{}'), 140 ('monitor', 'idle', '{}'); 141 `); 142 } 143 144 beforeEach(async () => { 145 // Disable immediate invocation to avoid infinite recursion in tests 146 process.env.AGENT_IMMEDIATE_INVOCATION = 'false'; 147 148 // Remove existing test database 149 try { 150 await fs.unlink(TEST_DB_PATH); 151 } catch (e) { 152 // Ignore if file doesn't exist 153 } 154 155 // Create temporary test database 156 db = new Database(TEST_DB_PATH); 157 process.env.DATABASE_PATH = TEST_DB_PATH; 158 159 // Create full schema 160 createTestSchema(db); 161 162 // Initialize all agents 163 agents.triage = new TriageAgent(); 164 agents.developer = new DeveloperAgent(); 165 agents.qa = new QAAgent(); 166 agents.security = new SecurityAgent(); 167 agents.architect = new ArchitectAgent(); 168 agents.monitor = new MonitorAgent(); 169 170 // Initialize agents (loads context) 171 for (const agent of Object.values(agents)) { 172 await agent.initialize(); 173 } 174 }); 175 176 afterEach(async () => { 177 // Reset all database connections 178 resetBaseDb(); 179 resetTaskDb(); 180 resetMessageDb(); 181 resetStructuredLoggerDb(); 182 183 if (db) { 184 db.close(); 185 } 186 187 // Clean up test database 188 try { 189 await fs.unlink(TEST_DB_PATH); 190 } catch (e) { 191 // Ignore if file doesn't exist 192 } 193 194 // Restore env vars 195 delete process.env.AGENT_IMMEDIATE_INVOCATION; 196 197 // Restore devDeps to originals (tests may have replaced them) 198 Object.assign(devDeps, originalDevDeps); 199 200 // Reset any node:test mocks 201 mock.restoreAll(); 202 }); 203 204 describe('E2E Agent System Tests', () => { 205 describe('1. Task Lifecycle', () => { 206 test('complete task lifecycle: create → pick up → process → complete', async () => { 207 // Create a simple classify_error task for Triage 208 const taskId = db 209 .prepare( 210 ` 211 INSERT INTO agent_tasks (task_type, assigned_to, status, priority, context_json) 212 VALUES ('classify_error', 'triage', 'pending', 5, ?) 213 ` 214 ) 215 .run( 216 JSON.stringify({ 217 error_message: 'TypeError: Cannot read property "score" of null', 218 stack_trace: 'at Object.<anonymous> (src/scoring.js:100:20)', 219 stage: 'scoring', 220 frequency: 1, 221 }) 222 ).lastInsertRowid; 223 224 // Verify task created 225 const createdTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 226 assert.strictEqual(createdTask.status, 'pending'); 227 assert.strictEqual(createdTask.assigned_to, 'triage'); 228 229 // Agent picks up task 230 const processedCount = await agents.triage.pollTasks(1); 231 assert.strictEqual(processedCount, 1, 'Agent should process 1 task'); 232 233 // Verify task completed 234 const completedTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 235 assert.strictEqual(completedTask.status, 'completed'); 236 assert.ok(completedTask.started_at, 'Task should have started_at timestamp'); 237 assert.ok(completedTask.completed_at, 'Task should have completed_at timestamp'); 238 assert.ok(completedTask.result_json, 'Task should have result_json'); 239 240 const result = JSON.parse(completedTask.result_json); 241 assert.strictEqual(result.classification, 'null_pointer'); 242 assert.strictEqual(result.routed_to, 'developer'); 243 }); 244 245 test('task status transitions: pending → running → completed', async () => { 246 const taskId = db 247 .prepare( 248 ` 249 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 250 VALUES ('classify_error', 'triage', 'pending', ?) 251 ` 252 ) 253 .run( 254 JSON.stringify({ 255 error_message: 'ECONNREFUSED: Connection refused', 256 stack_trace: '', 257 stage: 'serps', 258 frequency: 5, 259 }) 260 ).lastInsertRowid; 261 262 // Track status before processing 263 const beforeTask = db.prepare('SELECT status FROM agent_tasks WHERE id = ?').get(taskId); 264 assert.strictEqual(beforeTask.status, 'pending'); 265 266 // Process task 267 await agents.triage.pollTasks(1); 268 269 // Verify completed 270 const afterTask = db.prepare('SELECT status FROM agent_tasks WHERE id = ?').get(taskId); 271 assert.strictEqual(afterTask.status, 'completed'); 272 273 // Verify logs show status transitions 274 const logs = db 275 .prepare('SELECT * FROM agent_logs WHERE task_id = ? ORDER BY created_at ASC') 276 .all(taskId); 277 278 assert.ok(logs.length > 0, 'Should have logged activity'); 279 assert.ok( 280 logs.some(log => log.message.includes('Task started')), 281 'Should log task start' 282 ); 283 assert.ok( 284 logs.some(log => log.message.includes('Task completed')), 285 'Should log task completion' 286 ); 287 }); 288 }); 289 290 describe('2. Inter-agent Communication', () => { 291 test('Developer creates task → QA reviews', async () => { 292 // Create fix_bug task for Developer (simulate Triage completed) 293 const devTaskId = db 294 .prepare( 295 ` 296 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, created_by) 297 VALUES ('fix_bug', 'developer', 'pending', ?, 'triage') 298 ` 299 ) 300 .run( 301 JSON.stringify({ 302 error_message: 'UNIQUE constraint failed: sites.domain', 303 error_type: 'database', 304 stage: 'serps', 305 file_path: 'src/scrape.js', 306 suggested_fix: 'Check for existing record before INSERT', 307 }) 308 ).lastInsertRowid; 309 310 // Mock file operations and test runner via _deps (avoids ESM namespace issues) 311 devDeps.readFile = () => 312 Promise.resolve({ 313 content: 'const db = Database(); db.prepare("INSERT INTO sites...").run();', 314 size: 100, 315 }); 316 devDeps.getFileContext = () => 317 Promise.resolve({ 318 imports: ['better-sqlite3'], 319 testFiles: ['tests/serps.test.js'], 320 }); 321 devDeps.editFile = () => 322 Promise.resolve({ 323 backupPath: '/tmp/backup.js', 324 diff: 'Added null check', 325 }); 326 devDeps.cleanupBackups = () => Promise.resolve(); 327 devDeps.runTestsForFile = () => 328 Promise.resolve({ 329 success: true, 330 stats: { pass: 10, fail: 0 }, 331 coverage: { lines: { pct: 90 } }, 332 }); 333 devDeps.simpleLLMCall = () => 334 Promise.resolve( 335 JSON.stringify({ 336 old_string: 'db.prepare("INSERT INTO sites...").run();', 337 new_string: 338 'const existing = db.prepare("SELECT id FROM sites WHERE domain = ?").get(domain);\nif (!existing) db.prepare("INSERT INTO sites...").run();', 339 explanation: 'Added check for existing record before INSERT', 340 test_cases: ['Duplicate domain', 'New domain'], 341 }) 342 ); 343 344 // Mock git commands and coverage read via devDeps 345 devDeps.execSync = cmd => { 346 if (cmd.includes('git commit')) return 'commit-hash-123\n'; 347 return ''; 348 }; 349 devDeps.readFileCoverage = () => 350 Promise.resolve(JSON.stringify({ 'src/scrape.js': { lines: { pct: 90 } } })); 351 352 // Developer processes the bug fix 353 await agents.developer.pollTasks(1); 354 355 // Verify Developer completed task 356 const devTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(devTaskId); 357 assert.strictEqual(devTask.status, 'completed'); 358 359 // Verify QA task was created 360 const qaTasks = db 361 .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?') 362 .all('qa', devTaskId); 363 364 assert.strictEqual(qaTasks.length, 1, 'QA task should be created'); 365 assert.strictEqual(qaTasks[0].task_type, 'verify_fix'); 366 367 // Verify handoff message exists 368 const messages = db 369 .prepare('SELECT * FROM agent_messages WHERE from_agent = ? AND to_agent = ?') 370 .all('developer', 'qa'); 371 372 assert.strictEqual(messages.length, 1, 'Handoff message should exist'); 373 assert.strictEqual(messages[0].message_type, 'handoff'); 374 assert.ok( 375 messages[0].content.includes('Bug fix complete'), 376 'Handoff message should describe completion' 377 ); 378 }); 379 380 test('agents can ask questions and receive answers', async () => { 381 // Create task for Developer 382 const taskId = db 383 .prepare( 384 ` 385 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 386 VALUES ('fix_bug', 'developer', 'pending', ?) 387 ` 388 ) 389 .run( 390 JSON.stringify({ 391 error_message: 'Unclear error with no stack trace', 392 error_type: 'unknown', 393 stage: 'scoring', 394 }) 395 ).lastInsertRowid; 396 397 // Developer asks Triage for clarification 398 const questionId = await agents.developer.askQuestion( 399 taskId, 400 'triage', 401 'Could not identify file from error. Please provide file path.' 402 ); 403 404 // Verify question message created 405 const question = db.prepare('SELECT * FROM agent_messages WHERE id = ?').get(questionId); 406 assert.strictEqual(question.from_agent, 'developer'); 407 assert.strictEqual(question.to_agent, 'triage'); 408 assert.strictEqual(question.message_type, 'question'); 409 assert.ok(!question.read_at, 'Question should be unread initially'); 410 411 // Triage checks for unread messages 412 const unreadMessages = agents.triage.getUnreadMessages(10); 413 assert.strictEqual(unreadMessages.length, 1); 414 assert.strictEqual(unreadMessages[0].id, questionId); 415 416 // Triage sends answer 417 const answerId = await agents.triage.sendAnswer( 418 taskId, 419 'developer', 420 'Error is in src/scoring.js line 100', 421 questionId 422 ); 423 424 // Verify answer message created 425 const answer = db.prepare('SELECT * FROM agent_messages WHERE id = ?').get(answerId); 426 assert.strictEqual(answer.from_agent, 'triage'); 427 assert.strictEqual(answer.to_agent, 'developer'); 428 assert.strictEqual(answer.message_type, 'answer'); 429 430 // Verify metadata links question and answer 431 const metadata = JSON.parse(answer.metadata_json); 432 assert.strictEqual(metadata.in_reply_to, questionId); 433 }); 434 }); 435 436 describe('3. Error Handling', () => { 437 test('invalid context → graceful failure with clear error', async () => { 438 // Create task with missing required field 439 const taskId = db 440 .prepare( 441 ` 442 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 443 VALUES ('fix_bug', 'developer', 'pending', ?) 444 ` 445 ) 446 .run( 447 JSON.stringify({ 448 // Missing error_message field 449 error_type: 'null_pointer', 450 stage: 'scoring', 451 }) 452 ).lastInsertRowid; 453 454 // Process task 455 await agents.developer.pollTasks(1); 456 457 // Verify task failed gracefully 458 const failedTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 459 assert.strictEqual(failedTask.status, 'failed'); 460 assert.ok( 461 failedTask.error_message.includes('Missing required field: error_message'), 462 'Error message should be descriptive' 463 ); 464 465 // Verify error was logged 466 const errorLogs = db 467 .prepare("SELECT * FROM agent_logs WHERE task_id = ? AND log_level = 'error'") 468 .all(taskId); 469 470 assert.ok(errorLogs.length > 0, 'Error should be logged'); 471 }); 472 473 test('task with malformed JSON → fails with parse error', async () => { 474 // Create task with malformed JSON 475 const taskId = db 476 .prepare( 477 ` 478 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 479 VALUES ('classify_error', 'triage', 'pending', ?) 480 ` 481 ) 482 .run('{invalid json here').lastInsertRowid; 483 484 // Process task 485 await agents.triage.pollTasks(1); 486 487 // Task should still complete (context_json is optional) 488 // Agent handles parse errors gracefully 489 const task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 490 assert.ok( 491 task.status === 'completed' || task.status === 'failed', 492 'Task should handle malformed JSON gracefully' 493 ); 494 }); 495 496 test('retry logic: task fails → marked as failed with error message', async () => { 497 // Create task that will fail (missing file) 498 const taskId = db 499 .prepare( 500 ` 501 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 502 VALUES ('fix_bug', 'developer', 'pending', ?) 503 ` 504 ) 505 .run( 506 JSON.stringify({ 507 error_message: 'TypeError: Cannot read property "x" of null', 508 stack_trace: 'at Object.<anonymous> (src/nonexistent-dev-test-file.js:100:20)', 509 error_type: 'null_pointer', 510 stage: 'scoring', 511 }) 512 ).lastInsertRowid; 513 514 // Process task (will fail - developer handles errors internally via failTask) 515 await agents.developer.pollTasks(1); 516 517 // Developer agent handles errors internally without retry mechanism 518 // It calls failTask() directly, marking the task failed with an error message 519 const finalTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 520 assert.strictEqual(finalTask.status, 'failed'); 521 assert.ok(finalTask.error_message, 'Task should have an error message'); 522 }); 523 }); 524 525 describe('4. Circuit Breaker', () => { 526 test('multiple failures → circuit opens', async () => { 527 // Create circuit breaker state table entry 528 db.prepare( 529 ` 530 INSERT INTO circuit_breaker_state (service_name, state, failure_count) 531 VALUES ('openrouter', 'closed', 0) 532 ` 533 ).run(); 534 535 // Simulate 5 consecutive failures (threshold to open circuit) 536 for (let i = 0; i < 5; i++) { 537 db.prepare( 538 ` 539 UPDATE circuit_breaker_state 540 SET failure_count = failure_count + 1, 541 last_failure_at = datetime('now'), 542 state = CASE WHEN failure_count >= 4 THEN 'open' ELSE 'closed' END 543 WHERE service_name = 'openrouter' 544 ` 545 ).run(); 546 } 547 548 // Verify circuit is open 549 const cbState = db 550 .prepare('SELECT * FROM circuit_breaker_state WHERE service_name = ?') 551 .get('openrouter'); 552 553 assert.strictEqual(cbState.state, 'open'); 554 assert.ok(cbState.failure_count >= 5); 555 }); 556 557 test('circuit opens → auto-recovers after cooldown', async () => { 558 // Insert open circuit breaker state with old timestamp (use SQLite datetime format for comparison) 559 db.prepare( 560 ` 561 INSERT INTO circuit_breaker_state (service_name, state, failure_count, opened_at) 562 VALUES ('openrouter', 'open', 10, datetime('now', '-35 minutes')) 563 ` 564 ).run(); 565 566 // Simulate circuit breaker check (normally done by circuit-breaker.js) 567 // After 30 minutes, circuit should transition to half_open 568 db.prepare( 569 ` 570 UPDATE circuit_breaker_state 571 SET state = 'half_open' 572 WHERE service_name = 'openrouter' 573 AND state = 'open' 574 AND opened_at < datetime('now', '-30 minutes') 575 ` 576 ).run(); 577 578 const cbState = db 579 .prepare('SELECT * FROM circuit_breaker_state WHERE service_name = ?') 580 .get('openrouter'); 581 582 assert.strictEqual(cbState.state, 'half_open', 'Circuit should be half_open after cooldown'); 583 584 // Simulate successful request → circuit closes 585 db.prepare( 586 ` 587 UPDATE circuit_breaker_state 588 SET state = 'closed', failure_count = 0 589 WHERE service_name = 'openrouter' AND state = 'half_open' 590 ` 591 ).run(); 592 593 const recoveredState = db 594 .prepare('SELECT * FROM circuit_breaker_state WHERE service_name = ?') 595 .get('openrouter'); 596 597 assert.strictEqual(recoveredState.state, 'closed', 'Circuit should close after success'); 598 assert.strictEqual(recoveredState.failure_count, 0, 'Failure count should reset'); 599 }); 600 }); 601 602 describe('5. Task Routing', () => { 603 test('triage routes security error to Security agent', async () => { 604 const taskId = db 605 .prepare( 606 ` 607 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 608 VALUES ('classify_error', 'triage', 'pending', ?) 609 ` 610 ) 611 .run( 612 JSON.stringify({ 613 error_message: 'Unauthorized: Invalid API signature', 614 stack_trace: '', 615 stage: 'outreach', 616 frequency: 1, 617 }) 618 ).lastInsertRowid; 619 620 await agents.triage.pollTasks(1); 621 622 // Verify routed to Security 623 const securityTasks = db 624 .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?') 625 .all('security', taskId); 626 627 assert.strictEqual(securityTasks.length, 1); 628 assert.strictEqual(securityTasks[0].task_type, 'fix_bug'); 629 630 const context = JSON.parse(securityTasks[0].context_json); 631 assert.strictEqual(context.error_type, 'security'); 632 assert.strictEqual(context.severity, 'critical'); 633 }); 634 635 test('triage routes database constraint to Developer agent', async () => { 636 const taskId = db 637 .prepare( 638 ` 639 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 640 VALUES ('classify_error', 'triage', 'pending', ?) 641 ` 642 ) 643 .run( 644 JSON.stringify({ 645 error_message: 'UNIQUE constraint failed: sites.domain', 646 stack_trace: 'at db.prepare(...).run() (src/serps.js:50:10)', 647 stage: 'serps', 648 frequency: 1, 649 }) 650 ).lastInsertRowid; 651 652 await agents.triage.pollTasks(1); 653 654 // Verify routed to Developer 655 const devTasks = db 656 .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?') 657 .all('developer', taskId); 658 659 assert.strictEqual(devTasks.length, 1); 660 661 const context = JSON.parse(devTasks[0].context_json); 662 assert.strictEqual(context.error_type, 'database'); 663 assert.ok( 664 context.suggested_fix.includes('INSERT OR IGNORE'), 665 'Should suggest fix for UNIQUE constraint' 666 ); 667 }); 668 669 test('triage routes network error to Architect agent', async () => { 670 const taskId = db 671 .prepare( 672 ` 673 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 674 VALUES ('classify_error', 'triage', 'pending', ?) 675 ` 676 ) 677 .run( 678 JSON.stringify({ 679 error_message: 'ETIMEDOUT: Connection timed out', 680 stack_trace: '', 681 stage: 'serps', 682 frequency: 10, 683 }) 684 ).lastInsertRowid; 685 686 await agents.triage.pollTasks(1); 687 688 // Verify routed to Architect (infrastructure issue) 689 const architectTasks = db 690 .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?') 691 .all('architect', taskId); 692 693 assert.strictEqual(architectTasks.length, 1); 694 695 const context = JSON.parse(architectTasks[0].context_json); 696 assert.strictEqual(context.error_type, 'network'); 697 }); 698 }); 699 700 describe('6. Priority Handling', () => { 701 test('high priority tasks processed first', async () => { 702 // Create 3 tasks with different priorities 703 const lowPriorityId = db 704 .prepare( 705 ` 706 INSERT INTO agent_tasks (task_type, assigned_to, status, priority, context_json) 707 VALUES ('classify_error', 'triage', 'pending', 3, ?) 708 ` 709 ) 710 .run( 711 JSON.stringify({ error_message: 'Low priority error', stage: 'proposals' }) 712 ).lastInsertRowid; 713 714 const highPriorityId = db 715 .prepare( 716 ` 717 INSERT INTO agent_tasks (task_type, assigned_to, status, priority, context_json) 718 VALUES ('classify_error', 'triage', 'pending', 9, ?) 719 ` 720 ) 721 .run( 722 JSON.stringify({ error_message: 'High priority error', stage: 'scoring' }) 723 ).lastInsertRowid; 724 725 const mediumPriorityId = db 726 .prepare( 727 ` 728 INSERT INTO agent_tasks (task_type, assigned_to, status, priority, context_json) 729 VALUES ('classify_error', 'triage', 'pending', 5, ?) 730 ` 731 ) 732 .run( 733 JSON.stringify({ error_message: 'Medium priority error', stage: 'enrichment' }) 734 ).lastInsertRowid; 735 736 // Process one task at a time 737 await agents.triage.pollTasks(1); 738 739 // Verify high priority task was processed first 740 const firstProcessed = db 741 .prepare('SELECT * FROM agent_tasks WHERE status = ? ORDER BY completed_at ASC LIMIT 1') 742 .get('completed'); 743 744 assert.strictEqual(firstProcessed.id, highPriorityId, 'High priority task should run first'); 745 746 // Process second task 747 await agents.triage.pollTasks(1); 748 749 const secondProcessed = db 750 .prepare('SELECT * FROM agent_tasks WHERE status = ? ORDER BY completed_at ASC LIMIT 1, 1') 751 .get('completed'); 752 753 assert.strictEqual( 754 secondProcessed.id, 755 mediumPriorityId, 756 'Medium priority task should run second' 757 ); 758 }); 759 760 test('priority calculation based on severity and stage', async () => { 761 // Critical error in early stage (scoring) should get high priority 762 const criticalEarlyId = db 763 .prepare( 764 ` 765 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 766 VALUES ('classify_error', 'triage', 'pending', ?) 767 ` 768 ) 769 .run( 770 JSON.stringify({ 771 error_message: 'Security breach detected', 772 stage: 'scoring', 773 frequency: 1, 774 }) 775 ).lastInsertRowid; 776 777 await agents.triage.pollTasks(1); 778 779 // Check created task priority 780 const routedTask = db 781 .prepare('SELECT * FROM agent_tasks WHERE parent_task_id = ?') 782 .get(criticalEarlyId); 783 784 assert.ok(routedTask.priority >= 8, 'Critical security error should get priority >= 8'); 785 }); 786 }); 787 788 describe('7. Row-level Locking (Concurrent Agents)', () => { 789 test('concurrent agents do not claim same task', async () => { 790 // Enable row-level locking 791 process.env.AGENT_ENABLE_ROW_LOCKING = 'true'; 792 process.env.AGENT_ALLOW_HORIZONTAL_SCALING = 'true'; 793 794 // Create a single task 795 const taskId = db 796 .prepare( 797 ` 798 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 799 VALUES ('classify_error', 'triage', 'pending', ?) 800 ` 801 ) 802 .run( 803 JSON.stringify({ 804 error_message: 'Test error', 805 stage: 'scoring', 806 }) 807 ).lastInsertRowid; 808 809 // Simulate two concurrent agent instances 810 const agent1 = new TriageAgent(); 811 const agent2 = new TriageAgent(); 812 813 await agent1.initialize(); 814 await agent2.initialize(); 815 816 // Both agents try to claim the task simultaneously 817 const [count1, count2] = await Promise.all([agent1.pollTasks(1), agent2.pollTasks(1)]); 818 819 // Only one should have processed the task 820 assert.strictEqual(count1 + count2, 1, 'Only one agent should claim and process the task'); 821 822 // Verify task is completed (not running twice) 823 const task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 824 assert.strictEqual(task.status, 'completed'); 825 826 // Clean up 827 delete process.env.AGENT_ENABLE_ROW_LOCKING; 828 delete process.env.AGENT_ALLOW_HORIZONTAL_SCALING; 829 }); 830 }); 831 832 describe('8. Known Error Database', () => { 833 test('similar errors get suggested fixes from past tasks', async () => { 834 // Step 1: Create a completed fix_bug task (known fix) 835 // Use old created_at to prevent deduplication from treating it as recent 836 const completedTaskId = db 837 .prepare( 838 ` 839 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, result_json, completed_at, created_at) 840 VALUES ('fix_bug', 'developer', 'completed', ?, ?, datetime('now', '-1 day'), datetime('now', '-1 day')) 841 ` 842 ) 843 .run( 844 JSON.stringify({ 845 error_message: 846 'TypeError: Cannot read property "conversion_score" of null at scoring.js:100', 847 error_type: 'null_pointer', 848 stage: 'scoring', 849 }), 850 JSON.stringify({ 851 fix_description: 'Added null check: score?.conversion_score || 0', 852 files_changed: ['src/scoring.js'], 853 }) 854 ).lastInsertRowid; 855 856 // Step 2: Create similar error (same type, different line number) 857 const newTaskId = db 858 .prepare( 859 ` 860 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 861 VALUES ('classify_error', 'triage', 'pending', ?) 862 ` 863 ) 864 .run( 865 JSON.stringify({ 866 error_message: 867 'TypeError: Cannot read property "conversion_score" of null at scoring.js:250', 868 stack_trace: '', 869 stage: 'scoring', 870 frequency: 1, 871 }) 872 ).lastInsertRowid; 873 874 // Process new error 875 await agents.triage.pollTasks(1); 876 877 // Verify triage found known fix 878 const newTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(newTaskId); 879 const result = JSON.parse(newTask.result_json); 880 881 assert.ok(result.known_fix, 'Should detect known fix'); 882 assert.ok(result.known_fix.task_id === completedTaskId, 'Should reference previous fix task'); 883 assert.ok(result.known_fix.similarity >= 0.7, 'Similarity should be >= 70%'); 884 885 // Verify routed task includes suggested fix 886 const routedTask = db 887 .prepare('SELECT * FROM agent_tasks WHERE parent_task_id = ?') 888 .get(newTaskId); 889 890 const context = JSON.parse(routedTask.context_json); 891 assert.ok(context.suggested_fix.includes('null check'), 'Should suggest known fix approach'); 892 }); 893 }); 894 895 describe('9. Coverage Gates (Developer Agent)', () => { 896 test('developer enforces 85% coverage before commit', async () => { 897 // This test verifies the checkCoverageBeforeCommit gate 898 // Mock scenario: Developer attempts commit with low coverage 899 900 const taskId = db 901 .prepare( 902 ` 903 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 904 VALUES ('fix_bug', 'developer', 'pending', ?) 905 ` 906 ) 907 .run( 908 JSON.stringify({ 909 error_message: 'TypeError: x is null', 910 error_type: 'null_pointer', 911 stage: 'scoring', 912 file_path: 'src/score.js', 913 }) 914 ).lastInsertRowid; 915 916 // Mock file operations via devDeps 917 devDeps.readFile = () => 918 Promise.resolve({ content: 'const x = null; console.log(x.value);', size: 50 }); 919 devDeps.getFileContext = () => Promise.resolve({ imports: [], testFiles: [] }); 920 devDeps.editFile = () => 921 Promise.resolve({ backupPath: '/tmp/backup.js', diff: 'Added null check' }); 922 devDeps.cleanupBackups = () => Promise.resolve(); 923 924 // Mock test runner — tests pass but coverage is below 85% threshold 925 devDeps.runTestsForFile = () => 926 Promise.resolve({ 927 success: true, 928 stats: { pass: 5 }, 929 coverage: { lines: { pct: 70 } }, 930 }); 931 932 // Mock LLM 933 devDeps.simpleLLMCall = () => 934 Promise.resolve( 935 JSON.stringify({ 936 old_string: 'const x = null; console.log(x.value);', 937 new_string: 'const x = null; if (x) console.log(x.value);', 938 explanation: 'Added null check', 939 }) 940 ); 941 942 // Mock coverage check — npm test succeeds but coverage file shows low coverage 943 devDeps.execSync = () => ''; 944 devDeps.readFileCoverage = path => { 945 if (path.includes('coverage-summary.json')) { 946 return Promise.resolve(JSON.stringify({ 'src/score.js': { lines: { pct: 70 } } })); 947 } 948 if (path.includes('coverage-final.json')) { 949 // Return valid coverage data so getDetailedCoverage succeeds 950 return Promise.resolve( 951 JSON.stringify({ 952 '/home/jason/code/333Method/src/score.js': { 953 lines: { pct: 70 }, 954 statementMap: { 1: { start: { line: 1 }, end: { line: 1 } } }, 955 s: { 1: 0 }, 956 }, 957 }) 958 ); 959 } 960 // Source file read 961 return Promise.resolve('const x = null; console.log(x.value);'); 962 }; 963 964 // Process task 965 await agents.developer.pollTasks(1); 966 967 // Verify task blocked due to coverage 968 const task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 969 970 assert.strictEqual(task.status, 'blocked', 'Task should be blocked due to coverage gate'); 971 assert.ok( 972 task.error_message.includes('coverage') || task.error_message.includes('85%'), 973 'Error should mention coverage threshold' 974 ); 975 }); 976 }); 977 978 describe('10. Workflow Dependencies', () => { 979 test('features require approved design proposal', async () => { 980 // Try to implement feature without approved design 981 const featureTaskId = db 982 .prepare( 983 ` 984 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 985 VALUES ('implement_feature', 'developer', 'pending', ?) 986 ` 987 ) 988 .run( 989 JSON.stringify({ 990 feature_description: 'Add SMS rate limiting', 991 requirements: ['Limit to 10 SMS per hour per number'], 992 files_to_modify: ['src/outreach/sms.js'], 993 }) 994 ).lastInsertRowid; 995 996 // Process feature task (should block and create design_proposal task) 997 await agents.developer.pollTasks(1); 998 999 // Verify feature task is blocked 1000 const featureTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(featureTaskId); 1001 assert.strictEqual( 1002 featureTask.status, 1003 'blocked', 1004 'Feature implementation should be blocked without approved design' 1005 ); 1006 1007 // Verify design_proposal task created 1008 const designTasks = db 1009 .prepare('SELECT * FROM agent_tasks WHERE task_type = ? AND assigned_to = ?') 1010 .all('design_proposal', 'architect'); 1011 1012 assert.strictEqual(designTasks.length, 1, 'Design proposal task should be auto-created'); 1013 }); 1014 1015 test('approved design enables feature implementation', async () => { 1016 // Step 1: Create approved design proposal 1017 const designTaskId = db 1018 .prepare( 1019 ` 1020 INSERT INTO agent_tasks (task_type, assigned_to, status, result_json, approval_json, reviewed_by, completed_at) 1021 VALUES ('design_proposal', 'architect', 'completed', ?, ?, 'ProductOwner', datetime('now')) 1022 ` 1023 ) 1024 .run( 1025 JSON.stringify({ 1026 design_proposal: { 1027 title: 'SMS Rate Limiting', 1028 files_affected: ['src/outreach/sms.js'], 1029 requires_migration: false, 1030 }, 1031 }), 1032 JSON.stringify({ 1033 decision: 'approved', 1034 reviewer: 'ProductOwner', 1035 timestamp: new Date().toISOString(), 1036 }) 1037 ).lastInsertRowid; 1038 1039 // Step 2: Create feature implementation task with approved parent 1040 const featureTaskId = db 1041 .prepare( 1042 ` 1043 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, parent_task_id) 1044 VALUES ('implement_feature', 'developer', 'pending', ?, ?) 1045 ` 1046 ) 1047 .run( 1048 JSON.stringify({ 1049 feature_description: 'Add SMS rate limiting', 1050 requirements: ['Limit to 10 SMS per hour'], 1051 files_to_modify: ['src/outreach/sms.js'], 1052 }), 1053 designTaskId 1054 ).lastInsertRowid; 1055 1056 // Mock file operations and tests via devDeps 1057 devDeps.readFile = () => 1058 Promise.resolve({ content: 'export function sendSMS() { /* ... */ }', size: 100 }); 1059 devDeps.getFileContext = () => Promise.resolve({ imports: [], testFiles: [] }); 1060 devDeps.writeFile = () => Promise.resolve({ backupPath: '/tmp/backup.js' }); 1061 devDeps.editFile = () => 1062 Promise.resolve({ backupPath: '/tmp/backup.js', diff: 'Added rate limiting' }); 1063 devDeps.runTests = () => 1064 Promise.resolve({ success: true, stats: { pass: 10 }, coverage: { lines: { pct: 90 } } }); 1065 devDeps.simpleLLMCall = () => 1066 Promise.resolve( 1067 JSON.stringify({ 1068 old_string: 'export function sendSMS() {', 1069 new_string: 'const rateLimit = new Map();\nexport function sendSMS() {', 1070 explanation: 'Added rate limiting', 1071 }) 1072 ); 1073 1074 // Mock execSync (git add + commit) and coverage file read 1075 devDeps.execSync = cmd => { 1076 if (cmd.includes('git commit')) return 'commit-hash-456\n'; 1077 return ''; 1078 }; 1079 devDeps.readFileCoverage = () => 1080 Promise.resolve(JSON.stringify({ 'src/outreach/sms.js': { lines: { pct: 90 } } })); 1081 1082 // Process feature task 1083 await agents.developer.pollTasks(1); 1084 1085 // Verify feature task completed (not blocked) 1086 const featureTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(featureTaskId); 1087 assert.strictEqual( 1088 featureTask.status, 1089 'completed', 1090 'Feature should implement successfully with approved design' 1091 ); 1092 }); 1093 }); 1094 1095 describe('11. Agent System Integration', () => { 1096 test('full bug fix workflow: Monitor → Triage → Developer → QA', async () => { 1097 // Step 1: Monitor detects error and creates classify_error task 1098 const monitorTaskId = db 1099 .prepare( 1100 ` 1101 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, created_by) 1102 VALUES ('scan_logs', 'monitor', 'pending', ?, 'system') 1103 ` 1104 ) 1105 .run( 1106 JSON.stringify({ 1107 log_file: 'logs/pipeline-2024-01-01.log', 1108 error_patterns: ['TypeError', 'null'], 1109 }) 1110 ).lastInsertRowid; 1111 1112 // Mock log scanning (Monitor would create Triage task) 1113 const triageTaskId = db 1114 .prepare( 1115 ` 1116 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, created_by) 1117 VALUES ('classify_error', 'triage', 'pending', ?, 'monitor') 1118 ` 1119 ) 1120 .run( 1121 JSON.stringify({ 1122 error_message: 'TypeError: Cannot read property "score" of undefined', 1123 stack_trace: 'at Object.<anonymous> (src/score.js:150:10)', 1124 stage: 'scoring', 1125 frequency: 5, 1126 }) 1127 ).lastInsertRowid; 1128 1129 // Step 2: Triage classifies and routes 1130 await agents.triage.pollTasks(1); 1131 1132 const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId); 1133 assert.strictEqual(triageTask.status, 'completed'); 1134 1135 // Step 3: Developer fixes bug 1136 const devTasks = db 1137 .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?') 1138 .all('developer', triageTaskId); 1139 1140 assert.strictEqual(devTasks.length, 1, 'Developer task should be created'); 1141 1142 // Mock Developer operations via devDeps 1143 devDeps.readFile = () => Promise.resolve({ content: 'const score = data.score;', size: 50 }); 1144 devDeps.getFileContext = () => Promise.resolve({ imports: [], testFiles: [] }); 1145 devDeps.editFile = () => Promise.resolve({ backupPath: '/tmp/backup.js', diff: 'fix' }); 1146 devDeps.cleanupBackups = () => Promise.resolve(); 1147 devDeps.runTestsForFile = () => 1148 Promise.resolve({ success: true, stats: { pass: 8 }, coverage: { lines: { pct: 90 } } }); 1149 devDeps.simpleLLMCall = () => 1150 Promise.resolve( 1151 JSON.stringify({ 1152 old_string: 'const score = data.score;', 1153 new_string: 'const score = data?.score || 0;', 1154 explanation: 'Added null check', 1155 }) 1156 ); 1157 devDeps.execSync = cmd => { 1158 if (cmd.includes('git commit')) return 'hash-789\n'; 1159 return ''; 1160 }; 1161 devDeps.readFileCoverage = () => 1162 Promise.resolve(JSON.stringify({ 'src/score.js': { lines: { pct: 90 } } })); 1163 1164 await agents.developer.pollTasks(1); 1165 1166 const devTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(devTasks[0].id); 1167 assert.strictEqual(devTask.status, 'completed', 'Developer should complete fix'); 1168 1169 // Step 4: Verify QA task created 1170 const qaTasks = db 1171 .prepare('SELECT * FROM agent_tasks WHERE assigned_to = ? AND parent_task_id = ?') 1172 .all('qa', devTask.id); 1173 1174 assert.strictEqual(qaTasks.length, 1, 'QA verification task should be created'); 1175 1176 // Verify complete workflow chain 1177 const allTasks = db 1178 .prepare('SELECT task_type, assigned_to, status FROM agent_tasks ORDER BY id ASC') 1179 .all(); 1180 1181 assert.ok( 1182 allTasks.some(t => t.assigned_to === 'monitor'), 1183 'Monitor task exists' 1184 ); 1185 assert.ok( 1186 allTasks.some(t => t.assigned_to === 'triage'), 1187 'Triage task exists' 1188 ); 1189 assert.ok( 1190 allTasks.some(t => t.assigned_to === 'developer'), 1191 'Developer task exists' 1192 ); 1193 assert.ok( 1194 allTasks.some(t => t.assigned_to === 'qa'), 1195 'QA task exists' 1196 ); 1197 }); 1198 }); 1199 });