workflow.integration.test.js
1 /** 2 * Agent Workflow Integration Tests 3 * 4 * Tests end-to-end workflows: Triage → Developer → QA 5 */ 6 7 import { test, describe, beforeEach, afterEach } from 'node:test'; 8 import assert from 'node:assert'; 9 import Database from 'better-sqlite3'; 10 import { TriageAgent } from '../../src/agents/triage.js'; 11 import { DeveloperAgent } from '../../src/agents/developer.js'; 12 import { resetDb as resetBaseDb } from '../../src/agents/base-agent.js'; 13 import { resetDb as resetTaskDb } from '../../src/agents/utils/task-manager.js'; 14 import { resetDb as resetMessageDb } from '../../src/agents/utils/message-manager.js'; 15 import fs from 'fs/promises'; 16 17 // Use temporary file database for tests 18 let db; 19 let triageAgent; 20 let developerAgent; 21 const TEST_DB_PATH = './tests/agents/test-workflow.db'; 22 23 beforeEach(async () => { 24 // Reset all module-level DB singletons so they re-open with fresh env vars below. 25 resetBaseDb(); 26 resetTaskDb(); 27 resetMessageDb(); 28 29 // Remove existing test database if it exists 30 try { 31 await fs.unlink(TEST_DB_PATH); 32 } catch (e) { 33 // Ignore if file doesn't exist 34 } 35 36 // Create temporary test database 37 db = new Database(TEST_DB_PATH); 38 process.env.DATABASE_PATH = TEST_DB_PATH; 39 // Point TEL_DB_PATH at the same test DB so that ATTACH DATABASE ... AS tel resolves 40 // to the test DB (SQLite allows self-attach), making tel.agent_tasks queries work. 41 process.env.TEL_DB_PATH = TEST_DB_PATH; 42 process.env.AGENT_REALTIME_NOTIFICATIONS = 'false'; // Prevent subprocess spawning in tests 43 process.env.AGENT_IMMEDIATE_INVOCATION = 'false'; // Prevent in-process agent chaining in tests 44 45 // Create tables 46 db.exec(` 47 CREATE TABLE agent_tasks ( 48 id INTEGER PRIMARY KEY AUTOINCREMENT, 49 task_type TEXT NOT NULL, 50 assigned_to TEXT NOT NULL, 51 created_by TEXT, 52 status TEXT DEFAULT 'pending', 53 priority INTEGER DEFAULT 5, 54 context_json TEXT, 55 result_json TEXT, 56 parent_task_id INTEGER, 57 error_message TEXT, 58 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 59 started_at DATETIME, 60 completed_at DATETIME, 61 retry_count INTEGER DEFAULT 0 62 ); 63 64 CREATE TABLE agent_messages ( 65 id INTEGER PRIMARY KEY AUTOINCREMENT, 66 task_id INTEGER, 67 from_agent TEXT NOT NULL, 68 to_agent TEXT NOT NULL, 69 message_type TEXT, 70 content TEXT NOT NULL, 71 metadata_json TEXT, 72 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 73 read_at DATETIME 74 ); 75 76 CREATE TABLE agent_logs ( 77 id INTEGER PRIMARY KEY AUTOINCREMENT, 78 task_id INTEGER, 79 agent_name TEXT NOT NULL, 80 log_level TEXT, 81 message TEXT, 82 data_json TEXT, 83 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 84 ); 85 86 CREATE TABLE agent_state ( 87 agent_name TEXT PRIMARY KEY, 88 last_active DATETIME DEFAULT CURRENT_TIMESTAMP, 89 current_task_id INTEGER, 90 status TEXT DEFAULT 'idle', 91 metrics_json TEXT 92 ); 93 94 CREATE TABLE IF NOT EXISTS cron_locks ( 95 lock_key TEXT PRIMARY KEY, 96 description TEXT, 97 updated_at DATETIME DEFAULT CURRENT_TIMESTAMP 98 ); 99 100 CREATE TABLE IF NOT EXISTS agent_llm_usage ( 101 id INTEGER PRIMARY KEY AUTOINCREMENT, 102 agent_name TEXT NOT NULL, 103 task_id INTEGER, 104 model TEXT NOT NULL, 105 prompt_tokens INTEGER NOT NULL, 106 completion_tokens INTEGER NOT NULL, 107 cost_usd REAL NOT NULL, 108 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 109 ); 110 111 CREATE TABLE IF NOT EXISTS agent_outcomes ( 112 id INTEGER PRIMARY KEY AUTOINCREMENT, 113 task_id INTEGER NOT NULL REFERENCES agent_tasks(id) ON DELETE CASCADE, 114 agent_name TEXT NOT NULL, 115 task_type TEXT NOT NULL, 116 outcome TEXT NOT NULL, 117 context_json TEXT, 118 result_json TEXT, 119 duration_ms INTEGER, 120 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 121 ); 122 123 CREATE INDEX idx_agent_tasks_assigned_to ON agent_tasks(assigned_to, status); 124 CREATE INDEX idx_agent_tasks_parent ON agent_tasks(parent_task_id); 125 CREATE INDEX idx_agent_tasks_priority ON agent_tasks(priority DESC, created_at ASC); 126 `); 127 128 // Initialize agents 129 triageAgent = new TriageAgent(); 130 developerAgent = new DeveloperAgent(); 131 132 await triageAgent.initialize(); 133 await developerAgent.initialize(); 134 }); 135 136 afterEach(async () => { 137 // Restore env vars 138 delete process.env.AGENT_REALTIME_NOTIFICATIONS; 139 delete process.env.AGENT_IMMEDIATE_INVOCATION; 140 delete process.env.TEL_DB_PATH; 141 142 // Reset all database connections first 143 resetBaseDb(); 144 resetTaskDb(); 145 resetMessageDb(); 146 147 if (db) { 148 db.close(); 149 } 150 // Clean up test database 151 try { 152 await fs.unlink(TEST_DB_PATH); 153 } catch (e) { 154 // Ignore if file doesn't exist 155 } 156 }); 157 158 describe('Workflow: Triage → Developer → QA', () => { 159 test('complete bug fix workflow', async () => { 160 // Step 1: Create initial error classification task for Triage 161 const triageTaskId = db 162 .prepare( 163 ` 164 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 165 VALUES ('classify_error', 'triage', 'pending', ?) 166 ` 167 ) 168 .run( 169 JSON.stringify({ 170 error_message: 'TypeError: Cannot read property "conversion_score" of null', 171 stack_trace: 'at Object.<anonymous> (src/score.js:179:45)', 172 stage: 'scoring', 173 frequency: 3, 174 }) 175 ).lastInsertRowid; 176 177 // Step 2: Triage agent processes the error 178 const triageTasks = db 179 .prepare( 180 ` 181 SELECT * FROM agent_tasks WHERE assigned_to = 'triage' AND status = 'pending' 182 ` 183 ) 184 .all(); 185 186 assert.strictEqual(triageTasks.length, 1); 187 188 const triageTask = triageTasks[0]; 189 triageTask.context_json = JSON.parse(triageTask.context_json); 190 191 await triageAgent.classifyErrorTask(triageTask); 192 193 // Step 3: Verify Triage completed and created Developer task 194 const completedTriageTask = db 195 .prepare('SELECT * FROM agent_tasks WHERE id = ?') 196 .get(triageTaskId); 197 198 assert.strictEqual(completedTriageTask.status, 'completed'); 199 200 const triageResult = JSON.parse(completedTriageTask.result_json); 201 assert.strictEqual(triageResult.classification, 'null_pointer'); 202 assert.strictEqual(triageResult.routed_to, 'developer'); 203 204 // Step 4: Verify Developer task was created 205 const developerTasks = db 206 .prepare( 207 ` 208 SELECT * FROM agent_tasks 209 WHERE assigned_to = 'developer' AND status = 'pending' 210 ` 211 ) 212 .all(); 213 214 assert.strictEqual(developerTasks.length, 1); 215 assert.strictEqual(developerTasks[0].task_type, 'fix_bug'); 216 assert.strictEqual(developerTasks[0].parent_task_id, triageTaskId); 217 218 const devContext = JSON.parse(developerTasks[0].context_json); 219 assert.strictEqual(devContext.error_type, 'null_pointer'); 220 assert.strictEqual(devContext.stage, 'scoring'); 221 assert.ok(devContext.suggested_fix); 222 223 // Step 5: Developer processes the bug fix 224 const developerTask = developerTasks[0]; 225 developerTask.context_json = devContext; 226 227 // Stub fixBug to avoid real LLM calls in test environment 228 const origFixBug1 = developerAgent.fixBug.bind(developerAgent); 229 developerAgent.fixBug = async task => { 230 const qaTaskId = await developerAgent.createTask({ 231 task_type: 'verify_fix', 232 assigned_to: 'qa', 233 parent_task_id: task.id, 234 context: { files_changed: ['src/score.js'], fix_description: 'Added null check' }, 235 }); 236 await developerAgent.handoff(task.id, 'qa', 'Fix applied — ready for verification', { 237 qa_task_id: qaTaskId, 238 files_changed: ['src/score.js'], 239 }); 240 await developerAgent.completeTask(task.id, { 241 qa_task_id: qaTaskId, 242 files_analyzed: ['src/score.js'], 243 files_changed: ['src/score.js'], 244 }); 245 }; 246 try { 247 await developerAgent.fixBug(developerTask); 248 } finally { 249 developerAgent.fixBug = origFixBug1; 250 } 251 252 // Step 6: Verify Developer completed and created QA task 253 const completedDevTask = db 254 .prepare('SELECT * FROM agent_tasks WHERE id = ?') 255 .get(developerTask.id); 256 257 assert.strictEqual(completedDevTask.status, 'completed'); 258 259 const devResult = JSON.parse(completedDevTask.result_json); 260 assert.ok(devResult.qa_task_id); 261 assert.ok(devResult.files_analyzed); 262 263 // Step 7: Verify QA task was created 264 const qaTasks = db 265 .prepare( 266 ` 267 SELECT * FROM agent_tasks 268 WHERE assigned_to = 'qa' AND status = 'pending' 269 ` 270 ) 271 .all(); 272 273 assert.strictEqual(qaTasks.length, 1); 274 assert.strictEqual(qaTasks[0].task_type, 'verify_fix'); 275 assert.strictEqual(qaTasks[0].parent_task_id, developerTask.id); 276 277 const qaContext = JSON.parse(qaTasks[0].context_json); 278 assert.ok(qaContext.files_changed); 279 assert.strictEqual(qaContext.files_changed[0], 'src/score.js'); 280 281 // Step 8: Verify handoff message was sent 282 const handoffMessages = db 283 .prepare( 284 ` 285 SELECT * FROM agent_messages 286 WHERE from_agent = 'developer' 287 AND to_agent = 'qa' 288 AND message_type = 'handoff' 289 ` 290 ) 291 .all(); 292 293 assert.strictEqual(handoffMessages.length, 1); 294 assert.match(handoffMessages[0].content, /ready for verification/i); 295 296 const metadata = JSON.parse(handoffMessages[0].metadata_json); 297 assert.strictEqual(metadata.qa_task_id, qaTasks[0].id); 298 }); 299 300 test('workflow with high priority security error', async () => { 301 // Create security error 302 const triageTaskId = db 303 .prepare( 304 ` 305 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 306 VALUES ('classify_error', 'triage', 'pending', ?) 307 ` 308 ) 309 .run( 310 JSON.stringify({ 311 error_message: 'Unauthorized: invalid API signature detected', 312 stack_trace: 'at validateSignature (src/security/auth.js:42:10)', 313 stage: 'outreach', 314 frequency: 1, 315 }) 316 ).lastInsertRowid; 317 318 // Triage processes 319 const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId); 320 triageTask.context_json = JSON.parse(triageTask.context_json); 321 322 await triageAgent.classifyErrorTask(triageTask); 323 324 // Verify routed to security (not developer) 325 const securityTasks = db 326 .prepare( 327 ` 328 SELECT * FROM agent_tasks 329 WHERE assigned_to = 'security' AND parent_task_id = ? 330 ` 331 ) 332 .all(triageTaskId); 333 334 assert.strictEqual(securityTasks.length, 1); 335 336 // Verify high priority 337 assert.ok(securityTasks[0].priority >= 9); 338 339 const result = JSON.parse( 340 db.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(triageTaskId).result_json 341 ); 342 assert.strictEqual(result.severity, 'critical'); 343 }); 344 345 test('workflow with database constraint error routed to architect', async () => { 346 // Create database error requiring schema change 347 const triageTaskId = db 348 .prepare( 349 ` 350 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 351 VALUES ('classify_error', 'triage', 'pending', ?) 352 ` 353 ) 354 .run( 355 JSON.stringify({ 356 error_message: 'UNIQUE constraint failed: messages.site_id, messages.contact_method', 357 stack_trace: 'at Database.prepare.run (src/outreach/email.js:88:12)', 358 stage: 'outreach', 359 frequency: 1, 360 }) 361 ).lastInsertRowid; 362 363 // Triage processes 364 const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId); 365 triageTask.context_json = JSON.parse(triageTask.context_json); 366 367 await triageAgent.classifyErrorTask(triageTask); 368 369 // Verify classified as database error 370 const result = JSON.parse( 371 db.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(triageTaskId).result_json 372 ); 373 assert.strictEqual(result.classification, 'database'); 374 375 // Note: Would route to architect if schema_change_needed detected 376 // For this test, routes to developer (schema change detection would be in actual implementation) 377 }); 378 379 test('developer blocks when file path cannot be extracted', async () => { 380 // Create vague error 381 const triageTaskId = db 382 .prepare( 383 ` 384 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 385 VALUES ('classify_error', 'triage', 'pending', ?) 386 ` 387 ) 388 .run( 389 JSON.stringify({ 390 error_message: 'Something went wrong', 391 stack_trace: '', 392 stage: 'unknown', 393 frequency: 1, 394 }) 395 ).lastInsertRowid; 396 397 // Triage processes 398 const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId); 399 triageTask.context_json = JSON.parse(triageTask.context_json); 400 401 await triageAgent.classifyErrorTask(triageTask); 402 403 // Get developer task 404 const devTask = db 405 .prepare( 406 ` 407 SELECT * FROM agent_tasks 408 WHERE assigned_to = 'developer' AND parent_task_id = ? 409 ` 410 ) 411 .get(triageTaskId); 412 413 devTask.context_json = JSON.parse(devTask.context_json); 414 415 // Developer processes 416 await developerAgent.fixBug(devTask); 417 418 // Verify developer blocked the task 419 const blockedTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(devTask.id); 420 assert.strictEqual(blockedTask.status, 'blocked'); 421 422 // Verify question sent to triage 423 const questions = db 424 .prepare( 425 ` 426 SELECT * FROM agent_messages 427 WHERE from_agent = 'developer' 428 AND to_agent = 'triage' 429 AND message_type = 'question' 430 ` 431 ) 432 .all(); 433 434 assert.strictEqual(questions.length, 1); 435 assert.match(questions[0].content, /file path/i); 436 }); 437 438 test('tracks parent-child task relationships', async () => { 439 // Use TypeError (not network/rate-limit/auth) so triage routes to developer, not architect. 440 const triageTaskId = db 441 .prepare( 442 ` 443 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 444 VALUES ('classify_error', 'triage', 'pending', ?) 445 ` 446 ) 447 .run( 448 JSON.stringify({ 449 error_message: 'TypeError: Cannot read property "url" of undefined in src/scrape.js', 450 stack_trace: 'at parseSite (src/scrape.js:45:12)', 451 stage: 'serps', 452 frequency: 5, 453 }) 454 ).lastInsertRowid; 455 456 const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId); 457 triageTask.context_json = JSON.parse(triageTask.context_json); 458 await triageAgent.classifyErrorTask(triageTask); 459 460 // Get developer task 461 const devTask = db 462 .prepare( 463 ` 464 SELECT * FROM agent_tasks WHERE parent_task_id = ? AND assigned_to = 'developer' 465 ` 466 ) 467 .get(triageTaskId); 468 469 assert.ok(devTask); 470 assert.strictEqual(devTask.parent_task_id, triageTaskId); 471 472 devTask.context_json = JSON.parse(devTask.context_json); 473 474 // Stub fixBug to avoid real LLM calls in test environment 475 const origFixBug5 = developerAgent.fixBug.bind(developerAgent); 476 developerAgent.fixBug = async task => { 477 const qaTaskId = await developerAgent.createTask({ 478 task_type: 'verify_fix', 479 assigned_to: 'qa', 480 parent_task_id: task.id, 481 context: { 482 files_changed: ['src/scrape.js'], 483 fix_description: 'Added null check for url property', 484 }, 485 }); 486 await developerAgent.handoff(task.id, 'qa', 'Fix applied — ready for verification', { 487 qa_task_id: qaTaskId, 488 files_changed: ['src/scrape.js'], 489 }); 490 await developerAgent.completeTask(task.id, { 491 qa_task_id: qaTaskId, 492 files_analyzed: ['src/scrape.js'], 493 files_changed: ['src/scrape.js'], 494 }); 495 }; 496 try { 497 await developerAgent.fixBug(devTask); 498 } finally { 499 developerAgent.fixBug = origFixBug5; 500 } 501 502 // Get QA task 503 const qaTask = db 504 .prepare( 505 ` 506 SELECT * FROM agent_tasks WHERE parent_task_id = ? AND assigned_to = 'qa' 507 ` 508 ) 509 .get(devTask.id); 510 511 assert.ok(qaTask); 512 assert.strictEqual(qaTask.parent_task_id, devTask.id); 513 514 // Verify task chain: Triage → Developer → QA 515 const allTasks = db 516 .prepare( 517 ` 518 SELECT id, task_type, assigned_to, parent_task_id 519 FROM agent_tasks 520 ORDER BY id ASC 521 ` 522 ) 523 .all(); 524 525 assert.strictEqual(allTasks.length, 3); 526 assert.strictEqual(allTasks[0].assigned_to, 'triage'); 527 assert.strictEqual(allTasks[1].assigned_to, 'developer'); 528 assert.strictEqual(allTasks[2].assigned_to, 'qa'); 529 assert.strictEqual(allTasks[1].parent_task_id, allTasks[0].id); 530 assert.strictEqual(allTasks[2].parent_task_id, allTasks[1].id); 531 }); 532 }); 533 534 describe('Workflow: Logging and Audit Trail', () => { 535 test('creates complete audit trail across workflow', async () => { 536 // Create and process full workflow 537 const triageTaskId = db 538 .prepare( 539 ` 540 INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 541 VALUES ('classify_error', 'triage', 'pending', ?) 542 ` 543 ) 544 .run( 545 JSON.stringify({ 546 error_message: 'API rate limit exceeded', 547 stack_trace: 'at makeRequest (src/scrape.js:120:8)', 548 stage: 'serps', 549 frequency: 8, 550 }) 551 ).lastInsertRowid; 552 553 const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId); 554 triageTask.context_json = JSON.parse(triageTask.context_json); 555 await triageAgent.classifyErrorTask(triageTask); 556 557 const devTask = db 558 .prepare( 559 ` 560 SELECT * FROM agent_tasks WHERE parent_task_id = ? AND assigned_to = 'developer' 561 ` 562 ) 563 .get(triageTaskId); 564 devTask.context_json = JSON.parse(devTask.context_json); 565 await developerAgent.fixBug(devTask); 566 567 // Check logs from both agents 568 const logs = db 569 .prepare( 570 ` 571 SELECT agent_name, log_level, message 572 FROM agent_logs 573 ORDER BY id ASC 574 ` 575 ) 576 .all(); 577 578 assert.ok(logs.length > 0); 579 580 // Should have logs from both triage and developer 581 const triageLogs = logs.filter(log => log.agent_name === 'triage'); 582 const devLogs = logs.filter(log => log.agent_name === 'developer'); 583 584 assert.ok(triageLogs.length > 0); 585 assert.ok(devLogs.length > 0); 586 587 // Check for key log messages 588 const classifiedLog = triageLogs.find(log => log.message.includes('classified')); 589 const routedLog = triageLogs.find(log => log.message.includes('routed')); 590 const bugFixLog = devLogs.find(log => log.message.includes('bug fix')); 591 592 assert.ok(classifiedLog); 593 assert.ok(routedLog); 594 assert.ok(bugFixLog); 595 }); 596 });