agents-e2e.test.js
1 /** 2 * Agent System E2E Tests 3 * 4 * Tests the complete agent workflow using the actual schema and APIs: 5 * - Task creation with correct schema (assigned_to, context_json) 6 * - Workflow chains (Triage → Developer → QA) 7 * - Inter-agent messaging 8 * - Task status transitions 9 * - Parent-child task relationships 10 */ 11 12 import { test } from 'node:test'; 13 import assert from 'node:assert/strict'; 14 import Database from 'better-sqlite3'; 15 import { fileURLToPath } from 'url'; 16 import { dirname, join } from 'path'; 17 import { existsSync, unlinkSync } from 'fs'; 18 19 const __filename = fileURLToPath(import.meta.url); 20 const __dirname = dirname(__filename); 21 const projectRoot = join(__dirname, '../..'); 22 23 // Use a test database 24 const TEST_DB_PATH = join(projectRoot, 'db/test-agents-e2e.db'); 25 26 // Import agent system components 27 import { 28 createAgentTask, 29 getAgentTasks, 30 updateTaskStatus, 31 completeTask, 32 failTask, 33 resetDbConnection, 34 } from '../../src/agents/utils/task-manager.js'; 35 36 /** 37 * Setup test database with correct schema from migration 047 38 */ 39 function setupTestDatabase() { 40 const db = new Database(TEST_DB_PATH); 41 42 // Create agent_tasks table (from migration 047) 43 db.exec(` 44 CREATE TABLE IF NOT EXISTS agent_tasks ( 45 id INTEGER PRIMARY KEY AUTOINCREMENT, 46 task_type TEXT NOT NULL, 47 assigned_to TEXT NOT NULL, 48 created_by TEXT, 49 priority INTEGER DEFAULT 5, 50 status TEXT DEFAULT 'pending', 51 context_json TEXT, 52 parent_task_id INTEGER, 53 result_json TEXT, 54 error_message TEXT, 55 retry_count INTEGER DEFAULT 0, 56 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 57 started_at TIMESTAMP, 58 completed_at TIMESTAMP, 59 FOREIGN KEY (parent_task_id) REFERENCES agent_tasks(id) 60 ); 61 62 CREATE INDEX IF NOT EXISTS idx_agent_tasks_assigned_status 63 ON agent_tasks(assigned_to, status, priority DESC, created_at ASC); 64 65 CREATE INDEX IF NOT EXISTS idx_agent_tasks_parent 66 ON agent_tasks(parent_task_id); 67 68 CREATE TABLE IF NOT EXISTS agent_logs ( 69 id INTEGER PRIMARY KEY AUTOINCREMENT, 70 task_id INTEGER, 71 agent_name TEXT NOT NULL, 72 log_level TEXT NOT NULL, 73 message TEXT NOT NULL, 74 context_json TEXT, 75 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 76 FOREIGN KEY (task_id) REFERENCES agent_tasks(id) 77 ); 78 79 CREATE INDEX IF NOT EXISTS idx_agent_logs_task ON agent_logs(task_id); 80 CREATE INDEX IF NOT EXISTS idx_agent_logs_created ON agent_logs(created_at DESC); 81 82 CREATE TABLE IF NOT EXISTS agent_state ( 83 agent_name TEXT PRIMARY KEY, 84 status TEXT NOT NULL, 85 last_active TIMESTAMP, 86 last_poll_at TIMESTAMP, 87 last_task_id INTEGER, 88 total_tasks INTEGER DEFAULT 0, 89 success_count INTEGER DEFAULT 0, 90 failure_count INTEGER DEFAULT 0, 91 updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 92 ); 93 94 CREATE TABLE IF NOT EXISTS agent_messages ( 95 id INTEGER PRIMARY KEY AUTOINCREMENT, 96 from_agent TEXT NOT NULL, 97 to_agent TEXT NOT NULL, 98 message_type TEXT NOT NULL, 99 payload_json TEXT NOT NULL, 100 read INTEGER DEFAULT 0, 101 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 102 ); 103 104 CREATE INDEX IF NOT EXISTS idx_agent_messages_to_unread 105 ON agent_messages(to_agent, read, created_at); 106 107 CREATE TABLE IF NOT EXISTS cron_locks ( 108 lock_key TEXT PRIMARY KEY, 109 acquired_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, 110 updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, 111 description TEXT 112 ); 113 `); 114 115 return db; 116 } 117 118 test('Agent System E2E - Task Creation with Correct Schema', async t => { 119 const db = setupTestDatabase(); 120 121 // Override DATABASE_PATH for task manager 122 const originalDbPath = process.env.DATABASE_PATH; 123 resetDbConnection(); // Reset singleton connection 124 process.env.DATABASE_PATH = TEST_DB_PATH; 125 126 try { 127 await t.test('Create task with correct schema', async () => { 128 const taskId = await createAgentTask({ 129 task_type: 'fix_bug', 130 assigned_to: 'developer', 131 created_by: 'triage', 132 priority: 7, 133 context: { 134 error_type: 'null_pointer', 135 file: 'src/scoring.js', 136 line: 42, 137 }, 138 }); 139 140 assert.ok(taskId, 'Task ID should be returned'); 141 assert.strictEqual(typeof taskId, 'number', 'Task ID should be a number'); 142 143 // Verify in database 144 const task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 145 assert.ok(task, 'Task should exist in database'); 146 assert.strictEqual(task.task_type, 'fix_bug', 'task_type should match'); 147 assert.strictEqual(task.assigned_to, 'developer', 'assigned_to should match'); 148 assert.strictEqual(task.created_by, 'triage', 'created_by should match'); 149 assert.strictEqual(task.priority, 7, 'priority should match'); 150 assert.strictEqual(task.status, 'pending', 'status should be pending'); 151 152 const context = JSON.parse(task.context_json); 153 assert.strictEqual(context.error_type, 'null_pointer', 'context should be preserved'); 154 assert.strictEqual(context.file, 'src/scoring.js', 'context.file should match'); 155 }); 156 157 await t.test('Validate required fields', async () => { 158 await assert.rejects( 159 () => createAgentTask({ task_type: 'test' }), // Missing assigned_to 160 /assigned_to are required/, 161 'Should reject when assigned_to is missing' 162 ); 163 164 await assert.rejects( 165 () => createAgentTask({ assigned_to: 'developer' }), // Missing task_type 166 /task_type and assigned_to are required/, 167 'Should reject when task_type is missing' 168 ); 169 }); 170 171 await t.test('Validate agent names', async () => { 172 await assert.rejects( 173 () => 174 createAgentTask({ 175 task_type: 'test', 176 assigned_to: 'Monitor', // Should be lowercase 'monitor' 177 }), 178 /Invalid assigned_to/, 179 'Should reject for invalid agent name (uppercase)' 180 ); 181 182 await assert.rejects( 183 () => 184 createAgentTask({ 185 task_type: 'test', 186 assigned_to: 'unknown_agent', 187 }), 188 /Invalid assigned_to/, 189 'Should reject for unknown agent name' 190 ); 191 }); 192 193 await t.test('Validate priority range', async () => { 194 await assert.rejects( 195 () => 196 createAgentTask({ 197 task_type: 'test', 198 assigned_to: 'developer', 199 priority: 0, // Too low 200 }), 201 /priority must be between 1 and 10/, 202 'Should reject for priority < 1' 203 ); 204 205 await assert.rejects( 206 () => 207 createAgentTask({ 208 task_type: 'test', 209 assigned_to: 'developer', 210 priority: 11, // Too high 211 }), 212 /priority must be between 1 and 10/, 213 'Should reject for priority > 10' 214 ); 215 }); 216 } finally { 217 process.env.DATABASE_PATH = originalDbPath; 218 db.close(); 219 // Small delay to let any spawned processes finish 220 await new Promise(resolve => setTimeout(resolve, 100)); 221 try { 222 if (existsSync(TEST_DB_PATH)) { 223 unlinkSync(TEST_DB_PATH); 224 } 225 } catch (e) { 226 // Ignore cleanup errors - may be locked by spawned process 227 } 228 } 229 }); 230 231 test('Agent System E2E - Task Retrieval and Polling', async t => { 232 const db = setupTestDatabase(); 233 const originalDbPath = process.env.DATABASE_PATH; 234 resetDbConnection(); 235 process.env.DATABASE_PATH = TEST_DB_PATH; 236 237 try { 238 await t.test('Get tasks by agent and status', async () => { 239 // Create multiple tasks 240 const task1 = await createAgentTask({ 241 task_type: 'fix_bug', 242 assigned_to: 'developer', 243 priority: 5, 244 context: { bug: 1 }, 245 }); 246 247 const task2 = await createAgentTask({ 248 task_type: 'write_test', 249 assigned_to: 'qa', 250 priority: 3, 251 context: { test: 1 }, 252 }); 253 254 const task3 = await createAgentTask({ 255 task_type: 'refactor', 256 assigned_to: 'developer', 257 priority: 8, // Higher priority 258 context: { refactor: 1 }, 259 }); 260 261 // Get developer tasks (should be ordered by priority DESC) 262 const devTasks = getAgentTasks('developer', 'pending', 10); 263 assert.strictEqual(devTasks.length, 2, 'Should have 2 developer tasks'); 264 assert.strictEqual(devTasks[0].id, task3, 'Higher priority task should be first'); 265 assert.strictEqual(devTasks[1].id, task1, 'Lower priority task should be second'); 266 267 // Get QA tasks 268 const qaTasks = getAgentTasks('qa', 'pending', 10); 269 assert.strictEqual(qaTasks.length, 1, 'Should have 1 QA task'); 270 assert.strictEqual(qaTasks[0].id, task2, 'QA task ID should match'); 271 }); 272 273 await t.test('Respect task limit', async () => { 274 // Create 5 tasks 275 for (let i = 0; i < 5; i++) { 276 await createAgentTask({ 277 task_type: `task_${i}`, 278 assigned_to: 'developer', 279 context: { index: i }, 280 }); 281 } 282 283 // Get only 3 284 const tasks = getAgentTasks('developer', 'pending', 3); 285 assert.strictEqual(tasks.length, 3, 'Should respect limit'); 286 }); 287 } finally { 288 process.env.DATABASE_PATH = originalDbPath; 289 db.close(); 290 // Small delay to let any spawned processes finish 291 await new Promise(resolve => setTimeout(resolve, 100)); 292 try { 293 if (existsSync(TEST_DB_PATH)) { 294 unlinkSync(TEST_DB_PATH); 295 } 296 } catch (e) { 297 // Ignore cleanup errors - may be locked by spawned process 298 } 299 } 300 }); 301 302 test('Agent System E2E - Task Status Transitions', async t => { 303 const db = setupTestDatabase(); 304 const originalDbPath = process.env.DATABASE_PATH; 305 resetDbConnection(); 306 process.env.DATABASE_PATH = TEST_DB_PATH; 307 308 try { 309 await t.test('Pending → Running → Completed', async () => { 310 const taskId = await createAgentTask({ 311 task_type: 'test_task', 312 assigned_to: 'developer', 313 context: { test: true }, 314 }); 315 316 // Initial state 317 let task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 318 assert.strictEqual(task.status, 'pending', 'Initial status should be pending'); 319 assert.strictEqual(task.started_at, null, 'started_at should be null'); 320 321 // Start task 322 updateTaskStatus(taskId, 'running'); 323 task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 324 assert.strictEqual(task.status, 'running', 'Status should be running'); 325 assert.ok(task.started_at, 'started_at should be set'); 326 327 // Complete task 328 completeTask(taskId, { success: true, changes: 5 }); 329 task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 330 assert.strictEqual(task.status, 'completed', 'Status should be completed'); 331 assert.ok(task.completed_at, 'completed_at should be set'); 332 333 const result = JSON.parse(task.result_json); 334 assert.strictEqual(result.success, true, 'Result should be preserved'); 335 assert.strictEqual(result.changes, 5, 'Result.changes should match'); 336 }); 337 338 await t.test('Pending → Running → Failed', async () => { 339 const taskId = await createAgentTask({ 340 task_type: 'failing_task', 341 assigned_to: 'developer', 342 context: { test: true }, 343 }); 344 345 updateTaskStatus(taskId, 'running'); 346 failTask(taskId, 'Test error message'); 347 348 const task = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(taskId); 349 assert.strictEqual(task.status, 'failed', 'Status should be failed'); 350 assert.ok(task.error_message.includes('Test error message'), 'Error message should be set'); 351 assert.ok(task.completed_at, 'completed_at should be set even on failure'); 352 }); 353 } finally { 354 process.env.DATABASE_PATH = originalDbPath; 355 db.close(); 356 // Small delay to let any spawned processes finish 357 await new Promise(resolve => setTimeout(resolve, 100)); 358 try { 359 if (existsSync(TEST_DB_PATH)) { 360 unlinkSync(TEST_DB_PATH); 361 } 362 } catch (e) { 363 // Ignore cleanup errors - may be locked by spawned process 364 } 365 } 366 }); 367 368 test('Agent System E2E - Workflow Chains (Parent-Child Tasks)', async t => { 369 const db = setupTestDatabase(); 370 const originalDbPath = process.env.DATABASE_PATH; 371 resetDbConnection(); 372 process.env.DATABASE_PATH = TEST_DB_PATH; 373 374 try { 375 await t.test('Create workflow: Triage → Developer → QA', async () => { 376 // Step 1: Triage creates initial task 377 const triageTaskId = await createAgentTask({ 378 task_type: 'classify_error', 379 assigned_to: 'triage', 380 created_by: 'monitor', 381 priority: 6, 382 context: { 383 error: 'NullPointerException', 384 log_file: 'pipeline.log', 385 }, 386 }); 387 388 // Step 2: Triage completes and delegates to Developer 389 completeTask(triageTaskId, { 390 classification: 'bug', 391 severity: 'high', 392 affected_file: 'src/scoring.js', 393 }); 394 395 const devTaskId = await createAgentTask({ 396 task_type: 'fix_bug', 397 assigned_to: 'developer', 398 created_by: 'triage', 399 priority: 7, 400 parent_task_id: triageTaskId, // Link to parent 401 context: { 402 error_type: 'NullPointerException', 403 file: 'src/scoring.js', 404 severity: 'high', 405 }, 406 }); 407 408 // Step 3: Developer completes and delegates to QA 409 completeTask(devTaskId, { 410 fixed: true, 411 commit: 'abc123', 412 changes: ['Added null check', 'Updated tests'], 413 }); 414 415 const qaTaskId = await createAgentTask({ 416 task_type: 'verify_fix', 417 assigned_to: 'qa', 418 created_by: 'developer', 419 priority: 8, 420 parent_task_id: devTaskId, // Link to parent 421 context: { 422 commit: 'abc123', 423 test_file: 'tests/scoring.test.js', 424 }, 425 }); 426 427 // Verify workflow chain 428 const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId); 429 const devTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(devTaskId); 430 const qaTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(qaTaskId); 431 432 assert.strictEqual(triageTask.status, 'completed', 'Triage task should be completed'); 433 assert.strictEqual(devTask.status, 'completed', 'Dev task should be completed'); 434 assert.strictEqual(devTask.parent_task_id, triageTaskId, 'Dev task parent should be triage'); 435 assert.strictEqual(qaTask.parent_task_id, devTaskId, 'QA task parent should be dev'); 436 assert.strictEqual(qaTask.status, 'pending', 'QA task should be pending'); 437 438 // Verify context flow 439 const qaContext = JSON.parse(qaTask.context_json); 440 assert.strictEqual(qaContext.commit, 'abc123', 'Commit should flow to QA'); 441 }); 442 } finally { 443 process.env.DATABASE_PATH = originalDbPath; 444 db.close(); 445 // Small delay to let any spawned processes finish 446 await new Promise(resolve => setTimeout(resolve, 100)); 447 try { 448 if (existsSync(TEST_DB_PATH)) { 449 unlinkSync(TEST_DB_PATH); 450 } 451 } catch (e) { 452 // Ignore cleanup errors - may be locked by spawned process 453 } 454 } 455 }); 456 457 test('Agent System E2E - Inter-Agent Messaging', async t => { 458 const db = setupTestDatabase(); 459 460 try { 461 await t.test('Send and read messages', () => { 462 // Triage sends message to Developer 463 const messageId = db 464 .prepare( 465 `INSERT INTO agent_messages (from_agent, to_agent, message_type, payload_json) 466 VALUES (?, ?, ?, ?)` 467 ) 468 .run( 469 'triage', 470 'developer', 471 'urgent_bug', 472 JSON.stringify({ severity: 'critical', file: 'auth.js' }) 473 ).lastInsertRowid; 474 475 // Developer reads unread messages 476 const messages = db 477 .prepare( 478 `SELECT * FROM agent_messages 479 WHERE to_agent = ? AND read = 0 480 ORDER BY created_at ASC` 481 ) 482 .all('developer'); 483 484 assert.strictEqual(messages.length, 1, 'Should have 1 unread message'); 485 assert.strictEqual(messages[0].from_agent, 'triage', 'from_agent should be triage'); 486 assert.strictEqual(messages[0].message_type, 'urgent_bug', 'message_type should match'); 487 488 const payload = JSON.parse(messages[0].payload_json); 489 assert.strictEqual(payload.severity, 'critical', 'Payload should be preserved'); 490 491 // Mark as read 492 db.prepare('UPDATE agent_messages SET read = 1 WHERE id = ?').run(messageId); 493 494 const unreadCount = db 495 .prepare('SELECT COUNT(*) as count FROM agent_messages WHERE to_agent = ? AND read = 0') 496 .get('developer').count; 497 assert.strictEqual(unreadCount, 0, 'Should have 0 unread messages'); 498 }); 499 } finally { 500 db.close(); 501 // Small delay to let any spawned processes finish 502 await new Promise(resolve => setTimeout(resolve, 100)); 503 try { 504 if (existsSync(TEST_DB_PATH)) { 505 unlinkSync(TEST_DB_PATH); 506 } 507 } catch (e) { 508 // Ignore cleanup errors - may be locked by spawned process 509 } 510 } 511 });