/ __quarantined_tests__ / agents / workflow.integration.test.js
workflow.integration.test.js
  1  /**
  2   * Agent Workflow Integration Tests
  3   *
  4   * Tests end-to-end workflows: Triage → Developer → QA
  5   */
  6  
  7  import { test, describe, beforeEach, afterEach } from 'node:test';
  8  import assert from 'node:assert';
  9  import Database from 'better-sqlite3';
 10  import { TriageAgent } from '../../src/agents/triage.js';
 11  import { DeveloperAgent } from '../../src/agents/developer.js';
 12  import { resetDb as resetBaseDb } from '../../src/agents/base-agent.js';
 13  import { resetDb as resetTaskDb } from '../../src/agents/utils/task-manager.js';
 14  import { resetDb as resetMessageDb } from '../../src/agents/utils/message-manager.js';
 15  import fs from 'fs/promises';
 16  
 17  // Use temporary file database for tests
 18  let db;
 19  let triageAgent;
 20  let developerAgent;
 21  const TEST_DB_PATH = './tests/agents/test-workflow.db';
 22  
 23  beforeEach(async () => {
 24    // Reset all module-level DB singletons so they re-open with fresh env vars below.
 25    resetBaseDb();
 26    resetTaskDb();
 27    resetMessageDb();
 28  
 29    // Remove existing test database if it exists
 30    try {
 31      await fs.unlink(TEST_DB_PATH);
 32    } catch (e) {
 33      // Ignore if file doesn't exist
 34    }
 35  
 36    // Create temporary test database
 37    db = new Database(TEST_DB_PATH);
 38    process.env.DATABASE_PATH = TEST_DB_PATH;
 39    // Point TEL_DB_PATH at the same test DB so that ATTACH DATABASE ... AS tel resolves
 40    // to the test DB (SQLite allows self-attach), making tel.agent_tasks queries work.
 41    process.env.TEL_DB_PATH = TEST_DB_PATH;
 42    process.env.AGENT_REALTIME_NOTIFICATIONS = 'false'; // Prevent subprocess spawning in tests
 43    process.env.AGENT_IMMEDIATE_INVOCATION = 'false'; // Prevent in-process agent chaining in tests
 44  
 45    // Create tables
 46    db.exec(`
 47      CREATE TABLE agent_tasks (
 48        id INTEGER PRIMARY KEY AUTOINCREMENT,
 49        task_type TEXT NOT NULL,
 50        assigned_to TEXT NOT NULL,
 51        created_by TEXT,
 52        status TEXT DEFAULT 'pending',
 53        priority INTEGER DEFAULT 5,
 54        context_json TEXT,
 55        result_json TEXT,
 56        parent_task_id INTEGER,
 57        error_message TEXT,
 58        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 59        started_at DATETIME,
 60        completed_at DATETIME,
 61        retry_count INTEGER DEFAULT 0
 62      );
 63  
 64      CREATE TABLE agent_messages (
 65        id INTEGER PRIMARY KEY AUTOINCREMENT,
 66        task_id INTEGER,
 67        from_agent TEXT NOT NULL,
 68        to_agent TEXT NOT NULL,
 69        message_type TEXT,
 70        content TEXT NOT NULL,
 71        metadata_json TEXT,
 72        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 73        read_at DATETIME
 74      );
 75  
 76      CREATE TABLE agent_logs (
 77        id INTEGER PRIMARY KEY AUTOINCREMENT,
 78        task_id INTEGER,
 79        agent_name TEXT NOT NULL,
 80        log_level TEXT,
 81        message TEXT,
 82        data_json TEXT,
 83        created_at DATETIME DEFAULT CURRENT_TIMESTAMP
 84      );
 85  
 86      CREATE TABLE agent_state (
 87        agent_name TEXT PRIMARY KEY,
 88        last_active DATETIME DEFAULT CURRENT_TIMESTAMP,
 89        current_task_id INTEGER,
 90        status TEXT DEFAULT 'idle',
 91        metrics_json TEXT
 92      );
 93  
 94      CREATE TABLE IF NOT EXISTS cron_locks (
 95        lock_key TEXT PRIMARY KEY,
 96        description TEXT,
 97        updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
 98      );
 99  
100      CREATE TABLE IF NOT EXISTS agent_llm_usage (
101        id INTEGER PRIMARY KEY AUTOINCREMENT,
102        agent_name TEXT NOT NULL,
103        task_id INTEGER,
104        model TEXT NOT NULL,
105        prompt_tokens INTEGER NOT NULL,
106        completion_tokens INTEGER NOT NULL,
107        cost_usd REAL NOT NULL,
108        created_at DATETIME DEFAULT CURRENT_TIMESTAMP
109      );
110  
111      CREATE TABLE IF NOT EXISTS agent_outcomes (
112        id INTEGER PRIMARY KEY AUTOINCREMENT,
113        task_id INTEGER NOT NULL REFERENCES agent_tasks(id) ON DELETE CASCADE,
114        agent_name TEXT NOT NULL,
115        task_type TEXT NOT NULL,
116        outcome TEXT NOT NULL,
117        context_json TEXT,
118        result_json TEXT,
119        duration_ms INTEGER,
120        created_at DATETIME DEFAULT CURRENT_TIMESTAMP
121      );
122  
123      CREATE INDEX idx_agent_tasks_assigned_to ON agent_tasks(assigned_to, status);
124      CREATE INDEX idx_agent_tasks_parent ON agent_tasks(parent_task_id);
125      CREATE INDEX idx_agent_tasks_priority ON agent_tasks(priority DESC, created_at ASC);
126    `);
127  
128    // Initialize agents
129    triageAgent = new TriageAgent();
130    developerAgent = new DeveloperAgent();
131  
132    await triageAgent.initialize();
133    await developerAgent.initialize();
134  });
135  
136  afterEach(async () => {
137    // Restore env vars
138    delete process.env.AGENT_REALTIME_NOTIFICATIONS;
139    delete process.env.AGENT_IMMEDIATE_INVOCATION;
140    delete process.env.TEL_DB_PATH;
141  
142    // Reset all database connections first
143    resetBaseDb();
144    resetTaskDb();
145    resetMessageDb();
146  
147    if (db) {
148      db.close();
149    }
150    // Clean up test database
151    try {
152      await fs.unlink(TEST_DB_PATH);
153    } catch (e) {
154      // Ignore if file doesn't exist
155    }
156  });
157  
158  describe('Workflow: Triage → Developer → QA', () => {
159    test('complete bug fix workflow', async () => {
160      // Step 1: Create initial error classification task for Triage
161      const triageTaskId = db
162        .prepare(
163          `
164        INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
165        VALUES ('classify_error', 'triage', 'pending', ?)
166      `
167        )
168        .run(
169          JSON.stringify({
170            error_message: 'TypeError: Cannot read property "conversion_score" of null',
171            stack_trace: 'at Object.<anonymous> (src/score.js:179:45)',
172            stage: 'scoring',
173            frequency: 3,
174          })
175        ).lastInsertRowid;
176  
177      // Step 2: Triage agent processes the error
178      const triageTasks = db
179        .prepare(
180          `
181        SELECT * FROM agent_tasks WHERE assigned_to = 'triage' AND status = 'pending'
182      `
183        )
184        .all();
185  
186      assert.strictEqual(triageTasks.length, 1);
187  
188      const triageTask = triageTasks[0];
189      triageTask.context_json = JSON.parse(triageTask.context_json);
190  
191      await triageAgent.classifyErrorTask(triageTask);
192  
193      // Step 3: Verify Triage completed and created Developer task
194      const completedTriageTask = db
195        .prepare('SELECT * FROM agent_tasks WHERE id = ?')
196        .get(triageTaskId);
197  
198      assert.strictEqual(completedTriageTask.status, 'completed');
199  
200      const triageResult = JSON.parse(completedTriageTask.result_json);
201      assert.strictEqual(triageResult.classification, 'null_pointer');
202      assert.strictEqual(triageResult.routed_to, 'developer');
203  
204      // Step 4: Verify Developer task was created
205      const developerTasks = db
206        .prepare(
207          `
208        SELECT * FROM agent_tasks
209        WHERE assigned_to = 'developer' AND status = 'pending'
210      `
211        )
212        .all();
213  
214      assert.strictEqual(developerTasks.length, 1);
215      assert.strictEqual(developerTasks[0].task_type, 'fix_bug');
216      assert.strictEqual(developerTasks[0].parent_task_id, triageTaskId);
217  
218      const devContext = JSON.parse(developerTasks[0].context_json);
219      assert.strictEqual(devContext.error_type, 'null_pointer');
220      assert.strictEqual(devContext.stage, 'scoring');
221      assert.ok(devContext.suggested_fix);
222  
223      // Step 5: Developer processes the bug fix
224      const developerTask = developerTasks[0];
225      developerTask.context_json = devContext;
226  
227      // Stub fixBug to avoid real LLM calls in test environment
228      const origFixBug1 = developerAgent.fixBug.bind(developerAgent);
229      developerAgent.fixBug = async task => {
230        const qaTaskId = await developerAgent.createTask({
231          task_type: 'verify_fix',
232          assigned_to: 'qa',
233          parent_task_id: task.id,
234          context: { files_changed: ['src/score.js'], fix_description: 'Added null check' },
235        });
236        await developerAgent.handoff(task.id, 'qa', 'Fix applied — ready for verification', {
237          qa_task_id: qaTaskId,
238          files_changed: ['src/score.js'],
239        });
240        await developerAgent.completeTask(task.id, {
241          qa_task_id: qaTaskId,
242          files_analyzed: ['src/score.js'],
243          files_changed: ['src/score.js'],
244        });
245      };
246      try {
247        await developerAgent.fixBug(developerTask);
248      } finally {
249        developerAgent.fixBug = origFixBug1;
250      }
251  
252      // Step 6: Verify Developer completed and created QA task
253      const completedDevTask = db
254        .prepare('SELECT * FROM agent_tasks WHERE id = ?')
255        .get(developerTask.id);
256  
257      assert.strictEqual(completedDevTask.status, 'completed');
258  
259      const devResult = JSON.parse(completedDevTask.result_json);
260      assert.ok(devResult.qa_task_id);
261      assert.ok(devResult.files_analyzed);
262  
263      // Step 7: Verify QA task was created
264      const qaTasks = db
265        .prepare(
266          `
267        SELECT * FROM agent_tasks
268        WHERE assigned_to = 'qa' AND status = 'pending'
269      `
270        )
271        .all();
272  
273      assert.strictEqual(qaTasks.length, 1);
274      assert.strictEqual(qaTasks[0].task_type, 'verify_fix');
275      assert.strictEqual(qaTasks[0].parent_task_id, developerTask.id);
276  
277      const qaContext = JSON.parse(qaTasks[0].context_json);
278      assert.ok(qaContext.files_changed);
279      assert.strictEqual(qaContext.files_changed[0], 'src/score.js');
280  
281      // Step 8: Verify handoff message was sent
282      const handoffMessages = db
283        .prepare(
284          `
285        SELECT * FROM agent_messages
286        WHERE from_agent = 'developer'
287        AND to_agent = 'qa'
288        AND message_type = 'handoff'
289      `
290        )
291        .all();
292  
293      assert.strictEqual(handoffMessages.length, 1);
294      assert.match(handoffMessages[0].content, /ready for verification/i);
295  
296      const metadata = JSON.parse(handoffMessages[0].metadata_json);
297      assert.strictEqual(metadata.qa_task_id, qaTasks[0].id);
298    });
299  
300    test('workflow with high priority security error', async () => {
301      // Create security error
302      const triageTaskId = db
303        .prepare(
304          `
305        INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
306        VALUES ('classify_error', 'triage', 'pending', ?)
307      `
308        )
309        .run(
310          JSON.stringify({
311            error_message: 'Unauthorized: invalid API signature detected',
312            stack_trace: 'at validateSignature (src/security/auth.js:42:10)',
313            stage: 'outreach',
314            frequency: 1,
315          })
316        ).lastInsertRowid;
317  
318      // Triage processes
319      const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId);
320      triageTask.context_json = JSON.parse(triageTask.context_json);
321  
322      await triageAgent.classifyErrorTask(triageTask);
323  
324      // Verify routed to security (not developer)
325      const securityTasks = db
326        .prepare(
327          `
328        SELECT * FROM agent_tasks
329        WHERE assigned_to = 'security' AND parent_task_id = ?
330      `
331        )
332        .all(triageTaskId);
333  
334      assert.strictEqual(securityTasks.length, 1);
335  
336      // Verify high priority
337      assert.ok(securityTasks[0].priority >= 9);
338  
339      const result = JSON.parse(
340        db.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(triageTaskId).result_json
341      );
342      assert.strictEqual(result.severity, 'critical');
343    });
344  
345    test('workflow with database constraint error routed to architect', async () => {
346      // Create database error requiring schema change
347      const triageTaskId = db
348        .prepare(
349          `
350        INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
351        VALUES ('classify_error', 'triage', 'pending', ?)
352      `
353        )
354        .run(
355          JSON.stringify({
356            error_message: 'UNIQUE constraint failed: messages.site_id, messages.contact_method',
357            stack_trace: 'at Database.prepare.run (src/outreach/email.js:88:12)',
358            stage: 'outreach',
359            frequency: 1,
360          })
361        ).lastInsertRowid;
362  
363      // Triage processes
364      const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId);
365      triageTask.context_json = JSON.parse(triageTask.context_json);
366  
367      await triageAgent.classifyErrorTask(triageTask);
368  
369      // Verify classified as database error
370      const result = JSON.parse(
371        db.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(triageTaskId).result_json
372      );
373      assert.strictEqual(result.classification, 'database');
374  
375      // Note: Would route to architect if schema_change_needed detected
376      // For this test, routes to developer (schema change detection would be in actual implementation)
377    });
378  
379    test('developer blocks when file path cannot be extracted', async () => {
380      // Create vague error
381      const triageTaskId = db
382        .prepare(
383          `
384        INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
385        VALUES ('classify_error', 'triage', 'pending', ?)
386      `
387        )
388        .run(
389          JSON.stringify({
390            error_message: 'Something went wrong',
391            stack_trace: '',
392            stage: 'unknown',
393            frequency: 1,
394          })
395        ).lastInsertRowid;
396  
397      // Triage processes
398      const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId);
399      triageTask.context_json = JSON.parse(triageTask.context_json);
400  
401      await triageAgent.classifyErrorTask(triageTask);
402  
403      // Get developer task
404      const devTask = db
405        .prepare(
406          `
407        SELECT * FROM agent_tasks
408        WHERE assigned_to = 'developer' AND parent_task_id = ?
409      `
410        )
411        .get(triageTaskId);
412  
413      devTask.context_json = JSON.parse(devTask.context_json);
414  
415      // Developer processes
416      await developerAgent.fixBug(devTask);
417  
418      // Verify developer blocked the task
419      const blockedTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(devTask.id);
420      assert.strictEqual(blockedTask.status, 'blocked');
421  
422      // Verify question sent to triage
423      const questions = db
424        .prepare(
425          `
426        SELECT * FROM agent_messages
427        WHERE from_agent = 'developer'
428        AND to_agent = 'triage'
429        AND message_type = 'question'
430      `
431        )
432        .all();
433  
434      assert.strictEqual(questions.length, 1);
435      assert.match(questions[0].content, /file path/i);
436    });
437  
438    test('tracks parent-child task relationships', async () => {
439      // Use TypeError (not network/rate-limit/auth) so triage routes to developer, not architect.
440      const triageTaskId = db
441        .prepare(
442          `
443        INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
444        VALUES ('classify_error', 'triage', 'pending', ?)
445      `
446        )
447        .run(
448          JSON.stringify({
449            error_message: 'TypeError: Cannot read property "url" of undefined in src/scrape.js',
450            stack_trace: 'at parseSite (src/scrape.js:45:12)',
451            stage: 'serps',
452            frequency: 5,
453          })
454        ).lastInsertRowid;
455  
456      const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId);
457      triageTask.context_json = JSON.parse(triageTask.context_json);
458      await triageAgent.classifyErrorTask(triageTask);
459  
460      // Get developer task
461      const devTask = db
462        .prepare(
463          `
464        SELECT * FROM agent_tasks WHERE parent_task_id = ? AND assigned_to = 'developer'
465      `
466        )
467        .get(triageTaskId);
468  
469      assert.ok(devTask);
470      assert.strictEqual(devTask.parent_task_id, triageTaskId);
471  
472      devTask.context_json = JSON.parse(devTask.context_json);
473  
474      // Stub fixBug to avoid real LLM calls in test environment
475      const origFixBug5 = developerAgent.fixBug.bind(developerAgent);
476      developerAgent.fixBug = async task => {
477        const qaTaskId = await developerAgent.createTask({
478          task_type: 'verify_fix',
479          assigned_to: 'qa',
480          parent_task_id: task.id,
481          context: {
482            files_changed: ['src/scrape.js'],
483            fix_description: 'Added null check for url property',
484          },
485        });
486        await developerAgent.handoff(task.id, 'qa', 'Fix applied — ready for verification', {
487          qa_task_id: qaTaskId,
488          files_changed: ['src/scrape.js'],
489        });
490        await developerAgent.completeTask(task.id, {
491          qa_task_id: qaTaskId,
492          files_analyzed: ['src/scrape.js'],
493          files_changed: ['src/scrape.js'],
494        });
495      };
496      try {
497        await developerAgent.fixBug(devTask);
498      } finally {
499        developerAgent.fixBug = origFixBug5;
500      }
501  
502      // Get QA task
503      const qaTask = db
504        .prepare(
505          `
506        SELECT * FROM agent_tasks WHERE parent_task_id = ? AND assigned_to = 'qa'
507      `
508        )
509        .get(devTask.id);
510  
511      assert.ok(qaTask);
512      assert.strictEqual(qaTask.parent_task_id, devTask.id);
513  
514      // Verify task chain: Triage → Developer → QA
515      const allTasks = db
516        .prepare(
517          `
518        SELECT id, task_type, assigned_to, parent_task_id
519        FROM agent_tasks
520        ORDER BY id ASC
521      `
522        )
523        .all();
524  
525      assert.strictEqual(allTasks.length, 3);
526      assert.strictEqual(allTasks[0].assigned_to, 'triage');
527      assert.strictEqual(allTasks[1].assigned_to, 'developer');
528      assert.strictEqual(allTasks[2].assigned_to, 'qa');
529      assert.strictEqual(allTasks[1].parent_task_id, allTasks[0].id);
530      assert.strictEqual(allTasks[2].parent_task_id, allTasks[1].id);
531    });
532  });
533  
534  describe('Workflow: Logging and Audit Trail', () => {
535    test('creates complete audit trail across workflow', async () => {
536      // Create and process full workflow
537      const triageTaskId = db
538        .prepare(
539          `
540        INSERT INTO agent_tasks (task_type, assigned_to, status, context_json)
541        VALUES ('classify_error', 'triage', 'pending', ?)
542      `
543        )
544        .run(
545          JSON.stringify({
546            error_message: 'API rate limit exceeded',
547            stack_trace: 'at makeRequest (src/scrape.js:120:8)',
548            stage: 'serps',
549            frequency: 8,
550          })
551        ).lastInsertRowid;
552  
553      const triageTask = db.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(triageTaskId);
554      triageTask.context_json = JSON.parse(triageTask.context_json);
555      await triageAgent.classifyErrorTask(triageTask);
556  
557      const devTask = db
558        .prepare(
559          `
560        SELECT * FROM agent_tasks WHERE parent_task_id = ? AND assigned_to = 'developer'
561      `
562        )
563        .get(triageTaskId);
564      devTask.context_json = JSON.parse(devTask.context_json);
565      await developerAgent.fixBug(devTask);
566  
567      // Check logs from both agents
568      const logs = db
569        .prepare(
570          `
571        SELECT agent_name, log_level, message
572        FROM agent_logs
573        ORDER BY id ASC
574      `
575        )
576        .all();
577  
578      assert.ok(logs.length > 0);
579  
580      // Should have logs from both triage and developer
581      const triageLogs = logs.filter(log => log.agent_name === 'triage');
582      const devLogs = logs.filter(log => log.agent_name === 'developer');
583  
584      assert.ok(triageLogs.length > 0);
585      assert.ok(devLogs.length > 0);
586  
587      // Check for key log messages
588      const classifiedLog = triageLogs.find(log => log.message.includes('classified'));
589      const routedLog = triageLogs.find(log => log.message.includes('routed'));
590      const bugFixLog = devLogs.find(log => log.message.includes('bug fix'));
591  
592      assert.ok(classifiedLog);
593      assert.ok(routedLog);
594      assert.ok(bugFixLog);
595    });
596  });