/ __quarantined_tests__ / agents / monitor-extended.test.js
monitor-extended.test.js
  1  /**
  2   * Monitor Agent Extended Unit Tests
  3   *
  4   * Extends coverage for MonitorAgent focusing on:
  5   * - checkProcessCompliance: invalid status, stuck sites
  6   * - detectAnomaly: returns results with anomalies array
  7   * - checkPipelineHealth: stage stalled, slow, no metrics, error rates, skip stages
  8   * - checkSLOCompliance: all SLOs compliant branch
  9   * - checkLoops: log-domain loop detection via pipeline log file
 10   * - groupByMessage and withinOneHour edge cases
 11   * - checkAgentHealth: elevated rate warning, overloaded agents
 12   * - processTask: unknown/delegated task types
 13   * - ensureRecurringTasks: timing behavior
 14   */
 15  
 16  import { test, describe, before, after, beforeEach, afterEach } from 'node:test';
 17  import assert from 'node:assert/strict';
 18  import Database from 'better-sqlite3';
 19  import { existsSync, unlinkSync, writeFileSync, mkdirSync, rmSync, readFileSync } from 'fs';
 20  import { join, dirname } from 'path';
 21  import { fileURLToPath } from 'url';
 22  
 23  const __filename = fileURLToPath(import.meta.url);
 24  const __dirname = dirname(__filename);
 25  const projectRoot = join(__dirname, '../..');
 26  
 27  const TEST_DB_PATH = join('/tmp', `test-monitor-ext-${Date.now()}.db`);
 28  const TEST_LOG_DIR = join(projectRoot, 'tests/fixtures/monitor-ext-logs');
 29  
 30  process.env.DATABASE_PATH = TEST_DB_PATH;
 31  process.env.AGENT_IMMEDIATE_INVOCATION = 'false';
 32  
 33  for (const ext of ['', '-wal', '-shm']) {
 34    try {
 35      unlinkSync(TEST_DB_PATH + ext);
 36    } catch {
 37      /* ignore */
 38    }
 39  }
 40  const sharedDb = new Database(TEST_DB_PATH);
 41  sharedDb.exec(`
 42    CREATE TABLE IF NOT EXISTS agent_tasks (id INTEGER PRIMARY KEY AUTOINCREMENT, task_type TEXT NOT NULL, assigned_to TEXT NOT NULL, created_by TEXT, status TEXT DEFAULT 'pending', priority INTEGER DEFAULT 5, context_json TEXT, result_json TEXT, parent_task_id INTEGER, error_message TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, completed_at DATETIME, retry_count INTEGER DEFAULT 0);
 43    CREATE TABLE IF NOT EXISTS agent_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, log_level TEXT, message TEXT NOT NULL, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 44    CREATE TABLE IF NOT EXISTS agent_state (agent_name TEXT PRIMARY KEY, last_active DATETIME DEFAULT CURRENT_TIMESTAMP, current_task_id INTEGER, status TEXT DEFAULT 'idle', metrics_json TEXT);
 45    CREATE TABLE IF NOT EXISTS agent_messages (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, from_agent TEXT NOT NULL, to_agent TEXT NOT NULL, message_type TEXT, content TEXT NOT NULL, metadata_json TEXT, context_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, read_at DATETIME);
 46    CREATE TABLE IF NOT EXISTS human_review_queue (id INTEGER PRIMARY KEY AUTOINCREMENT, file TEXT NOT NULL, reason TEXT NOT NULL, type TEXT NOT NULL, priority TEXT NOT NULL, metadata TEXT, status TEXT DEFAULT 'pending', created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 47    CREATE TABLE IF NOT EXISTS settings (key TEXT PRIMARY KEY, value TEXT NOT NULL, description TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 48    CREATE TABLE IF NOT EXISTS sites (id INTEGER PRIMARY KEY AUTOINCREMENT, domain TEXT, landing_page_url TEXT, status TEXT DEFAULT 'found', error_message TEXT, score REAL, grade TEXT, recapture_count INTEGER DEFAULT 0, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 49    CREATE TABLE IF NOT EXISTS pipeline_metrics (id INTEGER PRIMARY KEY AUTOINCREMENT, stage_name TEXT NOT NULL, sites_processed INTEGER DEFAULT 0, sites_succeeded INTEGER DEFAULT 0, sites_failed INTEGER DEFAULT 0, duration_ms INTEGER NOT NULL, started_at DATETIME NOT NULL, finished_at DATETIME NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 50    CREATE TABLE IF NOT EXISTS agent_outcomes (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, outcome TEXT NOT NULL, context_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 51    CREATE TABLE IF NOT EXISTS structured_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, agent_name TEXT, task_id INTEGER, level TEXT, message TEXT, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 52    CREATE TABLE IF NOT EXISTS site_status (id INTEGER PRIMARY KEY AUTOINCREMENT, site_id INTEGER, status TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 53    CREATE TABLE IF NOT EXISTS cron_locks (lock_key TEXT PRIMARY KEY, acquired_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, description TEXT);
 54    INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('monitor', 'idle');
 55    INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('triage', 'idle');
 56    INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('developer', 'idle');
 57    INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('qa', 'idle');
 58    INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('security', 'idle');
 59    INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('architect', 'idle');
 60  `);
 61  
 62  // ATTACH in-memory databases as ops and tel so queries like ops.settings, tel.agent_tasks resolve
 63  sharedDb.exec(`
 64    ATTACH ':memory:' AS ops;
 65    ATTACH ':memory:' AS tel;
 66    CREATE TABLE IF NOT EXISTS ops.settings (key TEXT PRIMARY KEY, value TEXT NOT NULL, description TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 67    CREATE TABLE IF NOT EXISTS tel.agent_tasks (id INTEGER PRIMARY KEY AUTOINCREMENT, task_type TEXT NOT NULL, assigned_to TEXT NOT NULL, created_by TEXT, status TEXT DEFAULT 'pending', priority INTEGER DEFAULT 5, context_json TEXT, result_json TEXT, parent_task_id INTEGER, error_message TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, completed_at DATETIME, retry_count INTEGER DEFAULT 0);
 68    CREATE TABLE IF NOT EXISTS tel.agent_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, log_level TEXT, message TEXT NOT NULL, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 69    CREATE TABLE IF NOT EXISTS tel.agent_state (agent_name TEXT PRIMARY KEY, last_active DATETIME DEFAULT CURRENT_TIMESTAMP, current_task_id INTEGER, status TEXT DEFAULT 'idle', metrics_json TEXT);
 70    CREATE TABLE IF NOT EXISTS tel.agent_messages (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, from_agent TEXT NOT NULL, to_agent TEXT NOT NULL, message_type TEXT, content TEXT NOT NULL, metadata_json TEXT, context_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, read_at DATETIME);
 71    CREATE TABLE IF NOT EXISTS tel.agent_outcomes (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, task_type TEXT NOT NULL, outcome TEXT NOT NULL, context_json TEXT, result_json TEXT, duration_ms INTEGER, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 72    CREATE TABLE IF NOT EXISTS tel.pipeline_metrics (id INTEGER PRIMARY KEY AUTOINCREMENT, stage_name TEXT NOT NULL, sites_processed INTEGER DEFAULT 0, sites_succeeded INTEGER DEFAULT 0, sites_failed INTEGER DEFAULT 0, duration_ms INTEGER NOT NULL, started_at DATETIME NOT NULL, finished_at DATETIME NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 73    CREATE TABLE IF NOT EXISTS tel.structured_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, agent_name TEXT, task_id INTEGER, level TEXT, message TEXT, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP);
 74    INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('monitor', 'idle');
 75    INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('triage', 'idle');
 76    INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('developer', 'idle');
 77    INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('qa', 'idle');
 78    INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('security', 'idle');
 79    INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('architect', 'idle');
 80  `);
 81  
 82  import { resetDb as resetBaseDb } from '../../src/agents/base-agent.js';
 83  import { resetDb as resetTaskDb } from '../../src/agents/utils/task-manager.js';
 84  import { resetDb as resetMessageDb } from '../../src/agents/utils/message-manager.js';
 85  import { resetDb as resetSLODb } from '../../src/agents/utils/slo-tracker.js';
 86  import { MonitorAgent, resetDb as resetMonitorDb } from '../../src/agents/monitor.js';
 87  
 88  let agent;
 89  
 90  before(async () => {
 91    mkdirSync(TEST_LOG_DIR, { recursive: true });
 92    process.env.LOGS_DIR = `${TEST_LOG_DIR}/`;
 93    resetMonitorDb(sharedDb);
 94    agent = new MonitorAgent();
 95    await agent.initialize();
 96  });
 97  
 98  after(() => {
 99    delete process.env.LOGS_DIR;
100    resetMonitorDb(null);
101    resetBaseDb();
102    resetTaskDb();
103    resetMessageDb();
104    resetSLODb();
105    try {
106      sharedDb.close();
107    } catch {
108      /* ignore */
109    }
110    for (const ext of ['', '-wal', '-shm']) {
111      try {
112        unlinkSync(TEST_DB_PATH + ext);
113      } catch {
114        /* ignore */
115      }
116    }
117    try {
118      rmSync(TEST_LOG_DIR, { recursive: true, force: true });
119    } catch {
120      /* ignore */
121    }
122  });
123  
124  function clearTables() {
125    sharedDb.exec(`
126      DELETE FROM agent_tasks;
127      DELETE FROM agent_logs;
128      DELETE FROM sites;
129      DELETE FROM pipeline_metrics;
130      DELETE FROM human_review_queue;
131      DELETE FROM settings;
132    `);
133    // Reset agent states to 'idle' so health checks start clean
134    sharedDb.exec(`
135      UPDATE agent_state SET status = 'idle', metrics_json = NULL WHERE agent_name IN ('developer', 'qa', 'triage', 'security', 'architect');
136    `);
137  }
138  
139  function getTask(taskType, context = {}) {
140    const r = sharedDb
141      .prepare(
142        `INSERT INTO agent_tasks (task_type, assigned_to, priority, context_json, status)
143       VALUES (?, 'monitor', 5, ?, 'running')`
144      )
145      .run(taskType, JSON.stringify(context));
146    return sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(r.lastInsertRowid);
147  }
148  
149  describe('MonitorAgent - checkProcessCompliance', () => {
150    test('completes successfully with empty DB', async () => {
151      clearTables();
152      const task = getTask('check_process_compliance');
153      await agent.processTask(task);
154      const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id);
155      assert.equal(updated.status, 'completed');
156      const result = JSON.parse(updated.result_json);
157      assert.ok(typeof result.invalid_statuses === 'number');
158      assert.ok(typeof result.stuck_sites === 'number');
159    });
160  
161    test('detects sites with invalid status updated recently', async () => {
162      clearTables();
163      sharedDb
164        .prepare(
165          `INSERT INTO sites (domain, status, updated_at) VALUES (?, ?, datetime('now', '-30 minutes'))`
166        )
167        .run('badstatus.com', 'invalid_status_xyz');
168  
169      const task = getTask('check_process_compliance');
170      await agent.processTask(task);
171      const result = JSON.parse(
172        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
173      );
174      assert.ok(result.invalid_statuses >= 1, 'Should detect invalid status');
175    });
176  
177    test('detects stuck sites (no progress >24 hours)', async () => {
178      clearTables();
179      sharedDb
180        .prepare(
181          `INSERT INTO sites (domain, status, updated_at) VALUES (?, ?, datetime('now', '-48 hours'))`
182        )
183        .run('stuck.com', 'found');
184  
185      const task = getTask('check_process_compliance');
186      await agent.processTask(task);
187      const result = JSON.parse(
188        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
189      );
190      assert.ok(result.stuck_sites >= 1, 'Should detect stuck sites');
191    });
192  
193    test('does not count excluded statuses as stuck', async () => {
194      clearTables();
195      for (const status of ['outreach_sent', 'ignored', 'high_score', 'failing']) {
196        sharedDb
197          .prepare(
198            `INSERT INTO sites (domain, status, updated_at) VALUES (?, ?, datetime('now', '-48 hours'))`
199          )
200          .run(`${status}.com`, status);
201      }
202  
203      const task = getTask('check_process_compliance');
204      await agent.processTask(task);
205      const result = JSON.parse(
206        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
207      );
208      assert.equal(result.stuck_sites, 0, 'Should not count excluded statuses');
209    });
210  
211    test('creates triage task for invalid status sites', async () => {
212      clearTables();
213      sharedDb
214        .prepare(
215          `INSERT INTO sites (domain, status, updated_at) VALUES (?, ?, datetime('now', '-10 minutes'))`
216        )
217        .run('broken.com', 'totally_invalid_status');
218  
219      const task = getTask('check_process_compliance');
220      await agent.processTask(task);
221  
222      const triageTasks = sharedDb
223        .prepare(
224          `SELECT * FROM agent_tasks WHERE task_type = 'classify_error' AND assigned_to = 'triage'`
225        )
226        .all();
227      assert.ok(triageTasks.length >= 1, 'Should create triage task for invalid status');
228    });
229  });
230  
231  describe('MonitorAgent - detectAnomaly', () => {
232    test('completes successfully and returns anomalies array', async () => {
233      clearTables();
234      const task = getTask('detect_anomaly');
235      await agent.processTask(task);
236      const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id);
237      assert.equal(updated.status, 'completed');
238      const result = JSON.parse(updated.result_json);
239      assert.ok(typeof result.anomaly_count === 'number');
240      assert.ok(Array.isArray(result.anomalies), 'anomalies should be an array');
241      assert.ok(result.anomaly_count >= 0);
242    });
243  });
244  
245  describe('MonitorAgent - checkPipelineHealth (detailed)', () => {
246    // Rate-limits.json and SKIP_STAGES can exclude stages. Other test files in the same
247    // process may load .env which sets SKIP_STAGES. Clear both for this suite.
248    const rateLimitsPath = join(projectRoot, 'logs/rate-limits.json');
249    let savedRateLimits = null;
250    let savedSkipStages;
251    beforeEach(() => {
252      try {
253        savedRateLimits = readFileSync(rateLimitsPath, 'utf8');
254        writeFileSync(rateLimitsPath, '{}');
255      } catch {
256        savedRateLimits = null;
257      }
258      savedSkipStages = process.env.SKIP_STAGES;
259      delete process.env.SKIP_STAGES;
260    });
261    afterEach(() => {
262      if (savedRateLimits !== null) {
263        try {
264          writeFileSync(rateLimitsPath, savedRateLimits);
265        } catch {
266          /* ignore */
267        }
268      }
269      if (savedSkipStages !== undefined) {
270        process.env.SKIP_STAGES = savedSkipStages;
271      }
272    });
273  
274    test('detects stalled stage (>30 minutes since last run)', async () => {
275      clearTables();
276      for (let i = 0; i < 5; i++) {
277        sharedDb
278          .prepare('INSERT INTO sites (domain, status) VALUES (?, ?)')
279          .run(`stalled${i}.com`, 'found');
280      }
281      sharedDb
282        .prepare(
283          `INSERT INTO pipeline_metrics (stage_name, sites_processed, duration_ms, started_at, finished_at)
284         VALUES ('assets', 10, 5000, datetime('now', '-65 minutes'), datetime('now', '-63 minutes'))`
285        )
286        .run();
287  
288      const task = getTask('check_pipeline_health');
289      await agent.processTask(task);
290      const result = JSON.parse(
291        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
292      );
293      assert.ok(result.stalled >= 1, 'Should detect stalled stage');
294      assert.ok(
295        result.issues.some(i => i.type === 'stage_stalled'),
296        'Issue type should be stage_stalled'
297      );
298    });
299  
300    test('detects slow stage (15-30 minutes since last run)', async () => {
301      clearTables();
302      for (let i = 0; i < 3; i++) {
303        sharedDb
304          .prepare('INSERT INTO sites (domain, status) VALUES (?, ?)')
305          .run(`slow${i}.com`, 'assets_captured');
306      }
307      // Use 25 minutes to ensure it's clearly in the 15-30 range
308      sharedDb
309        .prepare(
310          `INSERT INTO pipeline_metrics (stage_name, sites_processed, duration_ms, started_at, finished_at)
311         VALUES ('scoring', 5, 3000, datetime('now', '-27 minutes'), datetime('now', '-25 minutes'))`
312        )
313        .run();
314  
315      const task = getTask('check_pipeline_health');
316      await agent.processTask(task);
317      const result = JSON.parse(
318        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
319      );
320      // Check for slow or stalled (timing may vary slightly in test env)
321      const hasIssue = result.issues.some(
322        i => (i.type === 'stage_slow' || i.type === 'stage_stalled') && i.stage === 'scoring'
323      );
324      assert.ok(
325        hasIssue || result.slow >= 1 || result.stalled >= 1,
326        'Should detect slow or stalled stage'
327      );
328    });
329  
330    test('detects stage with large queue but no metrics', async () => {
331      clearTables();
332      for (let i = 0; i < 110; i++) {
333        sharedDb
334          .prepare('INSERT INTO sites (domain, status) VALUES (?, ?)')
335          .run(`nometrics${i}.com`, 'found');
336      }
337  
338      const task = getTask('check_pipeline_health');
339      await agent.processTask(task);
340      const result = JSON.parse(
341        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
342      );
343      assert.ok(
344        result.issues.some(i => i.type === 'stage_no_metrics'),
345        'Should detect no metrics issue'
346      );
347    });
348  
349    test('detects high error rates in failing sites', async () => {
350      clearTables();
351      for (let i = 0; i < 10; i++) {
352        sharedDb
353          .prepare(
354            `INSERT INTO sites (domain, status, error_message, updated_at)
355           VALUES (?, 'failing', 'Network timeout: ETIMEDOUT', datetime('now', '-1 hour'))`
356          )
357          .run(`fail${i}.com`);
358      }
359  
360      const task = getTask('check_pipeline_health', { check_error_rates: true });
361      await agent.processTask(task);
362      const result = JSON.parse(
363        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
364      );
365      assert.ok(result.error_rates >= 1, 'Should detect high error rate');
366    });
367  
368    test('skips stages listed in SKIP_STAGES env var', async () => {
369      clearTables();
370      const original = process.env.SKIP_STAGES;
371      process.env.SKIP_STAGES = 'assets,scoring,rescoring,enrich,proposals,outreach';
372  
373      for (let i = 0; i < 200; i++) {
374        sharedDb
375          .prepare('INSERT INTO sites (domain, status) VALUES (?, ?)')
376          .run(`skip${i}.com`, 'found');
377      }
378  
379      const task = getTask('check_pipeline_health');
380      await agent.processTask(task);
381      const result = JSON.parse(
382        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
383      );
384      const stalledAssets = result.issues.filter(i => i.stage === 'assets');
385      assert.equal(stalledAssets.length, 0, 'assets stage should be skipped');
386  
387      if (original !== undefined) process.env.SKIP_STAGES = original;
388      else delete process.env.SKIP_STAGES;
389    });
390  
391    test('skips stage when queue is empty', async () => {
392      clearTables();
393      sharedDb
394        .prepare(
395          `INSERT INTO pipeline_metrics (stage_name, sites_processed, duration_ms, started_at, finished_at)
396         VALUES ('assets', 10, 5000, datetime('now', '-2 hours'), datetime('now', '-2 hours'))`
397        )
398        .run();
399  
400      const task = getTask('check_pipeline_health');
401      await agent.processTask(task);
402      const result = JSON.parse(
403        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
404      );
405      const assetStalls = result.issues.filter(
406        i => i.type === 'stage_stalled' && i.stage === 'assets'
407      );
408      assert.equal(assetStalls.length, 0, 'Empty queue should not trigger stall');
409    });
410  
411    test('does not check error rates when check_error_rates is false', async () => {
412      clearTables();
413      for (let i = 0; i < 10; i++) {
414        sharedDb
415          .prepare(
416            `INSERT INTO sites (domain, status, error_message, updated_at)
417           VALUES (?, 'failing', 'Some error', datetime('now', '-1 hour'))`
418          )
419          .run(`err${i}.com`);
420      }
421  
422      const task = getTask('check_pipeline_health', { check_error_rates: false });
423      await agent.processTask(task);
424      const result = JSON.parse(
425        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
426      );
427      assert.equal(result.error_rates, 0, 'Should not check error rates when disabled');
428    });
429  });
430  
431  describe('MonitorAgent - checkSLOCompliance (all compliant)', () => {
432    test('completes and logs SLO compliance info when no violations', async () => {
433      clearTables();
434      resetSLODb();
435  
436      const task = getTask('check_slo_compliance');
437      await agent.processTask(task);
438      const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id);
439      assert.equal(updated.status, 'completed');
440      const result = JSON.parse(updated.result_json);
441      assert.ok(typeof result.violations === 'number');
442      assert.ok(typeof result.total_slos === 'number');
443    });
444  });
445  
446  describe('MonitorAgent - checkLoops (log domain detection)', () => {
447    test('detects domain loop from pipeline log file', async () => {
448      clearTables();
449  
450      const today = new Date().toISOString().slice(0, 10);
451      const logDir = TEST_LOG_DIR;
452      mkdirSync(logDir, { recursive: true });
453      const logPath = join(logDir, `pipeline-${today}.log`);
454  
455      const lines = [];
456      for (let i = 0; i < 15; i++) {
457        lines.push(
458          `[2025-01-01T10:0${i % 10}:00Z] [INFO] Processing domain=loopydomain.com status=found`
459        );
460      }
461      writeFileSync(logPath, `${lines.join('\n')}\n`);
462  
463      const task = getTask('check_loops');
464      await agent.processTask(task);
465      const result = JSON.parse(
466        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
467      );
468      assert.ok(result.log_domain_loops >= 1, 'Should detect log domain loop');
469      assert.ok(
470        result.loops.some(l => l.type === 'log_domain_loop'),
471        'Should include log_domain_loop type'
472      );
473  
474      try {
475        unlinkSync(logPath);
476      } catch {
477        /* ignore */
478      }
479    });
480  
481    test('handles missing pipeline log file gracefully', async () => {
482      clearTables();
483  
484      const today = new Date().toISOString().slice(0, 10);
485      const logPath = join(TEST_LOG_DIR, `pipeline-${today}.log`);
486      try {
487        unlinkSync(logPath);
488      } catch {
489        /* ignore */
490      }
491  
492      const task = getTask('check_loops');
493      await assert.doesNotReject(async () => {
494        await agent.processTask(task);
495      });
496      const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id);
497      assert.equal(updated.status, 'completed');
498    });
499  });
500  
501  describe('MonitorAgent - scanLogs (loop detection)', () => {
502    test('creates triage task when same error appears >3x in an hour', async () => {
503      clearTables();
504  
505      const today = new Date().toISOString().slice(0, 10);
506      const logDir = TEST_LOG_DIR;
507      mkdirSync(logDir, { recursive: true });
508      const logPath = join(logDir, `pipeline-${today}.log`);
509  
510      const now = new Date().toISOString();
511      const lines = [];
512      for (let i = 0; i < 5; i++) {
513        lines.push(`[${now}] [ERROR] Network timeout: ETIMEDOUT`);
514      }
515      writeFileSync(logPath, `${lines.join('\n')}\n`);
516  
517      const task = getTask('scan_logs', { days: 1 });
518      await agent.processTask(task);
519  
520      const result = JSON.parse(
521        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
522      );
523      assert.ok(result.total_errors >= 5, 'Should count all errors');
524      assert.ok(result.loops_detected >= 1, 'Should detect error loop');
525  
526      const triageTasks = sharedDb
527        .prepare(
528          `SELECT * FROM agent_tasks WHERE task_type = 'classify_error' AND assigned_to = 'triage'`
529        )
530        .all();
531      assert.ok(triageTasks.length >= 1, 'Should create triage task for looping error');
532  
533      try {
534        unlinkSync(logPath);
535      } catch {
536        /* ignore */
537      }
538    });
539  });
540  
541  describe('MonitorAgent - groupByMessage', () => {
542    test('groups multiple errors with same message', () => {
543      const a = new MonitorAgent();
544      const errors = [
545        '[2025-01-01T10:00:00Z] [ERROR] Network timeout',
546        '[2025-01-01T10:01:00Z] [ERROR] Network timeout',
547        '[2025-01-01T10:02:00Z] [INFO] Some info',
548        '[2025-01-01T10:03:00Z] [ERROR] Network timeout',
549      ];
550      const groups = a.groupByMessage(errors);
551      assert.ok(groups['Network timeout']);
552      assert.equal(groups['Network timeout'].length, 3);
553    });
554  
555    test('returns empty object for lines with no ERROR pattern', () => {
556      const a = new MonitorAgent();
557      const groups = a.groupByMessage(['[INFO] info', '[WARN] warn']);
558      assert.deepEqual(groups, {});
559    });
560  
561    test('handles empty array', () => {
562      const a = new MonitorAgent();
563      const groups = a.groupByMessage([]);
564      assert.deepEqual(groups, {});
565    });
566  
567    test('groups different error messages separately', () => {
568      const a = new MonitorAgent();
569      const errors = [
570        '[2025-01-01] [ERROR] Error type A',
571        '[2025-01-01] [ERROR] Error type B',
572        '[2025-01-01] [ERROR] Error type A',
573      ];
574      const groups = a.groupByMessage(errors);
575      assert.equal(groups['Error type A'].length, 2);
576      assert.equal(groups['Error type B'].length, 1);
577    });
578  });
579  
580  describe('MonitorAgent - withinOneHour', () => {
581    test('returns false for single occurrence', () => {
582      const a = new MonitorAgent();
583      assert.equal(a.withinOneHour(['[2025-01-01T10:00:00Z] [ERROR] Error']), false);
584    });
585  
586    test('returns true when all errors are within 1 hour', () => {
587      const a = new MonitorAgent();
588      assert.equal(
589        a.withinOneHour([
590          '[2025-01-01T10:00:00Z] [ERROR] Error',
591          '[2025-01-01T10:30:00Z] [ERROR] Error',
592          '[2025-01-01T10:59:00Z] [ERROR] Error',
593        ]),
594        true
595      );
596    });
597  
598    test('returns false when errors span more than 1 hour', () => {
599      const a = new MonitorAgent();
600      assert.equal(
601        a.withinOneHour([
602          '[2025-01-01T10:00:00Z] [ERROR] Error',
603          '[2025-01-01T11:01:00Z] [ERROR] Error',
604        ]),
605        false
606      );
607    });
608  
609    test('returns false when no valid timestamps', () => {
610      const a = new MonitorAgent();
611      assert.equal(a.withinOneHour(['no timestamp', 'also no timestamp']), false);
612    });
613  
614    test('returns false for empty array', () => {
615      const a = new MonitorAgent();
616      assert.equal(a.withinOneHour([]), false);
617    });
618  });
619  
620  describe('MonitorAgent - checkAgentHealth (elevated rate and overload)', () => {
621    test('logs warning when agent failure rate is 15-30%', async () => {
622      clearTables();
623  
624      for (let i = 0; i < 8; i++) {
625        sharedDb
626          .prepare(
627            `INSERT INTO agent_tasks (task_type, assigned_to, status, created_at) VALUES ('fix_bug', 'developer', 'completed', datetime('now', '-1 hours'))`
628          )
629          .run();
630      }
631      for (let i = 0; i < 2; i++) {
632        sharedDb
633          .prepare(
634            `INSERT INTO agent_tasks (task_type, assigned_to, status, created_at) VALUES ('fix_bug', 'developer', 'failed', datetime('now', '-1 hours'))`
635          )
636          .run();
637      }
638  
639      const task = getTask('check_agent_health');
640      await agent.processTask(task);
641  
642      const devState = sharedDb
643        .prepare(`SELECT * FROM agent_state WHERE agent_name = 'developer'`)
644        .get();
645      assert.notEqual(devState.status, 'blocked', 'Should not block at 20% failure rate');
646  
647      const warnLogs = sharedDb
648        .prepare(
649          `SELECT * FROM agent_logs WHERE agent_name = 'monitor' AND log_level = 'warn' AND message LIKE '%elevated%'`
650        )
651        .all();
652      assert.ok(warnLogs.length >= 1, 'Should log elevated failure rate warning');
653    });
654  
655    test('detects overloaded agents with >50 pending tasks (critical)', async () => {
656      clearTables();
657  
658      for (let i = 0; i < 55; i++) {
659        sharedDb
660          .prepare(
661            `INSERT INTO agent_tasks (task_type, assigned_to, status) VALUES ('fix_bug', 'developer', 'pending')`
662          )
663          .run();
664      }
665  
666      const task = getTask('check_agent_health');
667      await agent.processTask(task);
668      const result = JSON.parse(
669        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
670      );
671      const overloaded = result.overloaded_agents.find(a => a.agent === 'developer');
672      assert.ok(overloaded, 'Should detect developer as overloaded');
673      assert.equal(overloaded.severity, 'critical', 'Should be critical with >50 tasks');
674    });
675  
676    test('detects overloaded agents with 20-50 pending tasks (warning)', async () => {
677      clearTables();
678  
679      for (let i = 0; i < 25; i++) {
680        sharedDb
681          .prepare(
682            `INSERT INTO agent_tasks (task_type, assigned_to, status) VALUES ('fix_bug', 'developer', 'pending')`
683          )
684          .run();
685      }
686  
687      const task = getTask('check_agent_health');
688      await agent.processTask(task);
689      const result = JSON.parse(
690        sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json
691      );
692      const overloaded = result.overloaded_agents.find(a => a.agent === 'developer');
693      assert.ok(overloaded, 'Should detect developer as overloaded');
694      assert.equal(overloaded.severity, 'warning', 'Should be warning with 20-50 tasks');
695    });
696  });
697  
698  describe('MonitorAgent - processTask (unknown/delegated task types)', () => {
699    test('delegates fix_bug task type (not handled by monitor)', async () => {
700      clearTables();
701      const task = getTask('fix_bug', { error_message: 'Some bug' });
702      await agent.processTask(task);
703      const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id);
704      assert.equal(updated.status, 'completed');
705      const result = JSON.parse(updated.result_json || '{}');
706      assert.equal(result.delegated, true, 'fix_bug should be delegated');
707    });
708  
709    test('delegates bootstrap_monitor task type', async () => {
710      clearTables();
711      const task = getTask('bootstrap_monitor', {});
712      await agent.processTask(task);
713      const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id);
714      assert.equal(updated.status, 'completed');
715    });
716  
717    test('handles completely unknown task type via delegation', async () => {
718      clearTables();
719      const task = getTask('totally_unknown_task_xyz', {});
720      await agent.processTask(task);
721      const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id);
722      assert.equal(updated.status, 'completed');
723    });
724  });
725  
726  describe('MonitorAgent - ensureRecurringTasks (timing)', () => {
727    test('does not recreate task if completed recently (within interval)', async () => {
728      clearTables();
729  
730      // Insert a recently-completed scan_logs task (1 minute ago, interval is 5 min)
731      sharedDb
732        .prepare(
733          `INSERT INTO agent_tasks (task_type, assigned_to, status, completed_at)
734         VALUES ('scan_logs', 'monitor', 'completed', datetime('now', '-1 minutes'))`
735        )
736        .run();
737  
738      await agent.ensureRecurringTasks();
739  
740      const tasks = sharedDb
741        .prepare(
742          `SELECT * FROM agent_tasks WHERE assigned_to = 'monitor' AND task_type = 'scan_logs' AND status = 'pending'`
743        )
744        .all();
745      assert.equal(tasks.length, 0, 'Should not create scan_logs task within 5-minute interval');
746    });
747  
748    test('creates all recurring tasks when none exist at all', async () => {
749      clearTables();
750  
751      // No tasks exist at all - should create all 6 recurring task types
752      await agent.ensureRecurringTasks();
753  
754      const tasks = sharedDb
755        .prepare(
756          `SELECT * FROM agent_tasks WHERE assigned_to = 'monitor' AND status IN ('pending', 'running')`
757        )
758        .all();
759      // Should have created at least some recurring tasks
760      assert.ok(tasks.length > 0, 'Should create recurring tasks when none exist');
761    });
762  });