monitor-extended.test.js
1 /** 2 * Monitor Agent Extended Unit Tests 3 * 4 * Extends coverage for MonitorAgent focusing on: 5 * - checkProcessCompliance: invalid status, stuck sites 6 * - detectAnomaly: returns results with anomalies array 7 * - checkPipelineHealth: stage stalled, slow, no metrics, error rates, skip stages 8 * - checkSLOCompliance: all SLOs compliant branch 9 * - checkLoops: log-domain loop detection via pipeline log file 10 * - groupByMessage and withinOneHour edge cases 11 * - checkAgentHealth: elevated rate warning, overloaded agents 12 * - processTask: unknown/delegated task types 13 * - ensureRecurringTasks: timing behavior 14 */ 15 16 import { test, describe, before, after, beforeEach, afterEach } from 'node:test'; 17 import assert from 'node:assert/strict'; 18 import Database from 'better-sqlite3'; 19 import { existsSync, unlinkSync, writeFileSync, mkdirSync, rmSync, readFileSync } from 'fs'; 20 import { join, dirname } from 'path'; 21 import { fileURLToPath } from 'url'; 22 23 const __filename = fileURLToPath(import.meta.url); 24 const __dirname = dirname(__filename); 25 const projectRoot = join(__dirname, '../..'); 26 27 const TEST_DB_PATH = join('/tmp', `test-monitor-ext-${Date.now()}.db`); 28 const TEST_LOG_DIR = join(projectRoot, 'tests/fixtures/monitor-ext-logs'); 29 30 process.env.DATABASE_PATH = TEST_DB_PATH; 31 process.env.AGENT_IMMEDIATE_INVOCATION = 'false'; 32 33 for (const ext of ['', '-wal', '-shm']) { 34 try { 35 unlinkSync(TEST_DB_PATH + ext); 36 } catch { 37 /* ignore */ 38 } 39 } 40 const sharedDb = new Database(TEST_DB_PATH); 41 sharedDb.exec(` 42 CREATE TABLE IF NOT EXISTS agent_tasks (id INTEGER PRIMARY KEY AUTOINCREMENT, task_type TEXT NOT NULL, assigned_to TEXT NOT NULL, created_by TEXT, status TEXT DEFAULT 'pending', priority INTEGER DEFAULT 5, context_json TEXT, result_json TEXT, parent_task_id INTEGER, error_message TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, completed_at DATETIME, retry_count INTEGER DEFAULT 0); 43 CREATE TABLE IF NOT EXISTS agent_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, log_level TEXT, message TEXT NOT NULL, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 44 CREATE TABLE IF NOT EXISTS agent_state (agent_name TEXT PRIMARY KEY, last_active DATETIME DEFAULT CURRENT_TIMESTAMP, current_task_id INTEGER, status TEXT DEFAULT 'idle', metrics_json TEXT); 45 CREATE TABLE IF NOT EXISTS agent_messages (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, from_agent TEXT NOT NULL, to_agent TEXT NOT NULL, message_type TEXT, content TEXT NOT NULL, metadata_json TEXT, context_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, read_at DATETIME); 46 CREATE TABLE IF NOT EXISTS human_review_queue (id INTEGER PRIMARY KEY AUTOINCREMENT, file TEXT NOT NULL, reason TEXT NOT NULL, type TEXT NOT NULL, priority TEXT NOT NULL, metadata TEXT, status TEXT DEFAULT 'pending', created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 47 CREATE TABLE IF NOT EXISTS settings (key TEXT PRIMARY KEY, value TEXT NOT NULL, description TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP); 48 CREATE TABLE IF NOT EXISTS sites (id INTEGER PRIMARY KEY AUTOINCREMENT, domain TEXT, landing_page_url TEXT, status TEXT DEFAULT 'found', error_message TEXT, score REAL, grade TEXT, recapture_count INTEGER DEFAULT 0, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 49 CREATE TABLE IF NOT EXISTS pipeline_metrics (id INTEGER PRIMARY KEY AUTOINCREMENT, stage_name TEXT NOT NULL, sites_processed INTEGER DEFAULT 0, sites_succeeded INTEGER DEFAULT 0, sites_failed INTEGER DEFAULT 0, duration_ms INTEGER NOT NULL, started_at DATETIME NOT NULL, finished_at DATETIME NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 50 CREATE TABLE IF NOT EXISTS agent_outcomes (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, outcome TEXT NOT NULL, context_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 51 CREATE TABLE IF NOT EXISTS structured_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, agent_name TEXT, task_id INTEGER, level TEXT, message TEXT, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 52 CREATE TABLE IF NOT EXISTS site_status (id INTEGER PRIMARY KEY AUTOINCREMENT, site_id INTEGER, status TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 53 CREATE TABLE IF NOT EXISTS cron_locks (lock_key TEXT PRIMARY KEY, acquired_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, description TEXT); 54 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('monitor', 'idle'); 55 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('triage', 'idle'); 56 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('developer', 'idle'); 57 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('qa', 'idle'); 58 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('security', 'idle'); 59 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('architect', 'idle'); 60 `); 61 62 // ATTACH in-memory databases as ops and tel so queries like ops.settings, tel.agent_tasks resolve 63 sharedDb.exec(` 64 ATTACH ':memory:' AS ops; 65 ATTACH ':memory:' AS tel; 66 CREATE TABLE IF NOT EXISTS ops.settings (key TEXT PRIMARY KEY, value TEXT NOT NULL, description TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP); 67 CREATE TABLE IF NOT EXISTS tel.agent_tasks (id INTEGER PRIMARY KEY AUTOINCREMENT, task_type TEXT NOT NULL, assigned_to TEXT NOT NULL, created_by TEXT, status TEXT DEFAULT 'pending', priority INTEGER DEFAULT 5, context_json TEXT, result_json TEXT, parent_task_id INTEGER, error_message TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, completed_at DATETIME, retry_count INTEGER DEFAULT 0); 68 CREATE TABLE IF NOT EXISTS tel.agent_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, log_level TEXT, message TEXT NOT NULL, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 69 CREATE TABLE IF NOT EXISTS tel.agent_state (agent_name TEXT PRIMARY KEY, last_active DATETIME DEFAULT CURRENT_TIMESTAMP, current_task_id INTEGER, status TEXT DEFAULT 'idle', metrics_json TEXT); 70 CREATE TABLE IF NOT EXISTS tel.agent_messages (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, from_agent TEXT NOT NULL, to_agent TEXT NOT NULL, message_type TEXT, content TEXT NOT NULL, metadata_json TEXT, context_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, read_at DATETIME); 71 CREATE TABLE IF NOT EXISTS tel.agent_outcomes (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, task_type TEXT NOT NULL, outcome TEXT NOT NULL, context_json TEXT, result_json TEXT, duration_ms INTEGER, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 72 CREATE TABLE IF NOT EXISTS tel.pipeline_metrics (id INTEGER PRIMARY KEY AUTOINCREMENT, stage_name TEXT NOT NULL, sites_processed INTEGER DEFAULT 0, sites_succeeded INTEGER DEFAULT 0, sites_failed INTEGER DEFAULT 0, duration_ms INTEGER NOT NULL, started_at DATETIME NOT NULL, finished_at DATETIME NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 73 CREATE TABLE IF NOT EXISTS tel.structured_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, agent_name TEXT, task_id INTEGER, level TEXT, message TEXT, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 74 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('monitor', 'idle'); 75 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('triage', 'idle'); 76 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('developer', 'idle'); 77 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('qa', 'idle'); 78 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('security', 'idle'); 79 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('architect', 'idle'); 80 `); 81 82 import { resetDb as resetBaseDb } from '../../src/agents/base-agent.js'; 83 import { resetDb as resetTaskDb } from '../../src/agents/utils/task-manager.js'; 84 import { resetDb as resetMessageDb } from '../../src/agents/utils/message-manager.js'; 85 import { resetDb as resetSLODb } from '../../src/agents/utils/slo-tracker.js'; 86 import { MonitorAgent, resetDb as resetMonitorDb } from '../../src/agents/monitor.js'; 87 88 let agent; 89 90 before(async () => { 91 mkdirSync(TEST_LOG_DIR, { recursive: true }); 92 process.env.LOGS_DIR = `${TEST_LOG_DIR}/`; 93 resetMonitorDb(sharedDb); 94 agent = new MonitorAgent(); 95 await agent.initialize(); 96 }); 97 98 after(() => { 99 delete process.env.LOGS_DIR; 100 resetMonitorDb(null); 101 resetBaseDb(); 102 resetTaskDb(); 103 resetMessageDb(); 104 resetSLODb(); 105 try { 106 sharedDb.close(); 107 } catch { 108 /* ignore */ 109 } 110 for (const ext of ['', '-wal', '-shm']) { 111 try { 112 unlinkSync(TEST_DB_PATH + ext); 113 } catch { 114 /* ignore */ 115 } 116 } 117 try { 118 rmSync(TEST_LOG_DIR, { recursive: true, force: true }); 119 } catch { 120 /* ignore */ 121 } 122 }); 123 124 function clearTables() { 125 sharedDb.exec(` 126 DELETE FROM agent_tasks; 127 DELETE FROM agent_logs; 128 DELETE FROM sites; 129 DELETE FROM pipeline_metrics; 130 DELETE FROM human_review_queue; 131 DELETE FROM settings; 132 `); 133 // Reset agent states to 'idle' so health checks start clean 134 sharedDb.exec(` 135 UPDATE agent_state SET status = 'idle', metrics_json = NULL WHERE agent_name IN ('developer', 'qa', 'triage', 'security', 'architect'); 136 `); 137 } 138 139 function getTask(taskType, context = {}) { 140 const r = sharedDb 141 .prepare( 142 `INSERT INTO agent_tasks (task_type, assigned_to, priority, context_json, status) 143 VALUES (?, 'monitor', 5, ?, 'running')` 144 ) 145 .run(taskType, JSON.stringify(context)); 146 return sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(r.lastInsertRowid); 147 } 148 149 describe('MonitorAgent - checkProcessCompliance', () => { 150 test('completes successfully with empty DB', async () => { 151 clearTables(); 152 const task = getTask('check_process_compliance'); 153 await agent.processTask(task); 154 const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id); 155 assert.equal(updated.status, 'completed'); 156 const result = JSON.parse(updated.result_json); 157 assert.ok(typeof result.invalid_statuses === 'number'); 158 assert.ok(typeof result.stuck_sites === 'number'); 159 }); 160 161 test('detects sites with invalid status updated recently', async () => { 162 clearTables(); 163 sharedDb 164 .prepare( 165 `INSERT INTO sites (domain, status, updated_at) VALUES (?, ?, datetime('now', '-30 minutes'))` 166 ) 167 .run('badstatus.com', 'invalid_status_xyz'); 168 169 const task = getTask('check_process_compliance'); 170 await agent.processTask(task); 171 const result = JSON.parse( 172 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 173 ); 174 assert.ok(result.invalid_statuses >= 1, 'Should detect invalid status'); 175 }); 176 177 test('detects stuck sites (no progress >24 hours)', async () => { 178 clearTables(); 179 sharedDb 180 .prepare( 181 `INSERT INTO sites (domain, status, updated_at) VALUES (?, ?, datetime('now', '-48 hours'))` 182 ) 183 .run('stuck.com', 'found'); 184 185 const task = getTask('check_process_compliance'); 186 await agent.processTask(task); 187 const result = JSON.parse( 188 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 189 ); 190 assert.ok(result.stuck_sites >= 1, 'Should detect stuck sites'); 191 }); 192 193 test('does not count excluded statuses as stuck', async () => { 194 clearTables(); 195 for (const status of ['outreach_sent', 'ignored', 'high_score', 'failing']) { 196 sharedDb 197 .prepare( 198 `INSERT INTO sites (domain, status, updated_at) VALUES (?, ?, datetime('now', '-48 hours'))` 199 ) 200 .run(`${status}.com`, status); 201 } 202 203 const task = getTask('check_process_compliance'); 204 await agent.processTask(task); 205 const result = JSON.parse( 206 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 207 ); 208 assert.equal(result.stuck_sites, 0, 'Should not count excluded statuses'); 209 }); 210 211 test('creates triage task for invalid status sites', async () => { 212 clearTables(); 213 sharedDb 214 .prepare( 215 `INSERT INTO sites (domain, status, updated_at) VALUES (?, ?, datetime('now', '-10 minutes'))` 216 ) 217 .run('broken.com', 'totally_invalid_status'); 218 219 const task = getTask('check_process_compliance'); 220 await agent.processTask(task); 221 222 const triageTasks = sharedDb 223 .prepare( 224 `SELECT * FROM agent_tasks WHERE task_type = 'classify_error' AND assigned_to = 'triage'` 225 ) 226 .all(); 227 assert.ok(triageTasks.length >= 1, 'Should create triage task for invalid status'); 228 }); 229 }); 230 231 describe('MonitorAgent - detectAnomaly', () => { 232 test('completes successfully and returns anomalies array', async () => { 233 clearTables(); 234 const task = getTask('detect_anomaly'); 235 await agent.processTask(task); 236 const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id); 237 assert.equal(updated.status, 'completed'); 238 const result = JSON.parse(updated.result_json); 239 assert.ok(typeof result.anomaly_count === 'number'); 240 assert.ok(Array.isArray(result.anomalies), 'anomalies should be an array'); 241 assert.ok(result.anomaly_count >= 0); 242 }); 243 }); 244 245 describe('MonitorAgent - checkPipelineHealth (detailed)', () => { 246 // Rate-limits.json and SKIP_STAGES can exclude stages. Other test files in the same 247 // process may load .env which sets SKIP_STAGES. Clear both for this suite. 248 const rateLimitsPath = join(projectRoot, 'logs/rate-limits.json'); 249 let savedRateLimits = null; 250 let savedSkipStages; 251 beforeEach(() => { 252 try { 253 savedRateLimits = readFileSync(rateLimitsPath, 'utf8'); 254 writeFileSync(rateLimitsPath, '{}'); 255 } catch { 256 savedRateLimits = null; 257 } 258 savedSkipStages = process.env.SKIP_STAGES; 259 delete process.env.SKIP_STAGES; 260 }); 261 afterEach(() => { 262 if (savedRateLimits !== null) { 263 try { 264 writeFileSync(rateLimitsPath, savedRateLimits); 265 } catch { 266 /* ignore */ 267 } 268 } 269 if (savedSkipStages !== undefined) { 270 process.env.SKIP_STAGES = savedSkipStages; 271 } 272 }); 273 274 test('detects stalled stage (>30 minutes since last run)', async () => { 275 clearTables(); 276 for (let i = 0; i < 5; i++) { 277 sharedDb 278 .prepare('INSERT INTO sites (domain, status) VALUES (?, ?)') 279 .run(`stalled${i}.com`, 'found'); 280 } 281 sharedDb 282 .prepare( 283 `INSERT INTO pipeline_metrics (stage_name, sites_processed, duration_ms, started_at, finished_at) 284 VALUES ('assets', 10, 5000, datetime('now', '-65 minutes'), datetime('now', '-63 minutes'))` 285 ) 286 .run(); 287 288 const task = getTask('check_pipeline_health'); 289 await agent.processTask(task); 290 const result = JSON.parse( 291 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 292 ); 293 assert.ok(result.stalled >= 1, 'Should detect stalled stage'); 294 assert.ok( 295 result.issues.some(i => i.type === 'stage_stalled'), 296 'Issue type should be stage_stalled' 297 ); 298 }); 299 300 test('detects slow stage (15-30 minutes since last run)', async () => { 301 clearTables(); 302 for (let i = 0; i < 3; i++) { 303 sharedDb 304 .prepare('INSERT INTO sites (domain, status) VALUES (?, ?)') 305 .run(`slow${i}.com`, 'assets_captured'); 306 } 307 // Use 25 minutes to ensure it's clearly in the 15-30 range 308 sharedDb 309 .prepare( 310 `INSERT INTO pipeline_metrics (stage_name, sites_processed, duration_ms, started_at, finished_at) 311 VALUES ('scoring', 5, 3000, datetime('now', '-27 minutes'), datetime('now', '-25 minutes'))` 312 ) 313 .run(); 314 315 const task = getTask('check_pipeline_health'); 316 await agent.processTask(task); 317 const result = JSON.parse( 318 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 319 ); 320 // Check for slow or stalled (timing may vary slightly in test env) 321 const hasIssue = result.issues.some( 322 i => (i.type === 'stage_slow' || i.type === 'stage_stalled') && i.stage === 'scoring' 323 ); 324 assert.ok( 325 hasIssue || result.slow >= 1 || result.stalled >= 1, 326 'Should detect slow or stalled stage' 327 ); 328 }); 329 330 test('detects stage with large queue but no metrics', async () => { 331 clearTables(); 332 for (let i = 0; i < 110; i++) { 333 sharedDb 334 .prepare('INSERT INTO sites (domain, status) VALUES (?, ?)') 335 .run(`nometrics${i}.com`, 'found'); 336 } 337 338 const task = getTask('check_pipeline_health'); 339 await agent.processTask(task); 340 const result = JSON.parse( 341 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 342 ); 343 assert.ok( 344 result.issues.some(i => i.type === 'stage_no_metrics'), 345 'Should detect no metrics issue' 346 ); 347 }); 348 349 test('detects high error rates in failing sites', async () => { 350 clearTables(); 351 for (let i = 0; i < 10; i++) { 352 sharedDb 353 .prepare( 354 `INSERT INTO sites (domain, status, error_message, updated_at) 355 VALUES (?, 'failing', 'Network timeout: ETIMEDOUT', datetime('now', '-1 hour'))` 356 ) 357 .run(`fail${i}.com`); 358 } 359 360 const task = getTask('check_pipeline_health', { check_error_rates: true }); 361 await agent.processTask(task); 362 const result = JSON.parse( 363 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 364 ); 365 assert.ok(result.error_rates >= 1, 'Should detect high error rate'); 366 }); 367 368 test('skips stages listed in SKIP_STAGES env var', async () => { 369 clearTables(); 370 const original = process.env.SKIP_STAGES; 371 process.env.SKIP_STAGES = 'assets,scoring,rescoring,enrich,proposals,outreach'; 372 373 for (let i = 0; i < 200; i++) { 374 sharedDb 375 .prepare('INSERT INTO sites (domain, status) VALUES (?, ?)') 376 .run(`skip${i}.com`, 'found'); 377 } 378 379 const task = getTask('check_pipeline_health'); 380 await agent.processTask(task); 381 const result = JSON.parse( 382 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 383 ); 384 const stalledAssets = result.issues.filter(i => i.stage === 'assets'); 385 assert.equal(stalledAssets.length, 0, 'assets stage should be skipped'); 386 387 if (original !== undefined) process.env.SKIP_STAGES = original; 388 else delete process.env.SKIP_STAGES; 389 }); 390 391 test('skips stage when queue is empty', async () => { 392 clearTables(); 393 sharedDb 394 .prepare( 395 `INSERT INTO pipeline_metrics (stage_name, sites_processed, duration_ms, started_at, finished_at) 396 VALUES ('assets', 10, 5000, datetime('now', '-2 hours'), datetime('now', '-2 hours'))` 397 ) 398 .run(); 399 400 const task = getTask('check_pipeline_health'); 401 await agent.processTask(task); 402 const result = JSON.parse( 403 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 404 ); 405 const assetStalls = result.issues.filter( 406 i => i.type === 'stage_stalled' && i.stage === 'assets' 407 ); 408 assert.equal(assetStalls.length, 0, 'Empty queue should not trigger stall'); 409 }); 410 411 test('does not check error rates when check_error_rates is false', async () => { 412 clearTables(); 413 for (let i = 0; i < 10; i++) { 414 sharedDb 415 .prepare( 416 `INSERT INTO sites (domain, status, error_message, updated_at) 417 VALUES (?, 'failing', 'Some error', datetime('now', '-1 hour'))` 418 ) 419 .run(`err${i}.com`); 420 } 421 422 const task = getTask('check_pipeline_health', { check_error_rates: false }); 423 await agent.processTask(task); 424 const result = JSON.parse( 425 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 426 ); 427 assert.equal(result.error_rates, 0, 'Should not check error rates when disabled'); 428 }); 429 }); 430 431 describe('MonitorAgent - checkSLOCompliance (all compliant)', () => { 432 test('completes and logs SLO compliance info when no violations', async () => { 433 clearTables(); 434 resetSLODb(); 435 436 const task = getTask('check_slo_compliance'); 437 await agent.processTask(task); 438 const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id); 439 assert.equal(updated.status, 'completed'); 440 const result = JSON.parse(updated.result_json); 441 assert.ok(typeof result.violations === 'number'); 442 assert.ok(typeof result.total_slos === 'number'); 443 }); 444 }); 445 446 describe('MonitorAgent - checkLoops (log domain detection)', () => { 447 test('detects domain loop from pipeline log file', async () => { 448 clearTables(); 449 450 const today = new Date().toISOString().slice(0, 10); 451 const logDir = TEST_LOG_DIR; 452 mkdirSync(logDir, { recursive: true }); 453 const logPath = join(logDir, `pipeline-${today}.log`); 454 455 const lines = []; 456 for (let i = 0; i < 15; i++) { 457 lines.push( 458 `[2025-01-01T10:0${i % 10}:00Z] [INFO] Processing domain=loopydomain.com status=found` 459 ); 460 } 461 writeFileSync(logPath, `${lines.join('\n')}\n`); 462 463 const task = getTask('check_loops'); 464 await agent.processTask(task); 465 const result = JSON.parse( 466 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 467 ); 468 assert.ok(result.log_domain_loops >= 1, 'Should detect log domain loop'); 469 assert.ok( 470 result.loops.some(l => l.type === 'log_domain_loop'), 471 'Should include log_domain_loop type' 472 ); 473 474 try { 475 unlinkSync(logPath); 476 } catch { 477 /* ignore */ 478 } 479 }); 480 481 test('handles missing pipeline log file gracefully', async () => { 482 clearTables(); 483 484 const today = new Date().toISOString().slice(0, 10); 485 const logPath = join(TEST_LOG_DIR, `pipeline-${today}.log`); 486 try { 487 unlinkSync(logPath); 488 } catch { 489 /* ignore */ 490 } 491 492 const task = getTask('check_loops'); 493 await assert.doesNotReject(async () => { 494 await agent.processTask(task); 495 }); 496 const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id); 497 assert.equal(updated.status, 'completed'); 498 }); 499 }); 500 501 describe('MonitorAgent - scanLogs (loop detection)', () => { 502 test('creates triage task when same error appears >3x in an hour', async () => { 503 clearTables(); 504 505 const today = new Date().toISOString().slice(0, 10); 506 const logDir = TEST_LOG_DIR; 507 mkdirSync(logDir, { recursive: true }); 508 const logPath = join(logDir, `pipeline-${today}.log`); 509 510 const now = new Date().toISOString(); 511 const lines = []; 512 for (let i = 0; i < 5; i++) { 513 lines.push(`[${now}] [ERROR] Network timeout: ETIMEDOUT`); 514 } 515 writeFileSync(logPath, `${lines.join('\n')}\n`); 516 517 const task = getTask('scan_logs', { days: 1 }); 518 await agent.processTask(task); 519 520 const result = JSON.parse( 521 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 522 ); 523 assert.ok(result.total_errors >= 5, 'Should count all errors'); 524 assert.ok(result.loops_detected >= 1, 'Should detect error loop'); 525 526 const triageTasks = sharedDb 527 .prepare( 528 `SELECT * FROM agent_tasks WHERE task_type = 'classify_error' AND assigned_to = 'triage'` 529 ) 530 .all(); 531 assert.ok(triageTasks.length >= 1, 'Should create triage task for looping error'); 532 533 try { 534 unlinkSync(logPath); 535 } catch { 536 /* ignore */ 537 } 538 }); 539 }); 540 541 describe('MonitorAgent - groupByMessage', () => { 542 test('groups multiple errors with same message', () => { 543 const a = new MonitorAgent(); 544 const errors = [ 545 '[2025-01-01T10:00:00Z] [ERROR] Network timeout', 546 '[2025-01-01T10:01:00Z] [ERROR] Network timeout', 547 '[2025-01-01T10:02:00Z] [INFO] Some info', 548 '[2025-01-01T10:03:00Z] [ERROR] Network timeout', 549 ]; 550 const groups = a.groupByMessage(errors); 551 assert.ok(groups['Network timeout']); 552 assert.equal(groups['Network timeout'].length, 3); 553 }); 554 555 test('returns empty object for lines with no ERROR pattern', () => { 556 const a = new MonitorAgent(); 557 const groups = a.groupByMessage(['[INFO] info', '[WARN] warn']); 558 assert.deepEqual(groups, {}); 559 }); 560 561 test('handles empty array', () => { 562 const a = new MonitorAgent(); 563 const groups = a.groupByMessage([]); 564 assert.deepEqual(groups, {}); 565 }); 566 567 test('groups different error messages separately', () => { 568 const a = new MonitorAgent(); 569 const errors = [ 570 '[2025-01-01] [ERROR] Error type A', 571 '[2025-01-01] [ERROR] Error type B', 572 '[2025-01-01] [ERROR] Error type A', 573 ]; 574 const groups = a.groupByMessage(errors); 575 assert.equal(groups['Error type A'].length, 2); 576 assert.equal(groups['Error type B'].length, 1); 577 }); 578 }); 579 580 describe('MonitorAgent - withinOneHour', () => { 581 test('returns false for single occurrence', () => { 582 const a = new MonitorAgent(); 583 assert.equal(a.withinOneHour(['[2025-01-01T10:00:00Z] [ERROR] Error']), false); 584 }); 585 586 test('returns true when all errors are within 1 hour', () => { 587 const a = new MonitorAgent(); 588 assert.equal( 589 a.withinOneHour([ 590 '[2025-01-01T10:00:00Z] [ERROR] Error', 591 '[2025-01-01T10:30:00Z] [ERROR] Error', 592 '[2025-01-01T10:59:00Z] [ERROR] Error', 593 ]), 594 true 595 ); 596 }); 597 598 test('returns false when errors span more than 1 hour', () => { 599 const a = new MonitorAgent(); 600 assert.equal( 601 a.withinOneHour([ 602 '[2025-01-01T10:00:00Z] [ERROR] Error', 603 '[2025-01-01T11:01:00Z] [ERROR] Error', 604 ]), 605 false 606 ); 607 }); 608 609 test('returns false when no valid timestamps', () => { 610 const a = new MonitorAgent(); 611 assert.equal(a.withinOneHour(['no timestamp', 'also no timestamp']), false); 612 }); 613 614 test('returns false for empty array', () => { 615 const a = new MonitorAgent(); 616 assert.equal(a.withinOneHour([]), false); 617 }); 618 }); 619 620 describe('MonitorAgent - checkAgentHealth (elevated rate and overload)', () => { 621 test('logs warning when agent failure rate is 15-30%', async () => { 622 clearTables(); 623 624 for (let i = 0; i < 8; i++) { 625 sharedDb 626 .prepare( 627 `INSERT INTO agent_tasks (task_type, assigned_to, status, created_at) VALUES ('fix_bug', 'developer', 'completed', datetime('now', '-1 hours'))` 628 ) 629 .run(); 630 } 631 for (let i = 0; i < 2; i++) { 632 sharedDb 633 .prepare( 634 `INSERT INTO agent_tasks (task_type, assigned_to, status, created_at) VALUES ('fix_bug', 'developer', 'failed', datetime('now', '-1 hours'))` 635 ) 636 .run(); 637 } 638 639 const task = getTask('check_agent_health'); 640 await agent.processTask(task); 641 642 const devState = sharedDb 643 .prepare(`SELECT * FROM agent_state WHERE agent_name = 'developer'`) 644 .get(); 645 assert.notEqual(devState.status, 'blocked', 'Should not block at 20% failure rate'); 646 647 const warnLogs = sharedDb 648 .prepare( 649 `SELECT * FROM agent_logs WHERE agent_name = 'monitor' AND log_level = 'warn' AND message LIKE '%elevated%'` 650 ) 651 .all(); 652 assert.ok(warnLogs.length >= 1, 'Should log elevated failure rate warning'); 653 }); 654 655 test('detects overloaded agents with >50 pending tasks (critical)', async () => { 656 clearTables(); 657 658 for (let i = 0; i < 55; i++) { 659 sharedDb 660 .prepare( 661 `INSERT INTO agent_tasks (task_type, assigned_to, status) VALUES ('fix_bug', 'developer', 'pending')` 662 ) 663 .run(); 664 } 665 666 const task = getTask('check_agent_health'); 667 await agent.processTask(task); 668 const result = JSON.parse( 669 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 670 ); 671 const overloaded = result.overloaded_agents.find(a => a.agent === 'developer'); 672 assert.ok(overloaded, 'Should detect developer as overloaded'); 673 assert.equal(overloaded.severity, 'critical', 'Should be critical with >50 tasks'); 674 }); 675 676 test('detects overloaded agents with 20-50 pending tasks (warning)', async () => { 677 clearTables(); 678 679 for (let i = 0; i < 25; i++) { 680 sharedDb 681 .prepare( 682 `INSERT INTO agent_tasks (task_type, assigned_to, status) VALUES ('fix_bug', 'developer', 'pending')` 683 ) 684 .run(); 685 } 686 687 const task = getTask('check_agent_health'); 688 await agent.processTask(task); 689 const result = JSON.parse( 690 sharedDb.prepare('SELECT result_json FROM agent_tasks WHERE id = ?').get(task.id).result_json 691 ); 692 const overloaded = result.overloaded_agents.find(a => a.agent === 'developer'); 693 assert.ok(overloaded, 'Should detect developer as overloaded'); 694 assert.equal(overloaded.severity, 'warning', 'Should be warning with 20-50 tasks'); 695 }); 696 }); 697 698 describe('MonitorAgent - processTask (unknown/delegated task types)', () => { 699 test('delegates fix_bug task type (not handled by monitor)', async () => { 700 clearTables(); 701 const task = getTask('fix_bug', { error_message: 'Some bug' }); 702 await agent.processTask(task); 703 const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id); 704 assert.equal(updated.status, 'completed'); 705 const result = JSON.parse(updated.result_json || '{}'); 706 assert.equal(result.delegated, true, 'fix_bug should be delegated'); 707 }); 708 709 test('delegates bootstrap_monitor task type', async () => { 710 clearTables(); 711 const task = getTask('bootstrap_monitor', {}); 712 await agent.processTask(task); 713 const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id); 714 assert.equal(updated.status, 'completed'); 715 }); 716 717 test('handles completely unknown task type via delegation', async () => { 718 clearTables(); 719 const task = getTask('totally_unknown_task_xyz', {}); 720 await agent.processTask(task); 721 const updated = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(task.id); 722 assert.equal(updated.status, 'completed'); 723 }); 724 }); 725 726 describe('MonitorAgent - ensureRecurringTasks (timing)', () => { 727 test('does not recreate task if completed recently (within interval)', async () => { 728 clearTables(); 729 730 // Insert a recently-completed scan_logs task (1 minute ago, interval is 5 min) 731 sharedDb 732 .prepare( 733 `INSERT INTO agent_tasks (task_type, assigned_to, status, completed_at) 734 VALUES ('scan_logs', 'monitor', 'completed', datetime('now', '-1 minutes'))` 735 ) 736 .run(); 737 738 await agent.ensureRecurringTasks(); 739 740 const tasks = sharedDb 741 .prepare( 742 `SELECT * FROM agent_tasks WHERE assigned_to = 'monitor' AND task_type = 'scan_logs' AND status = 'pending'` 743 ) 744 .all(); 745 assert.equal(tasks.length, 0, 'Should not create scan_logs task within 5-minute interval'); 746 }); 747 748 test('creates all recurring tasks when none exist at all', async () => { 749 clearTables(); 750 751 // No tasks exist at all - should create all 6 recurring task types 752 await agent.ensureRecurringTasks(); 753 754 const tasks = sharedDb 755 .prepare( 756 `SELECT * FROM agent_tasks WHERE assigned_to = 'monitor' AND status IN ('pending', 'running')` 757 ) 758 .all(); 759 // Should have created at least some recurring tasks 760 assert.ok(tasks.length > 0, 'Should create recurring tasks when none exist'); 761 }); 762 });