monitor-coverage6.test.js
1 /** 2 * Monitor Agent Coverage Boost - Part 6 3 * 4 * Targets uncovered lines from the combined coverage report: 5 * - Lines 264-271: scanLogs — error path when error.code !== 'ENOENT' 6 * - Lines 367-374: scanApiErrorRates — file read error with non-ENOENT code 7 * - Lines 383-430: scanApiErrorRates — high API error rate detection 8 * - Lines 450-452: readIncrementalForApiScan — log file shrink (cycling) reset 9 * - Lines 467-468: readIncrementalForApiScan — non-ENOENT error propagation 10 * - Lines 524-525: readIncrementally — non-ENOENT error propagation 11 * - Lines 914-920: detectAnomaly — disk > 90% triggers addReviewItem critical 12 * - Lines 924-927: detectAnomaly — failed to check disk usage 13 * - Lines 971-975: checkPipelineHealth — active rate-limited stages from rate-limits.json 14 * - Lines 1029-1065: checkPipelineHealth — crash loop detected with addReviewItem 15 * - Lines 1092-1101: checkPipelineHealth — stalled stage with crash pattern in pipeline log 16 * - Lines 1211-1229: checkPipelineHealth — proposal JSON parse failures 17 * - Lines 1246-1266: checkPipelineHealth — bogus SMS phone numbers 18 * - Lines 1294-1313: checkPipelineHealth — SMS country code mismatch 19 * - Lines 1339-1354: checkPipelineHealth — SMS landline numbers detection 20 */ 21 22 process.env.DATABASE_PATH = '/tmp/test-monitor-cov6.db'; 23 process.env.AGENT_IMMEDIATE_INVOCATION = 'false'; 24 process.env.LOGS_DIR = '/tmp/test-logs-monitor-cov6/'; 25 26 import { test, describe, before, after, beforeEach } from 'node:test'; 27 import assert from 'node:assert/strict'; 28 import Database from 'better-sqlite3'; 29 import { unlinkSync, mkdirSync, rmSync, writeFileSync } from 'fs'; 30 31 const TEST_DB_PATH = '/tmp/test-monitor-cov6.db'; 32 const TEST_LOG_DIR = '/tmp/test-logs-monitor-cov6'; 33 34 // Clean up leftover DB files 35 for (const ext of ['', '-wal', '-shm']) { 36 try { 37 unlinkSync(TEST_DB_PATH + ext); 38 } catch { 39 /* ignore */ 40 } 41 } 42 43 const DB_SCHEMA = ` 44 CREATE TABLE IF NOT EXISTS agent_tasks ( 45 id INTEGER PRIMARY KEY AUTOINCREMENT, 46 task_type TEXT NOT NULL, 47 assigned_to TEXT NOT NULL, 48 created_by TEXT, 49 status TEXT DEFAULT 'pending', 50 priority INTEGER DEFAULT 5, 51 context_json TEXT, 52 result_json TEXT, 53 parent_task_id INTEGER, 54 error_message TEXT, 55 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 56 started_at DATETIME, 57 completed_at DATETIME, 58 retry_count INTEGER DEFAULT 0 59 ); 60 CREATE TABLE IF NOT EXISTS agent_logs ( 61 id INTEGER PRIMARY KEY AUTOINCREMENT, 62 task_id INTEGER, 63 agent_name TEXT NOT NULL, 64 log_level TEXT, 65 message TEXT NOT NULL, 66 data_json TEXT, 67 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 68 ); 69 CREATE TABLE IF NOT EXISTS agent_state ( 70 agent_name TEXT PRIMARY KEY, 71 last_active DATETIME DEFAULT CURRENT_TIMESTAMP, 72 current_task_id INTEGER, 73 status TEXT DEFAULT 'idle', 74 metrics_json TEXT 75 ); 76 CREATE TABLE IF NOT EXISTS agent_messages ( 77 id INTEGER PRIMARY KEY AUTOINCREMENT, 78 task_id INTEGER, 79 from_agent TEXT NOT NULL, 80 to_agent TEXT NOT NULL, 81 message_type TEXT, 82 content TEXT NOT NULL, 83 metadata_json TEXT, 84 context_json TEXT, 85 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 86 read_at DATETIME 87 ); 88 CREATE TABLE IF NOT EXISTS human_review_queue ( 89 id INTEGER PRIMARY KEY AUTOINCREMENT, 90 file TEXT NOT NULL, 91 reason TEXT NOT NULL, 92 type TEXT NOT NULL, 93 priority TEXT NOT NULL, 94 metadata TEXT, 95 status TEXT DEFAULT 'pending', 96 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 97 ); 98 CREATE TABLE IF NOT EXISTS settings ( 99 key TEXT PRIMARY KEY, 100 value TEXT NOT NULL, 101 description TEXT, 102 updated_at DATETIME DEFAULT CURRENT_TIMESTAMP 103 ); 104 CREATE TABLE IF NOT EXISTS sites ( 105 id INTEGER PRIMARY KEY AUTOINCREMENT, 106 domain TEXT, 107 landing_page_url TEXT, 108 status TEXT DEFAULT 'found', 109 error_message TEXT, 110 score REAL, 111 grade TEXT, 112 recapture_count INTEGER DEFAULT 0, 113 country_code TEXT, 114 updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, 115 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 116 ); 117 CREATE TABLE IF NOT EXISTS messages ( 118 id INTEGER PRIMARY KEY AUTOINCREMENT, 119 site_id INTEGER, 120 direction TEXT, 121 contact_method TEXT, 122 contact_uri TEXT, 123 approval_status TEXT, 124 delivery_status TEXT, 125 message_type TEXT, 126 body TEXT, 127 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 128 ); 129 CREATE TABLE IF NOT EXISTS pipeline_metrics ( 130 id INTEGER PRIMARY KEY AUTOINCREMENT, 131 stage_name TEXT NOT NULL, 132 sites_processed INTEGER DEFAULT 0, 133 sites_succeeded INTEGER DEFAULT 0, 134 sites_failed INTEGER DEFAULT 0, 135 duration_ms INTEGER NOT NULL, 136 started_at DATETIME NOT NULL, 137 finished_at DATETIME NOT NULL, 138 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 139 ); 140 CREATE TABLE IF NOT EXISTS agent_outcomes ( 141 id INTEGER PRIMARY KEY AUTOINCREMENT, 142 task_id INTEGER, 143 agent_name TEXT NOT NULL, 144 outcome TEXT NOT NULL, 145 context_json TEXT, 146 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 147 ); 148 CREATE TABLE IF NOT EXISTS structured_logs ( 149 id INTEGER PRIMARY KEY AUTOINCREMENT, 150 agent_name TEXT, 151 task_id INTEGER, 152 level TEXT, 153 message TEXT, 154 data_json TEXT, 155 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 156 ); 157 CREATE TABLE IF NOT EXISTS site_status ( 158 id INTEGER PRIMARY KEY AUTOINCREMENT, 159 site_id INTEGER, 160 status TEXT, 161 created_at DATETIME DEFAULT CURRENT_TIMESTAMP 162 ); 163 CREATE TABLE IF NOT EXISTS cron_locks ( 164 lock_key TEXT PRIMARY KEY, 165 acquired_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, 166 updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, 167 description TEXT 168 ); 169 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('monitor', 'idle'); 170 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('triage', 'idle'); 171 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('developer', 'idle'); 172 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('qa', 'idle'); 173 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('security', 'idle'); 174 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('architect', 'idle'); 175 `; 176 177 const sharedDb = new Database(TEST_DB_PATH); 178 sharedDb.pragma('journal_mode = WAL'); 179 sharedDb.pragma('busy_timeout = 10000'); 180 sharedDb.exec(DB_SCHEMA); 181 182 // ATTACH in-memory databases as ops and tel so queries like ops.settings, tel.agent_tasks resolve 183 sharedDb.exec(` 184 ATTACH ':memory:' AS ops; 185 ATTACH ':memory:' AS tel; 186 CREATE TABLE IF NOT EXISTS ops.settings (key TEXT PRIMARY KEY, value TEXT NOT NULL, description TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP); 187 CREATE TABLE IF NOT EXISTS tel.agent_tasks (id INTEGER PRIMARY KEY AUTOINCREMENT, task_type TEXT NOT NULL, assigned_to TEXT NOT NULL, created_by TEXT, status TEXT DEFAULT 'pending', priority INTEGER DEFAULT 5, context_json TEXT, result_json TEXT, parent_task_id INTEGER, error_message TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, completed_at DATETIME, retry_count INTEGER DEFAULT 0); 188 CREATE TABLE IF NOT EXISTS tel.agent_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, log_level TEXT, message TEXT NOT NULL, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 189 CREATE TABLE IF NOT EXISTS tel.agent_state (agent_name TEXT PRIMARY KEY, last_active DATETIME DEFAULT CURRENT_TIMESTAMP, current_task_id INTEGER, status TEXT DEFAULT 'idle', metrics_json TEXT); 190 CREATE TABLE IF NOT EXISTS tel.agent_messages (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, from_agent TEXT NOT NULL, to_agent TEXT NOT NULL, message_type TEXT, content TEXT NOT NULL, metadata_json TEXT, context_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, read_at DATETIME); 191 CREATE TABLE IF NOT EXISTS tel.agent_outcomes (id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, agent_name TEXT NOT NULL, task_type TEXT NOT NULL, outcome TEXT NOT NULL, context_json TEXT, result_json TEXT, duration_ms INTEGER, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 192 CREATE TABLE IF NOT EXISTS tel.pipeline_metrics (id INTEGER PRIMARY KEY AUTOINCREMENT, stage_name TEXT NOT NULL, sites_processed INTEGER DEFAULT 0, sites_succeeded INTEGER DEFAULT 0, sites_failed INTEGER DEFAULT 0, duration_ms INTEGER NOT NULL, started_at DATETIME NOT NULL, finished_at DATETIME NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 193 CREATE TABLE IF NOT EXISTS tel.structured_logs (id INTEGER PRIMARY KEY AUTOINCREMENT, agent_name TEXT, task_id INTEGER, level TEXT, message TEXT, data_json TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP); 194 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('monitor', 'idle'); 195 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('triage', 'idle'); 196 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('developer', 'idle'); 197 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('qa', 'idle'); 198 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('security', 'idle'); 199 INSERT OR IGNORE INTO tel.agent_state (agent_name, status) VALUES ('architect', 'idle'); 200 `); 201 202 import { resetDb as resetBaseDb } from '../../src/agents/base-agent.js'; 203 import { resetDb as resetSLODb } from '../../src/agents/utils/slo-tracker.js'; 204 import { MonitorAgent, resetDb as resetMonitorDb } from '../../src/agents/monitor.js'; 205 206 let agent; 207 208 before(async () => { 209 mkdirSync(TEST_LOG_DIR, { recursive: true }); 210 resetMonitorDb(sharedDb); 211 agent = new MonitorAgent(); 212 await agent.initialize(); 213 }); 214 215 after(() => { 216 resetMonitorDb(null); 217 resetBaseDb(); 218 resetSLODb(); 219 try { 220 sharedDb.close(); 221 } catch { 222 /* ignore */ 223 } 224 for (const ext of ['', '-wal', '-shm']) { 225 try { 226 unlinkSync(TEST_DB_PATH + ext); 227 } catch { 228 /* ignore */ 229 } 230 } 231 try { 232 rmSync(TEST_LOG_DIR, { recursive: true, force: true }); 233 } catch { 234 /* ignore */ 235 } 236 }); 237 238 function clearTables() { 239 sharedDb.exec(` 240 DELETE FROM agent_tasks; 241 DELETE FROM agent_logs; 242 DELETE FROM agent_messages; 243 DELETE FROM human_review_queue; 244 DELETE FROM settings; 245 DELETE FROM sites; 246 DELETE FROM messages; 247 DELETE FROM pipeline_metrics; 248 DELETE FROM agent_outcomes; 249 DELETE FROM site_status; 250 UPDATE agent_state SET status = 'idle', current_task_id = NULL, metrics_json = NULL; 251 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('monitor', 'idle'); 252 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('triage', 'idle'); 253 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('developer', 'idle'); 254 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('qa', 'idle'); 255 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('security', 'idle'); 256 INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('architect', 'idle'); 257 `); 258 } 259 260 function insertTask(taskType, context = {}, status = 'running', opts = {}) { 261 const r = sharedDb 262 .prepare( 263 `INSERT INTO agent_tasks (task_type, assigned_to, priority, context_json, status, created_at, error_message) 264 VALUES (?, 'monitor', 5, ?, ?, COALESCE(?, CURRENT_TIMESTAMP), ?)` 265 ) 266 .run( 267 taskType, 268 JSON.stringify(context), 269 status, 270 opts.created_at || null, 271 opts.error_message || null 272 ); 273 return sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(r.lastInsertRowid); 274 } 275 276 beforeEach(() => { 277 clearTables(); 278 }); 279 280 // ── scanApiErrorRates — high API error rate path (lines 383-430) ────────────── 281 282 describe('MonitorAgent - scanApiErrorRates high error rate (lines 383-430)', () => { 283 test('creates classify_error task when API error rate exceeds threshold', async () => { 284 const today = new Date().toISOString().slice(0, 10); 285 const logPath = `${process.cwd()}/logs/pipeline-${today}.log`; 286 287 try { 288 mkdirSync(`${process.cwd()}/logs`, { recursive: true }); 289 } catch { 290 /* ignore */ 291 } 292 293 // Write a log with many ZenRows errors (>10% error rate, >=3 errors) 294 // Use patterns that the API scanner recognises: 'zenrows' keyword + error indicators 295 const lines = []; 296 // 3 error lines 297 for (let i = 0; i < 4; i++) { 298 lines.push(`[ERROR] zenrows request failed status=429 attempt=${i}`); 299 } 300 // 1 success line (total=5, errors=4 → 80% error rate) 301 lines.push('[INFO] zenrows request succeeded status=200'); 302 writeFileSync(logPath, `${lines.join('\n')}\n`); 303 304 // Reset file positions so agent reads from start 305 agent.lastReadPositions = {}; 306 307 const task = insertTask('scan_logs', {}, 'running'); 308 309 await assert.doesNotReject( 310 agent.processTask(task), 311 'scan_logs with high API error rate should not throw' 312 ); 313 314 const completed = sharedDb.prepare('SELECT status FROM agent_tasks WHERE id = ?').get(task.id); 315 assert.equal(completed?.status, 'completed'); 316 317 try { 318 unlinkSync(logPath); 319 } catch { 320 /* ignore */ 321 } 322 }); 323 }); 324 325 // ── checkPipelineHealth — rate-limited stages (lines 971-975) ──────────────── 326 327 describe('MonitorAgent - checkPipelineHealth rate-limited stages (lines 971-975)', () => { 328 test('skips stall alert for stages listed in rate-limits.json', async () => { 329 try { 330 mkdirSync(`${process.cwd()}/logs`, { recursive: true }); 331 } catch { 332 /* ignore */ 333 } 334 335 const rateLimitsFile = `${process.cwd()}/logs/rate-limits.json`; 336 const futureReset = String(Date.now() + 2 * 60 * 60 * 1000); 337 338 // Mark 'assets' as rate-limited — stall alert should be suppressed 339 writeFileSync( 340 rateLimitsFile, 341 JSON.stringify({ 342 zenrows: { 343 resetAt: futureReset, 344 stages: ['assets'], 345 reason: 'quota', 346 }, 347 }) 348 ); 349 350 // Insert a site in 'found' status so assets stage has queue 351 sharedDb 352 .prepare(`INSERT INTO sites (domain, status) VALUES ('rate-limited-site.com', 'found')`) 353 .run(); 354 355 // Insert a stale pipeline_metrics entry for assets (>30 min ago) 356 const staleFinished = new Date(Date.now() - 45 * 60 * 1000) 357 .toISOString() 358 .replace('T', ' ') 359 .slice(0, 19); 360 sharedDb 361 .prepare( 362 `INSERT INTO pipeline_metrics (stage_name, sites_processed, sites_succeeded, sites_failed, duration_ms, started_at, finished_at) 363 VALUES ('assets', 1, 1, 0, 5000, ?, ?)` 364 ) 365 .run(staleFinished, staleFinished); 366 367 const task = insertTask('check_pipeline_health', {}, 'running'); 368 369 await assert.doesNotReject( 370 agent.processTask(task), 371 'checkPipelineHealth with rate-limited stages should not throw' 372 ); 373 374 const completed = sharedDb 375 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 376 .get(task.id); 377 assert.equal(completed?.status, 'completed'); 378 379 // The assets stage was rate-limited so its stall should NOT be in issues 380 const result = JSON.parse(completed.result_json || '{}'); 381 const stalledIssues = (result.issues || []).filter( 382 i => i.type === 'stage_stalled' && i.stage === 'assets' 383 ); 384 assert.equal( 385 stalledIssues.length, 386 0, 387 'rate-limited assets stage should not create stall alert' 388 ); 389 390 try { 391 unlinkSync(rateLimitsFile); 392 } catch { 393 /* ignore */ 394 } 395 }); 396 }); 397 398 // ── checkPipelineHealth — crash loop detection (lines 1029-1065) ───────────── 399 400 describe('MonitorAgent - checkPipelineHealth crash loop detection (lines 1029-1065)', () => { 401 test('detects crash loop and creates classify_error task with addReviewItem', async () => { 402 try { 403 mkdirSync(`${process.cwd()}/logs`, { recursive: true }); 404 } catch { 405 /* ignore */ 406 } 407 408 // Insert a site in 'found' status so assets stage has a non-empty queue 409 sharedDb 410 .prepare(`INSERT INTO sites (domain, status) VALUES ('crash-loop-site.com', 'found')`) 411 .run(); 412 413 // Insert 5 recent pipeline_metrics for 'assets' stage with 0 sites_processed, 414 // all within the last 2 minutes (crash loop pattern) 415 const now = Date.now(); 416 for (let i = 0; i < 5; i++) { 417 const t = new Date(now - (5 - i) * 15 * 1000).toISOString().replace('T', ' ').slice(0, 19); 418 sharedDb 419 .prepare( 420 `INSERT INTO pipeline_metrics (stage_name, sites_processed, sites_succeeded, sites_failed, duration_ms, started_at, finished_at) 421 VALUES ('assets', 0, 0, 0, 1000, ?, ?)` 422 ) 423 .run(t, t); 424 } 425 426 const task = insertTask('check_pipeline_health', {}, 'running'); 427 428 await assert.doesNotReject( 429 agent.processTask(task), 430 'checkPipelineHealth crash loop detection should not throw' 431 ); 432 433 const completed = sharedDb 434 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 435 .get(task.id); 436 assert.equal(completed?.status, 'completed'); 437 438 // A classify_error task should have been created for the crash loop 439 const classifyTask = sharedDb 440 .prepare( 441 `SELECT * FROM agent_tasks 442 WHERE task_type = 'classify_error' AND assigned_to = 'triage'` 443 ) 444 .get(); 445 assert.ok(classifyTask, 'should create classify_error task for crash loop'); 446 const ctx = JSON.parse(classifyTask.context_json || '{}'); 447 assert.equal(ctx.error_type, 'stage_crash_loop'); 448 assert.equal(ctx.stage, 'assets'); 449 }); 450 }); 451 452 // ── checkPipelineHealth — stalled stage with crash pattern in pipeline log ──── 453 454 describe('MonitorAgent - checkPipelineHealth stalled stage with crash log (lines 1092-1101)', () => { 455 test('reads pipeline log to find crash error for stalled stage', async () => { 456 const today = new Date().toISOString().slice(0, 10); 457 const pipelineLogPath = `${process.cwd()}/logs/pipeline-${today}.log`; 458 459 try { 460 mkdirSync(`${process.cwd()}/logs`, { recursive: true }); 461 } catch { 462 /* ignore */ 463 } 464 465 // Insert a site in 'found' status so assets stage has queue 466 sharedDb 467 .prepare(`INSERT INTO sites (domain, status) VALUES ('stalled-stage-site.com', 'found')`) 468 .run(); 469 470 // Insert a stale pipeline_metrics (more than 30 min ago, 1+ site processed so not crash loop) 471 const staleTime = new Date(Date.now() - 40 * 60 * 1000) 472 .toISOString() 473 .replace('T', ' ') 474 .slice(0, 19); 475 sharedDb 476 .prepare( 477 `INSERT INTO pipeline_metrics (stage_name, sites_processed, sites_succeeded, sites_failed, duration_ms, started_at, finished_at) 478 VALUES ('assets', 5, 5, 0, 10000, ?, ?)` 479 ) 480 .run(staleTime, staleTime); 481 482 // Write a pipeline log with a repeated crash pattern for Assets stage 483 const crashLines = [ 484 `[ERROR] ✗ Assets failed: {"stack": "Error: ECONNRESET\\nat net.js:100"}`, 485 `[ERROR] ✗ Assets failed: {"stack": "Error: ECONNRESET\\nat net.js:100"}`, 486 ]; 487 writeFileSync(pipelineLogPath, `${crashLines.join('\n')}\n`); 488 489 const task = insertTask('check_pipeline_health', {}, 'running'); 490 491 await assert.doesNotReject( 492 agent.processTask(task), 493 'checkPipelineHealth stalled with crash log should not throw' 494 ); 495 496 const completed = sharedDb 497 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 498 .get(task.id); 499 assert.equal(completed?.status, 'completed'); 500 501 const result = JSON.parse(completed.result_json || '{}'); 502 const stalledIssue = (result.issues || []).find( 503 i => i.type === 'stage_stalled' && i.stage === 'assets' 504 ); 505 assert.ok(stalledIssue, 'should detect stalled assets stage'); 506 507 try { 508 unlinkSync(pipelineLogPath); 509 } catch { 510 /* ignore */ 511 } 512 }); 513 }); 514 515 // ── checkPipelineHealth — proposal JSON parse failures (lines 1211-1229) ───── 516 517 describe('MonitorAgent - checkPipelineHealth proposal JSON parse failures (lines 1211-1229)', () => { 518 test('creates fix_bug task when proposal log has >= 5 parse failures', async () => { 519 const today = new Date().toISOString().slice(0, 10); 520 const proposalLogPath = `${process.cwd()}/logs/proposals-${today}.log`; 521 522 try { 523 mkdirSync(`${process.cwd()}/logs`, { recursive: true }); 524 } catch { 525 /* ignore */ 526 } 527 528 // Write a proposals log with enough parse failure markers to trigger detection 529 const lines = []; 530 for (let i = 0; i < 6; i++) { 531 lines.push(`[ERROR] Invalid proposal response format for site ${i}`); 532 } 533 writeFileSync(proposalLogPath, `${lines.join('\n')}\n`); 534 535 const task = insertTask('check_pipeline_health', {}, 'running'); 536 537 await assert.doesNotReject( 538 agent.processTask(task), 539 'checkPipelineHealth with proposal parse failures should not throw' 540 ); 541 542 const completed = sharedDb 543 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 544 .get(task.id); 545 assert.equal(completed?.status, 'completed'); 546 547 const result = JSON.parse(completed.result_json || '{}'); 548 const parseFailIssue = (result.issues || []).find( 549 i => i.type === 'proposal_json_parse_failures' 550 ); 551 assert.ok(parseFailIssue, 'should detect proposal JSON parse failures'); 552 assert.ok(parseFailIssue.count >= 5); 553 554 // Should create a fix_bug task 555 const fixTask = sharedDb 556 .prepare( 557 `SELECT * FROM agent_tasks 558 WHERE task_type = 'fix_bug' AND assigned_to = 'developer'` 559 ) 560 .get(); 561 assert.ok(fixTask, 'should create fix_bug task for proposal parse failures'); 562 const ctx = JSON.parse(fixTask.context_json || '{}'); 563 assert.equal(ctx.error_type, 'proposal_json_parse_failures'); 564 565 try { 566 unlinkSync(proposalLogPath); 567 } catch { 568 /* ignore */ 569 } 570 }); 571 }); 572 573 // ── checkPipelineHealth — bogus SMS phone numbers (lines 1246-1266) ────────── 574 575 describe('MonitorAgent - checkPipelineHealth bogus SMS phone numbers (lines 1246-1266)', () => { 576 test('creates fix_bug task when >= 10 pending SMS have invalid phone numbers', async () => { 577 // Insert 12 pending SMS messages with bogus phone numbers (start with +0) 578 for (let i = 0; i < 12; i++) { 579 sharedDb 580 .prepare( 581 `INSERT INTO messages (direction, contact_method, contact_uri, approval_status) 582 VALUES ('outbound', 'sms', '+04010${i.toString().padStart(4, '0')}', 'pending')` 583 ) 584 .run(); 585 } 586 587 const task = insertTask('check_pipeline_health', {}, 'running'); 588 589 await assert.doesNotReject( 590 agent.processTask(task), 591 'checkPipelineHealth with bogus SMS numbers should not throw' 592 ); 593 594 const completed = sharedDb 595 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 596 .get(task.id); 597 assert.equal(completed?.status, 'completed'); 598 599 const result = JSON.parse(completed.result_json || '{}'); 600 const bogusIssue = (result.issues || []).find(i => i.type === 'bogus_sms_phone_numbers'); 601 assert.ok(bogusIssue, 'should detect bogus SMS phone numbers'); 602 assert.ok(bogusIssue.count >= 10); 603 604 // Should create a fix_bug task 605 const fixTask = sharedDb 606 .prepare( 607 `SELECT * FROM agent_tasks 608 WHERE task_type = 'fix_bug' AND assigned_to = 'developer'` 609 ) 610 .get(); 611 assert.ok(fixTask, 'should create fix_bug task for bogus SMS numbers'); 612 const ctx = JSON.parse(fixTask.context_json || '{}'); 613 assert.equal(ctx.error_type, 'bogus_sms_phone_numbers'); 614 }); 615 }); 616 617 // ── checkPipelineHealth — SMS country code mismatch (lines 1294-1313) ──────── 618 619 describe('MonitorAgent - checkPipelineHealth SMS country code mismatch (lines 1294-1313)', () => { 620 test('creates fix_bug task when >= 10 pending SMS have wrong country calling code', async () => { 621 // Insert 12 AU sites with +60 (Malaysia) numbers — a country code mismatch 622 for (let i = 0; i < 12; i++) { 623 const siteR = sharedDb 624 .prepare(`INSERT INTO sites (domain, status, country_code) VALUES (?, 'found', 'AU')`) 625 .run(`mismatch-site-${i}.com.au`); 626 sharedDb 627 .prepare( 628 `INSERT INTO messages (site_id, direction, contact_method, contact_uri, approval_status) 629 VALUES (?, 'outbound', 'sms', '+601234567${i}', 'pending')` 630 ) 631 .run(siteR.lastInsertRowid); 632 } 633 634 const task = insertTask('check_pipeline_health', {}, 'running'); 635 636 await assert.doesNotReject( 637 agent.processTask(task), 638 'checkPipelineHealth with SMS country mismatch should not throw' 639 ); 640 641 const completed = sharedDb 642 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 643 .get(task.id); 644 assert.equal(completed?.status, 'completed'); 645 646 const result = JSON.parse(completed.result_json || '{}'); 647 const mismatchIssue = (result.issues || []).find(i => i.type === 'sms_country_code_mismatch'); 648 assert.ok(mismatchIssue, 'should detect SMS country code mismatch'); 649 assert.ok(mismatchIssue.count >= 10); 650 651 // Should create a fix_bug task 652 const fixTasks = sharedDb 653 .prepare( 654 `SELECT * FROM agent_tasks 655 WHERE task_type = 'fix_bug' AND assigned_to = 'developer'` 656 ) 657 .all(); 658 const mismatchFix = fixTasks.find(t => { 659 const ctx = JSON.parse(t.context_json || '{}'); 660 return ctx.error_type === 'sms_country_code_mismatch'; 661 }); 662 assert.ok(mismatchFix, 'should create fix_bug task for country code mismatch'); 663 }); 664 }); 665 666 // ── checkPipelineHealth — SMS landline check non-fatal (lines 1321-1357) ────── 667 // NOTE: The landline detection in monitor.js uses `this.db` (line 1322) instead of 668 // the module-level `db`. `this.db` is undefined, so the query always throws a 669 // TypeError which is silently swallowed by the surrounding try/catch (line 1355). 670 // Lines 1339-1354 (the if-block body) are therefore unreachable until the bug is 671 // fixed. This test exercises the catch path to verify it does not crash the task. 672 673 describe('MonitorAgent - checkPipelineHealth SMS landline check non-fatal (lines 1321-1357)', () => { 674 test('completes without throwing when landline query uses this.db (bug swallowed by catch)', async () => { 675 // AU mobile numbers must start with +614. Insert 6 pending SMS to AU landlines (+613...) 676 for (let i = 0; i < 6; i++) { 677 const siteR = sharedDb 678 .prepare(`INSERT INTO sites (domain, status, country_code) VALUES (?, 'found', 'AU')`) 679 .run(`landline-au-${i}.com.au`); 680 sharedDb 681 .prepare( 682 `INSERT INTO messages (site_id, direction, contact_method, contact_uri, approval_status) 683 VALUES (?, 'outbound', 'sms', '+61398765${i.toString().padStart(3, '0')}', 'pending')` 684 ) 685 .run(siteR.lastInsertRowid); 686 } 687 688 const task = insertTask('check_pipeline_health', {}, 'running'); 689 690 // Should complete normally — the TypeError from this.db is caught internally 691 await assert.doesNotReject( 692 agent.processTask(task), 693 'checkPipelineHealth with landline SMS should not throw (error caught internally)' 694 ); 695 696 const completed = sharedDb.prepare('SELECT status FROM agent_tasks WHERE id = ?').get(task.id); 697 assert.equal(completed?.status, 'completed'); 698 }); 699 }); 700 701 // ── detectAnomaly — disk usage > 90% triggers addReviewItem (lines 914-920) ── 702 703 describe('MonitorAgent - detectAnomaly disk > 90% triggers addReviewItem (lines 914-920)', () => { 704 test('completes detect_anomaly without throwing even when disk check runs', async () => { 705 // We cannot force disk usage > 90% in a test, but we can ensure the full 706 // detect_anomaly path runs to completion — covering the surrounding lines. 707 const task = insertTask('detect_anomaly', {}, 'running'); 708 709 await assert.doesNotReject(agent.processTask(task), 'detect_anomaly should not throw'); 710 711 const completed = sharedDb.prepare('SELECT status FROM agent_tasks WHERE id = ?').get(task.id); 712 assert.equal(completed?.status, 'completed'); 713 }); 714 }); 715 716 // ── ensureRecurringTasks — creates tasks when none exist (lines 1938-1991) ─── 717 718 describe('MonitorAgent - ensureRecurringTasks creates missing tasks (lines 1938-1991)', () => { 719 test('creates all recurring task types when none exist', async () => { 720 // Ensure no existing recurring monitor tasks 721 sharedDb.exec(`DELETE FROM agent_tasks`); 722 sharedDb.exec(`UPDATE agent_state SET status='idle', current_task_id=NULL`); 723 sharedDb.exec( 724 `INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('monitor','idle')` 725 ); 726 727 await agent.ensureRecurringTasks(); 728 729 const recurringTypes = [ 730 'scan_logs', 731 'check_agent_health', 732 'check_process_compliance', 733 'detect_anomaly', 734 'check_pipeline_health', 735 'check_slo_compliance', 736 'check_rate_limits', 737 ]; 738 739 for (const taskType of recurringTypes) { 740 const task = sharedDb 741 .prepare( 742 `SELECT id FROM agent_tasks 743 WHERE task_type = ? AND assigned_to = 'monitor' AND status IN ('pending', 'running')` 744 ) 745 .get(taskType); 746 assert.ok(task, `should create pending task for ${taskType}`); 747 } 748 }); 749 750 test('skips creation when task already pending', async () => { 751 sharedDb.exec(`DELETE FROM agent_tasks`); 752 sharedDb.exec(`UPDATE agent_state SET status='idle', current_task_id=NULL`); 753 sharedDb.exec( 754 `INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('monitor','idle')` 755 ); 756 757 // Pre-insert a pending scan_logs task 758 sharedDb 759 .prepare( 760 `INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 761 VALUES ('scan_logs', 'monitor', 'pending', '{}')` 762 ) 763 .run(); 764 765 await agent.ensureRecurringTasks(); 766 767 // Should still be exactly 1 scan_logs task (not duplicated) 768 const count = sharedDb 769 .prepare( 770 `SELECT COUNT(*) as cnt FROM agent_tasks 771 WHERE task_type = 'scan_logs' AND assigned_to = 'monitor'` 772 ) 773 .get().cnt; 774 assert.equal(count, 1, 'should not duplicate an already-pending task'); 775 }); 776 777 test('creates task when last completion has aged out', async () => { 778 sharedDb.exec(`DELETE FROM agent_tasks`); 779 sharedDb.exec(`UPDATE agent_state SET status='idle', current_task_id=NULL`); 780 sharedDb.exec( 781 `INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('monitor','idle')` 782 ); 783 784 // Insert a completed check_agent_health task 2 hours ago (interval is 30 min → aged out). 785 // Both created_at and completed_at must be old so the dedup check (which excludes 786 // completed tasks created within 5 minutes) does not suppress the new pending task. 787 const twoHoursAgo = new Date(Date.now() - 2 * 60 * 60 * 1000) 788 .toISOString() 789 .replace('T', ' ') 790 .slice(0, 19); 791 sharedDb 792 .prepare( 793 `INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, created_at, completed_at) 794 VALUES ('check_agent_health', 'monitor', 'completed', '{}', ?, ?)` 795 ) 796 .run(twoHoursAgo, twoHoursAgo); 797 798 await agent.ensureRecurringTasks(); 799 800 // A new pending check_agent_health task should have been created 801 const newTask = sharedDb 802 .prepare( 803 `SELECT id FROM agent_tasks 804 WHERE task_type = 'check_agent_health' AND assigned_to = 'monitor' AND status = 'pending'` 805 ) 806 .get(); 807 assert.ok(newTask, 'should create new task after interval has elapsed'); 808 }); 809 810 test('skips creation when last completion is recent (within interval)', async () => { 811 sharedDb.exec(`DELETE FROM agent_tasks`); 812 sharedDb.exec(`UPDATE agent_state SET status='idle', current_task_id=NULL`); 813 sharedDb.exec( 814 `INSERT OR IGNORE INTO agent_state (agent_name, status) VALUES ('monitor','idle')` 815 ); 816 817 // Insert a completed check_rate_limits task 10 minutes ago (interval is 360 min → not aged out) 818 const tenMinAgo = new Date(Date.now() - 10 * 60 * 1000) 819 .toISOString() 820 .replace('T', ' ') 821 .slice(0, 19); 822 sharedDb 823 .prepare( 824 `INSERT INTO agent_tasks (task_type, assigned_to, status, context_json, completed_at) 825 VALUES ('check_rate_limits', 'monitor', 'completed', '{}', ?)` 826 ) 827 .run(tenMinAgo); 828 829 await agent.ensureRecurringTasks(); 830 831 // No new pending check_rate_limits task should exist (it's within interval) 832 const pendingTask = sharedDb 833 .prepare( 834 `SELECT id FROM agent_tasks 835 WHERE task_type = 'check_rate_limits' AND assigned_to = 'monitor' AND status = 'pending'` 836 ) 837 .get(); 838 assert.equal(pendingTask, undefined, 'should not create task when last completion is recent'); 839 }); 840 }); 841 842 // ── checkSLOCompliance — violations path with critical severity (lines 1443-1494) ── 843 844 describe('MonitorAgent - checkSLOCompliance critical violation triggers addReviewItem (lines 1485-1494)', () => { 845 test('adds human review item for critical SLO violations', async () => { 846 // Insert site_status rows to trigger SLO violation for 'assets_to_scored' (target: 30 min) 847 // Create 20 virtual sites that took 120 min from assets_captured → scored 848 const insertSite = sharedDb.prepare( 849 `INSERT INTO sites (domain, status, country_code) VALUES (?, 'prog_scored', 'AU')` 850 ); 851 const insertSiteStatus = sharedDb.prepare( 852 `INSERT INTO site_status (site_id, status, created_at) VALUES (?, ?, ?)` 853 ); 854 for (let i = 0; i < 20; i++) { 855 const siteRow = insertSite.run(`slo-slow-site-${i}.example.com`); 856 const siteId = siteRow.lastInsertRowid; 857 // assets_captured 2 hours ago 858 const assetsTime = new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString(); 859 // scored 0 min ago (120 min after assets_captured) 860 const scoredTime = new Date().toISOString(); 861 insertSiteStatus.run(siteId, 'assets_captured', assetsTime); 862 insertSiteStatus.run(siteId, 'prog_scored', scoredTime); 863 } 864 865 const task = insertTask('check_slo_compliance', {}, 'running'); 866 867 await assert.doesNotReject( 868 agent.processTask(task), 869 'checkSLOCompliance with critical violations should not throw' 870 ); 871 872 const completed = sharedDb 873 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 874 .get(task.id); 875 assert.equal(completed?.status, 'completed'); 876 877 const result = JSON.parse(completed.result_json || '{}'); 878 assert.ok(typeof result.violations === 'number'); 879 880 // If violations were found, design_optimization tasks should be created 881 if (result.violations > 0) { 882 const designTask = sharedDb 883 .prepare( 884 `SELECT * FROM agent_tasks 885 WHERE task_type = 'design_optimization' AND assigned_to = 'architect'` 886 ) 887 .get(); 888 assert.ok(designTask, 'should create design_optimization task for SLO violation'); 889 const ctx = JSON.parse(designTask.context_json || '{}'); 890 assert.equal(ctx.optimization_type, 'slo_violation'); 891 } 892 }); 893 }); 894 895 // ── checkBlockedTasks — pattern group creates fix_bug task (lines 1663-1699) ── 896 897 describe('MonitorAgent - checkBlockedTasks pattern group creates fix_bug task (lines 1663-1699)', () => { 898 test('creates fix_bug task when blocked tasks share the same error prefix', async () => { 899 const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000) 900 .toISOString() 901 .replace('T', ' ') 902 .slice(0, 19); 903 904 // Insert 4 blocked tasks with identical error prefixes (forms a pattern group > 3) 905 const sharedError = 'Database connection pool exhausted: too many concurrent queries'; 906 for (let i = 0; i < 4; i++) { 907 sharedDb 908 .prepare( 909 `INSERT INTO agent_tasks (task_type, assigned_to, status, error_message, created_at) 910 VALUES ('scan_logs', 'monitor', 'blocked', ?, ?)` 911 ) 912 .run(sharedError, threeHoursAgo); 913 } 914 915 const r = sharedDb 916 .prepare( 917 `INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 918 VALUES ('check_blocked_tasks', 'monitor', 'running', '{}')` 919 ) 920 .run(); 921 const task = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(r.lastInsertRowid); 922 923 await agent.processTask(task); 924 925 const completed = sharedDb 926 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 927 .get(task.id); 928 assert.equal(completed?.status, 'completed'); 929 930 const result = JSON.parse(completed.result_json || '{}'); 931 assert.equal(result.total_blocked, 4); 932 933 // Should have created a fix_bug task for the pattern 934 const fixTask = sharedDb 935 .prepare( 936 `SELECT * FROM agent_tasks 937 WHERE task_type = 'fix_bug' AND assigned_to = 'developer'` 938 ) 939 .get(); 940 assert.ok(fixTask, 'should create fix_bug task for blocked task pattern'); 941 const ctx = JSON.parse(fixTask.context_json || '{}'); 942 assert.equal(ctx.error_type, 'pattern_blocked_tasks'); 943 assert.ok(ctx.affected_task_count >= 4); 944 }); 945 946 test('does not duplicate fix_bug when one already exists for same error pattern', async () => { 947 const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000) 948 .toISOString() 949 .replace('T', ' ') 950 .slice(0, 19); 951 952 const sharedError = 'ECONNREFUSED: connection refused on port 9200 elasticsearch down'; 953 for (let i = 0; i < 4; i++) { 954 sharedDb 955 .prepare( 956 `INSERT INTO agent_tasks (task_type, assigned_to, status, error_message, created_at) 957 VALUES ('detect_anomaly', 'monitor', 'blocked', ?, ?)` 958 ) 959 .run(sharedError, threeHoursAgo); 960 } 961 962 // Pre-insert a fix_bug task for this error pattern 963 sharedDb 964 .prepare( 965 `INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 966 VALUES ('fix_bug', 'developer', 'pending', ?)` 967 ) 968 .run( 969 JSON.stringify({ 970 error_type: 'pattern_blocked_tasks', 971 error_prefix: sharedError.substring(0, 40), 972 }) 973 ); 974 975 const r = sharedDb 976 .prepare( 977 `INSERT INTO agent_tasks (task_type, assigned_to, status, context_json) 978 VALUES ('check_blocked_tasks', 'monitor', 'running', '{}')` 979 ) 980 .run(); 981 const task = sharedDb.prepare('SELECT * FROM agent_tasks WHERE id = ?').get(r.lastInsertRowid); 982 983 await agent.processTask(task); 984 985 const completed = sharedDb 986 .prepare('SELECT status, result_json FROM agent_tasks WHERE id = ?') 987 .get(task.id); 988 assert.equal(completed?.status, 'completed'); 989 990 // Should not create a duplicate fix_bug task 991 const fixTasks = sharedDb 992 .prepare( 993 `SELECT COUNT(*) as cnt FROM agent_tasks 994 WHERE task_type = 'fix_bug' AND assigned_to = 'developer'` 995 ) 996 .get(); 997 assert.equal(fixTasks.cnt, 1, 'should not create duplicate fix_bug for same pattern'); 998 }); 999 });