classify-unknown-errors-supplement2.test.js
1 /** 2 * classify-unknown-errors-supplement2.test.js 3 * 4 * Covers lines 179-235, 260-262 (applyProposals with actual proposals), 5 * and exercises the direct-run guard via indirect testing. 6 * 7 * Uses mock.module to intercept fs/promises and child_process. 8 * 9 * Run with: 10 * NODE_ENV=test node --test --experimental-test-module-mocks \ 11 * tests/cron/classify-unknown-errors-supplement2.test.js 12 */ 13 14 import { test, describe } from 'node:test'; 15 import assert from 'node:assert/strict'; 16 import Database from 'better-sqlite3'; 17 import { tmpdir } from 'os'; 18 import { join } from 'path'; 19 import * as realFs from 'fs'; 20 const { mkdirSync, rmSync, readFileSync, existsSync, writeFileSync, readdirSync, unlinkSync, createWriteStream } = realFs; 21 import { createPgMock } from '../helpers/pg-mock.js'; 22 23 // Disable Logger file I/O for all tests in this file 24 process.env.NODE_ENV = 'test'; 25 26 // ── Schema helper ────────────────────────────────────────────────────────────── 27 28 function createSchema(db) { 29 db.exec(` 30 CREATE TABLE IF NOT EXISTS sites ( 31 id INTEGER PRIMARY KEY AUTOINCREMENT, 32 domain TEXT NOT NULL DEFAULT 'example.com', 33 landing_page_url TEXT, 34 status TEXT NOT NULL DEFAULT 'found', 35 error_message TEXT, 36 retry_count INTEGER DEFAULT 0, 37 updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, 38 rescored_at DATETIME 39 ); 40 41 CREATE TABLE IF NOT EXISTS messages ( 42 id INTEGER PRIMARY KEY AUTOINCREMENT, 43 site_id INTEGER, 44 direction TEXT DEFAULT 'outbound', 45 contact_method TEXT DEFAULT 'email', 46 message_body TEXT, 47 delivery_status TEXT DEFAULT 'pending', 48 error_message TEXT, 49 retry_at DATETIME, 50 read_at TEXT, 51 updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, 52 message_type TEXT DEFAULT 'outreach', 53 raw_payload TEXT 54 ); 55 56 CREATE TABLE IF NOT EXISTS error_pattern_proposals ( 57 id INTEGER PRIMARY KEY AUTOINCREMENT, 58 pattern TEXT NOT NULL, 59 label TEXT NOT NULL, 60 group_name TEXT NOT NULL CHECK(group_name IN ('terminal', 'retriable')), 61 context TEXT NOT NULL CHECK(context IN ('site', 'outreach')), 62 example_errors TEXT, 63 occurrence_count INTEGER, 64 status TEXT DEFAULT 'pending' CHECK(status IN ('pending', 'approved', 'rejected')), 65 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 66 reviewed_at DATETIME, 67 reviewed_by TEXT 68 ); 69 70 CREATE TABLE IF NOT EXISTS cron_jobs ( 71 id TEXT PRIMARY KEY, 72 last_run_at DATETIME, 73 status TEXT DEFAULT 'idle' 74 ); 75 `); 76 } 77 78 // ── Helper: fresh isolated DB dir ───────────────────────────────────────────── 79 80 function makeTempDb(tag) { 81 const dir = join(tmpdir(), `cue-s2-${tag}-${Date.now()}`); 82 mkdirSync(dir, { recursive: true }); 83 const dbPath = join(dir, 'test.db'); 84 const db = new Database(dbPath); 85 createSchema(db); 86 db.close(); 87 return { dir, dbPath }; 88 } 89 90 // Read real error-categories.js source once at module load time (before any mocks) 91 const realCategoriesPath = join(process.cwd(), 'src/utils/error-categories.js'); 92 const realCategoriesSrc = readFileSync(realCategoriesPath, 'utf8'); 93 94 // ── Tests ────────────────────────────────────────────────────────────────────── 95 96 describe('applyProposals — pending proposals path (lines 179-258)', () => { 97 test('applies a valid pending proposal: file is rewritten and proposal marked approved', async t => { 98 const { dir, dbPath } = makeTempDb('apply-valid'); 99 process.env.DATABASE_PATH = dbPath; 100 101 // Keep a single connection for both test data insertion and db.js mock 102 const db = new Database(dbPath); 103 db.prepare( 104 ` 105 INSERT INTO error_pattern_proposals (pattern, label, group_name, context) 106 VALUES ('ERR_UNIQUE_TEST_PATTERN_XYZ', 'Test pattern', 'retriable', 'site') 107 ` 108 ).run(); 109 110 // Mock db.js to use this test's SQLite DB (not real PG) 111 await t.mock.module('../../src/utils/db.js', { 112 namedExports: createPgMock(db), 113 }); 114 115 let writtenContent = null; 116 117 await t.mock.module('fs', { 118 namedExports: { 119 existsSync: () => false, 120 mkdirSync: () => {}, 121 createWriteStream: () => ({ write: () => {}, end: () => {} }), 122 default: { 123 promises: { 124 readFile: async (_path, _enc) => realCategoriesSrc, 125 writeFile: async (_path, content, _enc) => { 126 writtenContent = content; 127 }, 128 }, 129 existsSync: () => false, 130 mkdirSync: () => {}, 131 createWriteStream: () => ({ write: () => {}, end: () => {} }), 132 }, 133 promises: { 134 readFile: async (_path, _enc) => realCategoriesSrc, 135 writeFile: async (_path, content, _enc) => { 136 writtenContent = content; 137 }, 138 }, 139 }, 140 }); 141 142 await t.mock.module('child_process', { 143 namedExports: { 144 execSync: (_cmd, _opts) => { 145 return `${JSON.stringify({ sites_retried: 1, outreaches_retried: 0 })}\n`; 146 }, 147 }, 148 }); 149 150 const { classifyUnknownErrors } = await import( 151 `../../src/cron/classify-unknown-errors.js?v=apply-valid-${Date.now()}` 152 ); 153 const result = await classifyUnknownErrors(); 154 155 // applied should be at least 1 since we inserted a valid proposal 156 assert.ok( 157 result.patterns_applied >= 1, 158 `Expected patterns_applied >= 1, got ${result.patterns_applied}` 159 ); 160 161 // Check the proposal was marked approved in DB (re-read using same db connection) 162 const row = db 163 .prepare( 164 `SELECT status, reviewed_by FROM error_pattern_proposals WHERE pattern = 'ERR_UNIQUE_TEST_PATTERN_XYZ'` 165 ) 166 .get(); 167 168 assert.equal(row.status, 'approved', 'Proposal should be marked approved after apply'); 169 assert.ok( 170 row.reviewed_by.includes('classifyUnknownErrors'), 171 'reviewed_by should reference classifyUnknownErrors' 172 ); 173 174 db.close(); 175 delete process.env.DATABASE_PATH; 176 rmSync(dir, { recursive: true, force: true }); 177 }); 178 179 test('skips proposal with invalid regex — applies=0, proposal stays pending', async t => { 180 const { dir, dbPath } = makeTempDb('invalid-regex'); 181 process.env.DATABASE_PATH = dbPath; 182 183 const db = new Database(dbPath); 184 db.prepare( 185 ` 186 INSERT INTO error_pattern_proposals (pattern, label, group_name, context) 187 VALUES ('[invalid(regex', 'Bad regex', 'retriable', 'site') 188 ` 189 ).run(); 190 db.close(); 191 192 await t.mock.module('fs', { 193 namedExports: { 194 existsSync: () => false, 195 mkdirSync: () => {}, 196 createWriteStream: () => ({ write: () => {}, end: () => {} }), 197 default: { 198 promises: { 199 readFile: async (_path, _enc) => realCategoriesSrc, 200 writeFile: async (_path, _content, _enc) => {}, 201 }, 202 existsSync: () => false, 203 mkdirSync: () => {}, 204 createWriteStream: () => ({ write: () => {}, end: () => {} }), 205 }, 206 promises: { 207 readFile: async (_path, _enc) => realCategoriesSrc, 208 writeFile: async (_path, _content, _enc) => {}, 209 }, 210 }, 211 }); 212 213 await t.mock.module('child_process', { 214 namedExports: { 215 execSync: (_cmd, _opts) => 216 `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`, 217 }, 218 }); 219 220 const { classifyUnknownErrors } = await import( 221 `../../src/cron/classify-unknown-errors.js?v=invalid-regex-${Date.now()}` 222 ); 223 const result = await classifyUnknownErrors(); 224 225 assert.equal(result.patterns_applied, 0, 'Invalid regex proposal should not be applied'); 226 227 const db2 = new Database(dbPath); 228 const row = db2 229 .prepare(`SELECT status FROM error_pattern_proposals WHERE pattern = '[invalid(regex'`) 230 .get(); 231 db2.close(); 232 assert.equal(row.status, 'pending', 'Invalid-regex proposal should remain pending'); 233 234 delete process.env.DATABASE_PATH; 235 rmSync(dir, { recursive: true, force: true }); 236 }); 237 238 test('skips proposal when array name not found in source file', async t => { 239 const { dir, dbPath } = makeTempDb('no-array'); 240 process.env.DATABASE_PATH = dbPath; 241 242 const db = new Database(dbPath); 243 db.prepare( 244 ` 245 INSERT INTO error_pattern_proposals (pattern, label, group_name, context) 246 VALUES ('ERR_ARRAY_NOT_FOUND', 'Missing array', 'retriable', 'site') 247 ` 248 ).run(); 249 db.close(); 250 251 // Return a src that does NOT contain SITE_RETRIABLE_PATTERNS 252 const fakeSrc = '// empty file with no pattern arrays\n'; 253 254 await t.mock.module('fs', { 255 namedExports: { 256 existsSync: () => false, 257 mkdirSync: () => {}, 258 createWriteStream: () => ({ write: () => {}, end: () => {} }), 259 default: { 260 promises: { 261 readFile: async (_path, _enc) => fakeSrc, 262 writeFile: async (_path, _content, _enc) => {}, 263 }, 264 existsSync: () => false, 265 mkdirSync: () => {}, 266 createWriteStream: () => ({ write: () => {}, end: () => {} }), 267 }, 268 promises: { 269 readFile: async (_path, _enc) => fakeSrc, 270 writeFile: async (_path, _content, _enc) => {}, 271 }, 272 }, 273 }); 274 275 await t.mock.module('child_process', { 276 namedExports: { 277 execSync: (_cmd, _opts) => 278 `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`, 279 }, 280 }); 281 282 const { classifyUnknownErrors } = await import( 283 `../../src/cron/classify-unknown-errors.js?v=no-array-${Date.now()}` 284 ); 285 const result = await classifyUnknownErrors(); 286 287 assert.equal( 288 result.patterns_applied, 289 0, 290 'Proposal should be skipped when array not found in source' 291 ); 292 293 delete process.env.DATABASE_PATH; 294 rmSync(dir, { recursive: true, force: true }); 295 }); 296 297 test('phase 3 retry subprocess failure returns applied count with zeros for retry counts (lines 260-262)', async t => { 298 const { dir, dbPath } = makeTempDb('subprocess-fail'); 299 process.env.DATABASE_PATH = dbPath; 300 301 const db = new Database(dbPath); 302 db.prepare( 303 ` 304 INSERT INTO error_pattern_proposals (pattern, label, group_name, context) 305 VALUES ('ERR_SUBPROCESS_FAIL_TEST', 'Subprocess fail', 'retriable', 'site') 306 ` 307 ).run(); 308 309 // Mock db.js to use this test's SQLite DB (not real PG) 310 await t.mock.module('../../src/utils/db.js', { 311 namedExports: createPgMock(db), 312 }); 313 314 await t.mock.module('fs', { 315 namedExports: { 316 existsSync: () => false, 317 mkdirSync: () => {}, 318 createWriteStream: () => ({ write: () => {}, end: () => {} }), 319 default: { 320 promises: { 321 readFile: async (_path, _enc) => realCategoriesSrc, 322 writeFile: async (_path, _content, _enc) => {}, 323 }, 324 existsSync: () => false, 325 mkdirSync: () => {}, 326 createWriteStream: () => ({ write: () => {}, end: () => {} }), 327 }, 328 promises: { 329 readFile: async (_path, _enc) => realCategoriesSrc, 330 writeFile: async (_path, _content, _enc) => {}, 331 }, 332 }, 333 }); 334 335 // Make execSync throw to exercise the catch block (lines 260-262) 336 await t.mock.module('child_process', { 337 namedExports: { 338 execSync: (_cmd, _opts) => { 339 throw new Error('Subprocess crashed intentionally'); 340 }, 341 }, 342 }); 343 344 const { classifyUnknownErrors } = await import( 345 `../../src/cron/classify-unknown-errors.js?v=subprocess-fail-${Date.now()}` 346 ); 347 const result = await classifyUnknownErrors(); 348 349 assert.ok( 350 result.patterns_applied >= 1, 351 `Expected patterns_applied >= 1, got ${result.patterns_applied}` 352 ); 353 assert.equal( 354 result.phase3_sites_retried, 355 0, 356 'Should return 0 sites_retried on subprocess failure' 357 ); 358 assert.equal( 359 result.phase3_outreaches_retried, 360 0, 361 'Should return 0 outreaches_retried on subprocess failure' 362 ); 363 364 db.close(); 365 delete process.env.DATABASE_PATH; 366 rmSync(dir, { recursive: true, force: true }); 367 }); 368 369 test('arrayName mapping covers all four group_name/context combinations', async t => { 370 const { dir, dbPath } = makeTempDb('four-arrays'); 371 process.env.DATABASE_PATH = dbPath; 372 373 const db = new Database(dbPath); 374 db.prepare( 375 `INSERT INTO error_pattern_proposals (pattern, label, group_name, context) VALUES ('ERR_SITE_TERMINAL_X1', 'Site terminal', 'terminal', 'site')` 376 ).run(); 377 db.prepare( 378 `INSERT INTO error_pattern_proposals (pattern, label, group_name, context) VALUES ('ERR_SITE_RETRIABLE_X1', 'Site retriable', 'retriable', 'site')` 379 ).run(); 380 db.prepare( 381 `INSERT INTO error_pattern_proposals (pattern, label, group_name, context) VALUES ('ERR_OUT_TERMINAL_X1', 'Outreach terminal', 'terminal', 'outreach')` 382 ).run(); 383 db.prepare( 384 `INSERT INTO error_pattern_proposals (pattern, label, group_name, context) VALUES ('ERR_OUT_RETRIABLE_X1', 'Outreach retriable', 'retriable', 'outreach')` 385 ).run(); 386 387 // Mock db.js to use this test's SQLite DB (not real PG) 388 await t.mock.module('../../src/utils/db.js', { 389 namedExports: createPgMock(db), 390 }); 391 392 let appliedCount = 0; 393 394 await t.mock.module('fs', { 395 namedExports: { 396 existsSync: () => false, 397 mkdirSync: () => {}, 398 createWriteStream: () => ({ write: () => {}, end: () => {} }), 399 default: { 400 promises: { 401 readFile: async (_path, _enc) => realCategoriesSrc, 402 writeFile: async (_path, _content, _enc) => { 403 appliedCount++; 404 }, 405 }, 406 existsSync: () => false, 407 mkdirSync: () => {}, 408 createWriteStream: () => ({ write: () => {}, end: () => {} }), 409 }, 410 promises: { 411 readFile: async (_path, _enc) => realCategoriesSrc, 412 writeFile: async (_path, _content, _enc) => { 413 appliedCount++; 414 }, 415 }, 416 }, 417 }); 418 419 await t.mock.module('child_process', { 420 namedExports: { 421 execSync: (_cmd, _opts) => 422 `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`, 423 }, 424 }); 425 426 const { classifyUnknownErrors } = await import( 427 `../../src/cron/classify-unknown-errors.js?v=four-arrays-${Date.now()}` 428 ); 429 const result = await classifyUnknownErrors(); 430 431 assert.equal(result.patterns_applied, 4, `Expected 4 applied, got ${result.patterns_applied}`); 432 // writeFile should have been called once (one write for all patterns) 433 assert.equal(appliedCount, 1, 'writeFile should be called once after all patterns applied'); 434 435 db.close(); 436 delete process.env.DATABASE_PATH; 437 rmSync(dir, { recursive: true, force: true }); 438 }); 439 440 test('no-op when pending proposals list is empty — readFile is never called', async t => { 441 const { dir, dbPath } = makeTempDb('no-proposals'); 442 process.env.DATABASE_PATH = dbPath; 443 444 let readFileCalled = false; 445 446 await t.mock.module('fs', { 447 namedExports: { 448 existsSync: () => false, 449 mkdirSync: () => {}, 450 createWriteStream: () => ({ write: () => {}, end: () => {} }), 451 default: { 452 promises: { 453 readFile: async (_path, _enc) => { 454 readFileCalled = true; 455 return ''; 456 }, 457 writeFile: async () => {}, 458 }, 459 existsSync: () => false, 460 mkdirSync: () => {}, 461 createWriteStream: () => ({ write: () => {}, end: () => {} }), 462 }, 463 promises: { 464 readFile: async (_path, _enc) => { 465 readFileCalled = true; 466 return ''; 467 }, 468 writeFile: async () => {}, 469 }, 470 }, 471 }); 472 473 await t.mock.module('child_process', { 474 namedExports: { 475 execSync: () => `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`, 476 }, 477 }); 478 479 const { classifyUnknownErrors } = await import( 480 `../../src/cron/classify-unknown-errors.js?v=no-proposals-${Date.now()}` 481 ); 482 const result = await classifyUnknownErrors(); 483 484 assert.equal(result.patterns_applied, 0); 485 assert.equal( 486 readFileCalled, 487 false, 488 'readFile should not be called when no proposals (early return at line 178)' 489 ); 490 491 delete process.env.DATABASE_PATH; 492 rmSync(dir, { recursive: true, force: true }); 493 }); 494 }); 495 496 describe('classifyUnknownErrors — result shape and phases combined', () => { 497 test('returned object has all expected keys', async t => { 498 const { dir, dbPath } = makeTempDb('shape'); 499 process.env.DATABASE_PATH = dbPath; 500 501 await t.mock.module('fs', { 502 namedExports: { 503 existsSync: () => false, 504 mkdirSync: () => {}, 505 createWriteStream: () => ({ write: () => {}, end: () => {} }), 506 default: { 507 promises: { 508 readFile: async () => '', 509 writeFile: async () => {}, 510 }, 511 existsSync: () => false, 512 mkdirSync: () => {}, 513 createWriteStream: () => ({ write: () => {}, end: () => {} }), 514 }, 515 promises: { 516 readFile: async () => '', 517 writeFile: async () => {}, 518 }, 519 }, 520 }); 521 522 await t.mock.module('child_process', { 523 namedExports: { 524 execSync: () => `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`, 525 }, 526 }); 527 528 const { classifyUnknownErrors } = await import( 529 `../../src/cron/classify-unknown-errors.js?v=shape-${Date.now()}` 530 ); 531 const result = await classifyUnknownErrors(); 532 533 const expectedKeys = [ 534 'sites_retried', 535 'outreaches_retried', 536 'patterns_applied', 537 'phase3_sites_retried', 538 'phase3_outreaches_retried', 539 ]; 540 for (const key of expectedKeys) { 541 assert.ok(key in result, `Result missing key: ${key}`); 542 assert.ok(typeof result[key] === 'number', `${key} should be a number`); 543 } 544 545 delete process.env.DATABASE_PATH; 546 rmSync(dir, { recursive: true, force: true }); 547 }); 548 549 test('phase3_sites_retried and phase3_outreaches_retried come from subprocess output', async t => { 550 const { dir, dbPath } = makeTempDb('subprocess-counts'); 551 process.env.DATABASE_PATH = dbPath; 552 553 const db = new Database(dbPath); 554 db.prepare( 555 ` 556 INSERT INTO error_pattern_proposals (pattern, label, group_name, context) 557 VALUES ('ERR_SUBPROCESS_COUNT_TEST', 'Count test', 'retriable', 'outreach') 558 ` 559 ).run(); 560 561 // Mock db.js to use this test's SQLite DB (not real PG) 562 await t.mock.module('../../src/utils/db.js', { 563 namedExports: createPgMock(db), 564 }); 565 566 await t.mock.module('fs', { 567 namedExports: { 568 existsSync: () => false, 569 mkdirSync: () => {}, 570 createWriteStream: () => ({ write: () => {}, end: () => {} }), 571 default: { 572 promises: { 573 readFile: async (_path, _enc) => realCategoriesSrc, 574 writeFile: async (_path, _content, _enc) => {}, 575 }, 576 existsSync: () => false, 577 mkdirSync: () => {}, 578 createWriteStream: () => ({ write: () => {}, end: () => {} }), 579 }, 580 promises: { 581 readFile: async (_path, _enc) => realCategoriesSrc, 582 writeFile: async (_path, _content, _enc) => {}, 583 }, 584 }, 585 }); 586 587 await t.mock.module('child_process', { 588 namedExports: { 589 execSync: (_cmd, _opts) => { 590 return `${JSON.stringify({ sites_retried: 3, outreaches_retried: 7 })}\n`; 591 }, 592 }, 593 }); 594 595 const { classifyUnknownErrors } = await import( 596 `../../src/cron/classify-unknown-errors.js?v=subprocess-counts-${Date.now()}` 597 ); 598 const result = await classifyUnknownErrors(); 599 600 assert.equal( 601 result.phase3_sites_retried, 602 3, 603 'phase3_sites_retried should equal subprocess output' 604 ); 605 assert.equal( 606 result.phase3_outreaches_retried, 607 7, 608 'phase3_outreaches_retried should equal subprocess output' 609 ); 610 611 db.close(); 612 delete process.env.DATABASE_PATH; 613 rmSync(dir, { recursive: true, force: true }); 614 }); 615 });