/ tests / cron / classify-unknown-errors-supplement2.test.js
classify-unknown-errors-supplement2.test.js
  1  /**
  2   * classify-unknown-errors-supplement2.test.js
  3   *
  4   * Covers lines 179-235, 260-262 (applyProposals with actual proposals),
  5   * and exercises the direct-run guard via indirect testing.
  6   *
  7   * Uses mock.module to intercept fs/promises and child_process.
  8   *
  9   * Run with:
 10   *   NODE_ENV=test node --test --experimental-test-module-mocks \
 11   *     tests/cron/classify-unknown-errors-supplement2.test.js
 12   */
 13  
 14  import { test, describe } from 'node:test';
 15  import assert from 'node:assert/strict';
 16  import Database from 'better-sqlite3';
 17  import { tmpdir } from 'os';
 18  import { join } from 'path';
 19  import * as realFs from 'fs';
 20  const { mkdirSync, rmSync, readFileSync, existsSync, writeFileSync, readdirSync, unlinkSync, createWriteStream } = realFs;
 21  import { createPgMock } from '../helpers/pg-mock.js';
 22  
 23  // Disable Logger file I/O for all tests in this file
 24  process.env.NODE_ENV = 'test';
 25  
 26  // ── Schema helper ──────────────────────────────────────────────────────────────
 27  
 28  function createSchema(db) {
 29    db.exec(`
 30      CREATE TABLE IF NOT EXISTS sites (
 31        id INTEGER PRIMARY KEY AUTOINCREMENT,
 32        domain TEXT NOT NULL DEFAULT 'example.com',
 33        landing_page_url TEXT,
 34        status TEXT NOT NULL DEFAULT 'found',
 35        error_message TEXT,
 36        retry_count INTEGER DEFAULT 0,
 37        updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 38        rescored_at DATETIME
 39      );
 40  
 41      CREATE TABLE IF NOT EXISTS messages (
 42        id INTEGER PRIMARY KEY AUTOINCREMENT,
 43        site_id INTEGER,
 44        direction TEXT DEFAULT 'outbound',
 45        contact_method TEXT DEFAULT 'email',
 46        message_body TEXT,
 47        delivery_status TEXT DEFAULT 'pending',
 48        error_message TEXT,
 49        retry_at DATETIME,
 50        read_at TEXT,
 51        updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 52        message_type TEXT DEFAULT 'outreach',
 53        raw_payload TEXT
 54      );
 55  
 56      CREATE TABLE IF NOT EXISTS error_pattern_proposals (
 57        id INTEGER PRIMARY KEY AUTOINCREMENT,
 58        pattern TEXT NOT NULL,
 59        label TEXT NOT NULL,
 60        group_name TEXT NOT NULL CHECK(group_name IN ('terminal', 'retriable')),
 61        context TEXT NOT NULL CHECK(context IN ('site', 'outreach')),
 62        example_errors TEXT,
 63        occurrence_count INTEGER,
 64        status TEXT DEFAULT 'pending' CHECK(status IN ('pending', 'approved', 'rejected')),
 65        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
 66        reviewed_at DATETIME,
 67        reviewed_by TEXT
 68      );
 69  
 70      CREATE TABLE IF NOT EXISTS cron_jobs (
 71        id TEXT PRIMARY KEY,
 72        last_run_at DATETIME,
 73        status TEXT DEFAULT 'idle'
 74      );
 75    `);
 76  }
 77  
 78  // ── Helper: fresh isolated DB dir ─────────────────────────────────────────────
 79  
 80  function makeTempDb(tag) {
 81    const dir = join(tmpdir(), `cue-s2-${tag}-${Date.now()}`);
 82    mkdirSync(dir, { recursive: true });
 83    const dbPath = join(dir, 'test.db');
 84    const db = new Database(dbPath);
 85    createSchema(db);
 86    db.close();
 87    return { dir, dbPath };
 88  }
 89  
 90  // Read real error-categories.js source once at module load time (before any mocks)
 91  const realCategoriesPath = join(process.cwd(), 'src/utils/error-categories.js');
 92  const realCategoriesSrc = readFileSync(realCategoriesPath, 'utf8');
 93  
 94  // ── Tests ──────────────────────────────────────────────────────────────────────
 95  
 96  describe('applyProposals — pending proposals path (lines 179-258)', () => {
 97    test('applies a valid pending proposal: file is rewritten and proposal marked approved', async t => {
 98      const { dir, dbPath } = makeTempDb('apply-valid');
 99      process.env.DATABASE_PATH = dbPath;
100  
101      // Keep a single connection for both test data insertion and db.js mock
102      const db = new Database(dbPath);
103      db.prepare(
104        `
105        INSERT INTO error_pattern_proposals (pattern, label, group_name, context)
106        VALUES ('ERR_UNIQUE_TEST_PATTERN_XYZ', 'Test pattern', 'retriable', 'site')
107      `
108      ).run();
109  
110      // Mock db.js to use this test's SQLite DB (not real PG)
111      await t.mock.module('../../src/utils/db.js', {
112        namedExports: createPgMock(db),
113      });
114  
115      let writtenContent = null;
116  
117      await t.mock.module('fs', {
118        namedExports: {
119          existsSync: () => false,
120          mkdirSync: () => {},
121          createWriteStream: () => ({ write: () => {}, end: () => {} }),
122          default: {
123            promises: {
124              readFile: async (_path, _enc) => realCategoriesSrc,
125              writeFile: async (_path, content, _enc) => {
126                writtenContent = content;
127              },
128            },
129            existsSync: () => false,
130            mkdirSync: () => {},
131            createWriteStream: () => ({ write: () => {}, end: () => {} }),
132          },
133          promises: {
134            readFile: async (_path, _enc) => realCategoriesSrc,
135            writeFile: async (_path, content, _enc) => {
136              writtenContent = content;
137            },
138          },
139        },
140      });
141  
142      await t.mock.module('child_process', {
143        namedExports: {
144          execSync: (_cmd, _opts) => {
145            return `${JSON.stringify({ sites_retried: 1, outreaches_retried: 0 })}\n`;
146          },
147        },
148      });
149  
150      const { classifyUnknownErrors } = await import(
151        `../../src/cron/classify-unknown-errors.js?v=apply-valid-${Date.now()}`
152      );
153      const result = await classifyUnknownErrors();
154  
155      // applied should be at least 1 since we inserted a valid proposal
156      assert.ok(
157        result.patterns_applied >= 1,
158        `Expected patterns_applied >= 1, got ${result.patterns_applied}`
159      );
160  
161      // Check the proposal was marked approved in DB (re-read using same db connection)
162      const row = db
163        .prepare(
164          `SELECT status, reviewed_by FROM error_pattern_proposals WHERE pattern = 'ERR_UNIQUE_TEST_PATTERN_XYZ'`
165        )
166        .get();
167  
168      assert.equal(row.status, 'approved', 'Proposal should be marked approved after apply');
169      assert.ok(
170        row.reviewed_by.includes('classifyUnknownErrors'),
171        'reviewed_by should reference classifyUnknownErrors'
172      );
173  
174      db.close();
175      delete process.env.DATABASE_PATH;
176      rmSync(dir, { recursive: true, force: true });
177    });
178  
179    test('skips proposal with invalid regex — applies=0, proposal stays pending', async t => {
180      const { dir, dbPath } = makeTempDb('invalid-regex');
181      process.env.DATABASE_PATH = dbPath;
182  
183      const db = new Database(dbPath);
184      db.prepare(
185        `
186        INSERT INTO error_pattern_proposals (pattern, label, group_name, context)
187        VALUES ('[invalid(regex', 'Bad regex', 'retriable', 'site')
188      `
189      ).run();
190      db.close();
191  
192      await t.mock.module('fs', {
193        namedExports: {
194          existsSync: () => false,
195          mkdirSync: () => {},
196          createWriteStream: () => ({ write: () => {}, end: () => {} }),
197          default: {
198            promises: {
199              readFile: async (_path, _enc) => realCategoriesSrc,
200              writeFile: async (_path, _content, _enc) => {},
201            },
202            existsSync: () => false,
203            mkdirSync: () => {},
204            createWriteStream: () => ({ write: () => {}, end: () => {} }),
205          },
206          promises: {
207            readFile: async (_path, _enc) => realCategoriesSrc,
208            writeFile: async (_path, _content, _enc) => {},
209          },
210        },
211      });
212  
213      await t.mock.module('child_process', {
214        namedExports: {
215          execSync: (_cmd, _opts) =>
216            `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`,
217        },
218      });
219  
220      const { classifyUnknownErrors } = await import(
221        `../../src/cron/classify-unknown-errors.js?v=invalid-regex-${Date.now()}`
222      );
223      const result = await classifyUnknownErrors();
224  
225      assert.equal(result.patterns_applied, 0, 'Invalid regex proposal should not be applied');
226  
227      const db2 = new Database(dbPath);
228      const row = db2
229        .prepare(`SELECT status FROM error_pattern_proposals WHERE pattern = '[invalid(regex'`)
230        .get();
231      db2.close();
232      assert.equal(row.status, 'pending', 'Invalid-regex proposal should remain pending');
233  
234      delete process.env.DATABASE_PATH;
235      rmSync(dir, { recursive: true, force: true });
236    });
237  
238    test('skips proposal when array name not found in source file', async t => {
239      const { dir, dbPath } = makeTempDb('no-array');
240      process.env.DATABASE_PATH = dbPath;
241  
242      const db = new Database(dbPath);
243      db.prepare(
244        `
245        INSERT INTO error_pattern_proposals (pattern, label, group_name, context)
246        VALUES ('ERR_ARRAY_NOT_FOUND', 'Missing array', 'retriable', 'site')
247      `
248      ).run();
249      db.close();
250  
251      // Return a src that does NOT contain SITE_RETRIABLE_PATTERNS
252      const fakeSrc = '// empty file with no pattern arrays\n';
253  
254      await t.mock.module('fs', {
255        namedExports: {
256          existsSync: () => false,
257          mkdirSync: () => {},
258          createWriteStream: () => ({ write: () => {}, end: () => {} }),
259          default: {
260            promises: {
261              readFile: async (_path, _enc) => fakeSrc,
262              writeFile: async (_path, _content, _enc) => {},
263            },
264            existsSync: () => false,
265            mkdirSync: () => {},
266            createWriteStream: () => ({ write: () => {}, end: () => {} }),
267          },
268          promises: {
269            readFile: async (_path, _enc) => fakeSrc,
270            writeFile: async (_path, _content, _enc) => {},
271          },
272        },
273      });
274  
275      await t.mock.module('child_process', {
276        namedExports: {
277          execSync: (_cmd, _opts) =>
278            `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`,
279        },
280      });
281  
282      const { classifyUnknownErrors } = await import(
283        `../../src/cron/classify-unknown-errors.js?v=no-array-${Date.now()}`
284      );
285      const result = await classifyUnknownErrors();
286  
287      assert.equal(
288        result.patterns_applied,
289        0,
290        'Proposal should be skipped when array not found in source'
291      );
292  
293      delete process.env.DATABASE_PATH;
294      rmSync(dir, { recursive: true, force: true });
295    });
296  
297    test('phase 3 retry subprocess failure returns applied count with zeros for retry counts (lines 260-262)', async t => {
298      const { dir, dbPath } = makeTempDb('subprocess-fail');
299      process.env.DATABASE_PATH = dbPath;
300  
301      const db = new Database(dbPath);
302      db.prepare(
303        `
304        INSERT INTO error_pattern_proposals (pattern, label, group_name, context)
305        VALUES ('ERR_SUBPROCESS_FAIL_TEST', 'Subprocess fail', 'retriable', 'site')
306      `
307      ).run();
308  
309      // Mock db.js to use this test's SQLite DB (not real PG)
310      await t.mock.module('../../src/utils/db.js', {
311        namedExports: createPgMock(db),
312      });
313  
314      await t.mock.module('fs', {
315        namedExports: {
316          existsSync: () => false,
317          mkdirSync: () => {},
318          createWriteStream: () => ({ write: () => {}, end: () => {} }),
319          default: {
320            promises: {
321              readFile: async (_path, _enc) => realCategoriesSrc,
322              writeFile: async (_path, _content, _enc) => {},
323            },
324            existsSync: () => false,
325            mkdirSync: () => {},
326            createWriteStream: () => ({ write: () => {}, end: () => {} }),
327          },
328          promises: {
329            readFile: async (_path, _enc) => realCategoriesSrc,
330            writeFile: async (_path, _content, _enc) => {},
331          },
332        },
333      });
334  
335      // Make execSync throw to exercise the catch block (lines 260-262)
336      await t.mock.module('child_process', {
337        namedExports: {
338          execSync: (_cmd, _opts) => {
339            throw new Error('Subprocess crashed intentionally');
340          },
341        },
342      });
343  
344      const { classifyUnknownErrors } = await import(
345        `../../src/cron/classify-unknown-errors.js?v=subprocess-fail-${Date.now()}`
346      );
347      const result = await classifyUnknownErrors();
348  
349      assert.ok(
350        result.patterns_applied >= 1,
351        `Expected patterns_applied >= 1, got ${result.patterns_applied}`
352      );
353      assert.equal(
354        result.phase3_sites_retried,
355        0,
356        'Should return 0 sites_retried on subprocess failure'
357      );
358      assert.equal(
359        result.phase3_outreaches_retried,
360        0,
361        'Should return 0 outreaches_retried on subprocess failure'
362      );
363  
364      db.close();
365      delete process.env.DATABASE_PATH;
366      rmSync(dir, { recursive: true, force: true });
367    });
368  
369    test('arrayName mapping covers all four group_name/context combinations', async t => {
370      const { dir, dbPath } = makeTempDb('four-arrays');
371      process.env.DATABASE_PATH = dbPath;
372  
373      const db = new Database(dbPath);
374      db.prepare(
375        `INSERT INTO error_pattern_proposals (pattern, label, group_name, context) VALUES ('ERR_SITE_TERMINAL_X1', 'Site terminal', 'terminal', 'site')`
376      ).run();
377      db.prepare(
378        `INSERT INTO error_pattern_proposals (pattern, label, group_name, context) VALUES ('ERR_SITE_RETRIABLE_X1', 'Site retriable', 'retriable', 'site')`
379      ).run();
380      db.prepare(
381        `INSERT INTO error_pattern_proposals (pattern, label, group_name, context) VALUES ('ERR_OUT_TERMINAL_X1', 'Outreach terminal', 'terminal', 'outreach')`
382      ).run();
383      db.prepare(
384        `INSERT INTO error_pattern_proposals (pattern, label, group_name, context) VALUES ('ERR_OUT_RETRIABLE_X1', 'Outreach retriable', 'retriable', 'outreach')`
385      ).run();
386  
387      // Mock db.js to use this test's SQLite DB (not real PG)
388      await t.mock.module('../../src/utils/db.js', {
389        namedExports: createPgMock(db),
390      });
391  
392      let appliedCount = 0;
393  
394      await t.mock.module('fs', {
395        namedExports: {
396          existsSync: () => false,
397          mkdirSync: () => {},
398          createWriteStream: () => ({ write: () => {}, end: () => {} }),
399          default: {
400            promises: {
401              readFile: async (_path, _enc) => realCategoriesSrc,
402              writeFile: async (_path, _content, _enc) => {
403                appliedCount++;
404              },
405            },
406            existsSync: () => false,
407            mkdirSync: () => {},
408            createWriteStream: () => ({ write: () => {}, end: () => {} }),
409          },
410          promises: {
411            readFile: async (_path, _enc) => realCategoriesSrc,
412            writeFile: async (_path, _content, _enc) => {
413              appliedCount++;
414            },
415          },
416        },
417      });
418  
419      await t.mock.module('child_process', {
420        namedExports: {
421          execSync: (_cmd, _opts) =>
422            `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`,
423        },
424      });
425  
426      const { classifyUnknownErrors } = await import(
427        `../../src/cron/classify-unknown-errors.js?v=four-arrays-${Date.now()}`
428      );
429      const result = await classifyUnknownErrors();
430  
431      assert.equal(result.patterns_applied, 4, `Expected 4 applied, got ${result.patterns_applied}`);
432      // writeFile should have been called once (one write for all patterns)
433      assert.equal(appliedCount, 1, 'writeFile should be called once after all patterns applied');
434  
435      db.close();
436      delete process.env.DATABASE_PATH;
437      rmSync(dir, { recursive: true, force: true });
438    });
439  
440    test('no-op when pending proposals list is empty — readFile is never called', async t => {
441      const { dir, dbPath } = makeTempDb('no-proposals');
442      process.env.DATABASE_PATH = dbPath;
443  
444      let readFileCalled = false;
445  
446      await t.mock.module('fs', {
447        namedExports: {
448          existsSync: () => false,
449          mkdirSync: () => {},
450          createWriteStream: () => ({ write: () => {}, end: () => {} }),
451          default: {
452            promises: {
453              readFile: async (_path, _enc) => {
454                readFileCalled = true;
455                return '';
456              },
457              writeFile: async () => {},
458            },
459            existsSync: () => false,
460            mkdirSync: () => {},
461            createWriteStream: () => ({ write: () => {}, end: () => {} }),
462          },
463          promises: {
464            readFile: async (_path, _enc) => {
465              readFileCalled = true;
466              return '';
467            },
468            writeFile: async () => {},
469          },
470        },
471      });
472  
473      await t.mock.module('child_process', {
474        namedExports: {
475          execSync: () => `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`,
476        },
477      });
478  
479      const { classifyUnknownErrors } = await import(
480        `../../src/cron/classify-unknown-errors.js?v=no-proposals-${Date.now()}`
481      );
482      const result = await classifyUnknownErrors();
483  
484      assert.equal(result.patterns_applied, 0);
485      assert.equal(
486        readFileCalled,
487        false,
488        'readFile should not be called when no proposals (early return at line 178)'
489      );
490  
491      delete process.env.DATABASE_PATH;
492      rmSync(dir, { recursive: true, force: true });
493    });
494  });
495  
496  describe('classifyUnknownErrors — result shape and phases combined', () => {
497    test('returned object has all expected keys', async t => {
498      const { dir, dbPath } = makeTempDb('shape');
499      process.env.DATABASE_PATH = dbPath;
500  
501      await t.mock.module('fs', {
502        namedExports: {
503          existsSync: () => false,
504          mkdirSync: () => {},
505          createWriteStream: () => ({ write: () => {}, end: () => {} }),
506          default: {
507            promises: {
508              readFile: async () => '',
509              writeFile: async () => {},
510            },
511            existsSync: () => false,
512            mkdirSync: () => {},
513            createWriteStream: () => ({ write: () => {}, end: () => {} }),
514          },
515          promises: {
516            readFile: async () => '',
517            writeFile: async () => {},
518          },
519        },
520      });
521  
522      await t.mock.module('child_process', {
523        namedExports: {
524          execSync: () => `${JSON.stringify({ sites_retried: 0, outreaches_retried: 0 })}\n`,
525        },
526      });
527  
528      const { classifyUnknownErrors } = await import(
529        `../../src/cron/classify-unknown-errors.js?v=shape-${Date.now()}`
530      );
531      const result = await classifyUnknownErrors();
532  
533      const expectedKeys = [
534        'sites_retried',
535        'outreaches_retried',
536        'patterns_applied',
537        'phase3_sites_retried',
538        'phase3_outreaches_retried',
539      ];
540      for (const key of expectedKeys) {
541        assert.ok(key in result, `Result missing key: ${key}`);
542        assert.ok(typeof result[key] === 'number', `${key} should be a number`);
543      }
544  
545      delete process.env.DATABASE_PATH;
546      rmSync(dir, { recursive: true, force: true });
547    });
548  
549    test('phase3_sites_retried and phase3_outreaches_retried come from subprocess output', async t => {
550      const { dir, dbPath } = makeTempDb('subprocess-counts');
551      process.env.DATABASE_PATH = dbPath;
552  
553      const db = new Database(dbPath);
554      db.prepare(
555        `
556        INSERT INTO error_pattern_proposals (pattern, label, group_name, context)
557        VALUES ('ERR_SUBPROCESS_COUNT_TEST', 'Count test', 'retriable', 'outreach')
558      `
559      ).run();
560  
561      // Mock db.js to use this test's SQLite DB (not real PG)
562      await t.mock.module('../../src/utils/db.js', {
563        namedExports: createPgMock(db),
564      });
565  
566      await t.mock.module('fs', {
567        namedExports: {
568          existsSync: () => false,
569          mkdirSync: () => {},
570          createWriteStream: () => ({ write: () => {}, end: () => {} }),
571          default: {
572            promises: {
573              readFile: async (_path, _enc) => realCategoriesSrc,
574              writeFile: async (_path, _content, _enc) => {},
575            },
576            existsSync: () => false,
577            mkdirSync: () => {},
578            createWriteStream: () => ({ write: () => {}, end: () => {} }),
579          },
580          promises: {
581            readFile: async (_path, _enc) => realCategoriesSrc,
582            writeFile: async (_path, _content, _enc) => {},
583          },
584        },
585      });
586  
587      await t.mock.module('child_process', {
588        namedExports: {
589          execSync: (_cmd, _opts) => {
590            return `${JSON.stringify({ sites_retried: 3, outreaches_retried: 7 })}\n`;
591          },
592        },
593      });
594  
595      const { classifyUnknownErrors } = await import(
596        `../../src/cron/classify-unknown-errors.js?v=subprocess-counts-${Date.now()}`
597      );
598      const result = await classifyUnknownErrors();
599  
600      assert.equal(
601        result.phase3_sites_retried,
602        3,
603        'phase3_sites_retried should equal subprocess output'
604      );
605      assert.equal(
606        result.phase3_outreaches_retried,
607        7,
608        'phase3_outreaches_retried should equal subprocess output'
609      );
610  
611      db.close();
612      delete process.env.DATABASE_PATH;
613      rmSync(dir, { recursive: true, force: true });
614    });
615  });