/ scripts / reset-transient-failures.mjs
reset-transient-failures.mjs
  1  #!/usr/bin/env node
  2  /**
  3   * reset-transient-failures.mjs
  4   *
  5   * One-time (re-runnable) script: resets delivery_status='failed' messages
  6   * whose error_message matches a known transient pattern back to NULL so the
  7   * pipeline can retry them.
  8   *
  9   * DOES NOT reset terminal failures (ZeroBounce blocks, invalid addresses,
 10   * per-recipient cooldowns, etc.) — those stay failed permanently.
 11   *
 12   * Transient patterns (mirrors OUTREACH_RETRY_INTERVALS in error-categories.js):
 13   *   - ECONNRESET / ETIMEDOUT / ENOTFOUND  → network error
 14   *   - AbortError / operation was aborted  → HTTP timeout (AbortSignal.timeout)
 15   *   - status code 5xx                     → upstream API error
 16   *   - status code 429 / rate limit        → rate limited
 17   *   - Timeout                             → browser/form timeout
 18   *
 19   * Usage:
 20   *   node scripts/reset-transient-failures.mjs           # live run
 21   *   node scripts/reset-transient-failures.mjs --dry-run # preview only
 22   */
 23  
 24  import Database from 'better-sqlite3';
 25  import { join, dirname } from 'path';
 26  import { fileURLToPath } from 'url';
 27  
 28  const __dirname = dirname(fileURLToPath(import.meta.url));
 29  const DRY_RUN = process.argv.includes('--dry-run');
 30  const dbPath = process.env.DATABASE_PATH || join(__dirname, '../db/sites.db');
 31  
 32  const db = new Database(dbPath);
 33  db.pragma('journal_mode = WAL');
 34  db.pragma('busy_timeout = 30000');
 35  
 36  // Patterns that indicate a transient/network error that is worth retrying.
 37  // Terminal patterns (ZeroBounce, invalid phone, exclusion list) are NOT included.
 38  const TRANSIENT_PATTERNS = [
 39    /ECONNRESET/i,
 40    /ETIMEDOUT/i,
 41    /ENOTFOUND/i,
 42    /AbortError/i,
 43    /operation was aborted/i,
 44    /status code 5[0-9][0-9]/i,
 45    /Resend API HTTP 5[0-9][0-9]/i,
 46    /status code 429/i,
 47    /rate.?limit/i,
 48  ];
 49  
 50  function isTransient(errorMessage) {
 51    if (!errorMessage) return false;
 52    return TRANSIENT_PATTERNS.some(p => p.test(errorMessage));
 53  }
 54  
 55  const failed = db.prepare(`
 56    SELECT id, contact_method, error_message, contact_uri
 57    FROM messages
 58    WHERE delivery_status = 'failed'
 59      AND contact_method IN ('email', 'sms')
 60  `).all();
 61  
 62  const transient = failed.filter(m => isTransient(m.error_message));
 63  
 64  console.log(`=== reset-transient-failures ===`);
 65  console.log(`Mode:           ${DRY_RUN ? 'DRY RUN' : 'LIVE'}`);
 66  console.log(`Failed msgs:    ${failed.length.toLocaleString()}`);
 67  console.log(`Transient:      ${transient.length.toLocaleString()}`);
 68  console.log(`Terminal:       ${(failed.length - transient.length).toLocaleString()}`);
 69  console.log('');
 70  
 71  if (transient.length === 0) {
 72    console.log('No transient failures found. Nothing to reset.');
 73    db.close();
 74    process.exit(0);
 75  }
 76  
 77  // Show breakdown by error
 78  const byError = {};
 79  for (const m of transient) {
 80    byError[m.error_message] = (byError[m.error_message] || 0) + 1;
 81  }
 82  console.log('Transient breakdown:');
 83  for (const [err, cnt] of Object.entries(byError).sort((a, b) => b[1] - a[1])) {
 84    console.log(`  [${cnt}] ${err}`);
 85  }
 86  console.log('');
 87  
 88  if (DRY_RUN) {
 89    console.log('DRY RUN — no changes made.');
 90    db.close();
 91    process.exit(0);
 92  }
 93  
 94  const ids = transient.map(m => m.id);
 95  // Reset in batches of 500 to avoid large IN clauses
 96  let resetCount = 0;
 97  for (let i = 0; i < ids.length; i += 500) {
 98    const batch = ids.slice(i, i + 500);
 99    const placeholders = batch.map(() => '?').join(',');
100    const result = db.prepare(`
101      UPDATE messages
102      SET delivery_status = NULL,
103          error_message = NULL,
104          updated_at = datetime('now')
105      WHERE id IN (${placeholders})
106    `).run(...batch);
107    resetCount += result.changes;
108  }
109  
110  console.log(`Reset ${resetCount.toLocaleString()} messages to delivery_status=NULL.`);
111  console.log('Pipeline will retry these on next outreach batch.');
112  
113  db.close();