reset-transient-failures.mjs
1 #!/usr/bin/env node 2 /** 3 * reset-transient-failures.mjs 4 * 5 * One-time (re-runnable) script: resets delivery_status='failed' messages 6 * whose error_message matches a known transient pattern back to NULL so the 7 * pipeline can retry them. 8 * 9 * DOES NOT reset terminal failures (ZeroBounce blocks, invalid addresses, 10 * per-recipient cooldowns, etc.) — those stay failed permanently. 11 * 12 * Transient patterns (mirrors OUTREACH_RETRY_INTERVALS in error-categories.js): 13 * - ECONNRESET / ETIMEDOUT / ENOTFOUND → network error 14 * - AbortError / operation was aborted → HTTP timeout (AbortSignal.timeout) 15 * - status code 5xx → upstream API error 16 * - status code 429 / rate limit → rate limited 17 * - Timeout → browser/form timeout 18 * 19 * Usage: 20 * node scripts/reset-transient-failures.mjs # live run 21 * node scripts/reset-transient-failures.mjs --dry-run # preview only 22 */ 23 24 import Database from 'better-sqlite3'; 25 import { join, dirname } from 'path'; 26 import { fileURLToPath } from 'url'; 27 28 const __dirname = dirname(fileURLToPath(import.meta.url)); 29 const DRY_RUN = process.argv.includes('--dry-run'); 30 const dbPath = process.env.DATABASE_PATH || join(__dirname, '../db/sites.db'); 31 32 const db = new Database(dbPath); 33 db.pragma('journal_mode = WAL'); 34 db.pragma('busy_timeout = 30000'); 35 36 // Patterns that indicate a transient/network error that is worth retrying. 37 // Terminal patterns (ZeroBounce, invalid phone, exclusion list) are NOT included. 38 const TRANSIENT_PATTERNS = [ 39 /ECONNRESET/i, 40 /ETIMEDOUT/i, 41 /ENOTFOUND/i, 42 /AbortError/i, 43 /operation was aborted/i, 44 /status code 5[0-9][0-9]/i, 45 /Resend API HTTP 5[0-9][0-9]/i, 46 /status code 429/i, 47 /rate.?limit/i, 48 ]; 49 50 function isTransient(errorMessage) { 51 if (!errorMessage) return false; 52 return TRANSIENT_PATTERNS.some(p => p.test(errorMessage)); 53 } 54 55 const failed = db.prepare(` 56 SELECT id, contact_method, error_message, contact_uri 57 FROM messages 58 WHERE delivery_status = 'failed' 59 AND contact_method IN ('email', 'sms') 60 `).all(); 61 62 const transient = failed.filter(m => isTransient(m.error_message)); 63 64 console.log(`=== reset-transient-failures ===`); 65 console.log(`Mode: ${DRY_RUN ? 'DRY RUN' : 'LIVE'}`); 66 console.log(`Failed msgs: ${failed.length.toLocaleString()}`); 67 console.log(`Transient: ${transient.length.toLocaleString()}`); 68 console.log(`Terminal: ${(failed.length - transient.length).toLocaleString()}`); 69 console.log(''); 70 71 if (transient.length === 0) { 72 console.log('No transient failures found. Nothing to reset.'); 73 db.close(); 74 process.exit(0); 75 } 76 77 // Show breakdown by error 78 const byError = {}; 79 for (const m of transient) { 80 byError[m.error_message] = (byError[m.error_message] || 0) + 1; 81 } 82 console.log('Transient breakdown:'); 83 for (const [err, cnt] of Object.entries(byError).sort((a, b) => b[1] - a[1])) { 84 console.log(` [${cnt}] ${err}`); 85 } 86 console.log(''); 87 88 if (DRY_RUN) { 89 console.log('DRY RUN — no changes made.'); 90 db.close(); 91 process.exit(0); 92 } 93 94 const ids = transient.map(m => m.id); 95 // Reset in batches of 500 to avoid large IN clauses 96 let resetCount = 0; 97 for (let i = 0; i < ids.length; i += 500) { 98 const batch = ids.slice(i, i + 500); 99 const placeholders = batch.map(() => '?').join(','); 100 const result = db.prepare(` 101 UPDATE messages 102 SET delivery_status = NULL, 103 error_message = NULL, 104 updated_at = datetime('now') 105 WHERE id IN (${placeholders}) 106 `).run(...batch); 107 resetCount += result.changes; 108 } 109 110 console.log(`Reset ${resetCount.toLocaleString()} messages to delivery_status=NULL.`); 111 console.log('Pipeline will retry these on next outreach batch.'); 112 113 db.close();