validate-email-queue.js
1 #!/usr/bin/env node 2 /** 3 * Bulk email validation script — validate all pending/approved email outreaches 4 * via ZeroBounce batch API before sending, to protect Resend sender reputation. 5 * 6 * Emails already cached in email_validations (not expired) are skipped. 7 * 8 * Usage: 9 * node scripts/validate-email-queue.js [options] 10 * npm run validate:emails 11 * 12 * Options: 13 * --dry-run Show what would be blocked without updating the database 14 * --limit=N Only validate first N unique emails 15 * --batch-size=N ZeroBounce batch size 1-200 (default: 200) 16 * --yes Skip confirmation prompt and proceed automatically 17 * 18 * Examples: 19 * node scripts/validate-email-queue.js --dry-run 20 * node scripts/validate-email-queue.js --yes 21 * node scripts/validate-email-queue.js --limit=100 --dry-run 22 */ 23 24 import { createDatabaseConnection } from '../src/utils/db.js'; 25 import { join, dirname } from 'path'; 26 import { fileURLToPath } from 'url'; 27 import { 28 validateEmailBatchWithApi, 29 BLOCKED_STATUSES, 30 checkCredits, 31 } from '../src/utils/zerobounce.js'; 32 33 const __filename = fileURLToPath(import.meta.url); 34 const __dirname = dirname(__filename); 35 const projectRoot = join(__dirname, '..'); 36 37 const COST_PER_EMAIL = 0.008; // USD 38 39 // Parse CLI args 40 const args = process.argv.slice(2); 41 const DRY_RUN = args.includes('--dry-run'); 42 const YES = args.includes('--yes'); 43 const LIMIT = parseInt(args.find(a => a.startsWith('--limit='))?.split('=')[1] ?? '0', 10); 44 const BATCH_SIZE = Math.min( 45 200, 46 Math.max(1, parseInt(args.find(a => a.startsWith('--batch-size='))?.split('=')[1] ?? '200', 10)) 47 ); 48 49 const dbPath = process.env.DATABASE_PATH || join(projectRoot, 'db/sites.db'); 50 51 async function main() { 52 if (!process.env.ZEROBOUNCE_API_KEY) { 53 console.error('Error: ZEROBOUNCE_API_KEY is not set. Add it to .env.secrets.'); 54 process.exit(1); 55 } 56 57 const db = createDatabaseConnection(dbPath); 58 59 try { 60 // Fetch pending/approved email outreaches not yet cached in email_validations 61 const rows = db 62 .prepare( 63 `SELECT DISTINCT LOWER(o.contact_uri) AS email 64 FROM outreaches o 65 WHERE o.contact_method = 'email' 66 AND o.status IN ('pending', 'approved') 67 AND o.contact_uri NOT LIKE 'PENDING%' 68 AND LOWER(o.contact_uri) NOT IN ( 69 SELECT LOWER(email) FROM email_validations 70 WHERE expires_at > CURRENT_TIMESTAMP 71 ) 72 ORDER BY o.id 73 ${LIMIT > 0 ? `LIMIT ${LIMIT}` : ''}` 74 ) 75 .all(); 76 77 const emails = rows.map(r => r.email); 78 79 console.log( 80 `\nFound ${emails.length} unique uncached email${emails.length !== 1 ? 's' : ''} to validate` 81 ); 82 83 if (emails.length === 0) { 84 console.log('All emails are already validated and cached. Nothing to do.'); 85 db.close(); 86 return; 87 } 88 89 const estimatedCost = (emails.length * COST_PER_EMAIL).toFixed(2); 90 console.log(`Estimated cost: $${estimatedCost} USD (${emails.length} × $${COST_PER_EMAIL})`); 91 console.log(`Batch size: ${BATCH_SIZE} (${Math.ceil(emails.length / BATCH_SIZE)} API calls)`); 92 93 // Check credits 94 try { 95 const credits = await checkCredits(); 96 if (credits === -1 || credits < 1) { 97 console.error(`\nError: ZeroBounce has no credits remaining.`); 98 console.error('Top up credits at https://app.zerobounce.net/billing and re-run.'); 99 db.close(); 100 process.exit(1); 101 } 102 console.log(`ZeroBounce credits remaining: ${credits}`); 103 if (credits < emails.length) { 104 console.warn(`\nWarning: Only ${credits} credits but ${emails.length} emails to validate.`); 105 console.warn(`Will validate as many as possible (partial run).`); 106 } 107 } catch (err) { 108 console.warn(`Warning: Could not check credits: ${err.message}`); 109 } 110 111 if (DRY_RUN) { 112 console.log('\n[DRY RUN] No API calls or database changes will be made.'); 113 console.log( 114 `Would validate ${emails.length} emails across ${Math.ceil(emails.length / BATCH_SIZE)} batch(es).` 115 ); 116 console.log('Run without --dry-run and with --yes to proceed.'); 117 db.close(); 118 return; 119 } else if (!YES) { 120 console.log('\nRun with --yes to proceed, or --dry-run to preview without changes.'); 121 db.close(); 122 return; 123 } 124 125 // Prepare DB statements (used inside transaction) 126 const upsertStmt = db.prepare( 127 `INSERT OR REPLACE INTO email_validations 128 (email, status, sub_status, free_email, mx_found, validated_at, expires_at) 129 VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP, 130 datetime('now', '+${parseInt(process.env.ZEROBOUNCE_CACHE_TTL_DAYS || '90', 10)} days'))` 131 ); 132 133 const blockStmt = db.prepare( 134 `UPDATE outreaches 135 SET status = 'failed', error_message = ? 136 WHERE contact_method = 'email' 137 AND LOWER(contact_uri) = ? 138 AND status IN ('pending', 'approved')` 139 ); 140 141 // Process in batches 142 const stats = { valid: 0, catchAll: 0, unknown: 0, blocked: 0, errors: 0 }; 143 const blockedDetails = []; 144 145 for (let i = 0; i < emails.length; i += BATCH_SIZE) { 146 const batch = emails.slice(i, i + BATCH_SIZE); 147 const batchNum = Math.floor(i / BATCH_SIZE) + 1; 148 const totalBatches = Math.ceil(emails.length / BATCH_SIZE); 149 150 process.stdout.write(` Batch ${batchNum}/${totalBatches} (${batch.length} emails)... `); 151 152 try { 153 const results = await validateEmailBatchWithApi(batch); 154 155 if (!DRY_RUN) { 156 // Cache results and apply block decisions in a single transaction 157 db.transaction(() => { 158 for (const [email, result] of results) { 159 upsertStmt.run( 160 email, 161 result.status, 162 result.sub_status ?? null, 163 result.free_email !== null && result.free_email !== undefined 164 ? result.free_email 165 ? 1 166 : 0 167 : null, 168 result.mx_found !== null && result.mx_found !== undefined 169 ? result.mx_found 170 ? 1 171 : 0 172 : null 173 ); 174 175 if (BLOCKED_STATUSES.has(result.status)) { 176 const reason = `ZeroBounce: ${result.status}${result.sub_status ? ` (${result.sub_status})` : ''}`; 177 blockStmt.run(reason, email); 178 } 179 } 180 })(); 181 } 182 183 // Tally results 184 for (const [email, result] of results) { 185 if (BLOCKED_STATUSES.has(result.status)) { 186 stats.blocked++; 187 blockedDetails.push({ 188 email, 189 status: result.status, 190 sub_status: result.sub_status, 191 }); 192 } else if (result.status === 'catch-all') { 193 stats.catchAll++; 194 } else if (result.status === 'unknown') { 195 stats.unknown++; 196 } else { 197 stats.valid++; 198 } 199 } 200 201 // Emails not returned by API (API gap) count as errors 202 for (const email of batch) { 203 if (!results.has(email)) stats.errors++; 204 } 205 206 console.log('done'); 207 } catch (err) { 208 console.error(`FAILED: ${err.message}`); 209 stats.errors += batch.length; 210 } 211 } 212 213 // Summary 214 console.log('\n=== Validation Summary ==='); 215 console.log(` Valid: ${stats.valid}`); 216 console.log(` Catch-all: ${stats.catchAll} (sending — monitor for bounces)`); 217 console.log(` Unknown: ${stats.unknown} (sending)`); 218 console.log(` BLOCKED (failed): ${stats.blocked}`); 219 console.log(` API errors: ${stats.errors} (treated as unknown, not blocked)`); 220 if (DRY_RUN) { 221 console.log('\n[DRY RUN] No changes were made to the database.'); 222 } else { 223 console.log( 224 `\nActual cost: ~$${((stats.valid + stats.catchAll + stats.unknown + stats.blocked) * COST_PER_EMAIL).toFixed(2)} USD` 225 ); 226 } 227 228 if (blockedDetails.length > 0) { 229 console.log('\nBlocked breakdown:'); 230 const byCategory = {}; 231 for (const d of blockedDetails) { 232 const key = d.sub_status ? `${d.status} / ${d.sub_status}` : d.status; 233 byCategory[key] = (byCategory[key] || 0) + 1; 234 } 235 for (const [key, count] of Object.entries(byCategory).sort((a, b) => b[1] - a[1])) { 236 console.log(` ${key}: ${count}`); 237 } 238 } 239 240 if (!DRY_RUN && stats.blocked > 0) { 241 console.log(`\n${stats.blocked} outreaches marked as 'failed'. They will not be sent.`); 242 } 243 } finally { 244 db.close(); 245 } 246 } 247 248 main().catch(err => { 249 console.error(`\nFatal error: ${err.message}`); 250 process.exit(1); 251 });