/ scripts / validate-email-queue.js
validate-email-queue.js
  1  #!/usr/bin/env node
  2  /**
  3   * Bulk email validation script — validate all pending/approved email outreaches
  4   * via ZeroBounce batch API before sending, to protect Resend sender reputation.
  5   *
  6   * Emails already cached in email_validations (not expired) are skipped.
  7   *
  8   * Usage:
  9   *   node scripts/validate-email-queue.js [options]
 10   *   npm run validate:emails
 11   *
 12   * Options:
 13   *   --dry-run     Show what would be blocked without updating the database
 14   *   --limit=N     Only validate first N unique emails
 15   *   --batch-size=N  ZeroBounce batch size 1-200 (default: 200)
 16   *   --yes         Skip confirmation prompt and proceed automatically
 17   *
 18   * Examples:
 19   *   node scripts/validate-email-queue.js --dry-run
 20   *   node scripts/validate-email-queue.js --yes
 21   *   node scripts/validate-email-queue.js --limit=100 --dry-run
 22   */
 23  
 24  import { createDatabaseConnection } from '../src/utils/db.js';
 25  import { join, dirname } from 'path';
 26  import { fileURLToPath } from 'url';
 27  import {
 28    validateEmailBatchWithApi,
 29    BLOCKED_STATUSES,
 30    checkCredits,
 31  } from '../src/utils/zerobounce.js';
 32  
 33  const __filename = fileURLToPath(import.meta.url);
 34  const __dirname = dirname(__filename);
 35  const projectRoot = join(__dirname, '..');
 36  
 37  const COST_PER_EMAIL = 0.008; // USD
 38  
 39  // Parse CLI args
 40  const args = process.argv.slice(2);
 41  const DRY_RUN = args.includes('--dry-run');
 42  const YES = args.includes('--yes');
 43  const LIMIT = parseInt(args.find(a => a.startsWith('--limit='))?.split('=')[1] ?? '0', 10);
 44  const BATCH_SIZE = Math.min(
 45    200,
 46    Math.max(1, parseInt(args.find(a => a.startsWith('--batch-size='))?.split('=')[1] ?? '200', 10))
 47  );
 48  
 49  const dbPath = process.env.DATABASE_PATH || join(projectRoot, 'db/sites.db');
 50  
 51  async function main() {
 52    if (!process.env.ZEROBOUNCE_API_KEY) {
 53      console.error('Error: ZEROBOUNCE_API_KEY is not set. Add it to .env.secrets.');
 54      process.exit(1);
 55    }
 56  
 57    const db = createDatabaseConnection(dbPath);
 58  
 59    try {
 60      // Fetch pending/approved email outreaches not yet cached in email_validations
 61      const rows = db
 62        .prepare(
 63          `SELECT DISTINCT LOWER(o.contact_uri) AS email
 64           FROM outreaches o
 65           WHERE o.contact_method = 'email'
 66             AND o.status IN ('pending', 'approved')
 67             AND o.contact_uri NOT LIKE 'PENDING%'
 68             AND LOWER(o.contact_uri) NOT IN (
 69               SELECT LOWER(email) FROM email_validations
 70               WHERE expires_at > CURRENT_TIMESTAMP
 71             )
 72           ORDER BY o.id
 73           ${LIMIT > 0 ? `LIMIT ${LIMIT}` : ''}`
 74        )
 75        .all();
 76  
 77      const emails = rows.map(r => r.email);
 78  
 79      console.log(
 80        `\nFound ${emails.length} unique uncached email${emails.length !== 1 ? 's' : ''} to validate`
 81      );
 82  
 83      if (emails.length === 0) {
 84        console.log('All emails are already validated and cached. Nothing to do.');
 85        db.close();
 86        return;
 87      }
 88  
 89      const estimatedCost = (emails.length * COST_PER_EMAIL).toFixed(2);
 90      console.log(`Estimated cost: $${estimatedCost} USD (${emails.length} × $${COST_PER_EMAIL})`);
 91      console.log(`Batch size: ${BATCH_SIZE} (${Math.ceil(emails.length / BATCH_SIZE)} API calls)`);
 92  
 93      // Check credits
 94      try {
 95        const credits = await checkCredits();
 96        if (credits === -1 || credits < 1) {
 97          console.error(`\nError: ZeroBounce has no credits remaining.`);
 98          console.error('Top up credits at https://app.zerobounce.net/billing and re-run.');
 99          db.close();
100          process.exit(1);
101        }
102        console.log(`ZeroBounce credits remaining: ${credits}`);
103        if (credits < emails.length) {
104          console.warn(`\nWarning: Only ${credits} credits but ${emails.length} emails to validate.`);
105          console.warn(`Will validate as many as possible (partial run).`);
106        }
107      } catch (err) {
108        console.warn(`Warning: Could not check credits: ${err.message}`);
109      }
110  
111      if (DRY_RUN) {
112        console.log('\n[DRY RUN] No API calls or database changes will be made.');
113        console.log(
114          `Would validate ${emails.length} emails across ${Math.ceil(emails.length / BATCH_SIZE)} batch(es).`
115        );
116        console.log('Run without --dry-run and with --yes to proceed.');
117        db.close();
118        return;
119      } else if (!YES) {
120        console.log('\nRun with --yes to proceed, or --dry-run to preview without changes.');
121        db.close();
122        return;
123      }
124  
125      // Prepare DB statements (used inside transaction)
126      const upsertStmt = db.prepare(
127        `INSERT OR REPLACE INTO email_validations
128           (email, status, sub_status, free_email, mx_found, validated_at, expires_at)
129         VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP,
130           datetime('now', '+${parseInt(process.env.ZEROBOUNCE_CACHE_TTL_DAYS || '90', 10)} days'))`
131      );
132  
133      const blockStmt = db.prepare(
134        `UPDATE outreaches
135         SET status = 'failed', error_message = ?
136         WHERE contact_method = 'email'
137           AND LOWER(contact_uri) = ?
138           AND status IN ('pending', 'approved')`
139      );
140  
141      // Process in batches
142      const stats = { valid: 0, catchAll: 0, unknown: 0, blocked: 0, errors: 0 };
143      const blockedDetails = [];
144  
145      for (let i = 0; i < emails.length; i += BATCH_SIZE) {
146        const batch = emails.slice(i, i + BATCH_SIZE);
147        const batchNum = Math.floor(i / BATCH_SIZE) + 1;
148        const totalBatches = Math.ceil(emails.length / BATCH_SIZE);
149  
150        process.stdout.write(`  Batch ${batchNum}/${totalBatches} (${batch.length} emails)... `);
151  
152        try {
153          const results = await validateEmailBatchWithApi(batch);
154  
155          if (!DRY_RUN) {
156            // Cache results and apply block decisions in a single transaction
157            db.transaction(() => {
158              for (const [email, result] of results) {
159                upsertStmt.run(
160                  email,
161                  result.status,
162                  result.sub_status ?? null,
163                  result.free_email !== null && result.free_email !== undefined
164                    ? result.free_email
165                      ? 1
166                      : 0
167                    : null,
168                  result.mx_found !== null && result.mx_found !== undefined
169                    ? result.mx_found
170                      ? 1
171                      : 0
172                    : null
173                );
174  
175                if (BLOCKED_STATUSES.has(result.status)) {
176                  const reason = `ZeroBounce: ${result.status}${result.sub_status ? ` (${result.sub_status})` : ''}`;
177                  blockStmt.run(reason, email);
178                }
179              }
180            })();
181          }
182  
183          // Tally results
184          for (const [email, result] of results) {
185            if (BLOCKED_STATUSES.has(result.status)) {
186              stats.blocked++;
187              blockedDetails.push({
188                email,
189                status: result.status,
190                sub_status: result.sub_status,
191              });
192            } else if (result.status === 'catch-all') {
193              stats.catchAll++;
194            } else if (result.status === 'unknown') {
195              stats.unknown++;
196            } else {
197              stats.valid++;
198            }
199          }
200  
201          // Emails not returned by API (API gap) count as errors
202          for (const email of batch) {
203            if (!results.has(email)) stats.errors++;
204          }
205  
206          console.log('done');
207        } catch (err) {
208          console.error(`FAILED: ${err.message}`);
209          stats.errors += batch.length;
210        }
211      }
212  
213      // Summary
214      console.log('\n=== Validation Summary ===');
215      console.log(`  Valid:             ${stats.valid}`);
216      console.log(`  Catch-all:         ${stats.catchAll}  (sending — monitor for bounces)`);
217      console.log(`  Unknown:           ${stats.unknown}  (sending)`);
218      console.log(`  BLOCKED (failed):  ${stats.blocked}`);
219      console.log(`  API errors:        ${stats.errors}  (treated as unknown, not blocked)`);
220      if (DRY_RUN) {
221        console.log('\n[DRY RUN] No changes were made to the database.');
222      } else {
223        console.log(
224          `\nActual cost: ~$${((stats.valid + stats.catchAll + stats.unknown + stats.blocked) * COST_PER_EMAIL).toFixed(2)} USD`
225        );
226      }
227  
228      if (blockedDetails.length > 0) {
229        console.log('\nBlocked breakdown:');
230        const byCategory = {};
231        for (const d of blockedDetails) {
232          const key = d.sub_status ? `${d.status} / ${d.sub_status}` : d.status;
233          byCategory[key] = (byCategory[key] || 0) + 1;
234        }
235        for (const [key, count] of Object.entries(byCategory).sort((a, b) => b[1] - a[1])) {
236          console.log(`  ${key}: ${count}`);
237        }
238      }
239  
240      if (!DRY_RUN && stats.blocked > 0) {
241        console.log(`\n${stats.blocked} outreaches marked as 'failed'. They will not be sent.`);
242      }
243    } finally {
244      db.close();
245    }
246  }
247  
248  main().catch(err => {
249    console.error(`\nFatal error: ${err.message}`);
250    process.exit(1);
251  });