/ scripts / validate-contacts.js
validate-contacts.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Batch Contact Validator
  5   * Validates contacts_json for all sites and reports (or repairs) invalid entries.
  6   *
  7   * Usage:
  8   *   node scripts/validate-contacts.js              # Report only
  9   *   node scripts/validate-contacts.js --repair     # Report + attempt LLM repair
 10   *   node scripts/validate-contacts.js --limit 50   # Limit to 50 sites
 11   */
 12  
 13  import { createDatabaseConnection } from '../src/utils/db.js';
 14  import { join, dirname } from 'path';
 15  import { fileURLToPath } from 'url';
 16  import { validatePhone, validateEmail } from '../src/utils/contact-validator.js';
 17  import { validateAndRepairContacts } from '../src/utils/contact-repair.js';
 18  import { getContactsDataWithFallback, setContactsJson } from '../src/utils/contacts-storage.js';
 19  import dotenv from 'dotenv';
 20  
 21  const FS_SENTINEL = '{"_fs":true}';
 22  
 23  dotenv.config();
 24  
 25  const __filename = fileURLToPath(import.meta.url);
 26  const __dirname = dirname(__filename);
 27  
 28  const dbPath = process.env.DATABASE_PATH || join(__dirname, '../db/sites.db');
 29  const args = process.argv.slice(2);
 30  const doRepair = args.includes('--repair');
 31  const limitArg = args.find(
 32    a => a.startsWith('--limit=') || args[args.indexOf(a) - 1] === '--limit'
 33  );
 34  const limit = limitArg
 35    ? parseInt(limitArg.replace('--limit=', '') || args[args.indexOf('--limit') + 1], 10)
 36    : null;
 37  
 38  const db = createDatabaseConnection(dbPath);
 39  
 40  async function main() {
 41    const rows = db
 42      .prepare(
 43        `SELECT id, domain, country_code, contacts_json
 44         FROM sites
 45         WHERE contacts_json IS NOT NULL AND contacts_json != '{}'
 46         ORDER BY id DESC
 47         ${limit ? `LIMIT ${limit}` : ''}`
 48      )
 49      .all();
 50  
 51    console.log(
 52      `\nValidating contacts for ${rows.length} sites${doRepair ? ' (repair mode)' : ''}...\n`
 53    );
 54  
 55    let totalSites = 0;
 56    let sitesWithErrors = 0;
 57    let totalErrors = 0;
 58    let repaired = 0;
 59  
 60    for (const row of rows) {
 61      let contacts;
 62      try {
 63        contacts = getContactsDataWithFallback(row.id, row);
 64      } catch {
 65        console.warn(`  Site ${row.id} (${row.domain}): could not read contacts`);
 66        continue;
 67      }
 68      if (!contacts) continue;
 69  
 70      totalSites++;
 71      const errors = [];
 72  
 73      // Validate phones
 74      for (const phone of contacts.phone_numbers ?? []) {
 75        const { valid, reason } = validatePhone(phone.number, row.country_code);
 76        if (!valid) errors.push({ field: 'phone', value: phone.number, reason });
 77      }
 78  
 79      // Validate emails (format only for batch run — MX checks are slow)
 80      for (const emailEntry of contacts.email_addresses ?? []) {
 81        const { valid, reason } = await validateEmail(emailEntry.email, { checkMx: false });
 82        if (!valid) errors.push({ field: 'email', value: emailEntry.email, reason });
 83      }
 84  
 85      if (errors.length === 0) continue;
 86  
 87      sitesWithErrors++;
 88      totalErrors += errors.length;
 89  
 90      console.log(`  ✗ Site ${row.id} (${row.domain}) [${row.country_code || 'unknown'}]:`);
 91      for (const err of errors) {
 92        console.log(`      ${err.field}: "${err.value}" — ${err.reason}`);
 93      }
 94  
 95      if (doRepair) {
 96        try {
 97          const repaired_contacts = await validateAndRepairContacts(contacts, {
 98            countryCode: row.country_code,
 99          });
100          const newJson = JSON.stringify(repaired_contacts);
101          const currentJson = JSON.stringify(contacts);
102          if (newJson !== currentJson) {
103            setContactsJson(row.id, newJson);
104            db.prepare('UPDATE sites SET contacts_json = ? WHERE id = ?').run(FS_SENTINEL, row.id);
105            console.log(`      → Repaired and saved`);
106            repaired++;
107          } else {
108            console.log(`      → No change after repair`);
109          }
110        } catch (err) {
111          console.warn(`      → Repair failed: ${err.message}`);
112        }
113      }
114    }
115  
116    console.log('\n─────────────────────────────────────────');
117    console.log(`Sites checked:       ${totalSites}`);
118    console.log(`Sites with errors:   ${sitesWithErrors}`);
119    console.log(`Total errors:        ${totalErrors}`);
120    if (doRepair) console.log(`Sites repaired:      ${repaired}`);
121    console.log('─────────────────────────────────────────\n');
122  }
123  
124  main().catch(err => {
125    console.error(err);
126    process.exit(1);
127  });