validate-contacts.js
1 #!/usr/bin/env node 2 3 /** 4 * Batch Contact Validator 5 * Validates contacts_json for all sites and reports (or repairs) invalid entries. 6 * 7 * Usage: 8 * node scripts/validate-contacts.js # Report only 9 * node scripts/validate-contacts.js --repair # Report + attempt LLM repair 10 * node scripts/validate-contacts.js --limit 50 # Limit to 50 sites 11 */ 12 13 import { createDatabaseConnection } from '../src/utils/db.js'; 14 import { join, dirname } from 'path'; 15 import { fileURLToPath } from 'url'; 16 import { validatePhone, validateEmail } from '../src/utils/contact-validator.js'; 17 import { validateAndRepairContacts } from '../src/utils/contact-repair.js'; 18 import { getContactsDataWithFallback, setContactsJson } from '../src/utils/contacts-storage.js'; 19 import dotenv from 'dotenv'; 20 21 const FS_SENTINEL = '{"_fs":true}'; 22 23 dotenv.config(); 24 25 const __filename = fileURLToPath(import.meta.url); 26 const __dirname = dirname(__filename); 27 28 const dbPath = process.env.DATABASE_PATH || join(__dirname, '../db/sites.db'); 29 const args = process.argv.slice(2); 30 const doRepair = args.includes('--repair'); 31 const limitArg = args.find( 32 a => a.startsWith('--limit=') || args[args.indexOf(a) - 1] === '--limit' 33 ); 34 const limit = limitArg 35 ? parseInt(limitArg.replace('--limit=', '') || args[args.indexOf('--limit') + 1], 10) 36 : null; 37 38 const db = createDatabaseConnection(dbPath); 39 40 async function main() { 41 const rows = db 42 .prepare( 43 `SELECT id, domain, country_code, contacts_json 44 FROM sites 45 WHERE contacts_json IS NOT NULL AND contacts_json != '{}' 46 ORDER BY id DESC 47 ${limit ? `LIMIT ${limit}` : ''}` 48 ) 49 .all(); 50 51 console.log( 52 `\nValidating contacts for ${rows.length} sites${doRepair ? ' (repair mode)' : ''}...\n` 53 ); 54 55 let totalSites = 0; 56 let sitesWithErrors = 0; 57 let totalErrors = 0; 58 let repaired = 0; 59 60 for (const row of rows) { 61 let contacts; 62 try { 63 contacts = getContactsDataWithFallback(row.id, row); 64 } catch { 65 console.warn(` Site ${row.id} (${row.domain}): could not read contacts`); 66 continue; 67 } 68 if (!contacts) continue; 69 70 totalSites++; 71 const errors = []; 72 73 // Validate phones 74 for (const phone of contacts.phone_numbers ?? []) { 75 const { valid, reason } = validatePhone(phone.number, row.country_code); 76 if (!valid) errors.push({ field: 'phone', value: phone.number, reason }); 77 } 78 79 // Validate emails (format only for batch run — MX checks are slow) 80 for (const emailEntry of contacts.email_addresses ?? []) { 81 const { valid, reason } = await validateEmail(emailEntry.email, { checkMx: false }); 82 if (!valid) errors.push({ field: 'email', value: emailEntry.email, reason }); 83 } 84 85 if (errors.length === 0) continue; 86 87 sitesWithErrors++; 88 totalErrors += errors.length; 89 90 console.log(` ✗ Site ${row.id} (${row.domain}) [${row.country_code || 'unknown'}]:`); 91 for (const err of errors) { 92 console.log(` ${err.field}: "${err.value}" — ${err.reason}`); 93 } 94 95 if (doRepair) { 96 try { 97 const repaired_contacts = await validateAndRepairContacts(contacts, { 98 countryCode: row.country_code, 99 }); 100 const newJson = JSON.stringify(repaired_contacts); 101 const currentJson = JSON.stringify(contacts); 102 if (newJson !== currentJson) { 103 setContactsJson(row.id, newJson); 104 db.prepare('UPDATE sites SET contacts_json = ? WHERE id = ?').run(FS_SENTINEL, row.id); 105 console.log(` → Repaired and saved`); 106 repaired++; 107 } else { 108 console.log(` → No change after repair`); 109 } 110 } catch (err) { 111 console.warn(` → Repair failed: ${err.message}`); 112 } 113 } 114 } 115 116 console.log('\n─────────────────────────────────────────'); 117 console.log(`Sites checked: ${totalSites}`); 118 console.log(`Sites with errors: ${sitesWithErrors}`); 119 console.log(`Total errors: ${totalErrors}`); 120 if (doRepair) console.log(`Sites repaired: ${repaired}`); 121 console.log('─────────────────────────────────────────\n'); 122 } 123 124 main().catch(err => { 125 console.error(err); 126 process.exit(1); 127 });