cleanup-bad-phone-contacts.js
1 #!/usr/bin/env node 2 /** 3 * One-off cleanup: scan all sites' contacts_json and remove phone numbers 4 * that fail isValidSmsNumber() validation. Also removes the known garbage 5 * number fingerprint (+01520153, +2000206, etc.) injected by a bad scrape. 6 * 7 * Run: node scripts/cleanup-bad-phone-contacts.js [--dry-run] 8 */ 9 10 import { createDatabaseConnection } from '../src/utils/db.js'; 11 import { join, dirname } from 'path'; 12 import { fileURLToPath } from 'url'; 13 import { isValidSmsNumber } from '../src/utils/phone-normalizer.js'; 14 import { getContactsDataWithFallback, setContactsJson } from '../src/utils/contacts-storage.js'; 15 import '../src/utils/load-env.js'; 16 17 const FS_SENTINEL = '{"_fs":true}'; 18 19 const __dirname = dirname(fileURLToPath(import.meta.url)); 20 const dbPath = process.env.DATABASE_PATH || join(__dirname, '../db/sites.db'); 21 const dryRun = process.argv.includes('--dry-run'); 22 23 const db = createDatabaseConnection(dbPath); 24 db.pragma('journal_mode = WAL'); 25 26 const sites = db 27 .prepare( 28 `SELECT id, domain, status 29 FROM sites` 30 ) 31 .all(); 32 33 console.log(`Scanning ${sites.length} sites with phone_numbers in contacts_json...`); 34 35 let sitesUpdated = 0; 36 let phonesRemoved = 0; 37 38 const update = db.prepare( 39 `UPDATE sites SET updated_at = datetime('now') WHERE id = ?` 40 ); 41 42 db.transaction(() => { 43 for (const site of sites) { 44 let contacts; 45 try { 46 contacts = getContactsDataWithFallback(site.id, site); 47 } catch { 48 continue; 49 } 50 if (!contacts) continue; 51 52 const phones = contacts.phone_numbers; 53 if (!Array.isArray(phones) || phones.length === 0) continue; 54 55 const before = phones.length; 56 const clean = phones.filter(p => { 57 const num = typeof p === 'string' ? p : p?.number; 58 if (!num) return false; 59 const reason = isValidSmsNumber(num); 60 if (reason) { 61 console.log(` [${site.domain}] removing ${num}: ${reason}`); 62 return false; 63 } 64 return true; 65 }); 66 67 if (clean.length < before) { 68 const removed = before - clean.length; 69 phonesRemoved += removed; 70 sitesUpdated++; 71 if (!dryRun) { 72 contacts.phone_numbers = clean; 73 setContactsJson(site.id, JSON.stringify(contacts)); 74 update.run(site.id); 75 } else { 76 console.log( 77 ` DRY-RUN: would remove ${removed} phone(s) from ${site.domain} (${site.status})` 78 ); 79 } 80 } 81 } 82 })(); 83 84 console.log( 85 `\nDone. ${dryRun ? '[DRY RUN] ' : ''}${sitesUpdated} sites updated, ${phonesRemoved} invalid phone numbers removed.` 86 ); 87 db.close();