backfill-contacts.js
1 #!/usr/bin/env node 2 3 /** 4 * Backfill Contacts Script 5 * Extracts contact_details from conversion_score_json and populates contacts_json 6 */ 7 8 /* eslint-disable max-depth */ 9 10 import { createDatabaseConnection } from '../src/utils/db.js'; 11 import { join, dirname } from 'path'; 12 import { fileURLToPath } from 'url'; 13 import Logger from '../src/utils/logger.js'; 14 import { normalizePhoneNumbers } from '../src/utils/phone-normalizer.js'; 15 import { getScoreDataWithFallback } from '../src/utils/score-storage.js'; 16 import { setContactsJson } from '../src/utils/contacts-storage.js'; 17 import dotenv from 'dotenv'; 18 19 const FS_SENTINEL = '{"_fs":true}'; 20 21 dotenv.config(); 22 23 const __filename = fileURLToPath(import.meta.url); 24 const __dirname = dirname(__filename); 25 const projectRoot = join(__dirname, '..'); 26 27 const logger = new Logger('BackfillContacts'); 28 const dbPath = process.env.DATABASE_PATH || join(projectRoot, 'db/sites.db'); 29 30 /** 31 * Backfill contacts_json from conversion_score_json.contact_details 32 */ 33 function backfillContacts() { 34 const db = createDatabaseConnection(dbPath); 35 db.pragma('foreign_keys = ON'); // Enforce foreign key constraints 36 37 try { 38 logger.info('Starting contacts backfill...'); 39 40 // Get ALL sites that have score data ā contact_details presence is checked in JS 41 // (json_extract on sentinel '{"_fs":true}' always returns NULL, so WHERE clause 42 // would miss filesystem-backed scores; instead we load all sites with score_json 43 // and check contact_details via getScoreDataWithFallback) 44 const sites = db 45 .prepare( 46 `SELECT 47 id, 48 domain, 49 score_json, 50 contacts_json 51 FROM sites 52 WHERE score_json IS NOT NULL` 53 ) 54 .all(); 55 56 logger.info(`Found ${sites.length} sites with missing contacts_json`); 57 58 if (sites.length === 0) { 59 logger.success('No sites need backfilling'); 60 db.close(); 61 return { total: 0, updated: 0 }; 62 } 63 64 let updated = 0; 65 const updateStmt = db.prepare( 66 `UPDATE sites 67 SET contacts_json = ?, 68 city = ?, 69 country_code = ?, 70 updated_at = CURRENT_TIMESTAMP 71 WHERE id = ?` 72 ); 73 74 for (const site of sites) { 75 try { 76 const scoreData = getScoreDataWithFallback(site.id, site); 77 if (!scoreData) continue; 78 const contactDetails = scoreData.contact_details; 79 80 // Only update if contact_details is a non-empty object 81 if (contactDetails && typeof contactDetails === 'object') { 82 const hasContent = Array.isArray(contactDetails) 83 ? contactDetails.length > 0 84 : Object.keys(contactDetails).length > 0; 85 86 if (hasContent) { 87 // Normalize phone_numbers: convert to object format and E.164 format 88 if (contactDetails.phone_numbers && Array.isArray(contactDetails.phone_numbers)) { 89 contactDetails.phone_numbers = contactDetails.phone_numbers.map(phone => { 90 if (typeof phone === 'string') { 91 return { number: phone, label: null }; 92 } 93 return phone; 94 }); 95 96 // Normalize to E.164 format (remove dashes, spaces, parentheses) 97 contactDetails.phone_numbers = normalizePhoneNumbers(contactDetails.phone_numbers); 98 } 99 100 const contactsJson = JSON.stringify(contactDetails); 101 const city = contactDetails.city || null; 102 const countryCode = contactDetails.country_code || null; 103 104 setContactsJson(site.id, contactsJson); 105 updateStmt.run(FS_SENTINEL, city, countryCode, site.id); 106 updated++; 107 108 if (city && countryCode) { 109 logger.success( 110 `Updated ${site.domain} (ID: ${site.id}) - Location: ${city}, ${countryCode}` 111 ); 112 } else { 113 logger.success(`Updated ${site.domain} (ID: ${site.id})`); 114 } 115 } else { 116 logger.debug(`Skipped ${site.domain} - empty contact_details`); 117 } 118 } 119 } catch (error) { 120 logger.error(`Failed to process ${site.domain}`, error); 121 } 122 } 123 124 logger.success(`Backfill complete: ${updated}/${sites.length} sites updated`); 125 126 db.close(); 127 return { total: sites.length, updated }; 128 } catch (error) { 129 logger.error('Backfill failed', error); 130 db.close(); 131 throw error; 132 } 133 } 134 135 // CLI execution 136 if (import.meta.url === `file://${process.argv[1]}`) { 137 try { 138 const result = backfillContacts(); 139 console.log(`\nā Backfill complete: ${result.updated}/${result.total} sites updated\n`); 140 process.exit(0); 141 } catch (error) { 142 console.error(`\nā Backfill failed: ${error.message}\n`); 143 process.exit(1); 144 } 145 } 146 147 export default { backfillContacts };