/ scripts / backfill-contacts.js
backfill-contacts.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Backfill Contacts Script
  5   * Extracts contact_details from conversion_score_json and populates contacts_json
  6   */
  7  
  8  /* eslint-disable max-depth */
  9  
 10  import { createDatabaseConnection } from '../src/utils/db.js';
 11  import { join, dirname } from 'path';
 12  import { fileURLToPath } from 'url';
 13  import Logger from '../src/utils/logger.js';
 14  import { normalizePhoneNumbers } from '../src/utils/phone-normalizer.js';
 15  import { getScoreDataWithFallback } from '../src/utils/score-storage.js';
 16  import { setContactsJson } from '../src/utils/contacts-storage.js';
 17  import dotenv from 'dotenv';
 18  
 19  const FS_SENTINEL = '{"_fs":true}';
 20  
 21  dotenv.config();
 22  
 23  const __filename = fileURLToPath(import.meta.url);
 24  const __dirname = dirname(__filename);
 25  const projectRoot = join(__dirname, '..');
 26  
 27  const logger = new Logger('BackfillContacts');
 28  const dbPath = process.env.DATABASE_PATH || join(projectRoot, 'db/sites.db');
 29  
 30  /**
 31   * Backfill contacts_json from conversion_score_json.contact_details
 32   */
 33  function backfillContacts() {
 34    const db = createDatabaseConnection(dbPath);
 35    db.pragma('foreign_keys = ON'); // Enforce foreign key constraints
 36  
 37    try {
 38      logger.info('Starting contacts backfill...');
 39  
 40      // Get ALL sites that have score data — contact_details presence is checked in JS
 41      // (json_extract on sentinel '{"_fs":true}' always returns NULL, so WHERE clause
 42      // would miss filesystem-backed scores; instead we load all sites with score_json
 43      // and check contact_details via getScoreDataWithFallback)
 44      const sites = db
 45        .prepare(
 46          `SELECT
 47            id,
 48            domain,
 49            score_json,
 50            contacts_json
 51          FROM sites
 52          WHERE score_json IS NOT NULL`
 53        )
 54        .all();
 55  
 56      logger.info(`Found ${sites.length} sites with missing contacts_json`);
 57  
 58      if (sites.length === 0) {
 59        logger.success('No sites need backfilling');
 60        db.close();
 61        return { total: 0, updated: 0 };
 62      }
 63  
 64      let updated = 0;
 65      const updateStmt = db.prepare(
 66        `UPDATE sites
 67         SET contacts_json = ?,
 68             city = ?,
 69             country_code = ?,
 70             updated_at = CURRENT_TIMESTAMP
 71         WHERE id = ?`
 72      );
 73  
 74      for (const site of sites) {
 75        try {
 76          const scoreData = getScoreDataWithFallback(site.id, site);
 77          if (!scoreData) continue;
 78          const contactDetails = scoreData.contact_details;
 79  
 80          // Only update if contact_details is a non-empty object
 81          if (contactDetails && typeof contactDetails === 'object') {
 82            const hasContent = Array.isArray(contactDetails)
 83              ? contactDetails.length > 0
 84              : Object.keys(contactDetails).length > 0;
 85  
 86            if (hasContent) {
 87              // Normalize phone_numbers: convert to object format and E.164 format
 88              if (contactDetails.phone_numbers && Array.isArray(contactDetails.phone_numbers)) {
 89                contactDetails.phone_numbers = contactDetails.phone_numbers.map(phone => {
 90                  if (typeof phone === 'string') {
 91                    return { number: phone, label: null };
 92                  }
 93                  return phone;
 94                });
 95  
 96                // Normalize to E.164 format (remove dashes, spaces, parentheses)
 97                contactDetails.phone_numbers = normalizePhoneNumbers(contactDetails.phone_numbers);
 98              }
 99  
100              const contactsJson = JSON.stringify(contactDetails);
101              const city = contactDetails.city || null;
102              const countryCode = contactDetails.country_code || null;
103  
104              setContactsJson(site.id, contactsJson);
105              updateStmt.run(FS_SENTINEL, city, countryCode, site.id);
106              updated++;
107  
108              if (city && countryCode) {
109                logger.success(
110                  `Updated ${site.domain} (ID: ${site.id}) - Location: ${city}, ${countryCode}`
111                );
112              } else {
113                logger.success(`Updated ${site.domain} (ID: ${site.id})`);
114              }
115            } else {
116              logger.debug(`Skipped ${site.domain} - empty contact_details`);
117            }
118          }
119        } catch (error) {
120          logger.error(`Failed to process ${site.domain}`, error);
121        }
122      }
123  
124      logger.success(`Backfill complete: ${updated}/${sites.length} sites updated`);
125  
126      db.close();
127      return { total: sites.length, updated };
128    } catch (error) {
129      logger.error('Backfill failed', error);
130      db.close();
131      throw error;
132    }
133  }
134  
135  // CLI execution
136  if (import.meta.url === `file://${process.argv[1]}`) {
137    try {
138      const result = backfillContacts();
139      console.log(`\nāœ… Backfill complete: ${result.updated}/${result.total} sites updated\n`);
140      process.exit(0);
141    } catch (error) {
142      console.error(`\nāŒ Backfill failed: ${error.message}\n`);
143      process.exit(1);
144    }
145  }
146  
147  export default { backfillContacts };