/ scripts / compact-contacts-json.js
compact-contacts-json.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * One-off script to compact all data/contacts/*.json files.
  5   *
  6   * For each file:
  7   *   - Deduplicates email_addresses, phone_numbers, social_profiles
  8   *   - Removes keys with null values, empty arrays, or empty strings
  9   *   - Writes the compacted JSON back (unless --dry-run)
 10   *
 11   * Usage:
 12   *   node scripts/compact-contacts-json.js           # compact all files
 13   *   node scripts/compact-contacts-json.js --dry-run  # preview without writing
 14   */
 15  
 16  import { readFileSync, writeFileSync, readdirSync, statSync } from 'fs';
 17  import { join } from 'path';
 18  import { compactContacts } from '../src/utils/compact-contacts.js';
 19  
 20  const CONTACTS_DIR = join(process.cwd(), 'data', 'contacts');
 21  const BATCH_SIZE = 500;
 22  const dryRun = process.argv.includes('--dry-run');
 23  
 24  function run() {
 25    if (dryRun) {
 26      console.log('[DRY RUN] No files will be modified.\n');
 27    }
 28  
 29    // List all .json files
 30    let files;
 31    try {
 32      files = readdirSync(CONTACTS_DIR).filter(f => f.endsWith('.json'));
 33    } catch (err) {
 34      console.error(`Failed to read contacts directory: ${err.message}`);
 35      process.exit(1);
 36    }
 37  
 38    console.log(`Found ${files.length} contacts files in ${CONTACTS_DIR}\n`);
 39  
 40    let totalProcessed = 0;
 41    let totalModified = 0;
 42    let totalBytesSaved = 0;
 43  
 44    // Process in batches to avoid memory pressure
 45    for (let i = 0; i < files.length; i += BATCH_SIZE) {
 46      const batch = files.slice(i, i + BATCH_SIZE);
 47  
 48      for (const file of batch) {
 49        const filePath = join(CONTACTS_DIR, file);
 50        totalProcessed++;
 51  
 52        let raw;
 53        try {
 54          raw = readFileSync(filePath, 'utf8');
 55        } catch (err) {
 56          console.error(`  SKIP ${file}: read error — ${err.message}`);
 57          continue;
 58        }
 59  
 60        let parsed;
 61        try {
 62          parsed = JSON.parse(raw);
 63        } catch (err) {
 64          console.error(`  SKIP ${file}: invalid JSON — ${err.message}`);
 65          continue;
 66        }
 67  
 68        // Skip non-object contacts (some legacy files are arrays)
 69        if (Array.isArray(parsed) || typeof parsed !== 'object') {
 70          continue;
 71        }
 72  
 73        const compacted = compactContacts(parsed);
 74        const compactedJson = JSON.stringify(compacted);
 75        const originalSize = Buffer.byteLength(raw, 'utf8');
 76        const compactedSize = Buffer.byteLength(compactedJson, 'utf8');
 77  
 78        if (compactedJson !== JSON.stringify(parsed)) {
 79          totalModified++;
 80          const saved = originalSize - compactedSize;
 81          totalBytesSaved += saved;
 82  
 83          if (!dryRun) {
 84            writeFileSync(filePath, compactedJson, 'utf8');
 85          }
 86        }
 87      }
 88  
 89      // Progress report every batch
 90      if (files.length > BATCH_SIZE) {
 91        const processed = Math.min(i + BATCH_SIZE, files.length);
 92        console.log(`  Progress: ${processed}/${files.length} files processed...`);
 93      }
 94    }
 95  
 96    console.log('\n--- Summary ---');
 97    console.log(`Files processed: ${totalProcessed}`);
 98    console.log(`Files modified:  ${totalModified}`);
 99    console.log(`Bytes saved:     ${totalBytesSaved.toLocaleString()} (${(totalBytesSaved / 1024).toFixed(1)} KB)`);
100    if (dryRun) {
101      console.log('\n[DRY RUN] No files were written. Run without --dry-run to apply changes.');
102    }
103  }
104  
105  run();