dedupe-domains.js
1 #!/usr/bin/env node 2 3 /** 4 * Manual domain deduplication script 5 * Removes duplicate domains from sites table, keeping only the entry 6 * with the keyword that has the highest search volume 7 */ 8 9 import { createDatabaseConnection } from '../src/utils/db.js'; 10 import { dedupeDomains } from '../src/utils/dedupe-domains.js'; 11 import Logger from '../src/utils/logger.js'; 12 13 const logger = new Logger('Dedupe'); 14 15 const dbPath = process.env.DATABASE_PATH || './db/sites.db'; 16 17 // Parse command line args 18 const args = process.argv.slice(2); 19 const dryRun = args.includes('--dry-run'); 20 21 async function main() { 22 const db = createDatabaseConnection(dbPath); 23 24 try { 25 if (dryRun) { 26 logger.info('Running in dry-run mode (no changes will be made)'); 27 } 28 29 const stats = dedupeDomains(db, { dryRun }); 30 31 if (stats.duplicateDomains === 0) { 32 logger.info('\nNo duplicate domains found - database is clean!'); 33 process.exit(0); 34 } 35 36 if (dryRun) { 37 logger.info( 38 `\n${stats.duplicateDomains} duplicate domains found. Run without --dry-run to deduplicate.` 39 ); 40 } else { 41 logger.success( 42 `\n✓ Successfully deduplicated ${stats.duplicateDomains} domains (${stats.sitesMarkedIgnored} duplicates marked as ignored)` 43 ); 44 } 45 } catch (err) { 46 logger.error(`Deduplication failed: ${err.message}`); 47 process.exit(1); 48 } finally { 49 db.close(); 50 } 51 } 52 53 main();