deduplicate-regions.js
1 #!/usr/bin/env node 2 3 import fs from 'fs'; 4 import path from 'path'; 5 import { fileURLToPath } from 'url'; 6 7 const __dirname = path.dirname(fileURLToPath(import.meta.url)); 8 const dataDir = path.join(__dirname, '..', 'data'); 9 10 const countries = [ 11 'at', 12 'au', 13 'be', 14 'ca', 15 'ch', 16 'cn', 17 'de', 18 'dk', 19 'es', 20 'fr', 21 'id', 22 'ie', 23 'in', 24 'it', 25 'jp', 26 'kr', 27 'mx', 28 'nl', 29 'no', 30 'nz', 31 'pl', 32 'se', 33 'sg', 34 'uk', 35 'us', 36 ]; 37 38 function deduplicateRegions(filePath) { 39 const content = fs.readFileSync(filePath, 'utf-8'); 40 const lines = content.split('\n'); 41 42 const seen = new Set(); 43 const deduplicated = []; 44 let duplicatesCount = 0; 45 46 for (const line of lines) { 47 // Preserve comments and blank lines 48 if (line.trim().startsWith('#') || line.trim() === '') { 49 deduplicated.push(line); 50 continue; 51 } 52 53 // Check for duplicates (case-insensitive) 54 const normalized = line.trim().toLowerCase(); 55 if (normalized === '') { 56 deduplicated.push(line); 57 continue; 58 } 59 60 if (seen.has(normalized)) { 61 duplicatesCount++; 62 continue; // Skip duplicate 63 } 64 65 seen.add(normalized); 66 deduplicated.push(line); 67 } 68 69 return { deduplicated: deduplicated.join('\n'), duplicatesCount }; 70 } 71 72 console.log('Deduplicating regions.txt files...\n'); 73 74 const results = []; 75 76 for (const country of countries) { 77 const filePath = path.join(dataDir, country, 'regions.txt'); 78 79 if (!fs.existsSync(filePath)) { 80 console.log(`⚠️ ${country.toUpperCase()}: File not found`); 81 continue; 82 } 83 84 const { deduplicated, duplicatesCount } = deduplicateRegions(filePath); 85 86 if (duplicatesCount > 0) { 87 fs.writeFileSync(filePath, deduplicated, 'utf-8'); 88 console.log(`✅ ${country.toUpperCase()}: Removed ${duplicatesCount} duplicate(s)`); 89 } else { 90 console.log(`✓ ${country.toUpperCase()}: No duplicates found`); 91 } 92 93 results.push({ country, duplicatesCount }); 94 } 95 96 console.log(`\n${'='.repeat(50)}`); 97 console.log('SUMMARY'); 98 console.log('='.repeat(50)); 99 100 const totalDuplicates = results.reduce((sum, r) => sum + r.duplicatesCount, 0); 101 const countriesWithDuplicates = results.filter(r => r.duplicatesCount > 0); 102 103 if (countriesWithDuplicates.length > 0) { 104 console.log('\nCountries with duplicates removed:'); 105 countriesWithDuplicates.forEach(r => { 106 console.log(` ${r.country.toUpperCase()}: ${r.duplicatesCount} duplicate(s)`); 107 }); 108 } 109 110 console.log(`\nTotal duplicates removed: ${totalDuplicates}`); 111 console.log(`Files processed: ${results.length}`); 112 console.log(`Files with duplicates: ${countriesWithDuplicates.length}`);