compare-keywords.js
1 #!/usr/bin/env node 2 3 /** 4 * Compare original keywords vs filtered keywords after applying cutoff 5 * 6 * Shows: 7 * - What keywords were kept vs removed 8 * - Search volume statistics for each group 9 * - Impact of cutoff threshold on keyword count 10 * - Top removed keywords (high potential missed opportunities) 11 * 12 * Usage: 13 * node scripts/compare-keywords.js data/au/businesses-search-volume.csv data/au/businesses.txt 14 * node scripts/compare-keywords.js data/au/businesses-search-volume.csv data/au/businesses.txt --show-removed 15 */ 16 17 import fs from 'fs'; 18 import { parse } from 'csv-parse/sync'; 19 20 // Parse arguments 21 const args = process.argv.slice(2); 22 if (args.length < 2) { 23 console.error( 24 '\nā Usage: node compare-keywords.js <csv-path> <filtered-txt-path> [--show-removed]' 25 ); 26 console.error( 27 ' Example: node compare-keywords.js data/au/businesses-search-volume.csv data/au/businesses.txt\n' 28 ); 29 process.exit(1); 30 } 31 32 const csvPath = args[0]; 33 const txtPath = args[1]; 34 const showRemoved = args.includes('--show-removed'); 35 36 /** 37 * Read and parse CSV file 38 */ 39 function readCSV(csvPath) { 40 if (!fs.existsSync(csvPath)) { 41 console.error(`\nā CSV file not found: ${csvPath}\n`); 42 process.exit(1); 43 } 44 45 const content = fs.readFileSync(csvPath, 'utf-8'); 46 const records = parse(content, { 47 columns: true, 48 skip_empty_lines: true, 49 }); 50 51 return records.map(record => ({ 52 keyword: record.keyword, 53 searchVolume: parseInt(record.search_volume), 54 competition: parseInt(record.competition), 55 cpc: parseFloat(record.cpc_low || record.cpc || 0), 56 })); 57 } 58 59 /** 60 * Read filtered keywords from text file 61 */ 62 function readFilteredKeywords(txtPath) { 63 if (!fs.existsSync(txtPath)) { 64 console.error(`\nā Filtered keywords file not found: ${txtPath}\n`); 65 process.exit(1); 66 } 67 68 return fs 69 .readFileSync(txtPath, 'utf-8') 70 .split('\n') 71 .map(line => line.trim()) 72 .filter(line => line.length > 0); 73 } 74 75 /** 76 * Calculate statistics for a set of keywords 77 */ 78 function calculateStats(keywords) { 79 if (keywords.length === 0) { 80 return { 81 count: 0, 82 totalSearchVolume: 0, 83 meanSearchVolume: 0, 84 medianSearchVolume: 0, 85 minSearchVolume: 0, 86 maxSearchVolume: 0, 87 }; 88 } 89 90 const searchVolumes = keywords.map(k => k.searchVolume).sort((a, b) => a - b); 91 const totalSearchVolume = searchVolumes.reduce((sum, sv) => sum + sv, 0); 92 93 return { 94 count: keywords.length, 95 totalSearchVolume, 96 meanSearchVolume: Math.round(totalSearchVolume / keywords.length), 97 medianSearchVolume: searchVolumes[Math.floor(searchVolumes.length / 2)], 98 minSearchVolume: searchVolumes[0], 99 maxSearchVolume: searchVolumes[searchVolumes.length - 1], 100 }; 101 } 102 103 /** 104 * Main comparison function 105 */ 106 function main() { 107 console.log('\nš Keyword Comparison Analysis\n'); 108 console.log('='.repeat(80)); 109 110 // Read data 111 console.log(`\nš Reading files...`); 112 const allKeywords = readCSV(csvPath); 113 const filteredKeywordsList = readFilteredKeywords(txtPath); 114 const filteredSet = new Set(filteredKeywordsList); 115 116 console.log(` CSV: ${csvPath}`); 117 console.log(` Filtered: ${txtPath}`); 118 119 // Split into kept and removed 120 const kept = allKeywords.filter(k => filteredSet.has(k.keyword)); 121 const removed = allKeywords.filter(k => !filteredSet.has(k.keyword)); 122 123 // Calculate stats 124 const keptStats = calculateStats(kept); 125 const removedStats = calculateStats(removed); 126 const allStats = calculateStats(allKeywords); 127 128 // Display results 129 console.log(`\n${'='.repeat(80)}`); 130 console.log('\nš OVERALL STATISTICS\n'); 131 132 console.log(`Total keywords in CSV: ${allStats.count.toLocaleString()}`); 133 console.log( 134 `Keywords kept: ${keptStats.count.toLocaleString()} (${((keptStats.count / allStats.count) * 100).toFixed(1)}%)` 135 ); 136 console.log( 137 `Keywords removed: ${removedStats.count.toLocaleString()} (${((removedStats.count / allStats.count) * 100).toFixed(1)}%)` 138 ); 139 140 // Infer threshold 141 const inferredThreshold = 142 removedStats.maxSearchVolume > 0 ? removedStats.maxSearchVolume + 1 : keptStats.minSearchVolume; 143 console.log(`\nInferred cutoff threshold: ${inferredThreshold.toLocaleString()} searches/month`); 144 145 // Kept keywords stats 146 console.log(`\n${'='.repeat(80)}`); 147 console.log('\nā KEPT KEYWORDS\n'); 148 console.log(`Count: ${keptStats.count.toLocaleString()}`); 149 console.log( 150 `Search volume range: ${keptStats.minSearchVolume.toLocaleString()} - ${keptStats.maxSearchVolume.toLocaleString()}` 151 ); 152 console.log( 153 `Mean: ${keptStats.meanSearchVolume.toLocaleString()} | Median: ${keptStats.medianSearchVolume.toLocaleString()}` 154 ); 155 console.log(`Total monthly searches: ${keptStats.totalSearchVolume.toLocaleString()}`); 156 157 // Top kept keywords 158 const topKept = kept.sort((a, b) => b.searchVolume - a.searchVolume).slice(0, 10); 159 console.log('\nTop 10 by search volume:'); 160 for (const kw of topKept) { 161 console.log( 162 ` ${kw.searchVolume.toLocaleString().padStart(10)} | ${kw.competition.toString().padStart(3)}% | $${kw.cpc.toFixed(2).padStart(6)} | ${kw.keyword}` 163 ); 164 } 165 166 // Removed keywords stats 167 console.log(`\n${'='.repeat(80)}`); 168 console.log('\nā REMOVED KEYWORDS\n'); 169 console.log(`Count: ${removedStats.count.toLocaleString()}`); 170 171 if (removedStats.count > 0) { 172 console.log( 173 `Search volume range: ${removedStats.minSearchVolume.toLocaleString()} - ${removedStats.maxSearchVolume.toLocaleString()}` 174 ); 175 console.log( 176 `Mean: ${removedStats.meanSearchVolume.toLocaleString()} | Median: ${removedStats.medianSearchVolume.toLocaleString()}` 177 ); 178 console.log(`Total monthly searches lost: ${removedStats.totalSearchVolume.toLocaleString()}`); 179 180 // Top removed keywords (potential missed opportunities) 181 const topRemoved = removed.sort((a, b) => b.searchVolume - a.searchVolume).slice(0, 10); 182 console.log('\nTop 10 removed (potential missed opportunities):'); 183 for (const kw of topRemoved) { 184 console.log( 185 ` ${kw.searchVolume.toLocaleString().padStart(10)} | ${kw.competition.toString().padStart(3)}% | $${kw.cpc.toFixed(2).padStart(6)} | ${kw.keyword}` 186 ); 187 } 188 189 // Show all removed if requested 190 if (showRemoved) { 191 console.log(`\n${'='.repeat(80)}`); 192 console.log('\nš ALL REMOVED KEYWORDS\n'); 193 const sortedRemoved = removed.sort((a, b) => b.searchVolume - a.searchVolume); 194 for (const kw of sortedRemoved) { 195 console.log( 196 ` ${kw.searchVolume.toLocaleString().padStart(10)} | ${kw.competition.toString().padStart(3)}% | $${kw.cpc.toFixed(2).padStart(6)} | ${kw.keyword}` 197 ); 198 } 199 } 200 } else { 201 console.log('No keywords were removed (threshold may be too low).'); 202 } 203 204 // Impact analysis 205 console.log(`\n${'='.repeat(80)}`); 206 console.log('\nš” IMPACT ANALYSIS\n'); 207 208 const searchVolumeRetention = 209 allStats.totalSearchVolume > 0 210 ? ((keptStats.totalSearchVolume / allStats.totalSearchVolume) * 100).toFixed(1) 211 : 0; 212 213 console.log(`Keywords retained: ${((keptStats.count / allStats.count) * 100).toFixed(1)}%`); 214 console.log(`Search volume retained: ${searchVolumeRetention}%`); 215 console.log( 216 `Average search volume per keyword: ${keptStats.meanSearchVolume.toLocaleString()} (vs ${allStats.meanSearchVolume.toLocaleString()} overall)` 217 ); 218 219 // Recommendations 220 console.log('\nš” RECOMMENDATIONS\n'); 221 222 if (keptStats.count < allStats.count * 0.1) { 223 console.log('ā ļø Very aggressive filtering (>90% removed)'); 224 console.log(' Consider lowering the threshold to retain more keywords'); 225 } else if (keptStats.count > allStats.count * 0.9) { 226 console.log('ā ļø Very lenient filtering (<10% removed)'); 227 console.log(' Consider raising the threshold to focus on high-volume keywords'); 228 } else { 229 console.log('ā Balanced filtering - good keyword/volume retention ratio'); 230 } 231 232 if (removed.length > 0 && removedStats.maxSearchVolume > 5000) { 233 console.log( 234 `\nā ļø Some high-volume keywords removed (max: ${removedStats.maxSearchVolume.toLocaleString()})` 235 ); 236 console.log(' Review removed keywords above to avoid missing opportunities'); 237 } 238 239 console.log(`\n${'='.repeat(80)}\n`); 240 } 241 242 main();