/ scripts / compare-keywords.js
compare-keywords.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Compare original keywords vs filtered keywords after applying cutoff
  5   *
  6   * Shows:
  7   * - What keywords were kept vs removed
  8   * - Search volume statistics for each group
  9   * - Impact of cutoff threshold on keyword count
 10   * - Top removed keywords (high potential missed opportunities)
 11   *
 12   * Usage:
 13   *   node scripts/compare-keywords.js data/au/businesses-search-volume.csv data/au/businesses.txt
 14   *   node scripts/compare-keywords.js data/au/businesses-search-volume.csv data/au/businesses.txt --show-removed
 15   */
 16  
 17  import fs from 'fs';
 18  import { parse } from 'csv-parse/sync';
 19  
 20  // Parse arguments
 21  const args = process.argv.slice(2);
 22  if (args.length < 2) {
 23    console.error(
 24      '\nāŒ Usage: node compare-keywords.js <csv-path> <filtered-txt-path> [--show-removed]'
 25    );
 26    console.error(
 27      '   Example: node compare-keywords.js data/au/businesses-search-volume.csv data/au/businesses.txt\n'
 28    );
 29    process.exit(1);
 30  }
 31  
 32  const csvPath = args[0];
 33  const txtPath = args[1];
 34  const showRemoved = args.includes('--show-removed');
 35  
 36  /**
 37   * Read and parse CSV file
 38   */
 39  function readCSV(csvPath) {
 40    if (!fs.existsSync(csvPath)) {
 41      console.error(`\nāŒ CSV file not found: ${csvPath}\n`);
 42      process.exit(1);
 43    }
 44  
 45    const content = fs.readFileSync(csvPath, 'utf-8');
 46    const records = parse(content, {
 47      columns: true,
 48      skip_empty_lines: true,
 49    });
 50  
 51    return records.map(record => ({
 52      keyword: record.keyword,
 53      searchVolume: parseInt(record.search_volume),
 54      competition: parseInt(record.competition),
 55      cpc: parseFloat(record.cpc_low || record.cpc || 0),
 56    }));
 57  }
 58  
 59  /**
 60   * Read filtered keywords from text file
 61   */
 62  function readFilteredKeywords(txtPath) {
 63    if (!fs.existsSync(txtPath)) {
 64      console.error(`\nāŒ Filtered keywords file not found: ${txtPath}\n`);
 65      process.exit(1);
 66    }
 67  
 68    return fs
 69      .readFileSync(txtPath, 'utf-8')
 70      .split('\n')
 71      .map(line => line.trim())
 72      .filter(line => line.length > 0);
 73  }
 74  
 75  /**
 76   * Calculate statistics for a set of keywords
 77   */
 78  function calculateStats(keywords) {
 79    if (keywords.length === 0) {
 80      return {
 81        count: 0,
 82        totalSearchVolume: 0,
 83        meanSearchVolume: 0,
 84        medianSearchVolume: 0,
 85        minSearchVolume: 0,
 86        maxSearchVolume: 0,
 87      };
 88    }
 89  
 90    const searchVolumes = keywords.map(k => k.searchVolume).sort((a, b) => a - b);
 91    const totalSearchVolume = searchVolumes.reduce((sum, sv) => sum + sv, 0);
 92  
 93    return {
 94      count: keywords.length,
 95      totalSearchVolume,
 96      meanSearchVolume: Math.round(totalSearchVolume / keywords.length),
 97      medianSearchVolume: searchVolumes[Math.floor(searchVolumes.length / 2)],
 98      minSearchVolume: searchVolumes[0],
 99      maxSearchVolume: searchVolumes[searchVolumes.length - 1],
100    };
101  }
102  
103  /**
104   * Main comparison function
105   */
106  function main() {
107    console.log('\nšŸ” Keyword Comparison Analysis\n');
108    console.log('='.repeat(80));
109  
110    // Read data
111    console.log(`\nšŸ“‚ Reading files...`);
112    const allKeywords = readCSV(csvPath);
113    const filteredKeywordsList = readFilteredKeywords(txtPath);
114    const filteredSet = new Set(filteredKeywordsList);
115  
116    console.log(`   CSV: ${csvPath}`);
117    console.log(`   Filtered: ${txtPath}`);
118  
119    // Split into kept and removed
120    const kept = allKeywords.filter(k => filteredSet.has(k.keyword));
121    const removed = allKeywords.filter(k => !filteredSet.has(k.keyword));
122  
123    // Calculate stats
124    const keptStats = calculateStats(kept);
125    const removedStats = calculateStats(removed);
126    const allStats = calculateStats(allKeywords);
127  
128    // Display results
129    console.log(`\n${'='.repeat(80)}`);
130    console.log('\nšŸ“Š OVERALL STATISTICS\n');
131  
132    console.log(`Total keywords in CSV: ${allStats.count.toLocaleString()}`);
133    console.log(
134      `Keywords kept: ${keptStats.count.toLocaleString()} (${((keptStats.count / allStats.count) * 100).toFixed(1)}%)`
135    );
136    console.log(
137      `Keywords removed: ${removedStats.count.toLocaleString()} (${((removedStats.count / allStats.count) * 100).toFixed(1)}%)`
138    );
139  
140    // Infer threshold
141    const inferredThreshold =
142      removedStats.maxSearchVolume > 0 ? removedStats.maxSearchVolume + 1 : keptStats.minSearchVolume;
143    console.log(`\nInferred cutoff threshold: ${inferredThreshold.toLocaleString()} searches/month`);
144  
145    // Kept keywords stats
146    console.log(`\n${'='.repeat(80)}`);
147    console.log('\nāœ… KEPT KEYWORDS\n');
148    console.log(`Count: ${keptStats.count.toLocaleString()}`);
149    console.log(
150      `Search volume range: ${keptStats.minSearchVolume.toLocaleString()} - ${keptStats.maxSearchVolume.toLocaleString()}`
151    );
152    console.log(
153      `Mean: ${keptStats.meanSearchVolume.toLocaleString()} | Median: ${keptStats.medianSearchVolume.toLocaleString()}`
154    );
155    console.log(`Total monthly searches: ${keptStats.totalSearchVolume.toLocaleString()}`);
156  
157    // Top kept keywords
158    const topKept = kept.sort((a, b) => b.searchVolume - a.searchVolume).slice(0, 10);
159    console.log('\nTop 10 by search volume:');
160    for (const kw of topKept) {
161      console.log(
162        `  ${kw.searchVolume.toLocaleString().padStart(10)} | ${kw.competition.toString().padStart(3)}% | $${kw.cpc.toFixed(2).padStart(6)} | ${kw.keyword}`
163      );
164    }
165  
166    // Removed keywords stats
167    console.log(`\n${'='.repeat(80)}`);
168    console.log('\nāŒ REMOVED KEYWORDS\n');
169    console.log(`Count: ${removedStats.count.toLocaleString()}`);
170  
171    if (removedStats.count > 0) {
172      console.log(
173        `Search volume range: ${removedStats.minSearchVolume.toLocaleString()} - ${removedStats.maxSearchVolume.toLocaleString()}`
174      );
175      console.log(
176        `Mean: ${removedStats.meanSearchVolume.toLocaleString()} | Median: ${removedStats.medianSearchVolume.toLocaleString()}`
177      );
178      console.log(`Total monthly searches lost: ${removedStats.totalSearchVolume.toLocaleString()}`);
179  
180      // Top removed keywords (potential missed opportunities)
181      const topRemoved = removed.sort((a, b) => b.searchVolume - a.searchVolume).slice(0, 10);
182      console.log('\nTop 10 removed (potential missed opportunities):');
183      for (const kw of topRemoved) {
184        console.log(
185          `  ${kw.searchVolume.toLocaleString().padStart(10)} | ${kw.competition.toString().padStart(3)}% | $${kw.cpc.toFixed(2).padStart(6)} | ${kw.keyword}`
186        );
187      }
188  
189      // Show all removed if requested
190      if (showRemoved) {
191        console.log(`\n${'='.repeat(80)}`);
192        console.log('\nšŸ“‹ ALL REMOVED KEYWORDS\n');
193        const sortedRemoved = removed.sort((a, b) => b.searchVolume - a.searchVolume);
194        for (const kw of sortedRemoved) {
195          console.log(
196            `  ${kw.searchVolume.toLocaleString().padStart(10)} | ${kw.competition.toString().padStart(3)}% | $${kw.cpc.toFixed(2).padStart(6)} | ${kw.keyword}`
197          );
198        }
199      }
200    } else {
201      console.log('No keywords were removed (threshold may be too low).');
202    }
203  
204    // Impact analysis
205    console.log(`\n${'='.repeat(80)}`);
206    console.log('\nšŸ’” IMPACT ANALYSIS\n');
207  
208    const searchVolumeRetention =
209      allStats.totalSearchVolume > 0
210        ? ((keptStats.totalSearchVolume / allStats.totalSearchVolume) * 100).toFixed(1)
211        : 0;
212  
213    console.log(`Keywords retained: ${((keptStats.count / allStats.count) * 100).toFixed(1)}%`);
214    console.log(`Search volume retained: ${searchVolumeRetention}%`);
215    console.log(
216      `Average search volume per keyword: ${keptStats.meanSearchVolume.toLocaleString()} (vs ${allStats.meanSearchVolume.toLocaleString()} overall)`
217    );
218  
219    // Recommendations
220    console.log('\nšŸ’” RECOMMENDATIONS\n');
221  
222    if (keptStats.count < allStats.count * 0.1) {
223      console.log('āš ļø  Very aggressive filtering (>90% removed)');
224      console.log('   Consider lowering the threshold to retain more keywords');
225    } else if (keptStats.count > allStats.count * 0.9) {
226      console.log('āš ļø  Very lenient filtering (<10% removed)');
227      console.log('   Consider raising the threshold to focus on high-volume keywords');
228    } else {
229      console.log('āœ… Balanced filtering - good keyword/volume retention ratio');
230    }
231  
232    if (removed.length > 0 && removedStats.maxSearchVolume > 5000) {
233      console.log(
234        `\nāš ļø  Some high-volume keywords removed (max: ${removedStats.maxSearchVolume.toLocaleString()})`
235      );
236      console.log('   Review removed keywords above to avoid missing opportunities');
237    }
238  
239    console.log(`\n${'='.repeat(80)}\n`);
240  }
241  
242  main();