/ scripts / recommend-threshold.js
recommend-threshold.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Recommend optimal search volume cutoff thresholds
  5   *
  6   * Given a target number of keywords, finds the optimal cutoff threshold
  7   * that achieves that target while maximizing total search volume.
  8   *
  9   * Also provides scenario analysis showing impact of different thresholds.
 10   *
 11   * Usage:
 12   *   node scripts/recommend-threshold.js data/au/businesses-search-volume.csv --target 100
 13   *   node scripts/recommend-threshold.js data/au/businesses-search-volume.csv --scenarios
 14   */
 15  
 16  import fs from 'fs';
 17  import { parse } from 'csv-parse/sync';
 18  
 19  // Parse arguments
 20  const args = process.argv.slice(2);
 21  if (args.length < 1) {
 22    console.error('\n❌ Usage: node recommend-threshold.js <csv-path> [--target N | --scenarios]');
 23    console.error('   Examples:');
 24    console.error(
 25      '     node recommend-threshold.js data/au/businesses-search-volume.csv --target 100'
 26    );
 27    console.error(
 28      '     node recommend-threshold.js data/au/businesses-search-volume.csv --scenarios\n'
 29    );
 30    process.exit(1);
 31  }
 32  
 33  const csvPath = args[0];
 34  const targetCount = args.includes('--target') ? parseInt(args[args.indexOf('--target') + 1]) : null;
 35  const showScenarios = args.includes('--scenarios');
 36  
 37  /**
 38   * Read and parse CSV file
 39   */
 40  function readCSV(csvPath) {
 41    if (!fs.existsSync(csvPath)) {
 42      console.error(`\n❌ CSV file not found: ${csvPath}\n`);
 43      process.exit(1);
 44    }
 45  
 46    const content = fs.readFileSync(csvPath, 'utf-8');
 47    const records = parse(content, {
 48      columns: true,
 49      skip_empty_lines: true,
 50    });
 51  
 52    return records
 53      .map(record => ({
 54        keyword: record.keyword,
 55        searchVolume: parseInt(record.search_volume),
 56        competition: parseInt(record.competition),
 57        cpc: parseFloat(record.cpc_low || record.cpc || 0),
 58      }))
 59      .sort((a, b) => b.searchVolume - a.searchVolume); // Sort descending by search volume
 60  }
 61  
 62  /**
 63   * Find threshold that gives target keyword count
 64   */
 65  function findThresholdForTarget(keywords, target) {
 66    if (target >= keywords.length) {
 67      return {
 68        threshold: 0,
 69        count: keywords.length,
 70        exactMatch: false,
 71      };
 72    }
 73  
 74    // Find the search volume of the keyword at target position
 75    const targetKeyword = keywords[target - 1];
 76    const threshold = targetKeyword.searchVolume;
 77  
 78    // Count how many keywords meet this threshold
 79    const actualCount = keywords.filter(k => k.searchVolume >= threshold).length;
 80  
 81    return {
 82      threshold,
 83      count: actualCount,
 84      exactMatch: actualCount === target,
 85    };
 86  }
 87  
 88  /**
 89   * Analyze impact of a threshold
 90   */
 91  function analyzeThreshold(keywords, threshold) {
 92    const kept = keywords.filter(k => k.searchVolume >= threshold);
 93    const removed = keywords.filter(k => k.searchVolume < threshold);
 94  
 95    const totalSearchVolume = keywords.reduce((sum, k) => sum + k.searchVolume, 0);
 96    const keptSearchVolume = kept.reduce((sum, k) => sum + k.searchVolume, 0);
 97  
 98    return {
 99      threshold,
100      totalKeywords: keywords.length,
101      keptCount: kept.length,
102      removedCount: removed.length,
103      keptPercent: ((kept.length / keywords.length) * 100).toFixed(1),
104      keptSearchVolume,
105      keptSearchVolumePercent: ((keptSearchVolume / totalSearchVolume) * 100).toFixed(1),
106      avgSearchVolume: kept.length > 0 ? Math.round(keptSearchVolume / kept.length) : 0,
107      minSearchVolume: kept.length > 0 ? kept[kept.length - 1].searchVolume : 0,
108      maxSearchVolume: kept.length > 0 ? kept[0].searchVolume : 0,
109    };
110  }
111  
112  /**
113   * Main analysis function
114   */
115  function main() {
116    console.log('\n🎯 Threshold Recommendation Analysis\n');
117    console.log('='.repeat(80));
118  
119    // Read data
120    console.log(`\n📂 Reading CSV: ${csvPath}`);
121    const keywords = readCSV(csvPath);
122    console.log(`   Total keywords: ${keywords.length.toLocaleString()}`);
123  
124    const totalSearchVolume = keywords.reduce((sum, k) => sum + k.searchVolume, 0);
125    console.log(`   Total search volume: ${totalSearchVolume.toLocaleString()}`);
126  
127    // Calculate percentiles for reference
128    const p50 = keywords[Math.floor(keywords.length * 0.5)].searchVolume;
129    const p75 = keywords[Math.floor(keywords.length * 0.25)].searchVolume;
130    const p90 = keywords[Math.floor(keywords.length * 0.1)].searchVolume;
131    const p95 = keywords[Math.floor(keywords.length * 0.05)].searchVolume;
132  
133    console.log(`\n📊 Percentiles:`);
134    console.log(`   p50 (top 50%): ${p50.toLocaleString()}`);
135    console.log(`   p75 (top 25%): ${p75.toLocaleString()}`);
136    console.log(`   p90 (top 10%): ${p90.toLocaleString()}`);
137    console.log(`   p95 (top 5%):  ${p95.toLocaleString()}`);
138  
139    // Target-based recommendation
140    if (targetCount) {
141      console.log(`\n${'='.repeat(80)}`);
142      console.log(`\n🎯 TARGET: ${targetCount} keywords\n`);
143  
144      const result = findThresholdForTarget(keywords, targetCount);
145      console.log(`Recommended threshold: ${result.threshold.toLocaleString()} searches/month`);
146      console.log(`Actual keyword count: ${result.count.toLocaleString()}`);
147  
148      if (!result.exactMatch) {
149        console.log(`   ⚠️  Exact match not possible (multiple keywords at threshold)`);
150      }
151  
152      // Show impact
153      const impact = analyzeThreshold(keywords, result.threshold);
154      console.log(`\nImpact:`);
155      console.log(`   Keywords retained: ${impact.keptPercent}%`);
156      console.log(`   Search volume retained: ${impact.keptSearchVolumePercent}%`);
157      console.log(`   Average search volume: ${impact.avgSearchVolume.toLocaleString()}`);
158      console.log(
159        `   Range: ${impact.minSearchVolume.toLocaleString()} - ${impact.maxSearchVolume.toLocaleString()}`
160      );
161  
162      console.log(`\nCommand to apply:`);
163      console.log(
164        `   npm run keywords apply-cutoff -- --csv ${csvPath} --threshold ${result.threshold}`
165      );
166    }
167  
168    // Scenario analysis
169    if (showScenarios || !targetCount) {
170      console.log(`\n${'='.repeat(80)}`);
171      console.log('\n📈 SCENARIO ANALYSIS\n');
172  
173      const scenarios = [
174        { name: 'Very Aggressive (p95)', threshold: p95 },
175        { name: 'Aggressive (p90)', threshold: p90 },
176        { name: 'Balanced (p75)', threshold: p75 },
177        { name: 'Lenient (p50)', threshold: p50 },
178        { name: 'Top 100', threshold: keywords[99]?.searchVolume || 0 },
179        { name: 'Top 50', threshold: keywords[49]?.searchVolume || 0 },
180        { name: 'Top 25', threshold: keywords[24]?.searchVolume || 0 },
181      ];
182  
183      console.log(
184        'Threshold'.padEnd(25) +
185          'Keywords'.padStart(10) +
186          'Search Vol%'.padStart(13) +
187          'Avg Vol'.padStart(12)
188      );
189      console.log('-'.repeat(80));
190  
191      for (const scenario of scenarios) {
192        if (scenario.threshold === 0) continue;
193  
194        const impact = analyzeThreshold(keywords, scenario.threshold);
195        console.log(
196          `${scenario.name.padEnd(25)}${impact.keptCount.toString().padStart(10)}${`${impact.keptSearchVolumePercent}%`.padStart(13)}${impact.avgSearchVolume.toLocaleString().padStart(12)}`
197        );
198      }
199  
200      console.log('\n💡 Interpretation:');
201      console.log('   - Keywords: Number of keywords retained');
202      console.log('   - Search Vol%: Percentage of total search volume retained');
203      console.log('   - Avg Vol: Average monthly searches per retained keyword');
204    }
205  
206    // Strategic recommendations
207    console.log(`\n${'='.repeat(80)}`);
208    console.log('\n💡 STRATEGIC RECOMMENDATIONS\n');
209  
210    const top10Percent = Math.floor(keywords.length * 0.1);
211    const top25Percent = Math.floor(keywords.length * 0.25);
212    const top50Percent = Math.floor(keywords.length * 0.5);
213  
214    console.log('Based on your goals:\n');
215  
216    console.log('1. Maximum Coverage (Cast wide net)');
217    console.log(`   Target: ${top50Percent} keywords (top 50%)`);
218    console.log(`   Threshold: ${p50.toLocaleString()} searches/month`);
219    console.log(`   Use case: Initial testing, exploring new markets\n`);
220  
221    console.log('2. Balanced Approach (Quality + quantity)');
222    console.log(`   Target: ${top25Percent} keywords (top 25%)`);
223    console.log(`   Threshold: ${p75.toLocaleString()} searches/month`);
224    console.log(`   Use case: General production use, cost-effective ⭐\n`);
225  
226    console.log('3. High-Value Focus (Premium traffic only)');
227    console.log(`   Target: ${top10Percent} keywords (top 10%)`);
228    console.log(`   Threshold: ${p90.toLocaleString()} searches/month`);
229    console.log(`   Use case: Established markets, maximizing ROI\n`);
230  
231    console.log('4. Ultra-Selective (Top performers)');
232    console.log(`   Target: ${Math.floor(keywords.length * 0.05)} keywords (top 5%)`);
233    console.log(`   Threshold: ${p95.toLocaleString()} searches/month`);
234    console.log(`   Use case: Saturated markets, limited resources\n`);
235  
236    console.log(`${'='.repeat(80)}\n`);
237  }
238  
239  main();