recommend-threshold.js
1 #!/usr/bin/env node 2 3 /** 4 * Recommend optimal search volume cutoff thresholds 5 * 6 * Given a target number of keywords, finds the optimal cutoff threshold 7 * that achieves that target while maximizing total search volume. 8 * 9 * Also provides scenario analysis showing impact of different thresholds. 10 * 11 * Usage: 12 * node scripts/recommend-threshold.js data/au/businesses-search-volume.csv --target 100 13 * node scripts/recommend-threshold.js data/au/businesses-search-volume.csv --scenarios 14 */ 15 16 import fs from 'fs'; 17 import { parse } from 'csv-parse/sync'; 18 19 // Parse arguments 20 const args = process.argv.slice(2); 21 if (args.length < 1) { 22 console.error('\n❌ Usage: node recommend-threshold.js <csv-path> [--target N | --scenarios]'); 23 console.error(' Examples:'); 24 console.error( 25 ' node recommend-threshold.js data/au/businesses-search-volume.csv --target 100' 26 ); 27 console.error( 28 ' node recommend-threshold.js data/au/businesses-search-volume.csv --scenarios\n' 29 ); 30 process.exit(1); 31 } 32 33 const csvPath = args[0]; 34 const targetCount = args.includes('--target') ? parseInt(args[args.indexOf('--target') + 1]) : null; 35 const showScenarios = args.includes('--scenarios'); 36 37 /** 38 * Read and parse CSV file 39 */ 40 function readCSV(csvPath) { 41 if (!fs.existsSync(csvPath)) { 42 console.error(`\n❌ CSV file not found: ${csvPath}\n`); 43 process.exit(1); 44 } 45 46 const content = fs.readFileSync(csvPath, 'utf-8'); 47 const records = parse(content, { 48 columns: true, 49 skip_empty_lines: true, 50 }); 51 52 return records 53 .map(record => ({ 54 keyword: record.keyword, 55 searchVolume: parseInt(record.search_volume), 56 competition: parseInt(record.competition), 57 cpc: parseFloat(record.cpc_low || record.cpc || 0), 58 })) 59 .sort((a, b) => b.searchVolume - a.searchVolume); // Sort descending by search volume 60 } 61 62 /** 63 * Find threshold that gives target keyword count 64 */ 65 function findThresholdForTarget(keywords, target) { 66 if (target >= keywords.length) { 67 return { 68 threshold: 0, 69 count: keywords.length, 70 exactMatch: false, 71 }; 72 } 73 74 // Find the search volume of the keyword at target position 75 const targetKeyword = keywords[target - 1]; 76 const threshold = targetKeyword.searchVolume; 77 78 // Count how many keywords meet this threshold 79 const actualCount = keywords.filter(k => k.searchVolume >= threshold).length; 80 81 return { 82 threshold, 83 count: actualCount, 84 exactMatch: actualCount === target, 85 }; 86 } 87 88 /** 89 * Analyze impact of a threshold 90 */ 91 function analyzeThreshold(keywords, threshold) { 92 const kept = keywords.filter(k => k.searchVolume >= threshold); 93 const removed = keywords.filter(k => k.searchVolume < threshold); 94 95 const totalSearchVolume = keywords.reduce((sum, k) => sum + k.searchVolume, 0); 96 const keptSearchVolume = kept.reduce((sum, k) => sum + k.searchVolume, 0); 97 98 return { 99 threshold, 100 totalKeywords: keywords.length, 101 keptCount: kept.length, 102 removedCount: removed.length, 103 keptPercent: ((kept.length / keywords.length) * 100).toFixed(1), 104 keptSearchVolume, 105 keptSearchVolumePercent: ((keptSearchVolume / totalSearchVolume) * 100).toFixed(1), 106 avgSearchVolume: kept.length > 0 ? Math.round(keptSearchVolume / kept.length) : 0, 107 minSearchVolume: kept.length > 0 ? kept[kept.length - 1].searchVolume : 0, 108 maxSearchVolume: kept.length > 0 ? kept[0].searchVolume : 0, 109 }; 110 } 111 112 /** 113 * Main analysis function 114 */ 115 function main() { 116 console.log('\n🎯 Threshold Recommendation Analysis\n'); 117 console.log('='.repeat(80)); 118 119 // Read data 120 console.log(`\n📂 Reading CSV: ${csvPath}`); 121 const keywords = readCSV(csvPath); 122 console.log(` Total keywords: ${keywords.length.toLocaleString()}`); 123 124 const totalSearchVolume = keywords.reduce((sum, k) => sum + k.searchVolume, 0); 125 console.log(` Total search volume: ${totalSearchVolume.toLocaleString()}`); 126 127 // Calculate percentiles for reference 128 const p50 = keywords[Math.floor(keywords.length * 0.5)].searchVolume; 129 const p75 = keywords[Math.floor(keywords.length * 0.25)].searchVolume; 130 const p90 = keywords[Math.floor(keywords.length * 0.1)].searchVolume; 131 const p95 = keywords[Math.floor(keywords.length * 0.05)].searchVolume; 132 133 console.log(`\n📊 Percentiles:`); 134 console.log(` p50 (top 50%): ${p50.toLocaleString()}`); 135 console.log(` p75 (top 25%): ${p75.toLocaleString()}`); 136 console.log(` p90 (top 10%): ${p90.toLocaleString()}`); 137 console.log(` p95 (top 5%): ${p95.toLocaleString()}`); 138 139 // Target-based recommendation 140 if (targetCount) { 141 console.log(`\n${'='.repeat(80)}`); 142 console.log(`\n🎯 TARGET: ${targetCount} keywords\n`); 143 144 const result = findThresholdForTarget(keywords, targetCount); 145 console.log(`Recommended threshold: ${result.threshold.toLocaleString()} searches/month`); 146 console.log(`Actual keyword count: ${result.count.toLocaleString()}`); 147 148 if (!result.exactMatch) { 149 console.log(` ⚠️ Exact match not possible (multiple keywords at threshold)`); 150 } 151 152 // Show impact 153 const impact = analyzeThreshold(keywords, result.threshold); 154 console.log(`\nImpact:`); 155 console.log(` Keywords retained: ${impact.keptPercent}%`); 156 console.log(` Search volume retained: ${impact.keptSearchVolumePercent}%`); 157 console.log(` Average search volume: ${impact.avgSearchVolume.toLocaleString()}`); 158 console.log( 159 ` Range: ${impact.minSearchVolume.toLocaleString()} - ${impact.maxSearchVolume.toLocaleString()}` 160 ); 161 162 console.log(`\nCommand to apply:`); 163 console.log( 164 ` npm run keywords apply-cutoff -- --csv ${csvPath} --threshold ${result.threshold}` 165 ); 166 } 167 168 // Scenario analysis 169 if (showScenarios || !targetCount) { 170 console.log(`\n${'='.repeat(80)}`); 171 console.log('\n📈 SCENARIO ANALYSIS\n'); 172 173 const scenarios = [ 174 { name: 'Very Aggressive (p95)', threshold: p95 }, 175 { name: 'Aggressive (p90)', threshold: p90 }, 176 { name: 'Balanced (p75)', threshold: p75 }, 177 { name: 'Lenient (p50)', threshold: p50 }, 178 { name: 'Top 100', threshold: keywords[99]?.searchVolume || 0 }, 179 { name: 'Top 50', threshold: keywords[49]?.searchVolume || 0 }, 180 { name: 'Top 25', threshold: keywords[24]?.searchVolume || 0 }, 181 ]; 182 183 console.log( 184 'Threshold'.padEnd(25) + 185 'Keywords'.padStart(10) + 186 'Search Vol%'.padStart(13) + 187 'Avg Vol'.padStart(12) 188 ); 189 console.log('-'.repeat(80)); 190 191 for (const scenario of scenarios) { 192 if (scenario.threshold === 0) continue; 193 194 const impact = analyzeThreshold(keywords, scenario.threshold); 195 console.log( 196 `${scenario.name.padEnd(25)}${impact.keptCount.toString().padStart(10)}${`${impact.keptSearchVolumePercent}%`.padStart(13)}${impact.avgSearchVolume.toLocaleString().padStart(12)}` 197 ); 198 } 199 200 console.log('\n💡 Interpretation:'); 201 console.log(' - Keywords: Number of keywords retained'); 202 console.log(' - Search Vol%: Percentage of total search volume retained'); 203 console.log(' - Avg Vol: Average monthly searches per retained keyword'); 204 } 205 206 // Strategic recommendations 207 console.log(`\n${'='.repeat(80)}`); 208 console.log('\n💡 STRATEGIC RECOMMENDATIONS\n'); 209 210 const top10Percent = Math.floor(keywords.length * 0.1); 211 const top25Percent = Math.floor(keywords.length * 0.25); 212 const top50Percent = Math.floor(keywords.length * 0.5); 213 214 console.log('Based on your goals:\n'); 215 216 console.log('1. Maximum Coverage (Cast wide net)'); 217 console.log(` Target: ${top50Percent} keywords (top 50%)`); 218 console.log(` Threshold: ${p50.toLocaleString()} searches/month`); 219 console.log(` Use case: Initial testing, exploring new markets\n`); 220 221 console.log('2. Balanced Approach (Quality + quantity)'); 222 console.log(` Target: ${top25Percent} keywords (top 25%)`); 223 console.log(` Threshold: ${p75.toLocaleString()} searches/month`); 224 console.log(` Use case: General production use, cost-effective ⭐\n`); 225 226 console.log('3. High-Value Focus (Premium traffic only)'); 227 console.log(` Target: ${top10Percent} keywords (top 10%)`); 228 console.log(` Threshold: ${p90.toLocaleString()} searches/month`); 229 console.log(` Use case: Established markets, maximizing ROI\n`); 230 231 console.log('4. Ultra-Selective (Top performers)'); 232 console.log(` Target: ${Math.floor(keywords.length * 0.05)} keywords (top 5%)`); 233 console.log(` Threshold: ${p95.toLocaleString()} searches/month`); 234 console.log(` Use case: Saturated markets, limited resources\n`); 235 236 console.log(`${'='.repeat(80)}\n`); 237 } 238 239 main();