analyze-keyword-coverage.js
1 #!/usr/bin/env node 2 3 /** 4 * Analyze keyword coverage across countries and business types 5 * 6 * Shows: 7 * - Total keywords per country 8 * - Distribution of search volumes 9 * - Coverage gaps (business types with low keyword counts) 10 * - Recommendations for improving coverage 11 * 12 * Usage: 13 * node scripts/analyze-keyword-coverage.js 14 * node scripts/analyze-keyword-coverage.js --country AU 15 * node scripts/analyze-keyword-coverage.js --type businesses 16 */ 17 18 import fs from 'fs'; 19 import path from 'path'; 20 import { fileURLToPath } from 'url'; 21 22 const __filename = fileURLToPath(import.meta.url); 23 const __dirname = path.dirname(__filename); 24 const projectRoot = path.join(__dirname, '..'); 25 26 // Parse command line arguments 27 const args = process.argv.slice(2); 28 const countryFilter = args.includes('--country') 29 ? args[args.indexOf('--country') + 1]?.toUpperCase() 30 : null; 31 const typeFilter = args.includes('--type') ? args[args.indexOf('--type') + 1] : null; 32 33 // Country codes 34 const COUNTRIES = [ 35 'AU', 36 'NZ', 37 'GB', 38 'US', 39 'CA', 40 'IE', 41 'ZA', 42 'SG', 43 'MY', 44 'PH', 45 'IN', 46 'AE', 47 'ES', 48 'FR', 49 'DE', 50 'IT', 51 'PT', 52 'NL', 53 'BE', 54 'AT', 55 'CH', 56 'SE', 57 'NO', 58 'DK', 59 'FI', 60 ]; 61 62 // Keyword types 63 const TYPES = ['businesses', 'regions']; 64 65 /** 66 * Read keywords from file 67 */ 68 function readKeywords(filePath) { 69 if (!fs.existsSync(filePath)) { 70 return []; 71 } 72 return fs 73 .readFileSync(filePath, 'utf-8') 74 .split('\n') 75 .map(line => line.trim()) 76 .filter(line => line.length > 0); 77 } 78 79 /** 80 * Read CSV and get keyword count + search volume stats 81 */ 82 function analyzeCSV(csvPath) { 83 if (!fs.existsSync(csvPath)) { 84 return null; 85 } 86 87 const content = fs.readFileSync(csvPath, 'utf-8'); 88 const lines = content.trim().split('\n'); 89 90 if (lines.length <= 1) { 91 return null; 92 } 93 94 const dataRows = lines.slice(1); // Skip header 95 const searchVolumes = dataRows 96 .map(line => { 97 const match = line.match(/,(\d+),/); // Extract search_volume column 98 return match ? parseInt(match[1]) : 0; 99 }) 100 .sort((a, b) => a - b); 101 102 if (searchVolumes.length === 0) { 103 return null; 104 } 105 106 const total = searchVolumes.reduce((sum, sv) => sum + sv, 0); 107 const mean = Math.round(total / searchVolumes.length); 108 const median = searchVolumes[Math.floor(searchVolumes.length / 2)]; 109 const min = searchVolumes[0]; 110 const max = searchVolumes[searchVolumes.length - 1]; 111 112 return { 113 count: searchVolumes.length, 114 mean, 115 median, 116 min, 117 max, 118 }; 119 } 120 121 /** 122 * Analyze coverage for a country 123 */ 124 function analyzeCountry(country) { 125 const countryDir = path.join(projectRoot, 'data', country.toLowerCase()); 126 const analysis = { 127 country, 128 types: {}, 129 }; 130 131 for (const type of TYPES) { 132 const txtFile = path.join(countryDir, `${type}.txt`); 133 const csvFile = path.join(countryDir, `${type}-search-volume.csv`); 134 135 const originalKeywords = readKeywords(txtFile); 136 const csvStats = analyzeCSV(csvFile); 137 138 analysis.types[type] = { 139 originalCount: originalKeywords.length, 140 csvStats, 141 hasCSV: !!csvStats, 142 }; 143 } 144 145 return analysis; 146 } 147 148 /** 149 * Main analysis function 150 */ 151 function main() { 152 console.log('\nš Keyword Coverage Analysis\n'); 153 console.log('='.repeat(80)); 154 155 const countries = countryFilter ? [countryFilter] : COUNTRIES; 156 const types = typeFilter ? [typeFilter] : TYPES; 157 158 const allAnalyses = []; 159 160 // Analyze each country 161 for (const country of countries) { 162 const analysis = analyzeCountry(country); 163 allAnalyses.push(analysis); 164 } 165 166 // Filter by type if specified 167 if (typeFilter) { 168 console.log(`\nFiltering by type: ${typeFilter}`); 169 } 170 171 // Display results 172 for (const analysis of allAnalyses) { 173 console.log(`\nš ${analysis.country}`); 174 console.log('-'.repeat(80)); 175 176 for (const type of types) { 177 const data = analysis.types[type]; 178 if (!data) continue; 179 180 console.log(`\n ${type.toUpperCase()}`); 181 console.log(` Original keywords: ${data.originalCount}`); 182 183 if (data.hasCSV) { 184 const stats = data.csvStats; 185 console.log(` CSV generated: ā ${stats.count} unique keywords`); 186 console.log( 187 ` Search volume range: ${stats.min.toLocaleString()} - ${stats.max.toLocaleString()}` 188 ); 189 console.log( 190 ` Mean: ${stats.mean.toLocaleString()} | Median: ${stats.median.toLocaleString()}` 191 ); 192 193 // Coverage analysis 194 const expansionRatio = (stats.count / data.originalCount).toFixed(1); 195 console.log(` Expansion ratio: ${expansionRatio}x`); 196 197 // Recommendations 198 if (expansionRatio < 5) { 199 console.log(` ā ļø Low expansion - consider reviewing seed keywords`); 200 } 201 } else { 202 console.log(` CSV generated: ā Not found`); 203 console.log( 204 ` Run: npm run keywords generate-csv -- --type ${type} --country ${analysis.country}` 205 ); 206 } 207 } 208 } 209 210 // Summary statistics 211 console.log(`\n${'='.repeat(80)}`); 212 console.log('\nš SUMMARY\n'); 213 214 let totalOriginal = 0; 215 let totalCSV = 0; 216 let countriesWithCSV = 0; 217 218 for (const analysis of allAnalyses) { 219 for (const type of types) { 220 const data = analysis.types[type]; 221 if (!data) continue; 222 223 totalOriginal += data.originalCount; 224 if (data.hasCSV) { 225 totalCSV += data.csvStats.count; 226 countriesWithCSV++; 227 } 228 } 229 } 230 231 const totalCountries = countries.length * types.length; 232 const csvCoverage = ((countriesWithCSV / totalCountries) * 100).toFixed(1); 233 234 console.log(`Countries analyzed: ${countries.length}`); 235 console.log(`Types analyzed: ${types.join(', ')}`); 236 console.log(`Total original keywords: ${totalOriginal.toLocaleString()}`); 237 console.log(`Total unique keywords (CSV): ${totalCSV.toLocaleString()}`); 238 console.log(`CSV coverage: ${csvCoverage}% (${countriesWithCSV}/${totalCountries})`); 239 240 if (totalCSV > 0) { 241 const overallExpansion = (totalCSV / totalOriginal).toFixed(1); 242 console.log(`Overall expansion ratio: ${overallExpansion}x`); 243 } 244 245 // Recommendations 246 console.log('\nš” RECOMMENDATIONS\n'); 247 248 if (csvCoverage < 100) { 249 console.log('1. Generate CSVs for remaining countries:'); 250 console.log(' npm run keywords generate-csv -- --type businesses'); 251 console.log(' npm run keywords generate-csv -- --type regions'); 252 } 253 254 if (totalCSV === 0) { 255 console.log('2. Start with one country to test:'); 256 console.log(' npm run keywords generate-csv -- --type businesses --country AU'); 257 } 258 259 if (totalCSV > 0 && csvCoverage === 100) { 260 console.log('ā All countries have CSV data generated!'); 261 console.log(' Next: Analyze search volumes and apply cutoffs'); 262 console.log(' npm run keywords analyze -- --csv data/au/businesses-search-volume.csv'); 263 } 264 265 console.log(`\n${'='.repeat(80)}\n`); 266 } 267 268 main();