/ scripts / analyze-keyword-coverage.js
analyze-keyword-coverage.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Analyze keyword coverage across countries and business types
  5   *
  6   * Shows:
  7   * - Total keywords per country
  8   * - Distribution of search volumes
  9   * - Coverage gaps (business types with low keyword counts)
 10   * - Recommendations for improving coverage
 11   *
 12   * Usage:
 13   *   node scripts/analyze-keyword-coverage.js
 14   *   node scripts/analyze-keyword-coverage.js --country AU
 15   *   node scripts/analyze-keyword-coverage.js --type businesses
 16   */
 17  
 18  import fs from 'fs';
 19  import path from 'path';
 20  import { fileURLToPath } from 'url';
 21  
 22  const __filename = fileURLToPath(import.meta.url);
 23  const __dirname = path.dirname(__filename);
 24  const projectRoot = path.join(__dirname, '..');
 25  
 26  // Parse command line arguments
 27  const args = process.argv.slice(2);
 28  const countryFilter = args.includes('--country')
 29    ? args[args.indexOf('--country') + 1]?.toUpperCase()
 30    : null;
 31  const typeFilter = args.includes('--type') ? args[args.indexOf('--type') + 1] : null;
 32  
 33  // Country codes
 34  const COUNTRIES = [
 35    'AU',
 36    'NZ',
 37    'GB',
 38    'US',
 39    'CA',
 40    'IE',
 41    'ZA',
 42    'SG',
 43    'MY',
 44    'PH',
 45    'IN',
 46    'AE',
 47    'ES',
 48    'FR',
 49    'DE',
 50    'IT',
 51    'PT',
 52    'NL',
 53    'BE',
 54    'AT',
 55    'CH',
 56    'SE',
 57    'NO',
 58    'DK',
 59    'FI',
 60  ];
 61  
 62  // Keyword types
 63  const TYPES = ['businesses', 'regions'];
 64  
 65  /**
 66   * Read keywords from file
 67   */
 68  function readKeywords(filePath) {
 69    if (!fs.existsSync(filePath)) {
 70      return [];
 71    }
 72    return fs
 73      .readFileSync(filePath, 'utf-8')
 74      .split('\n')
 75      .map(line => line.trim())
 76      .filter(line => line.length > 0);
 77  }
 78  
 79  /**
 80   * Read CSV and get keyword count + search volume stats
 81   */
 82  function analyzeCSV(csvPath) {
 83    if (!fs.existsSync(csvPath)) {
 84      return null;
 85    }
 86  
 87    const content = fs.readFileSync(csvPath, 'utf-8');
 88    const lines = content.trim().split('\n');
 89  
 90    if (lines.length <= 1) {
 91      return null;
 92    }
 93  
 94    const dataRows = lines.slice(1); // Skip header
 95    const searchVolumes = dataRows
 96      .map(line => {
 97        const match = line.match(/,(\d+),/); // Extract search_volume column
 98        return match ? parseInt(match[1]) : 0;
 99      })
100      .sort((a, b) => a - b);
101  
102    if (searchVolumes.length === 0) {
103      return null;
104    }
105  
106    const total = searchVolumes.reduce((sum, sv) => sum + sv, 0);
107    const mean = Math.round(total / searchVolumes.length);
108    const median = searchVolumes[Math.floor(searchVolumes.length / 2)];
109    const min = searchVolumes[0];
110    const max = searchVolumes[searchVolumes.length - 1];
111  
112    return {
113      count: searchVolumes.length,
114      mean,
115      median,
116      min,
117      max,
118    };
119  }
120  
121  /**
122   * Analyze coverage for a country
123   */
124  function analyzeCountry(country) {
125    const countryDir = path.join(projectRoot, 'data', country.toLowerCase());
126    const analysis = {
127      country,
128      types: {},
129    };
130  
131    for (const type of TYPES) {
132      const txtFile = path.join(countryDir, `${type}.txt`);
133      const csvFile = path.join(countryDir, `${type}-search-volume.csv`);
134  
135      const originalKeywords = readKeywords(txtFile);
136      const csvStats = analyzeCSV(csvFile);
137  
138      analysis.types[type] = {
139        originalCount: originalKeywords.length,
140        csvStats,
141        hasCSV: !!csvStats,
142      };
143    }
144  
145    return analysis;
146  }
147  
148  /**
149   * Main analysis function
150   */
151  function main() {
152    console.log('\nšŸ“Š Keyword Coverage Analysis\n');
153    console.log('='.repeat(80));
154  
155    const countries = countryFilter ? [countryFilter] : COUNTRIES;
156    const types = typeFilter ? [typeFilter] : TYPES;
157  
158    const allAnalyses = [];
159  
160    // Analyze each country
161    for (const country of countries) {
162      const analysis = analyzeCountry(country);
163      allAnalyses.push(analysis);
164    }
165  
166    // Filter by type if specified
167    if (typeFilter) {
168      console.log(`\nFiltering by type: ${typeFilter}`);
169    }
170  
171    // Display results
172    for (const analysis of allAnalyses) {
173      console.log(`\nšŸ“ ${analysis.country}`);
174      console.log('-'.repeat(80));
175  
176      for (const type of types) {
177        const data = analysis.types[type];
178        if (!data) continue;
179  
180        console.log(`\n  ${type.toUpperCase()}`);
181        console.log(`    Original keywords: ${data.originalCount}`);
182  
183        if (data.hasCSV) {
184          const stats = data.csvStats;
185          console.log(`    CSV generated: āœ… ${stats.count} unique keywords`);
186          console.log(
187            `    Search volume range: ${stats.min.toLocaleString()} - ${stats.max.toLocaleString()}`
188          );
189          console.log(
190            `    Mean: ${stats.mean.toLocaleString()} | Median: ${stats.median.toLocaleString()}`
191          );
192  
193          // Coverage analysis
194          const expansionRatio = (stats.count / data.originalCount).toFixed(1);
195          console.log(`    Expansion ratio: ${expansionRatio}x`);
196  
197          // Recommendations
198          if (expansionRatio < 5) {
199            console.log(`    āš ļø  Low expansion - consider reviewing seed keywords`);
200          }
201        } else {
202          console.log(`    CSV generated: āŒ Not found`);
203          console.log(
204            `    Run: npm run keywords generate-csv -- --type ${type} --country ${analysis.country}`
205          );
206        }
207      }
208    }
209  
210    // Summary statistics
211    console.log(`\n${'='.repeat(80)}`);
212    console.log('\nšŸ“ˆ SUMMARY\n');
213  
214    let totalOriginal = 0;
215    let totalCSV = 0;
216    let countriesWithCSV = 0;
217  
218    for (const analysis of allAnalyses) {
219      for (const type of types) {
220        const data = analysis.types[type];
221        if (!data) continue;
222  
223        totalOriginal += data.originalCount;
224        if (data.hasCSV) {
225          totalCSV += data.csvStats.count;
226          countriesWithCSV++;
227        }
228      }
229    }
230  
231    const totalCountries = countries.length * types.length;
232    const csvCoverage = ((countriesWithCSV / totalCountries) * 100).toFixed(1);
233  
234    console.log(`Countries analyzed: ${countries.length}`);
235    console.log(`Types analyzed: ${types.join(', ')}`);
236    console.log(`Total original keywords: ${totalOriginal.toLocaleString()}`);
237    console.log(`Total unique keywords (CSV): ${totalCSV.toLocaleString()}`);
238    console.log(`CSV coverage: ${csvCoverage}% (${countriesWithCSV}/${totalCountries})`);
239  
240    if (totalCSV > 0) {
241      const overallExpansion = (totalCSV / totalOriginal).toFixed(1);
242      console.log(`Overall expansion ratio: ${overallExpansion}x`);
243    }
244  
245    // Recommendations
246    console.log('\nšŸ’” RECOMMENDATIONS\n');
247  
248    if (csvCoverage < 100) {
249      console.log('1. Generate CSVs for remaining countries:');
250      console.log('   npm run keywords generate-csv -- --type businesses');
251      console.log('   npm run keywords generate-csv -- --type regions');
252    }
253  
254    if (totalCSV === 0) {
255      console.log('2. Start with one country to test:');
256      console.log('   npm run keywords generate-csv -- --type businesses --country AU');
257    }
258  
259    if (totalCSV > 0 && csvCoverage === 100) {
260      console.log('āœ… All countries have CSV data generated!');
261      console.log('   Next: Analyze search volumes and apply cutoffs');
262      console.log('   npm run keywords analyze -- --csv data/au/businesses-search-volume.csv');
263    }
264  
265    console.log(`\n${'='.repeat(80)}\n`);
266  }
267  
268  main();