generate-all-keywords.js
1 #!/usr/bin/env node 2 3 /** 4 * Generate All Keyword Combinations 5 * 6 * Creates business × region combinations for all countries and loads them into 7 * the keywords table. Search volumes are set to null initially and will be 8 * populated later via DataForSEO API. 9 * 10 * For India, also generates Hindi keyword combinations. 11 */ 12 13 import { createDatabaseConnection } from '../src/utils/db.js'; 14 import Logger from '../src/utils/logger.js'; 15 import { generateKeywordCombinations } from '../src/utils/keyword-manager.js'; 16 import { getSupportedCountries, getCountryByCode } from '../src/config/countries.js'; 17 import fs from 'fs'; 18 import { parse } from 'csv-parse/sync'; 19 20 const logger = new Logger('GenerateAllKeywords'); 21 22 /** 23 * Generate Hindi keyword combinations for India 24 * Loads from data/in/businesses-hindi-final-filtered.csv and regions-hindi-final-filtered.csv 25 * Search volumes and priority will be populated later via DataForSEO API 26 */ 27 function generateHindiCombinations() { 28 logger.info('Generating Hindi keyword combinations for India...'); 29 30 const businessPath = './data/in/businesses-hindi-final-filtered.csv'; 31 const regionPath = './data/in/regions-hindi-final-filtered.csv'; 32 33 if (!fs.existsSync(businessPath) || !fs.existsSync(regionPath)) { 34 logger.warn('Hindi CSV files not found, skipping Hindi combinations'); 35 return []; 36 } 37 38 // Load businesses 39 const businessContent = fs.readFileSync(businessPath, 'utf-8'); 40 const businessRows = parse(businessContent, { columns: true, skip_empty_lines: true }); 41 const businesses = businessRows.map(row => row.keyword); 42 43 // Load regions 44 const regionContent = fs.readFileSync(regionPath, 'utf-8'); 45 const regionRows = parse(regionContent, { columns: true, skip_empty_lines: true }); 46 const regions = regionRows.map(row => row.keyword); 47 48 const combinations = []; 49 50 // Generate combinations (business × region) 51 for (const business of businesses) { 52 for (const region of regions) { 53 combinations.push({ 54 keyword: `${business} ${region}`, 55 countryCode: 'IN', 56 googleDomain: 'google.co.in', 57 searchVolume: null, 58 priority: null, 59 }); 60 } 61 } 62 63 logger.success( 64 `Generated ${combinations.length} Hindi keyword combinations (${businesses.length} businesses × ${regions.length} regions)` 65 ); 66 67 return combinations; 68 } 69 70 async function main() { 71 const args = process.argv.slice(2); 72 const dryRun = args.includes('--dry-run'); 73 const countryFilter = args.find(arg => arg.startsWith('--country='))?.split('=')[1]; 74 const limitPerCountry = args.find(arg => arg.startsWith('--limit='))?.split('=')[1]; 75 const limit = limitPerCountry ? parseInt(limitPerCountry) : null; 76 77 if (dryRun) { 78 logger.info('DRY RUN MODE - no database changes will be made\n'); 79 } 80 81 const dbPath = process.env.DATABASE_PATH || './db/sites.db'; 82 const db = createDatabaseConnection(dbPath); 83 db.pragma('journal_mode = WAL'); 84 85 try { 86 const allCountryCodes = getSupportedCountries(); 87 const countryCodes = countryFilter 88 ? allCountryCodes.filter(code => code.toLowerCase() === countryFilter.toLowerCase()) 89 : allCountryCodes; 90 91 const countries = countryCodes.map(code => getCountryByCode(code)); 92 93 if (countries.length === 0) { 94 logger.error(`No countries found matching filter: ${countryFilter}`); 95 process.exit(1); 96 } 97 98 logger.info(`Generating keywords for ${countries.length} countries...\n`); 99 100 let totalInserted = 0; 101 let totalUpdated = 0; 102 let totalSkipped = 0; 103 104 for (const country of countries) { 105 logger.info(`\n${'='.repeat(60)}`); 106 logger.info(`Processing ${country.name} (${country.code})...`); 107 108 try { 109 // Generate English/romanized combinations 110 const combinations = generateKeywordCombinations(country.code); 111 112 // Override search volumes and priority to null (will be populated via DataForSEO later) 113 for (const combo of combinations) { 114 combo.searchVolume = null; 115 combo.priority = null; 116 } 117 118 // For India, also add Hindi combinations 119 let hindiCombinations = []; 120 if (country.code === 'IN') { 121 hindiCombinations = generateHindiCombinations(); 122 } 123 124 const allCombinations = [...combinations, ...hindiCombinations]; 125 const toProcess = limit ? allCombinations.slice(0, limit) : allCombinations; 126 127 logger.info( 128 `Inserting ${toProcess.length} keywords${limit ? ` (limited from ${allCombinations.length})` : ''}...` 129 ); 130 131 let inserted = 0; 132 let updated = 0; 133 let skipped = 0; 134 135 if (!dryRun) { 136 const upsertStmt = db.prepare(` 137 INSERT INTO keywords (keyword, country_code, google_domain, search_volume, priority, status) 138 VALUES (?, ?, ?, ?, ?, 'active') 139 ON CONFLICT(keyword, country_code) 140 DO UPDATE SET 141 search_volume = excluded.search_volume, 142 priority = excluded.priority, 143 google_domain = excluded.google_domain 144 WHERE search_volume != excluded.search_volume 145 OR priority != excluded.priority 146 OR google_domain != excluded.google_domain 147 `); 148 149 const checkStmt = db.prepare( 150 'SELECT id, search_volume, priority FROM keywords WHERE keyword = ? AND country_code = ?' 151 ); 152 153 const upsertMany = db.transaction(keywords => { 154 for (const kw of keywords) { 155 const existing = checkStmt.get(kw.keyword, kw.countryCode); 156 157 if (!existing) { 158 upsertStmt.run( 159 kw.keyword, 160 kw.countryCode, 161 kw.googleDomain, 162 kw.searchVolume, 163 kw.priority 164 ); 165 inserted++; 166 } else if ( 167 existing.search_volume !== kw.searchVolume || 168 existing.priority !== kw.priority 169 ) { 170 upsertStmt.run( 171 kw.keyword, 172 kw.countryCode, 173 kw.googleDomain, 174 kw.searchVolume, 175 kw.priority 176 ); 177 updated++; 178 } else { 179 skipped++; 180 } 181 } 182 }); 183 184 upsertMany(toProcess); 185 } else { 186 // Dry run - just count 187 for (const kw of toProcess) { 188 const existing = db 189 .prepare('SELECT id FROM keywords WHERE keyword = ? AND country_code = ?') 190 .get(kw.keyword, kw.countryCode); 191 if (existing) { 192 skipped++; 193 } else { 194 inserted++; 195 } 196 } 197 } 198 199 logger.success( 200 `✓ ${country.code}: ${inserted} inserted, ${updated} updated, ${skipped} skipped` 201 ); 202 203 totalInserted += inserted; 204 totalUpdated += updated; 205 totalSkipped += skipped; 206 } catch (error) { 207 logger.error(`✗ ${country.code}: ${error.message}`); 208 } 209 } 210 211 logger.info(`\n${'='.repeat(60)}`); 212 logger.success('Keyword generation complete!'); 213 logger.info(` Total inserted: ${totalInserted}`); 214 logger.info(` Total updated: ${totalUpdated}`); 215 logger.info(` Total skipped: ${totalSkipped}`); 216 217 // Show final keyword count 218 const count = db 219 .prepare("SELECT COUNT(*) as count FROM keywords WHERE status = 'active'") 220 .get(); 221 logger.info(` Database total: ${count.count} active keywords`); 222 } catch (error) { 223 logger.error(`Fatal error: ${error.message}`); 224 console.error(error); 225 process.exit(1); 226 } finally { 227 db.close(); 228 } 229 } 230 231 main();