/ scripts / generate-all-keywords.js
generate-all-keywords.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Generate All Keyword Combinations
  5   *
  6   * Creates business × region combinations for all countries and loads them into
  7   * the keywords table. Search volumes are set to null initially and will be
  8   * populated later via DataForSEO API.
  9   *
 10   * For India, also generates Hindi keyword combinations.
 11   */
 12  
 13  import { createDatabaseConnection } from '../src/utils/db.js';
 14  import Logger from '../src/utils/logger.js';
 15  import { generateKeywordCombinations } from '../src/utils/keyword-manager.js';
 16  import { getSupportedCountries, getCountryByCode } from '../src/config/countries.js';
 17  import fs from 'fs';
 18  import { parse } from 'csv-parse/sync';
 19  
 20  const logger = new Logger('GenerateAllKeywords');
 21  
 22  /**
 23   * Generate Hindi keyword combinations for India
 24   * Loads from data/in/businesses-hindi-final-filtered.csv and regions-hindi-final-filtered.csv
 25   * Search volumes and priority will be populated later via DataForSEO API
 26   */
 27  function generateHindiCombinations() {
 28    logger.info('Generating Hindi keyword combinations for India...');
 29  
 30    const businessPath = './data/in/businesses-hindi-final-filtered.csv';
 31    const regionPath = './data/in/regions-hindi-final-filtered.csv';
 32  
 33    if (!fs.existsSync(businessPath) || !fs.existsSync(regionPath)) {
 34      logger.warn('Hindi CSV files not found, skipping Hindi combinations');
 35      return [];
 36    }
 37  
 38    // Load businesses
 39    const businessContent = fs.readFileSync(businessPath, 'utf-8');
 40    const businessRows = parse(businessContent, { columns: true, skip_empty_lines: true });
 41    const businesses = businessRows.map(row => row.keyword);
 42  
 43    // Load regions
 44    const regionContent = fs.readFileSync(regionPath, 'utf-8');
 45    const regionRows = parse(regionContent, { columns: true, skip_empty_lines: true });
 46    const regions = regionRows.map(row => row.keyword);
 47  
 48    const combinations = [];
 49  
 50    // Generate combinations (business × region)
 51    for (const business of businesses) {
 52      for (const region of regions) {
 53        combinations.push({
 54          keyword: `${business} ${region}`,
 55          countryCode: 'IN',
 56          googleDomain: 'google.co.in',
 57          searchVolume: null,
 58          priority: null,
 59        });
 60      }
 61    }
 62  
 63    logger.success(
 64      `Generated ${combinations.length} Hindi keyword combinations (${businesses.length} businesses × ${regions.length} regions)`
 65    );
 66  
 67    return combinations;
 68  }
 69  
 70  async function main() {
 71    const args = process.argv.slice(2);
 72    const dryRun = args.includes('--dry-run');
 73    const countryFilter = args.find(arg => arg.startsWith('--country='))?.split('=')[1];
 74    const limitPerCountry = args.find(arg => arg.startsWith('--limit='))?.split('=')[1];
 75    const limit = limitPerCountry ? parseInt(limitPerCountry) : null;
 76  
 77    if (dryRun) {
 78      logger.info('DRY RUN MODE - no database changes will be made\n');
 79    }
 80  
 81    const dbPath = process.env.DATABASE_PATH || './db/sites.db';
 82    const db = createDatabaseConnection(dbPath);
 83    db.pragma('journal_mode = WAL');
 84  
 85    try {
 86      const allCountryCodes = getSupportedCountries();
 87      const countryCodes = countryFilter
 88        ? allCountryCodes.filter(code => code.toLowerCase() === countryFilter.toLowerCase())
 89        : allCountryCodes;
 90  
 91      const countries = countryCodes.map(code => getCountryByCode(code));
 92  
 93      if (countries.length === 0) {
 94        logger.error(`No countries found matching filter: ${countryFilter}`);
 95        process.exit(1);
 96      }
 97  
 98      logger.info(`Generating keywords for ${countries.length} countries...\n`);
 99  
100      let totalInserted = 0;
101      let totalUpdated = 0;
102      let totalSkipped = 0;
103  
104      for (const country of countries) {
105        logger.info(`\n${'='.repeat(60)}`);
106        logger.info(`Processing ${country.name} (${country.code})...`);
107  
108        try {
109          // Generate English/romanized combinations
110          const combinations = generateKeywordCombinations(country.code);
111  
112          // Override search volumes and priority to null (will be populated via DataForSEO later)
113          for (const combo of combinations) {
114            combo.searchVolume = null;
115            combo.priority = null;
116          }
117  
118          // For India, also add Hindi combinations
119          let hindiCombinations = [];
120          if (country.code === 'IN') {
121            hindiCombinations = generateHindiCombinations();
122          }
123  
124          const allCombinations = [...combinations, ...hindiCombinations];
125          const toProcess = limit ? allCombinations.slice(0, limit) : allCombinations;
126  
127          logger.info(
128            `Inserting ${toProcess.length} keywords${limit ? ` (limited from ${allCombinations.length})` : ''}...`
129          );
130  
131          let inserted = 0;
132          let updated = 0;
133          let skipped = 0;
134  
135          if (!dryRun) {
136            const upsertStmt = db.prepare(`
137              INSERT INTO keywords (keyword, country_code, google_domain, search_volume, priority, status)
138              VALUES (?, ?, ?, ?, ?, 'active')
139              ON CONFLICT(keyword, country_code)
140              DO UPDATE SET
141                search_volume = excluded.search_volume,
142                priority = excluded.priority,
143                google_domain = excluded.google_domain
144              WHERE search_volume != excluded.search_volume
145                 OR priority != excluded.priority
146                 OR google_domain != excluded.google_domain
147            `);
148  
149            const checkStmt = db.prepare(
150              'SELECT id, search_volume, priority FROM keywords WHERE keyword = ? AND country_code = ?'
151            );
152  
153            const upsertMany = db.transaction(keywords => {
154              for (const kw of keywords) {
155                const existing = checkStmt.get(kw.keyword, kw.countryCode);
156  
157                if (!existing) {
158                  upsertStmt.run(
159                    kw.keyword,
160                    kw.countryCode,
161                    kw.googleDomain,
162                    kw.searchVolume,
163                    kw.priority
164                  );
165                  inserted++;
166                } else if (
167                  existing.search_volume !== kw.searchVolume ||
168                  existing.priority !== kw.priority
169                ) {
170                  upsertStmt.run(
171                    kw.keyword,
172                    kw.countryCode,
173                    kw.googleDomain,
174                    kw.searchVolume,
175                    kw.priority
176                  );
177                  updated++;
178                } else {
179                  skipped++;
180                }
181              }
182            });
183  
184            upsertMany(toProcess);
185          } else {
186            // Dry run - just count
187            for (const kw of toProcess) {
188              const existing = db
189                .prepare('SELECT id FROM keywords WHERE keyword = ? AND country_code = ?')
190                .get(kw.keyword, kw.countryCode);
191              if (existing) {
192                skipped++;
193              } else {
194                inserted++;
195              }
196            }
197          }
198  
199          logger.success(
200            `✓ ${country.code}: ${inserted} inserted, ${updated} updated, ${skipped} skipped`
201          );
202  
203          totalInserted += inserted;
204          totalUpdated += updated;
205          totalSkipped += skipped;
206        } catch (error) {
207          logger.error(`✗ ${country.code}: ${error.message}`);
208        }
209      }
210  
211      logger.info(`\n${'='.repeat(60)}`);
212      logger.success('Keyword generation complete!');
213      logger.info(`  Total inserted: ${totalInserted}`);
214      logger.info(`  Total updated: ${totalUpdated}`);
215      logger.info(`  Total skipped: ${totalSkipped}`);
216  
217      // Show final keyword count
218      const count = db
219        .prepare("SELECT COUNT(*) as count FROM keywords WHERE status = 'active'")
220        .get();
221      logger.info(`  Database total: ${count.count} active keywords`);
222    } catch (error) {
223      logger.error(`Fatal error: ${error.message}`);
224      console.error(error);
225      process.exit(1);
226    } finally {
227      db.close();
228    }
229  }
230  
231  main();