/ scripts / sync-keyword-counters.js
sync-keyword-counters.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Sync Keyword Counters from Sites Table
  5   *
  6   * Rebuilds operational counters in the keywords table by aggregating
  7   * data from the sites table. This ensures counters stay accurate even
  8   * if keywords table is cleared/regenerated.
  9   *
 10   * Counters synced:
 11   * - zenrows_count: Total sites found via ZenRows for this keyword
 12   * - assets_scraped_count: Sites where assets (screenshots) were captured
 13   * - low_scoring_count: Sites with score < 82
 14   * - rescored_count: Sites that have been rescored
 15   * - last_searched_at: When keyword was last used for SERP search
 16   * - last_scraped_at: Same as last_searched_at (when SERP was last scraped)
 17   *
 18   * Usage:
 19   *   node scripts/sync-keyword-counters.js [options]
 20   *
 21   * Options:
 22   *   --keyword "keyword"  Only sync specific keyword
 23   *   --dry-run           Show what would be synced without making changes
 24   *   --verbose           Show detailed progress
 25   */
 26  
 27  import 'dotenv/config';
 28  import { createDatabaseConnection } from '../src/utils/db.js';
 29  import Logger from '../src/utils/logger.js';
 30  
 31  const logger = new Logger('SyncCounters');
 32  
 33  // Parse command line arguments
 34  const args = process.argv.slice(2);
 35  const dryRun = args.includes('--dry-run');
 36  const verbose = args.includes('--verbose');
 37  const keywordIndex = args.indexOf('--keyword');
 38  const keywordFilter = keywordIndex >= 0 ? args[keywordIndex + 1] : null;
 39  
 40  /**
 41   * Aggregate keyword stats from sites table
 42   */
 43  function getKeywordStats(db, keyword = null) {
 44    let query = `
 45      SELECT
 46        keyword,
 47        COUNT(*) as zenrows_count,
 48        SUM(CASE
 49          WHEN status IN ('assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'proposals_drafted', 'outreach_sent')
 50          THEN 1
 51          ELSE 0
 52        END) as assets_scraped_count,
 53        SUM(CASE
 54          WHEN score > 0 AND score < 82
 55          THEN 1
 56          ELSE 0
 57        END) as low_scoring_count,
 58        SUM(CASE
 59          WHEN rescored_at IS NOT NULL
 60          THEN 1
 61          ELSE 0
 62        END) as rescored_count,
 63        MAX(created_at) as last_searched_at,
 64        MAX(created_at) as last_scraped_at
 65      FROM sites
 66      WHERE keyword IS NOT NULL
 67    `;
 68  
 69    if (keyword) {
 70      query += ` AND keyword = ?`;
 71    }
 72  
 73    query += ` GROUP BY keyword`;
 74  
 75    if (keyword) {
 76      return db.prepare(query).all(keyword);
 77    } else {
 78      return db.prepare(query).all();
 79    }
 80  }
 81  
 82  /**
 83   * Update keyword counters in batch
 84   */
 85  function updateKeywordCounters(db, stats) {
 86    const stmt = db.prepare(`
 87      UPDATE keywords
 88      SET
 89        zenrows_count = ?,
 90        assets_scraped_count = ?,
 91        low_scoring_count = ?,
 92        rescored_count = ?,
 93        last_searched_at = ?,
 94        last_scraped_at = ?,
 95        updated_at = CURRENT_TIMESTAMP
 96      WHERE keyword = ? AND country_code = ?
 97    `);
 98  
 99    const updateMany = db.transaction(updates => {
100      let updated = 0;
101      for (const update of updates) {
102        const result = stmt.run(
103          update.zenrows_count,
104          update.assets_scraped_count,
105          update.low_scoring_count,
106          update.rescored_count,
107          update.last_searched_at,
108          update.last_scraped_at,
109          update.keyword,
110          update.country_code
111        );
112        if (result.changes > 0) {
113          updated++;
114        }
115      }
116      return updated;
117    });
118  
119    return updateMany(stats);
120  }
121  
122  /**
123   * Extract country code from keyword
124   * Keywords are in format: "business region" where region determines country
125   */
126  function inferCountryCode(db, keyword) {
127    // Query sites table to find country_code for this keyword
128    const site = db
129      .prepare(
130        `
131      SELECT country_code
132      FROM sites
133      WHERE keyword = ?
134      AND country_code IS NOT NULL
135      LIMIT 1
136    `
137      )
138      .get(keyword);
139  
140    return site?.country_code || null;
141  }
142  
143  /**
144   * Main sync process
145   */
146  async function main() {
147    logger.info('Keyword Counter Sync');
148    logger.info('='.repeat(60));
149  
150    if (dryRun) logger.warn('⚠️  DRY RUN: No changes will be made');
151    if (keywordFilter) logger.info(`Filtering to keyword: "${keywordFilter}"`);
152  
153    // Open database
154    const dbPath = process.env.DATABASE_PATH || './db/sites.db';
155    const db = createDatabaseConnection(dbPath);
156  
157    // Get aggregated stats from sites table
158    logger.info('Aggregating stats from sites table...');
159    const stats = getKeywordStats(db, keywordFilter);
160  
161    if (stats.length === 0) {
162      logger.warn('No keywords found in sites table');
163      db.close();
164      return;
165    }
166  
167    logger.success(`Found stats for ${stats.length} keywords`);
168  
169    // Enrich with country codes
170    logger.info('Inferring country codes...');
171    const enrichedStats = stats.map(stat => ({
172      ...stat,
173      country_code: inferCountryCode(db, stat.keyword),
174    }));
175  
176    // Filter out keywords without country codes
177    const validStats = enrichedStats.filter(s => s.country_code !== null);
178    const invalidStats = enrichedStats.filter(s => s.country_code === null);
179  
180    if (invalidStats.length > 0) {
181      logger.warn(`Skipping ${invalidStats.length} keywords without country codes`);
182      if (verbose) {
183        invalidStats.slice(0, 5).forEach(s => {
184          logger.debug(`  - ${s.keyword}`);
185        });
186      }
187    }
188  
189    if (validStats.length === 0) {
190      logger.warn('No valid keywords to sync');
191      db.close();
192      return;
193    }
194  
195    // Show sample of what will be synced
196    if (verbose || dryRun) {
197      logger.info('\nSample of stats to sync (first 5):');
198      validStats.slice(0, 5).forEach(stat => {
199        logger.info(`  ${stat.keyword} (${stat.country_code}):`);
200        logger.info(`    zenrows: ${stat.zenrows_count}, assets: ${stat.assets_scraped_count}`);
201        logger.info(`    low_scoring: ${stat.low_scoring_count}, rescored: ${stat.rescored_count}`);
202        logger.info(`    last_searched: ${stat.last_searched_at}`);
203      });
204      logger.info('');
205    }
206  
207    if (dryRun) {
208      logger.info(`[DRY RUN] Would update ${validStats.length} keywords`);
209      db.close();
210      return;
211    }
212  
213    // Update keywords table
214    logger.info('Updating keywords table...');
215    const updated = updateKeywordCounters(db, validStats);
216    logger.success(`Updated ${updated} keywords`);
217  
218    // Summary
219    logger.info('');
220    logger.info('='.repeat(60));
221    logger.info('SUMMARY');
222    logger.info('='.repeat(60));
223    logger.info(`Keywords in sites table: ${stats.length}`);
224    logger.info(`Valid keywords synced: ${updated}`);
225    logger.info(`Skipped (no country): ${invalidStats.length}`);
226  
227    // Show totals
228    const totals = {
229      zenrows: validStats.reduce((sum, s) => sum + s.zenrows_count, 0),
230      assets: validStats.reduce((sum, s) => sum + s.assets_scraped_count, 0),
231      low_scoring: validStats.reduce((sum, s) => sum + s.low_scoring_count, 0),
232      rescored: validStats.reduce((sum, s) => sum + s.rescored_count, 0),
233    };
234  
235    logger.info('');
236    logger.info('Total sites tracked:');
237    logger.info(`  ZenRows scraped: ${totals.zenrows}`);
238    logger.info(`  Assets captured: ${totals.assets}`);
239    logger.info(`  Low scoring: ${totals.low_scoring}`);
240    logger.info(`  Rescored: ${totals.rescored}`);
241  
242    db.close();
243    logger.success('✓ Sync complete!');
244  }
245  
246  main().catch(error => {
247    logger.error(`Fatal error: ${error.message}`);
248    if (error.stack) {
249      logger.debug(error.stack);
250    }
251    process.exit(1);
252  });