/ scripts / backfill-zenrows-count.js
backfill-zenrows-count.js
  1  #!/usr/bin/env node
  2  /**
  3   * Backfill zenrows_count for existing keywords
  4   * Sets zenrows_count = total number of sites in sites table for each keyword
  5   * Purpose: Track cumulative sites returned to prioritize high-performing keywords
  6   */
  7  
  8  import { createDatabaseConnection } from '../src/utils/db.js';
  9  import { config } from 'dotenv';
 10  
 11  // Load environment variables
 12  config();
 13  
 14  const dbPath = process.env.DATABASE_PATH || './db/sites.db';
 15  
 16  function backfillZenRowsCount() {
 17    const db = createDatabaseConnection(dbPath);
 18  
 19    try {
 20      console.log('Starting zenrows_count backfill from sites table...');
 21  
 22      // Show examples before updating (match on keyword AND country_code)
 23      const examples = db
 24        .prepare(
 25          `
 26        SELECT
 27          k.keyword,
 28          k.country_code,
 29          k.zenrows_count as old_count,
 30          COUNT(s.id) as site_count
 31        FROM keywords k
 32        LEFT JOIN sites s ON k.keyword = s.keyword AND k.country_code = s.country_code
 33        GROUP BY k.id, k.keyword, k.country_code, k.zenrows_count
 34        HAVING site_count > 0
 35        LIMIT 5
 36      `
 37        )
 38        .all();
 39  
 40      if (examples.length > 0) {
 41        console.log('\nExample updates:');
 42        examples.forEach(kw => {
 43          console.log(
 44            `  "${kw.keyword}" (${kw.country_code}): ${kw.old_count} → ${kw.site_count} sites`
 45          );
 46        });
 47        console.log();
 48      }
 49  
 50      // Update all keywords in a single SQL statement
 51      // Match on both keyword AND country_code since keywords can exist in multiple countries
 52      // This sets zenrows_count = 0 for keywords with no sites, or actual count for keywords with sites
 53      const result = db
 54        .prepare(
 55          `
 56        UPDATE keywords
 57        SET zenrows_count = (
 58          SELECT COUNT(*)
 59          FROM sites s
 60          WHERE s.keyword = keywords.keyword
 61            AND s.country_code = keywords.country_code
 62        )
 63      `
 64        )
 65        .run();
 66  
 67      console.log(`✓ Successfully backfilled zenrows_count for ${result.changes} keywords`);
 68  
 69      // Show summary
 70      const summary = db
 71        .prepare(
 72          `
 73        SELECT
 74          COUNT(*) as total_keywords,
 75          SUM(search_count) as total_searches,
 76          SUM(zenrows_count) as total_zenrows_calls,
 77          COUNT(CASE WHEN search_count > 0 THEN 1 END) as searched_keywords
 78        FROM keywords
 79      `
 80        )
 81        .get();
 82  
 83      const siteCount = db.prepare(`SELECT COUNT(*) as total FROM sites`).get();
 84  
 85      console.log('\nSummary:');
 86      console.log(`  Total keywords: ${summary.total_keywords}`);
 87      console.log(`  Keywords searched: ${summary.searched_keywords}`);
 88      console.log(`  Total sites: ${siteCount.total}`);
 89      console.log(`  Total search_count: ${summary.total_searches}`);
 90      console.log(`  Total zenrows_count: ${summary.total_zenrows_calls}`);
 91      console.log(
 92        `  Average sites per keyword: ${(siteCount.total / summary.searched_keywords).toFixed(1)}`
 93      );
 94    } catch (err) {
 95      console.error('Backfill failed:', err.message);
 96      throw err;
 97    } finally {
 98      db.close();
 99    }
100  }
101  
102  backfillZenRowsCount();