sync-keyword-counters.js
1 #!/usr/bin/env node 2 3 /** 4 * Sync Keyword Counters from Sites Table 5 * 6 * Rebuilds operational counters in the keywords table by aggregating 7 * data from the sites table. This ensures counters stay accurate even 8 * if keywords table is cleared/regenerated. 9 * 10 * Counters synced: 11 * - zenrows_count: Total sites found via ZenRows for this keyword 12 * - assets_scraped_count: Sites where assets (screenshots) were captured 13 * - low_scoring_count: Sites with score < 82 14 * - rescored_count: Sites that have been rescored 15 * - last_searched_at: When keyword was last used for SERP search 16 * - last_scraped_at: Same as last_searched_at (when SERP was last scraped) 17 * 18 * Usage: 19 * node scripts/sync-keyword-counters.js [options] 20 * 21 * Options: 22 * --keyword "keyword" Only sync specific keyword 23 * --dry-run Show what would be synced without making changes 24 * --verbose Show detailed progress 25 */ 26 27 import 'dotenv/config'; 28 import { createDatabaseConnection } from '../src/utils/db.js'; 29 import Logger from '../src/utils/logger.js'; 30 31 const logger = new Logger('SyncCounters'); 32 33 // Parse command line arguments 34 const args = process.argv.slice(2); 35 const dryRun = args.includes('--dry-run'); 36 const verbose = args.includes('--verbose'); 37 const keywordIndex = args.indexOf('--keyword'); 38 const keywordFilter = keywordIndex >= 0 ? args[keywordIndex + 1] : null; 39 40 /** 41 * Aggregate keyword stats from sites table 42 */ 43 function getKeywordStats(db, keyword = null) { 44 let query = ` 45 SELECT 46 keyword, 47 COUNT(*) as zenrows_count, 48 SUM(CASE 49 WHEN status IN ('assets_captured', 'prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'proposals_drafted', 'outreach_sent') 50 THEN 1 51 ELSE 0 52 END) as assets_scraped_count, 53 SUM(CASE 54 WHEN score > 0 AND score < 82 55 THEN 1 56 ELSE 0 57 END) as low_scoring_count, 58 SUM(CASE 59 WHEN rescored_at IS NOT NULL 60 THEN 1 61 ELSE 0 62 END) as rescored_count, 63 MAX(created_at) as last_searched_at, 64 MAX(created_at) as last_scraped_at 65 FROM sites 66 WHERE keyword IS NOT NULL 67 `; 68 69 if (keyword) { 70 query += ` AND keyword = ?`; 71 } 72 73 query += ` GROUP BY keyword`; 74 75 if (keyword) { 76 return db.prepare(query).all(keyword); 77 } else { 78 return db.prepare(query).all(); 79 } 80 } 81 82 /** 83 * Update keyword counters in batch 84 */ 85 function updateKeywordCounters(db, stats) { 86 const stmt = db.prepare(` 87 UPDATE keywords 88 SET 89 zenrows_count = ?, 90 assets_scraped_count = ?, 91 low_scoring_count = ?, 92 rescored_count = ?, 93 last_searched_at = ?, 94 last_scraped_at = ?, 95 updated_at = CURRENT_TIMESTAMP 96 WHERE keyword = ? AND country_code = ? 97 `); 98 99 const updateMany = db.transaction(updates => { 100 let updated = 0; 101 for (const update of updates) { 102 const result = stmt.run( 103 update.zenrows_count, 104 update.assets_scraped_count, 105 update.low_scoring_count, 106 update.rescored_count, 107 update.last_searched_at, 108 update.last_scraped_at, 109 update.keyword, 110 update.country_code 111 ); 112 if (result.changes > 0) { 113 updated++; 114 } 115 } 116 return updated; 117 }); 118 119 return updateMany(stats); 120 } 121 122 /** 123 * Extract country code from keyword 124 * Keywords are in format: "business region" where region determines country 125 */ 126 function inferCountryCode(db, keyword) { 127 // Query sites table to find country_code for this keyword 128 const site = db 129 .prepare( 130 ` 131 SELECT country_code 132 FROM sites 133 WHERE keyword = ? 134 AND country_code IS NOT NULL 135 LIMIT 1 136 ` 137 ) 138 .get(keyword); 139 140 return site?.country_code || null; 141 } 142 143 /** 144 * Main sync process 145 */ 146 async function main() { 147 logger.info('Keyword Counter Sync'); 148 logger.info('='.repeat(60)); 149 150 if (dryRun) logger.warn('⚠️ DRY RUN: No changes will be made'); 151 if (keywordFilter) logger.info(`Filtering to keyword: "${keywordFilter}"`); 152 153 // Open database 154 const dbPath = process.env.DATABASE_PATH || './db/sites.db'; 155 const db = createDatabaseConnection(dbPath); 156 157 // Get aggregated stats from sites table 158 logger.info('Aggregating stats from sites table...'); 159 const stats = getKeywordStats(db, keywordFilter); 160 161 if (stats.length === 0) { 162 logger.warn('No keywords found in sites table'); 163 db.close(); 164 return; 165 } 166 167 logger.success(`Found stats for ${stats.length} keywords`); 168 169 // Enrich with country codes 170 logger.info('Inferring country codes...'); 171 const enrichedStats = stats.map(stat => ({ 172 ...stat, 173 country_code: inferCountryCode(db, stat.keyword), 174 })); 175 176 // Filter out keywords without country codes 177 const validStats = enrichedStats.filter(s => s.country_code !== null); 178 const invalidStats = enrichedStats.filter(s => s.country_code === null); 179 180 if (invalidStats.length > 0) { 181 logger.warn(`Skipping ${invalidStats.length} keywords without country codes`); 182 if (verbose) { 183 invalidStats.slice(0, 5).forEach(s => { 184 logger.debug(` - ${s.keyword}`); 185 }); 186 } 187 } 188 189 if (validStats.length === 0) { 190 logger.warn('No valid keywords to sync'); 191 db.close(); 192 return; 193 } 194 195 // Show sample of what will be synced 196 if (verbose || dryRun) { 197 logger.info('\nSample of stats to sync (first 5):'); 198 validStats.slice(0, 5).forEach(stat => { 199 logger.info(` ${stat.keyword} (${stat.country_code}):`); 200 logger.info(` zenrows: ${stat.zenrows_count}, assets: ${stat.assets_scraped_count}`); 201 logger.info(` low_scoring: ${stat.low_scoring_count}, rescored: ${stat.rescored_count}`); 202 logger.info(` last_searched: ${stat.last_searched_at}`); 203 }); 204 logger.info(''); 205 } 206 207 if (dryRun) { 208 logger.info(`[DRY RUN] Would update ${validStats.length} keywords`); 209 db.close(); 210 return; 211 } 212 213 // Update keywords table 214 logger.info('Updating keywords table...'); 215 const updated = updateKeywordCounters(db, validStats); 216 logger.success(`Updated ${updated} keywords`); 217 218 // Summary 219 logger.info(''); 220 logger.info('='.repeat(60)); 221 logger.info('SUMMARY'); 222 logger.info('='.repeat(60)); 223 logger.info(`Keywords in sites table: ${stats.length}`); 224 logger.info(`Valid keywords synced: ${updated}`); 225 logger.info(`Skipped (no country): ${invalidStats.length}`); 226 227 // Show totals 228 const totals = { 229 zenrows: validStats.reduce((sum, s) => sum + s.zenrows_count, 0), 230 assets: validStats.reduce((sum, s) => sum + s.assets_scraped_count, 0), 231 low_scoring: validStats.reduce((sum, s) => sum + s.low_scoring_count, 0), 232 rescored: validStats.reduce((sum, s) => sum + s.rescored_count, 0), 233 }; 234 235 logger.info(''); 236 logger.info('Total sites tracked:'); 237 logger.info(` ZenRows scraped: ${totals.zenrows}`); 238 logger.info(` Assets captured: ${totals.assets}`); 239 logger.info(` Low scoring: ${totals.low_scoring}`); 240 logger.info(` Rescored: ${totals.rescored}`); 241 242 db.close(); 243 logger.success('✓ Sync complete!'); 244 } 245 246 main().catch(error => { 247 logger.error(`Fatal error: ${error.message}`); 248 if (error.stack) { 249 logger.debug(error.stack); 250 } 251 process.exit(1); 252 });