backfill-screenshots.js
1 /** 2 * Backfill Screenshots Utility 3 * Finds sites with missing screenshots and recaptures them 4 * 5 * DEPRECATED: The active pipeline uses backfillScreenshots() defined in 6 * src/stages/assets.js, which queries PostgreSQL directly. This file is only 7 * referenced by scripts/backfill-screenshots.js (a standalone CLI script) and 8 * its test files. It still uses the SQLite .prepare() API and is not called 9 * by the pipeline or cron. Do not use for new code. 10 */ 11 12 import { launchBrowser, createStealthContext, captureScreenshots } from '../capture.js'; 13 import Logger from './logger.js'; 14 15 const logger = new Logger('BackfillScreenshots'); 16 17 /** 18 * Find sites with missing screenshots 19 * @param {Database} db - SQLite database instance 20 * @param {number} limit - Maximum number of sites to return 21 * @returns {Array} Sites with missing screenshots 22 */ 23 export function findSitesWithMissingScreenshots(db, limit = 100) { 24 const query = ` 25 SELECT 26 id, 27 domain, 28 landing_page_url, 29 keyword, 30 screenshot_above_desktop IS NULL as missing_desktop_above, 31 screenshot_below_desktop IS NULL as missing_desktop_below, 32 screenshot_above_mobile IS NULL as missing_mobile_above, 33 screenshot_above_desktop_uncropped IS NULL as missing_desktop_above_uncropped, 34 screenshot_below_desktop_uncropped IS NULL as missing_desktop_below_uncropped, 35 screenshot_above_mobile_uncropped IS NULL as missing_mobile_above_uncropped 36 FROM sites 37 WHERE 38 screenshot_above_desktop IS NULL 39 OR screenshot_below_desktop IS NULL 40 OR screenshot_above_mobile IS NULL 41 OR screenshot_above_desktop_uncropped IS NULL 42 OR screenshot_below_desktop_uncropped IS NULL 43 OR screenshot_above_mobile_uncropped IS NULL 44 ORDER BY created_at DESC 45 LIMIT ? 46 `; 47 48 return db.prepare(query).all(limit); 49 } 50 51 /** 52 * Update site with new screenshots 53 * @param {Database} db - SQLite database instance 54 * @param {number} siteId - Site ID 55 * @param {Object} screenshots - Screenshot buffers 56 * @param {Object} screenshotsUncropped - Uncropped screenshot buffers 57 */ 58 export function updateSiteScreenshots(db, siteId, screenshots, screenshotsUncropped) { 59 const stmt = db.prepare(` 60 UPDATE sites 61 SET 62 screenshot_above_desktop = ?, 63 screenshot_below_desktop = ?, 64 screenshot_above_mobile = ?, 65 screenshot_above_desktop_uncropped = ?, 66 screenshot_below_desktop_uncropped = ?, 67 screenshot_above_mobile_uncropped = ?, 68 updated_at = CURRENT_TIMESTAMP 69 WHERE id = ? 70 `); 71 72 stmt.run( 73 screenshots.desktop_above || null, 74 screenshots.desktop_below || null, 75 screenshots.mobile_above || null, 76 screenshotsUncropped.desktop_above || null, 77 screenshotsUncropped.desktop_below || null, 78 screenshotsUncropped.mobile_above || null, 79 siteId 80 ); 81 82 logger.success(`Updated screenshots for site ID ${siteId}`); 83 } 84 85 /** 86 * Backfill screenshots for sites with missing data 87 * @param {Database} db - SQLite database instance 88 * @param {number} limit - Maximum number of sites to process 89 * @returns {Promise<Object>} Results summary 90 */ 91 export async function backfillScreenshots(db, limit = 100) { 92 logger.info('Starting screenshot backfill...'); 93 94 const sites = findSitesWithMissingScreenshots(db, limit); 95 96 if (sites.length === 0) { 97 logger.success('No sites with missing screenshots found'); 98 return { 99 total: 0, 100 success: 0, 101 failed: 0, 102 sites: [], 103 }; 104 } 105 106 logger.info(`Found ${sites.length} sites with missing screenshots`); 107 108 const browser = await launchBrowser({ headless: true }); 109 const context = await createStealthContext(browser); 110 111 const results = { 112 total: sites.length, 113 success: 0, 114 failed: 0, 115 sites: [], 116 }; 117 118 try { 119 for (const site of sites) { 120 logger.info(`Processing ${site.domain} (ID: ${site.id})...`); 121 122 try { 123 const captureResult = await captureScreenshots(context, site.landing_page_url, site.domain); 124 125 if (captureResult.error) { 126 throw new Error(captureResult.error); 127 } 128 129 // Update database with new screenshots 130 updateSiteScreenshots( 131 db, 132 site.id, 133 captureResult.screenshots, 134 captureResult.screenshotsUncropped 135 ); 136 137 results.success++; 138 results.sites.push({ 139 id: site.id, 140 domain: site.domain, 141 status: 'success', 142 }); 143 144 logger.success(`Successfully backfilled screenshots for ${site.domain}`); 145 } catch (error) { 146 logger.error(`Failed to backfill screenshots for ${site.domain}`, error); 147 148 results.failed++; 149 results.sites.push({ 150 id: site.id, 151 domain: site.domain, 152 status: 'failed', 153 error: error.message, 154 }); 155 } 156 } 157 } finally { 158 await context.close(); 159 await browser.close(); 160 } 161 162 logger.success( 163 `Screenshot backfill complete: ${results.success} success, ${results.failed} failed` 164 ); 165 166 return results; 167 } 168 169 export default { 170 findSitesWithMissingScreenshots, 171 updateSiteScreenshots, 172 backfillScreenshots, 173 };