retry-failed.js
1 #!/usr/bin/env node 2 3 /** 4 * Retry Failed Captures 5 * Retries sites that failed due to navigation timeouts or other errors 6 */ 7 8 import Logger from './utils/logger.js'; 9 import { launchBrowser, createStealthContext, captureScreenshots } from './capture.js'; 10 import { scoreWebsite } from './score.js'; 11 import { setContactsJson } from './utils/contacts-storage.js'; 12 import './utils/load-env.js'; 13 import { run, getOne, getAll, query, withTransaction, closePool, getPool } from './utils/db.js'; 14 15 const FS_SENTINEL = '{"_fs":true}'; 16 17 const logger = new Logger('RetryFailed'); 18 19 /** 20 * Get all failed sites from database 21 * Includes: 22 * - Sites with processing_status = 'failed' 23 * - Sites with error_log set 24 * - Sites with HTTP errors (status code != 200) 25 * - Sites with NULL processing_status (incomplete processing) 26 */ 27 async function getFailedSites() { 28 return await getAll( 29 `SELECT 30 domain, 31 landing_page_url, 32 keyword, 33 error_log, 34 processing_status, 35 http_status_code 36 FROM sites 37 WHERE processing_status = 'failed' 38 OR error_log IS NOT NULL 39 OR (http_status_code IS NOT NULL AND http_status_code != 200) 40 OR (processing_status IS NULL AND http_status_code IS NULL) 41 ORDER BY created_at DESC` 42 ); 43 } 44 45 /** 46 * Store site data after successful capture/scoring 47 */ 48 async function storeSiteData(domain, keyword, captureData, scoreData) { 49 // Extract contacts from score data - never save empty results 50 let contactsJson = null; 51 if (scoreData.contact_details) { 52 const contacts = scoreData.contact_details; 53 // Only save if it's a non-empty object or array with items 54 if (typeof contacts === 'object') { 55 const hasContent = Array.isArray(contacts) 56 ? contacts.length > 0 57 : Object.keys(contacts).length > 0; 58 if (hasContent) { 59 contactsJson = JSON.stringify(contacts); 60 } 61 } 62 } 63 64 const conversionScore = scoreData?.overall_calculation?.conversion_score || null; 65 66 // Write contacts to filesystem if present 67 if (contactsJson) { 68 // Need site id to write to filesystem — look it up 69 const siteRow = await getOne( 70 'SELECT id FROM sites WHERE domain = $1 AND keyword = $2', 71 [domain, keyword] 72 ); 73 if (siteRow) setContactsJson(siteRow.id, contactsJson); 74 } 75 76 await run( 77 `UPDATE sites 78 SET 79 screenshot_above_desktop = $1, 80 screenshot_below_desktop = $2, 81 screenshot_above_mobile = $3, 82 screenshot_above_desktop_uncropped = $4, 83 screenshot_below_desktop_uncropped = $5, 84 screenshot_above_mobile_uncropped = $6, 85 html_dom = $7, 86 http_status_code = $8, 87 conversion_score_json = $9, 88 conversion_score = $10, 89 contacts_json = $11, 90 processing_status = 'scored', 91 error_log = NULL, 92 updated_at = NOW() 93 WHERE domain = $12 AND keyword = $13`, 94 [ 95 captureData.screenshots.desktop_above, 96 captureData.screenshots.desktop_below, 97 captureData.screenshots.mobile_above, 98 captureData.screenshotsUncropped.desktop_above, 99 captureData.screenshotsUncropped.desktop_below, 100 captureData.screenshotsUncropped.mobile_above, 101 captureData.html, 102 captureData.httpStatusCode, 103 JSON.stringify(scoreData), 104 conversionScore, 105 contactsJson ? FS_SENTINEL : null, 106 domain, 107 keyword, 108 ] 109 ); 110 111 logger.success(`Stored data for ${domain}`); 112 } 113 114 /** 115 * Update site error status 116 */ 117 async function updateSiteError(domain, keyword, error) { 118 try { 119 await run( 120 `UPDATE sites 121 SET error_log = $1, 122 processing_status = 'failed', 123 updated_at = NOW() 124 WHERE domain = $2 AND keyword = $3`, 125 [error, domain, keyword] 126 ); 127 } catch (err) { 128 logger.error('Failed to update error status', err); 129 } 130 } 131 132 /** 133 * Retry a single failed site 134 */ 135 async function retrySite(context, site) { 136 const { domain, landing_page_url, keyword } = site; 137 138 logger.info(`Retrying ${domain} (${keyword})...`); 139 140 try { 141 // Step 1: Capture screenshots 142 const captureResult = await captureScreenshots(context, landing_page_url, domain); 143 144 if (captureResult.error) { 145 throw new Error(`Capture failed: ${captureResult.error}`); 146 } 147 148 // Check if site returned 4xx/5xx error - set aside for later retry 149 if (captureResult.httpStatusCode && captureResult.httpStatusCode >= 400) { 150 const errorMsg = `HTTP ${captureResult.httpStatusCode} - Site returned error status, set aside for later retry`; 151 logger.warn(errorMsg); 152 await updateSiteError(domain, keyword, errorMsg); 153 154 return { 155 domain, 156 keyword, 157 success: false, 158 error: errorMsg, 159 httpError: true, 160 }; 161 } 162 163 // Step 2: Score the website 164 const scoreData = await scoreWebsite({ 165 url: landing_page_url, 166 domain, 167 screenshots: captureResult.screenshots, 168 screenshotsUncropped: captureResult.screenshotsUncropped, 169 html: captureResult.html, 170 }); 171 172 // Step 3: Store in database 173 await storeSiteData(domain, keyword, captureResult, scoreData); 174 175 const grade = scoreData?.overall_calculation?.letter_grade || 'N/A'; 176 const score = scoreData?.overall_calculation?.conversion_score || 0; 177 178 logger.success(`✅ ${domain}: ${grade} (${score})`); 179 180 return { 181 domain, 182 keyword, 183 success: true, 184 grade, 185 score, 186 }; 187 } catch (error) { 188 logger.error(`❌ ${domain}: ${error.message}`); 189 await updateSiteError(domain, keyword, error.message); 190 191 return { 192 domain, 193 keyword, 194 success: false, 195 error: error.message, 196 }; 197 } 198 } 199 200 /** 201 * Retry all failed sites 202 */ 203 async function retryAllFailed(limit = null) { 204 logger.info('Starting retry of failed sites...'); 205 206 const failedSites = await getFailedSites(); 207 208 if (failedSites.length === 0) { 209 logger.info('No failed sites to retry'); 210 return []; 211 } 212 213 logger.info(`Found ${failedSites.length} failed sites`); 214 215 const sitesToRetry = limit ? failedSites.slice(0, limit) : failedSites; 216 217 logger.info(`Retrying ${sitesToRetry.length} sites...`); 218 219 // Launch browser 220 const browser = await launchBrowser({ headless: true, slowMo: 0 }); 221 const context = await createStealthContext(browser); 222 223 const results = []; 224 225 for (const site of sitesToRetry) { 226 const result = await retrySite(context, site); 227 results.push(result); 228 229 // Add delay between retries 230 await new Promise(resolve => setTimeout(resolve, 2000)); 231 } 232 233 await browser.close(); 234 235 const successCount = results.filter(r => r.success).length; 236 const failCount = results.filter(r => !r.success).length; 237 238 logger.success(`\nRetry complete: ${successCount} succeeded, ${failCount} failed`); 239 240 return results; 241 } 242 243 // CLI functionality 244 if (import.meta.url === `file://${process.argv[1]}`) { 245 const args = process.argv.slice(2); 246 const limit = args[0] ? parseInt(args[0]) : null; 247 248 retryAllFailed(limit) 249 .then(results => { 250 console.log(`\n${'='.repeat(80)}`); 251 console.log('RETRY RESULTS:'); 252 console.log('='.repeat(80)); 253 254 results.forEach(result => { 255 if (result.success) { 256 console.log(`✅ ${result.domain} (${result.keyword}): ${result.grade} (${result.score})`); 257 } else { 258 console.log(`❌ ${result.domain} (${result.keyword}): ${result.error}`); 259 } 260 }); 261 262 const successCount = results.filter(r => r.success).length; 263 console.log(`\n${'='.repeat(80)}`); 264 console.log(`Total: ${successCount}/${results.length} successful`); 265 console.log('='.repeat(80)); 266 267 process.exit(successCount === results.length ? 0 : 1); 268 }) 269 .catch(error => { 270 console.error('\n❌ Retry failed:', error.message); 271 process.exit(1); 272 }); 273 } 274 275 export default { 276 retryAllFailed, 277 retrySite, 278 getFailedSites, 279 };