/ src / retry-failed.js
retry-failed.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Retry Failed Captures
  5   * Retries sites that failed due to navigation timeouts or other errors
  6   */
  7  
  8  import Logger from './utils/logger.js';
  9  import { launchBrowser, createStealthContext, captureScreenshots } from './capture.js';
 10  import { scoreWebsite } from './score.js';
 11  import { setContactsJson } from './utils/contacts-storage.js';
 12  import './utils/load-env.js';
 13  import { run, getOne, getAll, query, withTransaction, closePool, getPool } from './utils/db.js';
 14  
 15  const FS_SENTINEL = '{"_fs":true}';
 16  
 17  const logger = new Logger('RetryFailed');
 18  
 19  /**
 20   * Get all failed sites from database
 21   * Includes:
 22   * - Sites with processing_status = 'failed'
 23   * - Sites with error_log set
 24   * - Sites with HTTP errors (status code != 200)
 25   * - Sites with NULL processing_status (incomplete processing)
 26   */
 27  async function getFailedSites() {
 28    return await getAll(
 29      `SELECT
 30        domain,
 31        landing_page_url,
 32        keyword,
 33        error_log,
 34        processing_status,
 35        http_status_code
 36      FROM sites
 37      WHERE processing_status = 'failed'
 38      OR error_log IS NOT NULL
 39      OR (http_status_code IS NOT NULL AND http_status_code != 200)
 40      OR (processing_status IS NULL AND http_status_code IS NULL)
 41      ORDER BY created_at DESC`
 42    );
 43  }
 44  
 45  /**
 46   * Store site data after successful capture/scoring
 47   */
 48  async function storeSiteData(domain, keyword, captureData, scoreData) {
 49    // Extract contacts from score data - never save empty results
 50    let contactsJson = null;
 51    if (scoreData.contact_details) {
 52      const contacts = scoreData.contact_details;
 53      // Only save if it's a non-empty object or array with items
 54      if (typeof contacts === 'object') {
 55        const hasContent = Array.isArray(contacts)
 56          ? contacts.length > 0
 57          : Object.keys(contacts).length > 0;
 58        if (hasContent) {
 59          contactsJson = JSON.stringify(contacts);
 60        }
 61      }
 62    }
 63  
 64    const conversionScore = scoreData?.overall_calculation?.conversion_score || null;
 65  
 66    // Write contacts to filesystem if present
 67    if (contactsJson) {
 68      // Need site id to write to filesystem — look it up
 69      const siteRow = await getOne(
 70        'SELECT id FROM sites WHERE domain = $1 AND keyword = $2',
 71        [domain, keyword]
 72      );
 73      if (siteRow) setContactsJson(siteRow.id, contactsJson);
 74    }
 75  
 76    await run(
 77      `UPDATE sites
 78       SET
 79         screenshot_above_desktop = $1,
 80         screenshot_below_desktop = $2,
 81         screenshot_above_mobile = $3,
 82         screenshot_above_desktop_uncropped = $4,
 83         screenshot_below_desktop_uncropped = $5,
 84         screenshot_above_mobile_uncropped = $6,
 85         html_dom = $7,
 86         http_status_code = $8,
 87         conversion_score_json = $9,
 88         conversion_score = $10,
 89         contacts_json = $11,
 90         processing_status = 'scored',
 91         error_log = NULL,
 92         updated_at = NOW()
 93       WHERE domain = $12 AND keyword = $13`,
 94      [
 95        captureData.screenshots.desktop_above,
 96        captureData.screenshots.desktop_below,
 97        captureData.screenshots.mobile_above,
 98        captureData.screenshotsUncropped.desktop_above,
 99        captureData.screenshotsUncropped.desktop_below,
100        captureData.screenshotsUncropped.mobile_above,
101        captureData.html,
102        captureData.httpStatusCode,
103        JSON.stringify(scoreData),
104        conversionScore,
105        contactsJson ? FS_SENTINEL : null,
106        domain,
107        keyword,
108      ]
109    );
110  
111    logger.success(`Stored data for ${domain}`);
112  }
113  
114  /**
115   * Update site error status
116   */
117  async function updateSiteError(domain, keyword, error) {
118    try {
119      await run(
120        `UPDATE sites
121         SET error_log = $1,
122             processing_status = 'failed',
123             updated_at = NOW()
124         WHERE domain = $2 AND keyword = $3`,
125        [error, domain, keyword]
126      );
127    } catch (err) {
128      logger.error('Failed to update error status', err);
129    }
130  }
131  
132  /**
133   * Retry a single failed site
134   */
135  async function retrySite(context, site) {
136    const { domain, landing_page_url, keyword } = site;
137  
138    logger.info(`Retrying ${domain} (${keyword})...`);
139  
140    try {
141      // Step 1: Capture screenshots
142      const captureResult = await captureScreenshots(context, landing_page_url, domain);
143  
144      if (captureResult.error) {
145        throw new Error(`Capture failed: ${captureResult.error}`);
146      }
147  
148      // Check if site returned 4xx/5xx error - set aside for later retry
149      if (captureResult.httpStatusCode && captureResult.httpStatusCode >= 400) {
150        const errorMsg = `HTTP ${captureResult.httpStatusCode} - Site returned error status, set aside for later retry`;
151        logger.warn(errorMsg);
152        await updateSiteError(domain, keyword, errorMsg);
153  
154        return {
155          domain,
156          keyword,
157          success: false,
158          error: errorMsg,
159          httpError: true,
160        };
161      }
162  
163      // Step 2: Score the website
164      const scoreData = await scoreWebsite({
165        url: landing_page_url,
166        domain,
167        screenshots: captureResult.screenshots,
168        screenshotsUncropped: captureResult.screenshotsUncropped,
169        html: captureResult.html,
170      });
171  
172      // Step 3: Store in database
173      await storeSiteData(domain, keyword, captureResult, scoreData);
174  
175      const grade = scoreData?.overall_calculation?.letter_grade || 'N/A';
176      const score = scoreData?.overall_calculation?.conversion_score || 0;
177  
178      logger.success(`✅ ${domain}: ${grade} (${score})`);
179  
180      return {
181        domain,
182        keyword,
183        success: true,
184        grade,
185        score,
186      };
187    } catch (error) {
188      logger.error(`❌ ${domain}: ${error.message}`);
189      await updateSiteError(domain, keyword, error.message);
190  
191      return {
192        domain,
193        keyword,
194        success: false,
195        error: error.message,
196      };
197    }
198  }
199  
200  /**
201   * Retry all failed sites
202   */
203  async function retryAllFailed(limit = null) {
204    logger.info('Starting retry of failed sites...');
205  
206    const failedSites = await getFailedSites();
207  
208    if (failedSites.length === 0) {
209      logger.info('No failed sites to retry');
210      return [];
211    }
212  
213    logger.info(`Found ${failedSites.length} failed sites`);
214  
215    const sitesToRetry = limit ? failedSites.slice(0, limit) : failedSites;
216  
217    logger.info(`Retrying ${sitesToRetry.length} sites...`);
218  
219    // Launch browser
220    const browser = await launchBrowser({ headless: true, slowMo: 0 });
221    const context = await createStealthContext(browser);
222  
223    const results = [];
224  
225    for (const site of sitesToRetry) {
226      const result = await retrySite(context, site);
227      results.push(result);
228  
229      // Add delay between retries
230      await new Promise(resolve => setTimeout(resolve, 2000));
231    }
232  
233    await browser.close();
234  
235    const successCount = results.filter(r => r.success).length;
236    const failCount = results.filter(r => !r.success).length;
237  
238    logger.success(`\nRetry complete: ${successCount} succeeded, ${failCount} failed`);
239  
240    return results;
241  }
242  
243  // CLI functionality
244  if (import.meta.url === `file://${process.argv[1]}`) {
245    const args = process.argv.slice(2);
246    const limit = args[0] ? parseInt(args[0]) : null;
247  
248    retryAllFailed(limit)
249      .then(results => {
250        console.log(`\n${'='.repeat(80)}`);
251        console.log('RETRY RESULTS:');
252        console.log('='.repeat(80));
253  
254        results.forEach(result => {
255          if (result.success) {
256            console.log(`✅ ${result.domain} (${result.keyword}): ${result.grade} (${result.score})`);
257          } else {
258            console.log(`❌ ${result.domain} (${result.keyword}): ${result.error}`);
259          }
260        });
261  
262        const successCount = results.filter(r => r.success).length;
263        console.log(`\n${'='.repeat(80)}`);
264        console.log(`Total: ${successCount}/${results.length} successful`);
265        console.log('='.repeat(80));
266  
267        process.exit(successCount === results.length ? 0 : 1);
268      })
269      .catch(error => {
270        console.error('\n❌ Retry failed:', error.message);
271        process.exit(1);
272      });
273  }
274  
275  export default {
276    retryAllFailed,
277    retrySite,
278    getFailedSites,
279  };