Cradicle Explorer

/ scripts / log-error-autofix.js
log-error-autofix.js
  1  #!/usr/bin/env node
  2  /**
  3   * Log Error Auto-Fix CLI
  4   * Parses logs, classifies errors, and applies fixes automatically
  5   *
  6   * Handles TWO categories of errors:
  7   * 1. Site-related errors (network timeouts, DNS failures) → database fixes
  8   * 2. Code bugs (TypeErrors, unhandled exceptions) → Claude AI code fixes
  9   *
 10   * Usage:
 11   *   node scripts/log-error-autofix.js [--dry-run] [--days=2] [--skip-retried]
 12   */
 13  
 14  import { readdirSync, existsSync, writeFileSync } from 'fs';
 15  import { join, dirname } from 'path';
 16  import { fileURLToPath } from 'url';
 17  import { createHash } from 'crypto';
 18  import { createDatabaseConnection } from '../src/utils/db.js';
 19  import { createReadStream } from 'fs';
 20  import { createInterface } from 'readline';
 21  
 22  const __filename = fileURLToPath(import.meta.url);
 23  const __dirname = dirname(__filename);
 24  const projectRoot = join(__dirname, '..');
 25  
 26  // Configuration
 27  const MAX_RETRIES = 3;
 28  const REPORT_DIR = join(projectRoot, '.analysis-reports');
 29  
 30  // Parse CLI arguments
 31  function parseArgs() {
 32    const args = process.argv.slice(2);
 33    const options = {
 34      dryRun: args.includes('--dry-run'),
 35      days: 2,
 36      domain: null,
 37      limit: null,
 38      skipRetried: args.includes('--skip-retried'),
 39      reportPath: null,
 40      dbOnly: args.includes('--db-only'),
 41      codeOnly: args.includes('--code-only'),
 42    };
 43  
 44    args.forEach(arg => {
 45      if (arg.startsWith('--days=')) options.days = parseInt(arg.split('=')[1]);
 46      if (arg.startsWith('--domain=')) options.domain = arg.split('=')[1];
 47      if (arg.startsWith('--limit=')) options.limit = parseInt(arg.split('=')[1]);
 48      if (arg.startsWith('--report-path=')) options.reportPath = arg.split('=')[1];
 49    });
 50  
 51    return options;
 52  }
 53  
 54  // Parse log file for error entries
 55  async function parseLogFile(filePath, dateFilter) {
 56    const errors = [];
 57    const fileStream = createReadStream(filePath);
 58    const rl = createInterface({ input: fileStream, crlfDelay: Infinity });
 59  
 60    let currentError = null;
 61    let stackTraceLines = [];
 62  
 63    for await (const line of rl) {
 64      // Check for error line: [timestamp] [context] [ERROR] message
 65      const errorMatch = line.match(/^\[(\d{4}-\d{2}-\d{2}T[\d:.]+Z)\] \[([^\]]+)\] \[ERROR\] (.+)/);
 66  
 67      if (errorMatch) {
 68        // Save previous error if exists
 69        if (currentError) {
 70          currentError.stackTrace = stackTraceLines.join('\n');
 71          errors.push(currentError);
 72        }
 73  
 74        // Start new error
 75        const [, timestamp, context, message] = errorMatch;
 76        const errorDate = new Date(timestamp);
 77  
 78        if (dateFilter && errorDate < dateFilter) {
 79          currentError = null;
 80          stackTraceLines = [];
 81          continue;
 82        }
 83  
 84        currentError = {
 85          timestamp,
 86          context,
 87          message,
 88          logFile: filePath,
 89          hash: null,
 90        };
 91        stackTraceLines = [];
 92      } else if (currentError && (line.startsWith('  at ') || line.trim().startsWith('{'))) {
 93        // Stack trace line or JSON data
 94        stackTraceLines.push(line);
 95      }
 96    }
 97  
 98    // Save last error
 99    if (currentError) {
100      currentError.stackTrace = stackTraceLines.join('\n');
101      errors.push(currentError);
102    }
103  
104    // Generate hashes and extract context
105    return errors.map(error => ({
106      ...error,
107      hash: createHash('md5')
108        .update(`${error.message}${error.context}`)
109        .digest('hex')
110        .substring(0, 8),
111      ...extractErrorContext(error),
112    }));
113  }
114  
115  // Extract context from error (site_id, domain, file path, line number)
116  function extractErrorContext(error) {
117    const context = {};
118  
119    // Extract site_id
120    const siteIdMatch =
121      error.message.match(/site[_ ]?id[:\s]+(\d+)/i) ||
122      error.stackTrace?.match(/site_id["']?\s*:\s*(\d+)/);
123    if (siteIdMatch) context.siteId = parseInt(siteIdMatch[1]);
124  
125    // Extract domain
126    const domainMatch =
127      error.message.match(/(?:domain|for)\s+([a-z0-9-]+\.[a-z]{2,})/i) ||
128      error.stackTrace?.match(/domain["']?\s*:\s*["']([a-z0-9-]+\.[a-z]{2,})/);
129    if (domainMatch) context.domain = domainMatch[1];
130  
131    // Extract file path and line number from stack trace
132    if (error.stackTrace) {
133      const fileMatch = error.stackTrace.match(/at [^(]+ \(([^:]+):(\d+):\d+\)/);
134      if (fileMatch) {
135        context.filePath = fileMatch[1];
136        context.lineNumber = parseInt(fileMatch[2]);
137      }
138    }
139  
140    return context;
141  }
142  
143  // Classify error into categories
144  function classifyError(error) {
145    const msg = error.message.toLowerCase();
146    const trace = (error.stackTrace || '').toLowerCase();
147  
148    // Category 1: Site-related errors (database fixes)
149    if (msg.includes('timeout') || msg.includes('etimedout'))
150      return { type: 'NETWORK_TIMEOUT', category: 'SITE_ERROR', fixable: true };
151    if (msg.includes('enotfound') || msg.includes('dns lookup'))
152      return { type: 'DNS_FAILURE', category: 'SITE_ERROR', fixable: true };
153    if (msg.match(/http (500|503|504)/))
154      return { type: 'HTTP_ERROR', category: 'SITE_ERROR', fixable: true };
155    if (msg.includes('unique constraint') && msg.includes('outreach'))
156      return { type: 'DUPLICATE_OUTREACH', category: 'SITE_ERROR', fixable: false };
157    if (msg.includes('proposal') && msg.includes('invalid'))
158      return { type: 'PROPOSAL_FORMAT_ERROR', category: 'SITE_ERROR', fixable: true };
159    if (msg.includes('0 results') || msg.includes('stale keyword'))
160      return { type: 'STALE_KEYWORD', category: 'SITE_ERROR', fixable: true };
161  
162    // Category 2: Code bugs (Claude AI fixes)
163    if (msg.includes('typeerror:') || msg.includes('is not a function'))
164      return { type: 'TYPE_ERROR', category: 'CODE_BUG', fixable: true };
165    if (msg.includes('referenceerror') || msg.includes('not defined'))
166      return { type: 'REFERENCE_ERROR', category: 'CODE_BUG', fixable: true };
167    if (msg.includes('cannot read property') && msg.includes('of null'))
168      return { type: 'NULL_POINTER', category: 'CODE_BUG', fixable: true };
169    if (msg.includes('unhandledpromiserejection') || trace.includes('unhandledpromiserejection'))
170      return { type: 'PROMISE_REJECTION', category: 'CODE_BUG', fixable: true };
171    if (msg.includes('syntaxerror'))
172      return { type: 'SYNTAX_ERROR', category: 'CODE_BUG', fixable: true };
173    if (msg.includes('unhandled') || msg.includes('uncaught'))
174      return { type: 'UNHANDLED_EXCEPTION', category: 'CODE_BUG', fixable: true };
175  
176    return { type: 'UNKNOWN', category: 'UNKNOWN', fixable: false };
177  }
178  
179  // Check if error should be skipped (already retried 3+ times)
180  function shouldSkipError(db, error, classification, skipRetried) {
181    if (!skipRetried) return false;
182    if (classification.category !== 'SITE_ERROR' || !error.siteId) return false;
183  
184    const site = db.prepare('SELECT recapture_count FROM sites WHERE id = ?').get(error.siteId);
185  
186    return site && site.recapture_count >= MAX_RETRIES;
187  }
188  
189  // Apply database fix for site errors
190  function applyDatabaseFix(db, error, classification, dryRun) {
191    if (!error.siteId) {
192      return { success: false, message: 'No site_id found' };
193    }
194  
195    const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(error.siteId);
196    if (!site) {
197      return { success: false, message: 'Site not found' };
198    }
199  
200    let action = '';
201    let recaptureAt = null;
202    const recaptureCount = (site.recapture_count || 0) + 1;
203  
204    switch (classification.type) {
205      case 'NETWORK_TIMEOUT':
206        recaptureAt = new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString(); // 1 day
207        action = 'Schedule 1-day recapture';
208        break;
209      case 'DNS_FAILURE':
210        recaptureAt = new Date(Date.now() + 7 * 24 * 60 * 60 * 1000).toISOString(); // 7 days
211        action = 'Schedule 7-day recapture';
212        break;
213      case 'HTTP_ERROR': {
214        // Exponential backoff: 1, 3, 7 days
215        const days = recaptureCount === 1 ? 1 : recaptureCount === 2 ? 3 : 7;
216        recaptureAt = new Date(Date.now() + days * 24 * 60 * 60 * 1000).toISOString();
217        action = `Schedule ${days}-day recapture`;
218        break;
219      }
220      case 'STALE_KEYWORD':
221        action = 'Mark keyword inactive';
222        // Would update keywords table (not implemented here)
223        break;
224      case 'PROPOSAL_FORMAT_ERROR':
225        action = 'Reset to enriched status, delete failed outreaches';
226        break;
227      default:
228        return { success: false, message: 'Unknown site error type' };
229    }
230  
231    if (dryRun) {
232      return { success: true, message: `[DRY RUN] ${action}`, action };
233    }
234  
235    // Apply fix
236    if (recaptureAt) {
237      db.prepare(
238        `UPDATE sites
239         SET recapture_at = ?, recapture_count = ?, error_message = NULL
240         WHERE id = ?`
241      ).run(recaptureAt, recaptureCount, error.siteId);
242    }
243  
244    // Track fix attempt
245    db.prepare(
246      `INSERT INTO error_fix_history
247       (site_id, error_type, error_category, error_message, fix_action, fix_succeeded)
248       VALUES (?, ?, ?, ?, ?, 1)`
249    ).run(
250      error.siteId,
251      classification.type,
252      classification.category,
253      error.message.substring(0, 200),
254      action
255    );
256  
257    return { success: true, message: action, action };
258  }
259  
260  // Main function
261  async function main() {
262    const options = parseArgs();
263    console.log('🔧 Log Error Auto-Fix\n');
264    console.log(`Mode: ${options.dryRun ? 'DRY RUN' : 'LIVE'}`);
265    console.log(`Analyzing last ${options.days} days of logs\n`);
266  
267    // Open database
268    const dbPath = process.env.DATABASE_PATH || join(projectRoot, 'db/sites.db');
269    const db = createDatabaseConnection(dbPath);
270  
271    // Find log files
272    const logDir = join(projectRoot, 'logs');
273    const cutoffDate = new Date(Date.now() - options.days * 24 * 60 * 60 * 1000);
274    const logFiles = readdirSync(logDir)
275      .filter(f => f.endsWith('.log'))
276      .map(f => join(logDir, f));
277  
278    console.log(`Found ${logFiles.length} log files\n`);
279  
280    // Parse logs
281    const results = {
282      totalErrors: 0,
283      fixable: 0,
284      fixed: 0,
285      skipped: 0,
286      failed: 0,
287      byType: {},
288      byCategory: {},
289      fixableByType: {},
290      unfixableByType: {},
291    };
292  
293    for (const logFile of logFiles) {
294      console.log(`Parsing ${logFile}...`);
295      const errors = await parseLogFile(logFile, cutoffDate);
296      console.log(`  Found ${errors.length} errors\n`);
297  
298      for (const error of errors) {
299        if (options.limit && results.totalErrors >= options.limit) break;
300  
301        results.totalErrors++;
302        const classification = classifyError(error);
303        results.byType[classification.type] = (results.byType[classification.type] || 0) + 1;
304        results.byCategory[classification.category] =
305          (results.byCategory[classification.category] || 0) + 1;
306  
307        if (classification.fixable) {
308          results.fixableByType[classification.type] =
309            (results.fixableByType[classification.type] || 0) + 1;
310        } else {
311          results.unfixableByType[classification.type] =
312            (results.unfixableByType[classification.type] || 0) + 1;
313        }
314  
315        if (!classification.fixable) {
316          results.skipped++;
317          continue;
318        }
319  
320        results.fixable++;
321  
322        // Check if should skip
323        if (shouldSkipError(db, error, classification, options.skipRetried)) {
324          console.log(`  Skipping ${classification.type} (already retried 3+ times)`);
325          results.skipped++;
326          continue;
327        }
328  
329        // Apply fix
330        if (classification.category === 'SITE_ERROR' && !options.codeOnly) {
331          const result = applyDatabaseFix(db, error, classification, options.dryRun);
332          if (result.success) {
333            console.log(`  ✓ ${classification.type}: ${result.message}`);
334            results.fixed++;
335          } else {
336            console.log(`  ✗ ${classification.type}: ${result.message}`);
337            results.failed++;
338          }
339        } else if (classification.category === 'CODE_BUG' && !options.dbOnly) {
340          console.log(`  ⚠ ${classification.type}: Code fix requires manual review`);
341          results.skipped++;
342        }
343      }
344    }
345  
346    // Also check database for sites with error_message
347    if (!options.codeOnly) {
348      console.log('\n📊 Checking database for sites with active errors...');
349      const sitesWithErrors = db
350        .prepare(
351          `SELECT id, domain, error_message, recapture_count
352           FROM sites
353           WHERE error_message IS NOT NULL AND status != 'ignored'`
354        )
355        .all();
356  
357      console.log(`Found ${sitesWithErrors.length} sites with errors\n`);
358  
359      for (const site of sitesWithErrors) {
360        if (options.limit && results.totalErrors >= options.limit) break;
361  
362        const error = {
363          siteId: site.id,
364          domain: site.domain,
365          message: site.error_message,
366          context: 'Database',
367          stackTrace: '',
368        };
369  
370        results.totalErrors++;
371        const classification = classifyError(error);
372        results.byType[classification.type] = (results.byType[classification.type] || 0) + 1;
373        results.byCategory[classification.category] =
374          (results.byCategory[classification.category] || 0) + 1;
375  
376        if (classification.fixable) {
377          results.fixableByType[classification.type] =
378            (results.fixableByType[classification.type] || 0) + 1;
379        } else {
380          results.unfixableByType[classification.type] =
381            (results.unfixableByType[classification.type] || 0) + 1;
382        }
383  
384        if (!classification.fixable || classification.category !== 'SITE_ERROR') {
385          results.skipped++;
386          continue;
387        }
388  
389        results.fixable++;
390  
391        if (shouldSkipError(db, error, classification, options.skipRetried)) {
392          console.log(`  Skipping ${classification.type} for ${site.domain} (max retries)`);
393          results.skipped++;
394          continue;
395        }
396  
397        const result = applyDatabaseFix(db, error, classification, options.dryRun);
398        if (result.success) {
399          console.log(`  ✓ ${classification.type} for ${site.domain}: ${result.message}`);
400          results.fixed++;
401        } else {
402          console.log(`  ✗ ${classification.type} for ${site.domain}: ${result.message}`);
403          results.failed++;
404        }
405      }
406    }
407  
408    db.close();
409  
410    // Generate report
411    const reportPath =
412      options.reportPath ||
413      join(REPORT_DIR, `log-autofix-${new Date().toISOString().split('T')[0]}.md`);
414  
415    // Calculate percentages
416    const fixablePercent =
417      results.totalErrors > 0 ? ((results.fixable / results.totalErrors) * 100).toFixed(1) : 0;
418    const fixedPercent =
419      results.fixable > 0 ? ((results.fixed / results.fixable) * 100).toFixed(1) : 0;
420  
421    // Sort errors by frequency
422    const sortedByFrequency = Object.entries(results.byType).sort(([, a], [, b]) => b - a);
423    const sortedFixable = Object.entries(results.fixableByType).sort(([, a], [, b]) => b - a);
424    const sortedUnfixable = Object.entries(results.unfixableByType).sort(([, a], [, b]) => b - a);
425  
426    // Get top 5 most common errors that couldn't be auto-fixed
427    const topUnfixable = sortedUnfixable.slice(0, 5);
428  
429    const report = `# Log Error Auto-Fix Report
430  
431  **Generated**: ${new Date().toISOString()}
432  **Mode**: ${options.dryRun ? 'DRY RUN' : 'LIVE'}
433  **Days Analyzed**: ${options.days}
434  
435  ## Summary
436  
437  - **Total Errors Found**: ${results.totalErrors}
438  - **Auto-Fixable Errors**: ${results.fixable} (${fixablePercent}%)
439  - **Errors Fixed**: ${results.fixed} (${fixedPercent}% of fixable)
440  - **Errors Skipped**: ${results.skipped}
441  - **Failed Fixes**: ${results.failed}
442  
443  ## Error Categories
444  
445  ${Object.entries(results.byCategory)
446    .sort(([, a], [, b]) => b - a)
447    .map(([category, count]) => {
448      const percent = ((count / results.totalErrors) * 100).toFixed(1);
449      return `- **${category}**: ${count} (${percent}%)`;
450    })
451    .join('\n')}
452  
453  ## All Errors by Frequency
454  
455  ${sortedByFrequency
456    .map(([type, count], index) => {
457      const percent = ((count / results.totalErrors) * 100).toFixed(1);
458      const isFixable = results.fixableByType[type] ? '✅ Auto-fixable' : '⚠️ Requires manual review';
459      return `${index + 1}. **${type}**: ${count} occurrences (${percent}%) — ${isFixable}`;
460    })
461    .join('\n')}
462  
463  ## Auto-Fixable Errors (${results.fixable} total)
464  
465  ${
466    sortedFixable.length > 0
467      ? sortedFixable
468          .map(([type, count], index) => {
469            const percent = ((count / results.fixable) * 100).toFixed(1);
470            return `${index + 1}. **${type}**: ${count} (${percent}% of fixable)`;
471          })
472          .join('\n')
473      : '_No auto-fixable errors found_'
474  }
475  
476  ## ⚠️ Top Errors Requiring Manual Attention (${sortedUnfixable.reduce((sum, [, count]) => sum + count, 0)} total)
477  
478  ${
479    topUnfixable.length > 0
480      ? topUnfixable
481          .map(([type, count], index) => {
482            const percent = ((count / results.totalErrors) * 100).toFixed(1);
483            const recommendations = {
484              UNKNOWN: 'Review error patterns and add classification rules to autofix script',
485              DUPLICATE_OUTREACH:
486                'Check for race conditions in outreach generation. Consider adding locks.',
487              PROMISE_REJECTION: 'Add proper error handling to async functions',
488              TYPE_ERROR: 'Review code for null/undefined checks before property access',
489              REFERENCE_ERROR: 'Check for missing imports or typos in variable names',
490              NULL_POINTER: 'Add null checks before accessing properties',
491              SYNTAX_ERROR: 'Review recent code changes for syntax issues',
492              UNHANDLED_EXCEPTION: 'Wrap risky operations in try-catch blocks',
493            };
494            const recommendation = recommendations[type] || 'Investigate and fix manually';
495            return `${index + 1}. **${type}**: ${count} occurrences (${percent}% of all errors)
496     - **Action**: ${recommendation}`;
497          })
498          .join('\n\n')
499      : '_All errors are auto-fixable! 🎉_'
500  }
501  
502  ${sortedUnfixable.length > 5 ? `\n_${sortedUnfixable.length - 5} additional unfixable error types not shown_\n` : ''}
503  
504  ## Next Steps
505  
506  ${results.fixed > 0 ? '✅ **Fixes Applied**: Run pipeline stages to process fixed sites' : '⚠️ **No Fixes Applied**: All errors were either unfixable or already at max retries'}
507  ${results.failed > 0 ? `\n⚠️ **Failed Fixes**: Review ${results.failed} failed fixes manually` : ''}
508  ${topUnfixable.length > 0 ? `\n🔍 **Manual Review Required**: Prioritize fixing the top ${topUnfixable.length} most common unfixable errors above` : ''}
509  ${results.skipped > 0 ? `\n📊 **Skipped**: ${results.skipped} errors (max retries reached or not fixable)` : ''}
510  
511  ---
512  
513  **Full Report Path**: \`${reportPath}\`
514  `;
515  
516    if (!existsSync(REPORT_DIR)) {
517      writeFileSync(REPORT_DIR, '', { flag: 'wx', mode: 0o755 });
518    }
519    writeFileSync(reportPath, report);
520  
521    console.log(`\n✅ Report generated: ${reportPath}\n`);
522    console.log('═'.repeat(60));
523    console.log('Summary:');
524    console.log('═'.repeat(60));
525    console.log(`  Total errors: ${results.totalErrors}`);
526    console.log(`  Auto-fixable: ${results.fixable} (${fixablePercent}%)`);
527    console.log(`  Fixed: ${results.fixed}`);
528    console.log(`  Skipped: ${results.skipped}`);
529    console.log(`  Failed: ${results.failed}`);
530  
531    if (topUnfixable.length > 0) {
532      console.log('\n⚠️  Top errors requiring manual attention:');
533      topUnfixable.slice(0, 3).forEach(([type, count], index) => {
534        console.log(`  ${index + 1}. ${type}: ${count} occurrences`);
535      });
536    }
537  
538    if (results.fixed > 0) {
539      console.log('\n✅ Next: Run pipeline stages to process fixed sites');
540    } else {
541      console.log('\n⚠️  No fixes applied. Review unfixable errors above.');
542    }
543  
544    console.log('═'.repeat(60));
545  
546    process.exit(results.failed > 0 ? 1 : 0);
547  }
548  
549  main().catch(error => {
550    console.error('Error:', error);
551    process.exit(1);
552  });