/ src / utils / error-handler.js
error-handler.js
  1  /**
  2   * Error Handler with Retry Logic
  3   * Implements exponential backoff for API failures
  4   */
  5  
  6  import Logger from './logger.js';
  7  
  8  const logger = new Logger('ErrorHandler');
  9  
 10  /**
 11   * Retry a function with exponential backoff
 12   * @param {Function} fn - Async function to retry
 13   * @param {Object} options - Retry options
 14   * @returns {Promise} - Result of successful function call
 15   */
 16  export async function retryWithBackoff(fn, options = {}) {
 17    const {
 18      maxRetries = 3,
 19      initialDelay = 1000,
 20      maxDelay = 30000,
 21      backoffFactor = 2,
 22      onRetry = null,
 23      shouldRetry = () => true,
 24    } = options;
 25  
 26    let lastError;
 27    let delay = initialDelay;
 28  
 29    for (let attempt = 0; attempt <= maxRetries; attempt++) {
 30      try {
 31        return await fn();
 32      } catch (error) {
 33        lastError = error;
 34  
 35        // Check if we should retry this error
 36        if (!shouldRetry(error)) {
 37          throw error;
 38        }
 39  
 40        // Don't wait after last attempt
 41        if (attempt === maxRetries) {
 42          break;
 43        }
 44  
 45        logger.warn(`Attempt ${attempt + 1}/${maxRetries + 1} failed: ${error.message}`);
 46  
 47        if (onRetry) {
 48          onRetry(attempt, error);
 49        }
 50  
 51        // Wait with exponential backoff
 52        await sleep(delay);
 53        delay = Math.min(delay * backoffFactor, maxDelay);
 54      }
 55    }
 56  
 57    // All retries exhausted
 58    logger.error(`All ${maxRetries + 1} attempts failed`, lastError);
 59    throw lastError;
 60  }
 61  
 62  /**
 63   * Check if error is retryable (network/rate limit errors).
 64   *
 65   * Non-retryable by design:
 66   *   402 Payment Required — quota exhausted; retrying immediately won't restore credits.
 67   *   401 Unauthorized — bad API key; retrying won't fix auth.
 68   *   403 Forbidden — permanent access denial.
 69   *   404 Not Found — resource doesn't exist; retrying is pointless.
 70   *   422 Unprocessable Entity — validation failure; request must be fixed first.
 71   * These are let through so the circuit breaker can classify them and set rate limits.
 72   */
 73  export function isRetryableError(error) {
 74    const status = error?.response?.status ?? error?.status ?? 0;
 75    if (status === 402 || status === 401 || status === 403 || status === 404 || status === 422) {
 76      return false;
 77    }
 78  
 79    const retryableMessages = [
 80      'ENOTFOUND',
 81      'ECONNRESET',
 82      'ETIMEDOUT',
 83      'ECONNREFUSED',
 84      'rate limit',
 85      'too many requests',
 86      '429',
 87      '500',
 88      '503',
 89      '504',
 90    ];
 91  
 92    const errorString = error.toString().toLowerCase();
 93    return retryableMessages.some(msg => errorString.includes(msg.toLowerCase()));
 94  }
 95  
 96  /**
 97   * Timeout wrapper for promises
 98   */
 99  export function withTimeout(promise, ms, errorMessage = 'Operation timed out') {
100    return Promise.race([
101      promise,
102      new Promise((_, reject) => setTimeout(() => reject(new Error(errorMessage)), ms)),
103    ]);
104  }
105  
106  /**
107   * Sleep utility
108   */
109  export function sleep(ms) {
110    return new Promise(resolve => setTimeout(resolve, ms));
111  }
112  
113  /**
114   * Safe JSON parse with fallback
115   */
116  export function safeJsonParse(str, fallback = null) {
117    try {
118      // Strip markdown code fences that some LLMs add despite json_mode.
119      // Also handles text after the closing fence (LLM adds trailing notes).
120      let clean = str;
121      if (str) {
122        const fenceMatch = str.match(/^```(?:json)?\s*([\s\S]*?)```/i);
123        if (fenceMatch) {
124          clean = fenceMatch[1].trim();
125        } else {
126          clean = str.replace(/^```(?:json)?\s*/i, '').replace(/\s*```\s*$/, '').trim();
127        }
128      }
129  
130      // First attempt: parse as-is
131      try {
132        return JSON.parse(clean);
133      } catch {
134        // Second attempt: escape raw control characters inside JSON string values.
135        // Some LLMs output literal \n/\r/\t inside string values instead of \\n/\\r/\\t,
136        // which is invalid JSON. We scan character-by-character to only replace chars
137        // that are actually inside a string (between unescaped quotes).
138        let repaired = '';
139        let inString = false;
140        let escaped = false;
141        for (let i = 0; i < clean.length; i++) {
142          const ch = clean[i];
143          if (escaped) {
144            repaired += ch;
145            escaped = false;
146          } else if (ch === '\\' && inString) {
147            repaired += ch;
148            escaped = true;
149          } else if (ch === '"') {
150            repaired += ch;
151            inString = !inString;
152          } else if (inString && ch === '\n') {
153            repaired += '\\n';
154          } else if (inString && ch === '\r') {
155            repaired += '\\r';
156          } else if (inString && ch === '\t') {
157            repaired += '\\t';
158          } else {
159            repaired += ch;
160          }
161        }
162        return JSON.parse(repaired);
163      }
164    } catch (parseErr) {
165      // Log context: first 100 chars, last 50 chars, total length (diagnose truncation vs malformed)
166      const len = str?.length ?? 0;
167      const tail = len > 150 ? `...${str.slice(-50)}` : '';
168      logger.warn('Failed to parse JSON, returning fallback', {
169        str: str?.substring(0, 100),
170        tail,
171        len,
172        err: parseErr?.message,
173      });
174      return fallback;
175    }
176  }
177  
178  /**
179   * Extract domain from URL
180   */
181  export function extractDomain(url) {
182    try {
183      const urlObj = new URL(url);
184      return urlObj.hostname.replace(/^www\./, '');
185    } catch {
186      logger.warn(`Failed to extract domain from: ${url}`);
187      return url;
188    }
189  }
190  
191  /**
192   * Batch processor with concurrency control
193   * Supports dynamic concurrency via getDynamicConcurrency function
194   */
195  export async function processBatch(items, processor, options = {}) {
196    const {
197      concurrency = 15,
198      onProgress = null,
199      onError = null,
200      getDynamicConcurrency = null,
201    } = options;
202  
203    const results = [];
204    const errors = [];
205    let completed = 0;
206    let i = 0;
207    const active = new Set();
208  
209    const startNext = () => {
210      if (i >= items.length) return;
211      const item = items[i];
212      const idx = i++;
213      let p;
214      p = Promise.resolve()
215        .then(() => processor(item, idx))
216        .then(value => {
217          active.delete(p);
218          results.push(value);
219          completed++;
220          if (onProgress) onProgress(completed, items.length);
221        })
222        .catch(reason => {
223          active.delete(p);
224          errors.push(reason);
225          if (onError) onError(reason);
226          completed++;
227        });
228      active.add(p);
229    };
230  
231    // Pool-based: re-evaluate concurrency after every site completes
232    while (i < items.length || active.size > 0) {
233      const currentConcurrency = getDynamicConcurrency ? getDynamicConcurrency() : concurrency;
234  
235      // Fill pool up to current concurrency
236      while (i < items.length && active.size < currentConcurrency) {
237        startNext();
238      }
239  
240      // Wait for one to finish, then re-evaluate
241      if (active.size > 0) {
242        await Promise.race(active);
243      }
244    }
245  
246    return { results, errors };
247  }
248  
249  export default {
250    retryWithBackoff,
251    isRetryableError,
252    withTimeout,
253    sleep,
254    safeJsonParse,
255    extractDomain,
256    processBatch,
257  };