/ src / utils / error-categories.js
error-categories.js
  1  /**
  2   * Error Categorization Module
  3   * Regex-based categorization of site and outreach error messages.
  4   * Groups errors into terminal (won't succeed on retry) and retriable.
  5   */
  6  
  7  import { getAll, getOne } from './db.js';
  8  
  9  // ──────────────────────────────────────────────────────────────────────────────
 10  // Pattern tables
 11  // ──────────────────────────────────────────────────────────────────────────────
 12  
 13  const SITE_TERMINAL_PATTERNS = [
 14    { pattern: /Social media platform/i, label: 'Social media' },
 15    { pattern: /Business directory/i, label: 'Business directory' },
 16    { pattern: /Not a local business/i, label: 'Not a local business' },
 17    { pattern: /Home service franchise/i, label: 'Franchise' },
 18    { pattern: /Government/i, label: 'Government domain' },
 19    { pattern: /Education domain/i, label: 'Education domain' },
 20    { pattern: /Non-commercial/i, label: 'Non-commercial' },
 21    { pattern: /Cross-border duplicate/i, label: 'Cross-border duplicate' },
 22    { pattern: /Duplicate domain/i, label: 'Duplicate domain' },
 23    { pattern: /Country mismatch/i, label: 'Country mismatch' },
 24    { pattern: /No email template/i, label: 'No template' },
 25    { pattern: /HTTP 404.*[Cc]annot capture/i, label: 'HTTP 404 (page gone)' },
 26    { pattern: /Max recapture attempts/i, label: 'Max recapture retries' },
 27    { pattern: /Ignored: Regulated industry \(.*\)/i, label: 'Regulated Industry Exclusion' },
 28    {
 29      pattern: /Max retries \(\d+\) exceeded: No contacts or key_pages found/i,
 30      label: 'No Contacts Found',
 31    },
 32    { pattern: /Ignored: Regulated industry \(\w+\)/i, label: 'Regulated Industry Exclusion' },
 33    {
 34      pattern: /Ignored: (Regulated industry|Law firm|Legal site detected).*/i,
 35      label: 'Restricted Industry',
 36    },
 37    {
 38      pattern:
 39        /(Page|The page) (not found|does not exist|you're looking for doesn't exist|appears to be an error page|is a security verification screen|indicates an error).*/i,
 40      label: 'Page Not Found',
 41    },
 42    {
 43      pattern:
 44        /Max retries \(\d+\) exceeded: (url\.toLowerCase is not a function|HTTP \d+ - Cannot capture assets for error response)/i,
 45      label: 'Capture Processing Error',
 46    },
 47    { pattern: /The page with the name '.*' could not be found/i, label: 'Page Not Found' },
 48    {
 49      pattern:
 50        /Max retries \(\d+\) exceeded: LLM daily budget exceeded: \$\d+\.\d+ spent today \(budget: \$\d+\)/i,
 51      label: 'LLM Daily Budget Exceeded',
 52    },
 53    {
 54      pattern: /The page displays a 'no posts matched your criteria' message/i,
 55      label: 'No Posts Found',
 56    },
 57    {
 58      pattern:
 59        /Access Denied|Access to the page (is forbidden|was denied)|access denied.*HTTP Error 403/i,
 60      label: 'Access Denied',
 61    },
 62    { pattern: /False-positive error page/i, label: 'Error page (false positive)' },
 63    { pattern: /CHECK constraint failed/i, label: 'DB constraint error' },
 64    { pattern: /^(Franchise site|Social media:\s+\S+)$/i, label: 'Site Filter - Franchise/Social Media' },
 65    { pattern: /Max retries.*could not determine data type of parameter/i, label: 'Database Parameter Type Mismatch' },
 66    { pattern: /^Access forbidden$/i, label: 'HTTP 403 Forbidden' },
 67    { pattern: /^Permanent error: 410$/i, label: 'HTTP 410 Gone' },
 68  ];
 69  
 70  const SITE_RETRIABLE_PATTERNS = [
 71    { pattern: /HTTP 403.*[Cc]annot capture/i, label: 'HTTP 403 (access denied)' },
 72    { pattern: /userDataDir|launchPersistentContext/i, label: 'Browser launch conflict' },
 73    { pattern: /status code 400/i, label: 'API bad request' },
 74    { pattern: /status code 401/i, label: 'API auth error' },
 75    { pattern: /status code 429/i, label: 'Rate limited' },
 76    { pattern: /Incomplete LLM response/i, label: 'Incomplete LLM' },
 77    { pattern: /Timed out|Timeout/i, label: 'Timeout' },
 78    { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, label: 'Network error' },
 79    { pattern: /navigating and changing/i, label: 'Page navigating' },
 80    { pattern: /browser has been closed/i, label: 'Browser crashed' },
 81    { pattern: /EACCES/i, label: 'Permission denied' },
 82    { pattern: /screenshot_path is NULL/i, label: 'Missing screenshot' },
 83    { pattern: /database is locked/i, label: 'DB lock' },
 84    { pattern: /chromium-nice ENOENT/i, label: 'Chromium not found' },
 85    { pattern: /HTTP (5\d\d|null)/i, label: 'Server error' },
 86    {
 87      pattern:
 88        /Max retries \(3\) exceeded: (No contacts or key_pages found|Country code is required|Failed to parse JSON response)/i,
 89      label: 'Max Retries Extraction Failure',
 90    },
 91    {
 92      pattern:
 93        /Max retries \(\d+\) exceeded: page\.goto: (net::ERR_TOO_MANY_REDIRECTS|net::ERR_TIMED_OUT|net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH).*/i,
 94      label: 'Page Navigation Error',
 95    },
 96    {
 97      pattern: /(Website is currently not reachable|No content available for evaluation|directory)/i,
 98      label: 'Site Unreachable',
 99    },
100    {
101      pattern:
102        /Max retries \(\d+\) exceeded: page\.goto: (net::ERR_SSL_UNRECOGNIZED_NAME_ALERT|net::ERR_SSL_PROTOCOL_ERROR) at https?:/i,
103      label: 'SSL Connection Error',
104    },
105    {
106      pattern: /Max retries \(\d+\) exceeded: page\.goto: net::ERR_NAME_NOT_RESOLVED/i,
107      label: 'DNS / domain not found',
108    },
109    {
110      pattern:
111        /Max retries \(\d+\) exceeded: page\.goto: net::ERR_CERT_(DATE_INVALID|COMMON_NAME_INVALID|AUTHORITY_INVALID)/i,
112      label: 'Invalid SSL certificate',
113    },
114    {
115      pattern:
116        /Max retries \(\d+\) exceeded: page\.goto: net::ERR_(CONNECTION_REFUSED|CONNECTION_CLOSED|CONNECTION_RESET|SOCKET_NOT_CONNECTED|ADDRESS_UNREACHABLE|ABORTED)/i,
117      label: 'Connection refused / closed',
118    },
119    {
120      pattern:
121        /Max retries \(\d+\) exceeded: page\.goto: net::ERR_HTTP(2_PROTOCOL_ERROR|_RESPONSE_CODE_FAILURE)/i,
122      label: 'HTTP protocol error',
123    },
124    {
125      pattern: /Max retries \(\d+\) exceeded: page\.goto: (Download is starting|net::ERR_ABORTED)/i,
126      label: 'URL is a file download',
127    },
128    {
129      pattern: /Max retries \(\d+\) exceeded: page\.goto: Protocol error/i,
130      label: 'Browser protocol error',
131    },
132    {
133      pattern: /Max retries \(\d+\) exceeded: page\.addStyleTag:.*Content Security Policy/i,
134      label: 'CSP blocks screenshot',
135    },
136    {
137      pattern: /Max retries \(\d+\) exceeded: page\.(addStyleTag|evaluate): TypeError/i,
138      label: 'Page JS error',
139    },
140    {
141      pattern:
142        /Max retries \(3\) exceeded: page\.goto: (net::ERR_INVALID_AUTH_CREDENTIALS|Page crashed|HTTP \d{3}:)/i,
143      label: 'Page Load Failure',
144    },
145    { pattern: /Max retries \(3\) exceeded: HTTP (700|405):/i, label: 'HTTP Max Retries Exceeded' },
146    {
147      pattern: /Max retries \(\d+\) exceeded: page\.goto: net::ERR_CONNECTION_TIMED_OUT/i,
148      label: 'Connection timed out',
149    },
150    {
151      pattern: /Max retries \(\d+\) exceeded: page\.content: Target crashed/i,
152      label: 'Browser crashed (target)',
153    },
154    {
155      pattern: /Max retries \(\d+\) exceeded: Request failed with status code 5\d\d/i,
156      label: 'Server error (5xx)',
157    },
158  ];
159  
160  const OUTREACH_TERMINAL_PATTERNS = [
161    { pattern: /opted out/i, label: 'Opted out' },
162    { pattern: /Fake.*phone|invalid.*phone/i, label: 'Invalid phone' },
163    { pattern: /gdpr_blocked/i, label: 'GDPR blocked' },
164    { pattern: /Cloudflare.*blocked/i, label: 'Cloudflare blocked' },
165    { pattern: /ZeroBounce/i, label: 'Invalid email (ZeroBounce)' },
166    { pattern: /gov_blocked/i, label: 'Gov domain blocked' },
167    { pattern: /no_message_button/i, label: 'No DM button' },
168    { pattern: /'To' number: \+\d+, is not a valid mobile number/i, label: 'Invalid Phone Number' },
169    {
170      pattern: /Permission to send an SMS has not been enabled for the region.*/i,
171      label: 'SMS Region Blocked',
172    },
173    {
174      pattern: /No Twilio number configured for country \w+ and TWILIO_PHONE_NUMBER not set/i,
175      label: 'Twilio Number Configuration Missing',
176    },
177    { pattern: /'To' number: \+\d+, is not a valid mobile number/i, label: 'Invalid Phone Number' },
178    { pattern: /\+\d{10,12}X{3,4} is not a valid mobile number/i, label: 'Invalid Phone Number' },
179    {
180      pattern: /'To' number: \+\d{11}XXXX, is not a valid mobile number/i,
181      label: 'Invalid Mobile Number',
182    },
183    {
184      pattern: /'To' number cannot be a Short Code: \+\d{3}XXXX/i,
185      label: 'Invalid Short Code Number',
186    },
187    {
188      pattern: /'From' \+\d{11} is not a Twilio phone number or Short Code country mismatch/i,
189      label: 'Invalid Twilio Phone Number',
190    },
191    {
192      pattern: /'To' number: \+61\d{9}XXXX, is not a valid mobile number/i,
193      label: 'Invalid Mobile Number',
194    },
195    { pattern: /\+?\d{10,12}XXXX is not a valid mobile number/i, label: 'Invalid Phone Number' },
196    { pattern: /\+\d+\d{4}XXXX, is not a valid mobile number/i, label: 'Invalid Phone Number' },
197    {
198      pattern: /'To' number cannot be a Short Code: \+61\d{2}XXXX/i,
199      label: 'Invalid Short Code Phone Number',
200    },
201    {
202      pattern: /'To' number cannot be a Short Code: \+\d{4}XXXX/i,
203      label: 'Invalid Short Code Phone Number',
204    },
205    {
206      pattern: /'To' number cannot be a Short Code: \+\d{8,12}/i,
207      label: 'Invalid Short Code Number',
208    },
209    {
210      pattern: /'To' number cannot be a Short Code: \+\d{8,12}/i,
211      label: 'Invalid Short Code Number',
212    },
213    {
214      pattern: /'To' number cannot be a Short Code: \+61(490|472)XXXX/i,
215      label: 'Invalid Short Code Number',
216    },
217    {
218      pattern: /Invalid E\.164: starts with \+0 \(\+0\d+\)/i,
219      label: 'Invalid Phone Number Zero Prefix',
220    },
221    {
222      pattern: /Short code detected — not a mobile number \(\+\d{8}\)/i,
223      label: 'Short Code Phone Number',
224    },
225    { pattern: /Outreach #\d+ is for \w+, not \w+/i, label: 'Channel mismatch' },
226    { pattern: /Contact email is a placeholder/i, label: 'Placeholder email' },
227    { pattern: /Toll-free number — cannot receive SMS/i, label: 'Toll-free number' },
228    { pattern: /cannot be a Short Code/i, label: 'Short code number' },
229    {
230      pattern: /(placeholder|test).*(email|address).*(permanently|exclude)/i,
231      label: 'Placeholder/Test Email Permanently Excluded',
232    },
233    { pattern: /email.*on.*exclusion list/i, label: 'Email Exclusion List' },
234    { pattern: /Too short to be a real phone number \(\+\d+\)/i, label: 'Invalid Phone Number - Too Short' },
235    { pattern: /\+1\d{3}555\d{4}/i, label: 'US 555 Placeholder Number' },
236    { pattern: /^Authentication Error\s*-\s*invalid username$/i, label: 'Auth Error - Invalid Username' },
237    { pattern: /'From' phone number provided.*is not a valid message-capable Twilio phone number/i, label: 'Twilio Invalid Sender Number' },
238    { pattern: /^US 555 placeholder number/i, label: 'Test Number Detection - 555' },
239  ];
240  
241  const OUTREACH_RETRIABLE_PATTERNS = [
242    {
243      pattern: /under.?construction|coming soon|site maintenance|parked domain/i,
244      label: 'Under construction',
245    },
246    { pattern: /business hours/i, label: 'Business hours block' },
247    { pattern: /status.?code 429|rate limit/i, label: 'Rate limited' },
248    { pattern: /Timeout/i, label: 'Form timeout' },
249    { pattern: /net::ERR_NAME_NOT_RESOLVED/i, label: 'DNS failure' },
250    { pattern: /per.recipient cooldown/i, label: 'Per-recipient cooldown' },
251    { pattern: /Breaker is open/i, label: 'Circuit breaker open' },
252    { pattern: /landline/i, label: 'Landline number' },
253    {
254      pattern: /browser.*crash|Target page.*closed|browser has been closed/i,
255      label: 'Browser crash',
256    },
257    { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, label: 'Network error' },
258    {
259      pattern:
260        /Invalid Resend API response: .*"statusCode":429,"message":"You have reached your daily email sending quota"/i,
261      label: 'Daily Email Quota Exceeded',
262    },
263    {
264      pattern: /Form page failed to load.*net::ERR_NAME_NOT_RESOLVED/i,
265      label: 'Form page DNS failure',
266    },
267    {
268      pattern: /Form page failed to load/i,
269      label: 'Form page load failure',
270    },
271    {
272      pattern: /Invalid Resend API response:.+"statusCode":429,.+"daily_quota_exceeded"/i,
273      label: 'Email Sending Quota Exceeded',
274    },
275    {
276      pattern: /Invalid Resend API response:.*"statusCode"\s*:\s*429/i,
277      label: 'Resend Rate Limited',
278    },
279    { pattern: /Timed out after \d+ms/i, label: 'Request Timeout' },
280    { pattern: /zb_unknown:/i, label: 'ZeroBounce unknown (re-check pending)' },
281    {
282      pattern: /No local phone number for country \w+ — parked/i,
283      label: 'No local phone number (parked)',
284    },
285    {
286      pattern: /server error|internal server error|status.?code[: ]+5\d{2}|HTTP 5\d{2}|form.*submission.*failed|submission.*error/i,
287      label: 'Server/submission error (transient)',
288    },
289  ];
290  
291  /**
292   * Suggested retry_at offsets (in seconds) for known retriable error patterns.
293   * Used by outreach send functions to set retry_at when transitioning to retry_later.
294   *
295   * Rationale:
296   *   Rate limited (429)      → 1 hour  (most APIs reset hourly)
297   *   Form timeout            → 30 min  (transient load spike)
298   *   DNS failure             → 24 hours (DNS outage / propagation)
299   *   Business hours block    → 8 hours  (conservative; covers most time zones)
300   *   Per-recipient cooldown  → 72 hours (full cooldown window)
301   *   Browser crash           → 15 min   (process restart window)
302   *   Network error           → 1 hour   (general network recovery)
303   *   Unknown retriable       → 1 hour   (safe default)
304   */
305  export const OUTREACH_RETRY_INTERVALS = [
306    {
307      pattern: /under.?construction|coming soon|site maintenance|parked domain/i,
308      seconds: 604800,
309      label: 'Under construction',
310    }, // 7 days
311    { pattern: /status.?code 429|rate limit/i, seconds: 3600, label: 'Rate limited' },
312    { pattern: /Timeout/i, seconds: 1800, label: 'Form timeout' },
313    { pattern: /net::ERR_NAME_NOT_RESOLVED/i, seconds: 86400, label: 'DNS failure' },
314    { pattern: /business hours/i, seconds: 28800, label: 'Business hours' },
315    { pattern: /per.recipient cooldown/i, seconds: 259200, label: 'Per-recipient cooldown' },
316    {
317      pattern: /browser.*crash|Target page.*closed|browser has been closed/i,
318      seconds: 900,
319      label: 'Browser crash',
320    },
321    { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, seconds: 3600, label: 'Network error' },
322  ];
323  
324  /**
325   * Compute retry_at datetime string for a retriable error message.
326   * Returns an ISO-8601 string (UTC) for use in SQLite DATETIME fields.
327   * Falls back to 1 hour if no pattern matches.
328   *
329   * @param {string} errorMessage
330   * @returns {string} ISO datetime string
331   */
332  export function computeRetryAt(errorMessage) {
333    const msg = errorMessage || '';
334    for (const { pattern, seconds } of OUTREACH_RETRY_INTERVALS) {
335      if (pattern.test(msg)) {
336        return new Date(Date.now() + seconds * 1000).toISOString();
337      }
338    }
339    // Default: 1 hour
340    return new Date(Date.now() + 3600 * 1000).toISOString();
341  }
342  
343  /**
344   * Determine whether an outreach error is retriable (should become retry_later)
345   * or terminal (should become failed).
346   *
347   * @param {string|null} errorMessage
348   * @returns {boolean} true if error is retriable
349   */
350  export function isOutreachRetriable(errorMessage) {
351    const { group } = categorizeError(errorMessage, 'outreach');
352    return group === 'retriable';
353  }
354  
355  // markOutreachResult() was removed — it used SQLite .prepare().run() and had no active
356  // callers. Each outreach module (email.js, sms.js, form.js, linkedin.js, x.js) has an
357  // inline async markOutreachResultAsync() that uses the PG run() helper from db.js.
358  
359  // ──────────────────────────────────────────────────────────────────────────────
360  // Core categorization function
361  // ──────────────────────────────────────────────────────────────────────────────
362  
363  /**
364   * Categorize an error message into a group and label.
365   * @param {string|null} errorMessage
366   * @param {'site'|'outreach'} context
367   * @returns {{ group: 'terminal'|'retriable'|'unknown', label: string }}
368   */
369  export function categorizeError(errorMessage, context = 'site') {
370    if (!errorMessage || errorMessage.trim() === '') {
371      return { group: 'retriable', label: 'Unknown (no error stored)' };
372    }
373  
374    const terminalPatterns =
375      context === 'outreach' ? OUTREACH_TERMINAL_PATTERNS : SITE_TERMINAL_PATTERNS;
376    const retriablePatterns =
377      context === 'outreach' ? OUTREACH_RETRIABLE_PATTERNS : SITE_RETRIABLE_PATTERNS;
378  
379    for (const { pattern, label } of terminalPatterns) {
380      if (pattern.test(errorMessage)) {
381        return { group: 'terminal', label };
382      }
383    }
384  
385    for (const { pattern, label } of retriablePatterns) {
386      if (pattern.test(errorMessage)) {
387        return { group: 'retriable', label };
388      }
389    }
390  
391    return { group: 'unknown', label: 'Unknown' };
392  }
393  
394  // ──────────────────────────────────────────────────────────────────────────────
395  // Tree builders
396  // ──────────────────────────────────────────────────────────────────────────────
397  
398  /**
399   * Pipeline stage order for display
400   */
401  const SITE_STATUS_ORDER = [
402    'found',
403    'ignored',
404    'assets_captured',
405    'prog_scored',
406    'semantic_scored',
407    'vision_scored',
408    'enriched',
409    'enriched_regex',
410    'enriched_llm',
411    'proposals_drafted',
412    'outreach_partial',
413    'outreach_sent',
414    'high_score',
415    'failing',
416  ];
417  
418  const OUTREACH_STATUS_ORDER = [
419    'pending',
420    'approved',
421    'scheduled',
422    'sent',
423    'delivered',
424    'opened',
425    'clicked',
426    'replied',
427    'rework',
428    'retry_later',
429    'failed',
430    'bounced',
431    'gdpr_blocked',
432    'rejected',
433  ];
434  
435  /**
436   * Pipeline stages in order (index = pipeline depth).
437   * For a given status, all statuses at higher indexes are "downstream" —
438   * sites that passed through this status but have since advanced further.
439   */
440  const PIPELINE_STAGE_DEPTH = [
441    'found',
442    'ignored',
443    'assets_captured',
444    'prog_scored',
445    'semantic_scored',
446    'vision_scored',
447    'enriched',
448    'enriched_regex',
449    'enriched_llm',
450    'proposals_drafted',
451    'outreach_partial',
452    'outreach_sent',
453  ];
454  
455  /**
456   * Build status tree for sites with error breakdowns and throughput.
457   * @returns {Promise<Array>} status tree
458   */
459  export async function buildStatusTree() {
460    // Split into separate queries to avoid full table scan with conditional COUNTs.
461  
462    // 1. Totals by status (uses idx_sites_status)
463    const totalRows = await getAll('SELECT status, COUNT(*) AS total FROM sites GROUP BY status');
464  
465    // 2. Deltas: sites that entered this status recently AND are still here.
466    //    Uses site_status transition log joined to sites current status.
467    //    Avoids the inflation from sites.updated_at which gets bumped by updateLastOutreachAt()
468    //    and other non-status-change updates (would show 2.3x actual throughput for outreach_sent).
469    // Rewritten to avoid COUNT(DISTINCT) full JOIN on 1.3M×1.2M rows.
470    // Subquery filters site_status by time (uses idx_site_status_created_at) first,
471    // then joins the small result set to sites for current-status confirmation.
472    const delta24hRows = await getAll(
473      `SELECT recent.status, COUNT(*) AS delta
474       FROM (
475         SELECT DISTINCT site_id, status FROM site_status
476         WHERE created_at > NOW() - INTERVAL '24 hours'
477       ) recent
478       JOIN sites s ON recent.site_id = s.id AND s.status = recent.status
479       GROUP BY recent.status`
480    );
481    const delta1hRows = await getAll(
482      `SELECT recent.status, COUNT(*) AS delta
483       FROM (
484         SELECT DISTINCT site_id, status FROM site_status
485         WHERE created_at > NOW() - INTERVAL '1 hour'
486       ) recent
487       JOIN sites s ON recent.site_id = s.id AND s.status = recent.status
488       GROUP BY recent.status`
489    );
490  
491    // Merge into topLevel shape
492    const d24map = Object.fromEntries(delta24hRows.map(r => [r.status, Number(r.delta)]));
493    const d1map = Object.fromEntries(delta1hRows.map(r => [r.status, Number(r.delta)]));
494    const topLevel = totalRows.map(r => ({
495      status: r.status,
496      total: Number(r.total),
497      delta_24h: d24map[r.status] || 0,
498      delta_1h: d1map[r.status] || 0,
499    }));
500  
501    // Build a lookup of total per status for cumulative calculation
502    const totalByStatus = {};
503    for (const row of topLevel) {
504      totalByStatus[row.status] = row.total;
505    }
506  
507    // Throughput from site_status transition log (split queries)
508    const thruTotal = await getAll('SELECT status, COUNT(*) AS total FROM site_status GROUP BY status');
509    const thru24h = await getAll(
510      `SELECT status, COUNT(*) AS cnt
511       FROM site_status WHERE created_at > NOW() - INTERVAL '24 hours'
512       GROUP BY status`
513    );
514    const thru1h = await getAll(
515      `SELECT status, COUNT(*) AS cnt
516       FROM site_status WHERE created_at > NOW() - INTERVAL '1 hour'
517       GROUP BY status`
518    );
519  
520    const thru24map = Object.fromEntries(thru24h.map(r => [r.status, Number(r.cnt)]));
521    const thru1map = Object.fromEntries(thru1h.map(r => [r.status, Number(r.cnt)]));
522  
523    const throughputByStatus = {};
524    for (const row of thruTotal) {
525      throughputByStatus[row.status] = {
526        thru_24h: thru24map[row.status] || 0,
527        thru_1h: thru1map[row.status] || 0,
528      };
529    }
530  
531    // Error breakdown for failing only (ignore has 800K+ rows dominated by known categories
532    // like "Social media platform" / cross-border duplicates — not actionable in status view).
533    // Fetch raw rows and group in JS to avoid slow PG substr() GROUP BY on large table.
534    const ERR_PREFIX = 200;
535    const failingRaw = await getAll(`SELECT error_message FROM sites WHERE status = 'failing'`);
536    const failingMap = {};
537    for (const r of failingRaw) {
538      const key = (r.error_message || '').substring(0, ERR_PREFIX);
539      failingMap[key] = (failingMap[key] || 0) + 1;
540    }
541    const errorRows = Object.entries(failingMap).map(([error_message, total]) => ({
542      status: 'failing',
543      error_message,
544      total,
545    }));
546  
547    // Group error rows by status
548    const errorsByStatus = {};
549    for (const row of errorRows) {
550      if (!errorsByStatus[row.status]) errorsByStatus[row.status] = [];
551      errorsByStatus[row.status].push(row);
552    }
553  
554    // Build result in pipeline order
555    const statusMap = {};
556    for (const row of topLevel) {
557      statusMap[row.status] = row;
558    }
559  
560    const tree = [];
561    const seen = new Set();
562  
563    for (const status of SITE_STATUS_ORDER) {
564      if (statusMap[status]) {
565        seen.add(status);
566        const row = statusMap[status];
567  
568        // Cumulative = this status + all downstream pipeline stages
569        const myDepth = PIPELINE_STAGE_DEPTH.indexOf(status);
570        let cumulative = null;
571        if (myDepth !== -1) {
572          cumulative = row.total;
573          for (let d = myDepth + 1; d < PIPELINE_STAGE_DEPTH.length; d++) {
574            cumulative += totalByStatus[PIPELINE_STAGE_DEPTH[d]] || 0;
575          }
576          // Only show cumulative when it differs from total (i.e. there are downstream sites)
577          if (cumulative === row.total) cumulative = null;
578        }
579  
580        const thru = throughputByStatus[status] || { thru_24h: 0, thru_1h: 0 };
581        const entry = {
582          status,
583          total: row.total,
584          cumulative,
585          delta_24h: row.delta_24h,
586          delta_1h: row.delta_1h,
587          thru_24h: thru.thru_24h,
588          thru_1h: thru.thru_1h,
589          children: null,
590        };
591  
592        if (errorsByStatus[status]) {
593          entry.children = buildErrorChildren(errorsByStatus[status], 'site');
594        }
595  
596        tree.push(entry);
597      }
598    }
599  
600    // Any statuses not in our order list go at the end
601    for (const row of topLevel) {
602      if (!seen.has(row.status)) {
603        const thru = throughputByStatus[row.status] || { thru_24h: 0, thru_1h: 0 };
604        tree.push({
605          status: row.status,
606          total: row.total,
607          cumulative: null,
608          delta_24h: row.delta_24h,
609          delta_1h: row.delta_1h,
610          thru_24h: thru.thru_24h,
611          thru_1h: thru.thru_1h,
612          children: null,
613        });
614      }
615    }
616  
617    return tree;
618  }
619  
620  /**
621   * Build status tree for outbound messages with error breakdowns.
622   * Combines approval_status and delivery_status into a synthetic display status:
623   *   - If delivery_status is set, use it
624   *   - Otherwise, use approval_status
625   * @returns {Promise<Array>} outreach status tree
626   */
627  export async function buildOutreachTree() {
628    const STATUS_EXPR = `COALESCE(delivery_status, approval_status)`;
629    const BASE_WHERE = `direction = 'outbound'`;
630  
631    // Split queries to avoid full table scans with conditional COUNTs.
632  
633    // 1. Totals
634    const totalRows = await getAll(
635      `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS total
636       FROM messages WHERE ${BASE_WHERE}
637       GROUP BY status`
638    );
639    // Use updated_at for all delta queries — tracks the most recent status change.
640    const d24Rows = await getAll(
641      `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS delta
642       FROM messages
643       WHERE ${BASE_WHERE} AND updated_at > NOW() - INTERVAL '24 hours'
644       GROUP BY status`
645    );
646    const d1Rows = await getAll(
647      `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS delta
648       FROM messages
649       WHERE ${BASE_WHERE} AND updated_at > NOW() - INTERVAL '1 hour'
650       GROUP BY status`
651    );
652    const d24map = Object.fromEntries(d24Rows.map(r => [r.status, Number(r.delta)]));
653    const d1map = Object.fromEntries(d1Rows.map(r => [r.status, Number(r.delta)]));
654    const topLevel = totalRows.map(r => ({
655      status: r.status,
656      total: Number(r.total),
657      delta_24h: d24map[r.status] || 0,
658      delta_1h: d1map[r.status] || 0,
659    }));
660  
661    // 2. Channel breakdown for pending, rework, approved + sent (split queries)
662    const chanStatuses = `('pending', 'rework', 'approved', 'sent', 'delivered', 'opened', 'clicked', 'replied')`;
663    const chanTotals = await getAll(
664      `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS total
665       FROM messages WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses}
666       GROUP BY status, contact_method`
667    );
668    const chanD24 = await getAll(
669      `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS delta
670       FROM messages
671       WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses}
672         AND updated_at > NOW() - INTERVAL '24 hours'
673       GROUP BY status, contact_method`
674    );
675    const chanD1 = await getAll(
676      `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS delta
677       FROM messages
678       WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses}
679         AND updated_at > NOW() - INTERVAL '1 hour'
680       GROUP BY status, contact_method`
681    );
682    const chanD24map = Object.fromEntries(
683      chanD24.map(r => [`${r.status}|${r.contact_method}`, Number(r.delta)])
684    );
685    const chanD1map = Object.fromEntries(
686      chanD1.map(r => [`${r.status}|${r.contact_method}`, Number(r.delta)])
687    );
688    const channelRows = chanTotals.map(r => ({
689      status: r.status,
690      contact_method: r.contact_method,
691      total: Number(r.total),
692      delta_24h: chanD24map[`${r.status}|${r.contact_method}`] || 0,
693      delta_1h: chanD1map[`${r.status}|${r.contact_method}`] || 0,
694    }));
695  
696    // 3. Error breakdown for failed + retry_later (split queries)
697    const errTotals = await getAll(
698      `SELECT ${STATUS_EXPR} AS status, error_message, COUNT(*) AS total
699       FROM messages
700       WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ('failed', 'retry_later')
701       GROUP BY status, error_message`
702    );
703    // delta_24h/delta_1h per error label are not displayed — skip extra queries
704    const errorRows = errTotals.map(r => ({
705      status: r.status,
706      error_message: r.error_message,
707      total: Number(r.total),
708    }));
709  
710    // 4. retry_later: count due now vs still waiting
711    const retryDueRow = await getOne(
712      `SELECT
713         COUNT(*) AS due_now,
714         MIN(retry_at) AS next_retry_at
715       FROM messages
716       WHERE ${BASE_WHERE} AND delivery_status = 'retry_later'`
717    ) || { due_now: 0, next_retry_at: null };
718    const retryStats = {
719      due_now: Number(retryDueRow.due_now),
720      next_retry_at: retryDueRow.next_retry_at,
721    };
722  
723    // Group channel rows by status
724    const channelsByStatus = {};
725    for (const row of channelRows) {
726      if (!channelsByStatus[row.status]) channelsByStatus[row.status] = [];
727      channelsByStatus[row.status].push(row);
728    }
729  
730    const statusMap = {};
731    for (const row of topLevel) {
732      statusMap[row.status] = row;
733    }
734  
735    const tree = [];
736    const seen = new Set();
737  
738    for (const status of OUTREACH_STATUS_ORDER) {
739      if (statusMap[status]) {
740        seen.add(status);
741        const row = statusMap[status];
742        const entry = {
743          status,
744          total: row.total,
745          delta_24h: row.delta_24h,
746          delta_1h: row.delta_1h,
747          children: null,
748        };
749  
750        if (channelsByStatus[status]) {
751          // Channel breakdown (sorted by total desc)
752          entry.children = {
753            type: 'channels',
754            rows: channelsByStatus[status]
755              .sort((a, b) => b.total - a.total)
756              .map(r => ({
757                label: r.contact_method,
758                total: r.total,
759                delta_24h: r.delta_24h,
760                delta_1h: r.delta_1h,
761              })),
762          };
763        } else if (status === 'failed') {
764          entry.children = buildErrorChildren(
765            errorRows.filter(r => r.status === 'failed'),
766            'outreach'
767          );
768        } else if (status === 'retry_later') {
769          entry.retryStats = retryStats;
770          entry.children = buildErrorChildren(
771            errorRows.filter(r => r.status === 'retry_later'),
772            'outreach'
773          );
774        }
775  
776        tree.push(entry);
777      }
778    }
779  
780    // Any statuses not in our order list
781    for (const row of topLevel) {
782      if (!seen.has(row.status)) {
783        tree.push({
784          status: row.status,
785          total: row.total,
786          delta_24h: row.delta_24h,
787          delta_1h: row.delta_1h,
788          children: null,
789        });
790      }
791    }
792  
793    return tree;
794  }
795  
796  // ──────────────────────────────────────────────────────────────────────────────
797  // Helpers
798  // ──────────────────────────────────────────────────────────────────────────────
799  
800  /**
801   * Collapse and categorize error rows into retriable/terminal/unknown groups.
802   * Cross-border duplicates are folded into a single entry.
803   * @param {Array} rows - DB rows with error_message, total, delta_24h, delta_1h
804   * @param {'site'|'outreach'} context
805   * @returns {{ type: 'errors', retriable: Array, terminal: Array, unknown: Array }}
806   */
807  function buildErrorChildren(rows, context) {
808    const retriable = {};
809    const terminal = {};
810    const unknown = {};
811  
812    for (const row of rows) {
813      const { group, label } = categorizeError(row.error_message, context);
814      const bucket = group === 'terminal' ? terminal : group === 'retriable' ? retriable : unknown;
815  
816      if (!bucket[label]) {
817        bucket[label] = { label, total: 0, delta_24h: 0, delta_1h: 0 };
818      }
819      bucket[label].total += row.total;
820      bucket[label].delta_24h += row.delta_24h;
821      bucket[label].delta_1h += row.delta_1h;
822    }
823  
824    const toSortedArray = obj => Object.values(obj).sort((a, b) => b.total - a.total);
825  
826    return {
827      type: 'errors',
828      retriable: toSortedArray(retriable),
829      terminal: toSortedArray(terminal),
830      unknown: toSortedArray(unknown),
831    };
832  }
833  
834  // ──────────────────────────────────────────────────────────────────────────────
835  // Conversations tree
836  // ──────────────────────────────────────────────────────────────────────────────
837  
838  const SENTIMENT_ORDER = ['positive', 'neutral', 'negative', 'objection'];
839  
840  /**
841   * Build inbound messages summary grouped by sentiment, with intent breakdown.
842   * @returns {Promise<Array>} sentiment rows with intent children
843   */
844  export async function buildConversationsTree() {
845    const topLevel = await getAll(
846      `SELECT
847         COALESCE(sentiment, 'unknown') AS sentiment,
848         COUNT(*) AS total,
849         COUNT(CASE WHEN created_at > NOW() - INTERVAL '24 hours' THEN 1 END) AS delta_24h,
850         COUNT(CASE WHEN created_at > NOW() - INTERVAL '1 hour' THEN 1 END) AS delta_1h
851       FROM messages
852       WHERE direction = 'inbound'
853       GROUP BY sentiment`
854    );
855  
856    const intentRows = await getAll(
857      `SELECT
858         COALESCE(sentiment, 'unknown') AS sentiment,
859         COALESCE(intent, 'unknown') AS intent,
860         COUNT(*) AS total,
861         COUNT(CASE WHEN created_at > NOW() - INTERVAL '24 hours' THEN 1 END) AS delta_24h,
862         COUNT(CASE WHEN created_at > NOW() - INTERVAL '1 hour' THEN 1 END) AS delta_1h
863       FROM messages
864       WHERE direction = 'inbound'
865       GROUP BY sentiment, intent`
866    );
867  
868    // PG COUNT() returns bigint strings — coerce to Number throughout
869    const intentsBySentiment = {};
870    for (const row of intentRows) {
871      if (!intentsBySentiment[row.sentiment]) intentsBySentiment[row.sentiment] = [];
872      intentsBySentiment[row.sentiment].push({
873        ...row,
874        total: Number(row.total),
875        delta_24h: Number(row.delta_24h),
876        delta_1h: Number(row.delta_1h),
877      });
878    }
879  
880    const statusMap = {};
881    for (const row of topLevel) {
882      statusMap[row.sentiment] = {
883        ...row,
884        total: Number(row.total),
885        delta_24h: Number(row.delta_24h),
886        delta_1h: Number(row.delta_1h),
887      };
888    }
889  
890    const tree = [];
891    const seen = new Set();
892  
893    for (const sentiment of SENTIMENT_ORDER) {
894      if (statusMap[sentiment]) {
895        seen.add(sentiment);
896        const row = statusMap[sentiment];
897        const intents = (intentsBySentiment[sentiment] || [])
898          .sort((a, b) => b.total - a.total)
899          .map(r => ({
900            label: r.intent,
901            total: r.total,
902            delta_24h: r.delta_24h,
903            delta_1h: r.delta_1h,
904          }));
905        tree.push({
906          sentiment,
907          total: row.total,
908          delta_24h: row.delta_24h,
909          delta_1h: row.delta_1h,
910          intents,
911        });
912      }
913    }
914  
915    // Any sentiments not in our order list
916    for (const row of topLevel) {
917      if (!seen.has(row.sentiment)) {
918        tree.push({
919          sentiment: row.sentiment,
920          total: row.total,
921          delta_24h: row.delta_24h,
922          delta_1h: row.delta_1h,
923          intents: (intentsBySentiment[row.sentiment] || [])
924            .sort((a, b) => b.total - a.total)
925            .map(r => ({
926              label: r.intent,
927              total: r.total,
928              delta_24h: r.delta_24h,
929              delta_1h: r.delta_1h,
930            })),
931        });
932      }
933    }
934  
935    return tree;
936  }