error-categories.js
1 /** 2 * Error Categorization Module 3 * Regex-based categorization of site and outreach error messages. 4 * Groups errors into terminal (won't succeed on retry) and retriable. 5 */ 6 7 import { getAll, getOne } from './db.js'; 8 9 // ────────────────────────────────────────────────────────────────────────────── 10 // Pattern tables 11 // ────────────────────────────────────────────────────────────────────────────── 12 13 const SITE_TERMINAL_PATTERNS = [ 14 { pattern: /Social media platform/i, label: 'Social media' }, 15 { pattern: /Business directory/i, label: 'Business directory' }, 16 { pattern: /Not a local business/i, label: 'Not a local business' }, 17 { pattern: /Home service franchise/i, label: 'Franchise' }, 18 { pattern: /Government/i, label: 'Government domain' }, 19 { pattern: /Education domain/i, label: 'Education domain' }, 20 { pattern: /Non-commercial/i, label: 'Non-commercial' }, 21 { pattern: /Cross-border duplicate/i, label: 'Cross-border duplicate' }, 22 { pattern: /Duplicate domain/i, label: 'Duplicate domain' }, 23 { pattern: /Country mismatch/i, label: 'Country mismatch' }, 24 { pattern: /No email template/i, label: 'No template' }, 25 { pattern: /HTTP 404.*[Cc]annot capture/i, label: 'HTTP 404 (page gone)' }, 26 { pattern: /Max recapture attempts/i, label: 'Max recapture retries' }, 27 { pattern: /Ignored: Regulated industry \(.*\)/i, label: 'Regulated Industry Exclusion' }, 28 { 29 pattern: /Max retries \(\d+\) exceeded: No contacts or key_pages found/i, 30 label: 'No Contacts Found', 31 }, 32 { pattern: /Ignored: Regulated industry \(\w+\)/i, label: 'Regulated Industry Exclusion' }, 33 { 34 pattern: /Ignored: (Regulated industry|Law firm|Legal site detected).*/i, 35 label: 'Restricted Industry', 36 }, 37 { 38 pattern: 39 /(Page|The page) (not found|does not exist|you're looking for doesn't exist|appears to be an error page|is a security verification screen|indicates an error).*/i, 40 label: 'Page Not Found', 41 }, 42 { 43 pattern: 44 /Max retries \(\d+\) exceeded: (url\.toLowerCase is not a function|HTTP \d+ - Cannot capture assets for error response)/i, 45 label: 'Capture Processing Error', 46 }, 47 { pattern: /The page with the name '.*' could not be found/i, label: 'Page Not Found' }, 48 { 49 pattern: 50 /Max retries \(\d+\) exceeded: LLM daily budget exceeded: \$\d+\.\d+ spent today \(budget: \$\d+\)/i, 51 label: 'LLM Daily Budget Exceeded', 52 }, 53 { 54 pattern: /The page displays a 'no posts matched your criteria' message/i, 55 label: 'No Posts Found', 56 }, 57 { 58 pattern: 59 /Access Denied|Access to the page (is forbidden|was denied)|access denied.*HTTP Error 403/i, 60 label: 'Access Denied', 61 }, 62 { pattern: /False-positive error page/i, label: 'Error page (false positive)' }, 63 { pattern: /CHECK constraint failed/i, label: 'DB constraint error' }, 64 { pattern: /^(Franchise site|Social media:\s+\S+)$/i, label: 'Site Filter - Franchise/Social Media' }, 65 { pattern: /Max retries.*could not determine data type of parameter/i, label: 'Database Parameter Type Mismatch' }, 66 { pattern: /^Access forbidden$/i, label: 'HTTP 403 Forbidden' }, 67 { pattern: /^Permanent error: 410$/i, label: 'HTTP 410 Gone' }, 68 ]; 69 70 const SITE_RETRIABLE_PATTERNS = [ 71 { pattern: /HTTP 403.*[Cc]annot capture/i, label: 'HTTP 403 (access denied)' }, 72 { pattern: /userDataDir|launchPersistentContext/i, label: 'Browser launch conflict' }, 73 { pattern: /status code 400/i, label: 'API bad request' }, 74 { pattern: /status code 401/i, label: 'API auth error' }, 75 { pattern: /status code 429/i, label: 'Rate limited' }, 76 { pattern: /Incomplete LLM response/i, label: 'Incomplete LLM' }, 77 { pattern: /Timed out|Timeout/i, label: 'Timeout' }, 78 { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, label: 'Network error' }, 79 { pattern: /navigating and changing/i, label: 'Page navigating' }, 80 { pattern: /browser has been closed/i, label: 'Browser crashed' }, 81 { pattern: /EACCES/i, label: 'Permission denied' }, 82 { pattern: /screenshot_path is NULL/i, label: 'Missing screenshot' }, 83 { pattern: /database is locked/i, label: 'DB lock' }, 84 { pattern: /chromium-nice ENOENT/i, label: 'Chromium not found' }, 85 { pattern: /HTTP (5\d\d|null)/i, label: 'Server error' }, 86 { 87 pattern: 88 /Max retries \(3\) exceeded: (No contacts or key_pages found|Country code is required|Failed to parse JSON response)/i, 89 label: 'Max Retries Extraction Failure', 90 }, 91 { 92 pattern: 93 /Max retries \(\d+\) exceeded: page\.goto: (net::ERR_TOO_MANY_REDIRECTS|net::ERR_TIMED_OUT|net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH).*/i, 94 label: 'Page Navigation Error', 95 }, 96 { 97 pattern: /(Website is currently not reachable|No content available for evaluation|directory)/i, 98 label: 'Site Unreachable', 99 }, 100 { 101 pattern: 102 /Max retries \(\d+\) exceeded: page\.goto: (net::ERR_SSL_UNRECOGNIZED_NAME_ALERT|net::ERR_SSL_PROTOCOL_ERROR) at https?:/i, 103 label: 'SSL Connection Error', 104 }, 105 { 106 pattern: /Max retries \(\d+\) exceeded: page\.goto: net::ERR_NAME_NOT_RESOLVED/i, 107 label: 'DNS / domain not found', 108 }, 109 { 110 pattern: 111 /Max retries \(\d+\) exceeded: page\.goto: net::ERR_CERT_(DATE_INVALID|COMMON_NAME_INVALID|AUTHORITY_INVALID)/i, 112 label: 'Invalid SSL certificate', 113 }, 114 { 115 pattern: 116 /Max retries \(\d+\) exceeded: page\.goto: net::ERR_(CONNECTION_REFUSED|CONNECTION_CLOSED|CONNECTION_RESET|SOCKET_NOT_CONNECTED|ADDRESS_UNREACHABLE|ABORTED)/i, 117 label: 'Connection refused / closed', 118 }, 119 { 120 pattern: 121 /Max retries \(\d+\) exceeded: page\.goto: net::ERR_HTTP(2_PROTOCOL_ERROR|_RESPONSE_CODE_FAILURE)/i, 122 label: 'HTTP protocol error', 123 }, 124 { 125 pattern: /Max retries \(\d+\) exceeded: page\.goto: (Download is starting|net::ERR_ABORTED)/i, 126 label: 'URL is a file download', 127 }, 128 { 129 pattern: /Max retries \(\d+\) exceeded: page\.goto: Protocol error/i, 130 label: 'Browser protocol error', 131 }, 132 { 133 pattern: /Max retries \(\d+\) exceeded: page\.addStyleTag:.*Content Security Policy/i, 134 label: 'CSP blocks screenshot', 135 }, 136 { 137 pattern: /Max retries \(\d+\) exceeded: page\.(addStyleTag|evaluate): TypeError/i, 138 label: 'Page JS error', 139 }, 140 { 141 pattern: 142 /Max retries \(3\) exceeded: page\.goto: (net::ERR_INVALID_AUTH_CREDENTIALS|Page crashed|HTTP \d{3}:)/i, 143 label: 'Page Load Failure', 144 }, 145 { pattern: /Max retries \(3\) exceeded: HTTP (700|405):/i, label: 'HTTP Max Retries Exceeded' }, 146 { 147 pattern: /Max retries \(\d+\) exceeded: page\.goto: net::ERR_CONNECTION_TIMED_OUT/i, 148 label: 'Connection timed out', 149 }, 150 { 151 pattern: /Max retries \(\d+\) exceeded: page\.content: Target crashed/i, 152 label: 'Browser crashed (target)', 153 }, 154 { 155 pattern: /Max retries \(\d+\) exceeded: Request failed with status code 5\d\d/i, 156 label: 'Server error (5xx)', 157 }, 158 ]; 159 160 const OUTREACH_TERMINAL_PATTERNS = [ 161 { pattern: /opted out/i, label: 'Opted out' }, 162 { pattern: /Fake.*phone|invalid.*phone/i, label: 'Invalid phone' }, 163 { pattern: /gdpr_blocked/i, label: 'GDPR blocked' }, 164 { pattern: /Cloudflare.*blocked/i, label: 'Cloudflare blocked' }, 165 { pattern: /ZeroBounce/i, label: 'Invalid email (ZeroBounce)' }, 166 { pattern: /gov_blocked/i, label: 'Gov domain blocked' }, 167 { pattern: /no_message_button/i, label: 'No DM button' }, 168 { pattern: /'To' number: \+\d+, is not a valid mobile number/i, label: 'Invalid Phone Number' }, 169 { 170 pattern: /Permission to send an SMS has not been enabled for the region.*/i, 171 label: 'SMS Region Blocked', 172 }, 173 { 174 pattern: /No Twilio number configured for country \w+ and TWILIO_PHONE_NUMBER not set/i, 175 label: 'Twilio Number Configuration Missing', 176 }, 177 { pattern: /'To' number: \+\d+, is not a valid mobile number/i, label: 'Invalid Phone Number' }, 178 { pattern: /\+\d{10,12}X{3,4} is not a valid mobile number/i, label: 'Invalid Phone Number' }, 179 { 180 pattern: /'To' number: \+\d{11}XXXX, is not a valid mobile number/i, 181 label: 'Invalid Mobile Number', 182 }, 183 { 184 pattern: /'To' number cannot be a Short Code: \+\d{3}XXXX/i, 185 label: 'Invalid Short Code Number', 186 }, 187 { 188 pattern: /'From' \+\d{11} is not a Twilio phone number or Short Code country mismatch/i, 189 label: 'Invalid Twilio Phone Number', 190 }, 191 { 192 pattern: /'To' number: \+61\d{9}XXXX, is not a valid mobile number/i, 193 label: 'Invalid Mobile Number', 194 }, 195 { pattern: /\+?\d{10,12}XXXX is not a valid mobile number/i, label: 'Invalid Phone Number' }, 196 { pattern: /\+\d+\d{4}XXXX, is not a valid mobile number/i, label: 'Invalid Phone Number' }, 197 { 198 pattern: /'To' number cannot be a Short Code: \+61\d{2}XXXX/i, 199 label: 'Invalid Short Code Phone Number', 200 }, 201 { 202 pattern: /'To' number cannot be a Short Code: \+\d{4}XXXX/i, 203 label: 'Invalid Short Code Phone Number', 204 }, 205 { 206 pattern: /'To' number cannot be a Short Code: \+\d{8,12}/i, 207 label: 'Invalid Short Code Number', 208 }, 209 { 210 pattern: /'To' number cannot be a Short Code: \+\d{8,12}/i, 211 label: 'Invalid Short Code Number', 212 }, 213 { 214 pattern: /'To' number cannot be a Short Code: \+61(490|472)XXXX/i, 215 label: 'Invalid Short Code Number', 216 }, 217 { 218 pattern: /Invalid E\.164: starts with \+0 \(\+0\d+\)/i, 219 label: 'Invalid Phone Number Zero Prefix', 220 }, 221 { 222 pattern: /Short code detected — not a mobile number \(\+\d{8}\)/i, 223 label: 'Short Code Phone Number', 224 }, 225 { pattern: /Outreach #\d+ is for \w+, not \w+/i, label: 'Channel mismatch' }, 226 { pattern: /Contact email is a placeholder/i, label: 'Placeholder email' }, 227 { pattern: /Toll-free number — cannot receive SMS/i, label: 'Toll-free number' }, 228 { pattern: /cannot be a Short Code/i, label: 'Short code number' }, 229 { 230 pattern: /(placeholder|test).*(email|address).*(permanently|exclude)/i, 231 label: 'Placeholder/Test Email Permanently Excluded', 232 }, 233 { pattern: /email.*on.*exclusion list/i, label: 'Email Exclusion List' }, 234 { pattern: /Too short to be a real phone number \(\+\d+\)/i, label: 'Invalid Phone Number - Too Short' }, 235 { pattern: /\+1\d{3}555\d{4}/i, label: 'US 555 Placeholder Number' }, 236 { pattern: /^Authentication Error\s*-\s*invalid username$/i, label: 'Auth Error - Invalid Username' }, 237 { pattern: /'From' phone number provided.*is not a valid message-capable Twilio phone number/i, label: 'Twilio Invalid Sender Number' }, 238 { pattern: /^US 555 placeholder number/i, label: 'Test Number Detection - 555' }, 239 ]; 240 241 const OUTREACH_RETRIABLE_PATTERNS = [ 242 { 243 pattern: /under.?construction|coming soon|site maintenance|parked domain/i, 244 label: 'Under construction', 245 }, 246 { pattern: /business hours/i, label: 'Business hours block' }, 247 { pattern: /status.?code 429|rate limit/i, label: 'Rate limited' }, 248 { pattern: /Timeout/i, label: 'Form timeout' }, 249 { pattern: /net::ERR_NAME_NOT_RESOLVED/i, label: 'DNS failure' }, 250 { pattern: /per.recipient cooldown/i, label: 'Per-recipient cooldown' }, 251 { pattern: /Breaker is open/i, label: 'Circuit breaker open' }, 252 { pattern: /landline/i, label: 'Landline number' }, 253 { 254 pattern: /browser.*crash|Target page.*closed|browser has been closed/i, 255 label: 'Browser crash', 256 }, 257 { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, label: 'Network error' }, 258 { 259 pattern: 260 /Invalid Resend API response: .*"statusCode":429,"message":"You have reached your daily email sending quota"/i, 261 label: 'Daily Email Quota Exceeded', 262 }, 263 { 264 pattern: /Form page failed to load.*net::ERR_NAME_NOT_RESOLVED/i, 265 label: 'Form page DNS failure', 266 }, 267 { 268 pattern: /Form page failed to load/i, 269 label: 'Form page load failure', 270 }, 271 { 272 pattern: /Invalid Resend API response:.+"statusCode":429,.+"daily_quota_exceeded"/i, 273 label: 'Email Sending Quota Exceeded', 274 }, 275 { 276 pattern: /Invalid Resend API response:.*"statusCode"\s*:\s*429/i, 277 label: 'Resend Rate Limited', 278 }, 279 { pattern: /Timed out after \d+ms/i, label: 'Request Timeout' }, 280 { pattern: /zb_unknown:/i, label: 'ZeroBounce unknown (re-check pending)' }, 281 { 282 pattern: /No local phone number for country \w+ — parked/i, 283 label: 'No local phone number (parked)', 284 }, 285 { 286 pattern: /server error|internal server error|status.?code[: ]+5\d{2}|HTTP 5\d{2}|form.*submission.*failed|submission.*error/i, 287 label: 'Server/submission error (transient)', 288 }, 289 ]; 290 291 /** 292 * Suggested retry_at offsets (in seconds) for known retriable error patterns. 293 * Used by outreach send functions to set retry_at when transitioning to retry_later. 294 * 295 * Rationale: 296 * Rate limited (429) → 1 hour (most APIs reset hourly) 297 * Form timeout → 30 min (transient load spike) 298 * DNS failure → 24 hours (DNS outage / propagation) 299 * Business hours block → 8 hours (conservative; covers most time zones) 300 * Per-recipient cooldown → 72 hours (full cooldown window) 301 * Browser crash → 15 min (process restart window) 302 * Network error → 1 hour (general network recovery) 303 * Unknown retriable → 1 hour (safe default) 304 */ 305 export const OUTREACH_RETRY_INTERVALS = [ 306 { 307 pattern: /under.?construction|coming soon|site maintenance|parked domain/i, 308 seconds: 604800, 309 label: 'Under construction', 310 }, // 7 days 311 { pattern: /status.?code 429|rate limit/i, seconds: 3600, label: 'Rate limited' }, 312 { pattern: /Timeout/i, seconds: 1800, label: 'Form timeout' }, 313 { pattern: /net::ERR_NAME_NOT_RESOLVED/i, seconds: 86400, label: 'DNS failure' }, 314 { pattern: /business hours/i, seconds: 28800, label: 'Business hours' }, 315 { pattern: /per.recipient cooldown/i, seconds: 259200, label: 'Per-recipient cooldown' }, 316 { 317 pattern: /browser.*crash|Target page.*closed|browser has been closed/i, 318 seconds: 900, 319 label: 'Browser crash', 320 }, 321 { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, seconds: 3600, label: 'Network error' }, 322 ]; 323 324 /** 325 * Compute retry_at datetime string for a retriable error message. 326 * Returns an ISO-8601 string (UTC) for use in SQLite DATETIME fields. 327 * Falls back to 1 hour if no pattern matches. 328 * 329 * @param {string} errorMessage 330 * @returns {string} ISO datetime string 331 */ 332 export function computeRetryAt(errorMessage) { 333 const msg = errorMessage || ''; 334 for (const { pattern, seconds } of OUTREACH_RETRY_INTERVALS) { 335 if (pattern.test(msg)) { 336 return new Date(Date.now() + seconds * 1000).toISOString(); 337 } 338 } 339 // Default: 1 hour 340 return new Date(Date.now() + 3600 * 1000).toISOString(); 341 } 342 343 /** 344 * Determine whether an outreach error is retriable (should become retry_later) 345 * or terminal (should become failed). 346 * 347 * @param {string|null} errorMessage 348 * @returns {boolean} true if error is retriable 349 */ 350 export function isOutreachRetriable(errorMessage) { 351 const { group } = categorizeError(errorMessage, 'outreach'); 352 return group === 'retriable'; 353 } 354 355 // markOutreachResult() was removed — it used SQLite .prepare().run() and had no active 356 // callers. Each outreach module (email.js, sms.js, form.js, linkedin.js, x.js) has an 357 // inline async markOutreachResultAsync() that uses the PG run() helper from db.js. 358 359 // ────────────────────────────────────────────────────────────────────────────── 360 // Core categorization function 361 // ────────────────────────────────────────────────────────────────────────────── 362 363 /** 364 * Categorize an error message into a group and label. 365 * @param {string|null} errorMessage 366 * @param {'site'|'outreach'} context 367 * @returns {{ group: 'terminal'|'retriable'|'unknown', label: string }} 368 */ 369 export function categorizeError(errorMessage, context = 'site') { 370 if (!errorMessage || errorMessage.trim() === '') { 371 return { group: 'retriable', label: 'Unknown (no error stored)' }; 372 } 373 374 const terminalPatterns = 375 context === 'outreach' ? OUTREACH_TERMINAL_PATTERNS : SITE_TERMINAL_PATTERNS; 376 const retriablePatterns = 377 context === 'outreach' ? OUTREACH_RETRIABLE_PATTERNS : SITE_RETRIABLE_PATTERNS; 378 379 for (const { pattern, label } of terminalPatterns) { 380 if (pattern.test(errorMessage)) { 381 return { group: 'terminal', label }; 382 } 383 } 384 385 for (const { pattern, label } of retriablePatterns) { 386 if (pattern.test(errorMessage)) { 387 return { group: 'retriable', label }; 388 } 389 } 390 391 return { group: 'unknown', label: 'Unknown' }; 392 } 393 394 // ────────────────────────────────────────────────────────────────────────────── 395 // Tree builders 396 // ────────────────────────────────────────────────────────────────────────────── 397 398 /** 399 * Pipeline stage order for display 400 */ 401 const SITE_STATUS_ORDER = [ 402 'found', 403 'ignored', 404 'assets_captured', 405 'prog_scored', 406 'semantic_scored', 407 'vision_scored', 408 'enriched', 409 'enriched_regex', 410 'enriched_llm', 411 'proposals_drafted', 412 'outreach_partial', 413 'outreach_sent', 414 'high_score', 415 'failing', 416 ]; 417 418 const OUTREACH_STATUS_ORDER = [ 419 'pending', 420 'approved', 421 'scheduled', 422 'sent', 423 'delivered', 424 'opened', 425 'clicked', 426 'replied', 427 'rework', 428 'retry_later', 429 'failed', 430 'bounced', 431 'gdpr_blocked', 432 'rejected', 433 ]; 434 435 /** 436 * Pipeline stages in order (index = pipeline depth). 437 * For a given status, all statuses at higher indexes are "downstream" — 438 * sites that passed through this status but have since advanced further. 439 */ 440 const PIPELINE_STAGE_DEPTH = [ 441 'found', 442 'ignored', 443 'assets_captured', 444 'prog_scored', 445 'semantic_scored', 446 'vision_scored', 447 'enriched', 448 'enriched_regex', 449 'enriched_llm', 450 'proposals_drafted', 451 'outreach_partial', 452 'outreach_sent', 453 ]; 454 455 /** 456 * Build status tree for sites with error breakdowns and throughput. 457 * @returns {Promise<Array>} status tree 458 */ 459 export async function buildStatusTree() { 460 // Split into separate queries to avoid full table scan with conditional COUNTs. 461 462 // 1. Totals by status (uses idx_sites_status) 463 const totalRows = await getAll('SELECT status, COUNT(*) AS total FROM sites GROUP BY status'); 464 465 // 2. Deltas: sites that entered this status recently AND are still here. 466 // Uses site_status transition log joined to sites current status. 467 // Avoids the inflation from sites.updated_at which gets bumped by updateLastOutreachAt() 468 // and other non-status-change updates (would show 2.3x actual throughput for outreach_sent). 469 // Rewritten to avoid COUNT(DISTINCT) full JOIN on 1.3M×1.2M rows. 470 // Subquery filters site_status by time (uses idx_site_status_created_at) first, 471 // then joins the small result set to sites for current-status confirmation. 472 const delta24hRows = await getAll( 473 `SELECT recent.status, COUNT(*) AS delta 474 FROM ( 475 SELECT DISTINCT site_id, status FROM site_status 476 WHERE created_at > NOW() - INTERVAL '24 hours' 477 ) recent 478 JOIN sites s ON recent.site_id = s.id AND s.status = recent.status 479 GROUP BY recent.status` 480 ); 481 const delta1hRows = await getAll( 482 `SELECT recent.status, COUNT(*) AS delta 483 FROM ( 484 SELECT DISTINCT site_id, status FROM site_status 485 WHERE created_at > NOW() - INTERVAL '1 hour' 486 ) recent 487 JOIN sites s ON recent.site_id = s.id AND s.status = recent.status 488 GROUP BY recent.status` 489 ); 490 491 // Merge into topLevel shape 492 const d24map = Object.fromEntries(delta24hRows.map(r => [r.status, Number(r.delta)])); 493 const d1map = Object.fromEntries(delta1hRows.map(r => [r.status, Number(r.delta)])); 494 const topLevel = totalRows.map(r => ({ 495 status: r.status, 496 total: Number(r.total), 497 delta_24h: d24map[r.status] || 0, 498 delta_1h: d1map[r.status] || 0, 499 })); 500 501 // Build a lookup of total per status for cumulative calculation 502 const totalByStatus = {}; 503 for (const row of topLevel) { 504 totalByStatus[row.status] = row.total; 505 } 506 507 // Throughput from site_status transition log (split queries) 508 const thruTotal = await getAll('SELECT status, COUNT(*) AS total FROM site_status GROUP BY status'); 509 const thru24h = await getAll( 510 `SELECT status, COUNT(*) AS cnt 511 FROM site_status WHERE created_at > NOW() - INTERVAL '24 hours' 512 GROUP BY status` 513 ); 514 const thru1h = await getAll( 515 `SELECT status, COUNT(*) AS cnt 516 FROM site_status WHERE created_at > NOW() - INTERVAL '1 hour' 517 GROUP BY status` 518 ); 519 520 const thru24map = Object.fromEntries(thru24h.map(r => [r.status, Number(r.cnt)])); 521 const thru1map = Object.fromEntries(thru1h.map(r => [r.status, Number(r.cnt)])); 522 523 const throughputByStatus = {}; 524 for (const row of thruTotal) { 525 throughputByStatus[row.status] = { 526 thru_24h: thru24map[row.status] || 0, 527 thru_1h: thru1map[row.status] || 0, 528 }; 529 } 530 531 // Error breakdown for failing only (ignore has 800K+ rows dominated by known categories 532 // like "Social media platform" / cross-border duplicates — not actionable in status view). 533 // Fetch raw rows and group in JS to avoid slow PG substr() GROUP BY on large table. 534 const ERR_PREFIX = 200; 535 const failingRaw = await getAll(`SELECT error_message FROM sites WHERE status = 'failing'`); 536 const failingMap = {}; 537 for (const r of failingRaw) { 538 const key = (r.error_message || '').substring(0, ERR_PREFIX); 539 failingMap[key] = (failingMap[key] || 0) + 1; 540 } 541 const errorRows = Object.entries(failingMap).map(([error_message, total]) => ({ 542 status: 'failing', 543 error_message, 544 total, 545 })); 546 547 // Group error rows by status 548 const errorsByStatus = {}; 549 for (const row of errorRows) { 550 if (!errorsByStatus[row.status]) errorsByStatus[row.status] = []; 551 errorsByStatus[row.status].push(row); 552 } 553 554 // Build result in pipeline order 555 const statusMap = {}; 556 for (const row of topLevel) { 557 statusMap[row.status] = row; 558 } 559 560 const tree = []; 561 const seen = new Set(); 562 563 for (const status of SITE_STATUS_ORDER) { 564 if (statusMap[status]) { 565 seen.add(status); 566 const row = statusMap[status]; 567 568 // Cumulative = this status + all downstream pipeline stages 569 const myDepth = PIPELINE_STAGE_DEPTH.indexOf(status); 570 let cumulative = null; 571 if (myDepth !== -1) { 572 cumulative = row.total; 573 for (let d = myDepth + 1; d < PIPELINE_STAGE_DEPTH.length; d++) { 574 cumulative += totalByStatus[PIPELINE_STAGE_DEPTH[d]] || 0; 575 } 576 // Only show cumulative when it differs from total (i.e. there are downstream sites) 577 if (cumulative === row.total) cumulative = null; 578 } 579 580 const thru = throughputByStatus[status] || { thru_24h: 0, thru_1h: 0 }; 581 const entry = { 582 status, 583 total: row.total, 584 cumulative, 585 delta_24h: row.delta_24h, 586 delta_1h: row.delta_1h, 587 thru_24h: thru.thru_24h, 588 thru_1h: thru.thru_1h, 589 children: null, 590 }; 591 592 if (errorsByStatus[status]) { 593 entry.children = buildErrorChildren(errorsByStatus[status], 'site'); 594 } 595 596 tree.push(entry); 597 } 598 } 599 600 // Any statuses not in our order list go at the end 601 for (const row of topLevel) { 602 if (!seen.has(row.status)) { 603 const thru = throughputByStatus[row.status] || { thru_24h: 0, thru_1h: 0 }; 604 tree.push({ 605 status: row.status, 606 total: row.total, 607 cumulative: null, 608 delta_24h: row.delta_24h, 609 delta_1h: row.delta_1h, 610 thru_24h: thru.thru_24h, 611 thru_1h: thru.thru_1h, 612 children: null, 613 }); 614 } 615 } 616 617 return tree; 618 } 619 620 /** 621 * Build status tree for outbound messages with error breakdowns. 622 * Combines approval_status and delivery_status into a synthetic display status: 623 * - If delivery_status is set, use it 624 * - Otherwise, use approval_status 625 * @returns {Promise<Array>} outreach status tree 626 */ 627 export async function buildOutreachTree() { 628 const STATUS_EXPR = `COALESCE(delivery_status, approval_status)`; 629 const BASE_WHERE = `direction = 'outbound'`; 630 631 // Split queries to avoid full table scans with conditional COUNTs. 632 633 // 1. Totals 634 const totalRows = await getAll( 635 `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS total 636 FROM messages WHERE ${BASE_WHERE} 637 GROUP BY status` 638 ); 639 // Use updated_at for all delta queries — tracks the most recent status change. 640 const d24Rows = await getAll( 641 `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS delta 642 FROM messages 643 WHERE ${BASE_WHERE} AND updated_at > NOW() - INTERVAL '24 hours' 644 GROUP BY status` 645 ); 646 const d1Rows = await getAll( 647 `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS delta 648 FROM messages 649 WHERE ${BASE_WHERE} AND updated_at > NOW() - INTERVAL '1 hour' 650 GROUP BY status` 651 ); 652 const d24map = Object.fromEntries(d24Rows.map(r => [r.status, Number(r.delta)])); 653 const d1map = Object.fromEntries(d1Rows.map(r => [r.status, Number(r.delta)])); 654 const topLevel = totalRows.map(r => ({ 655 status: r.status, 656 total: Number(r.total), 657 delta_24h: d24map[r.status] || 0, 658 delta_1h: d1map[r.status] || 0, 659 })); 660 661 // 2. Channel breakdown for pending, rework, approved + sent (split queries) 662 const chanStatuses = `('pending', 'rework', 'approved', 'sent', 'delivered', 'opened', 'clicked', 'replied')`; 663 const chanTotals = await getAll( 664 `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS total 665 FROM messages WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses} 666 GROUP BY status, contact_method` 667 ); 668 const chanD24 = await getAll( 669 `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS delta 670 FROM messages 671 WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses} 672 AND updated_at > NOW() - INTERVAL '24 hours' 673 GROUP BY status, contact_method` 674 ); 675 const chanD1 = await getAll( 676 `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS delta 677 FROM messages 678 WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses} 679 AND updated_at > NOW() - INTERVAL '1 hour' 680 GROUP BY status, contact_method` 681 ); 682 const chanD24map = Object.fromEntries( 683 chanD24.map(r => [`${r.status}|${r.contact_method}`, Number(r.delta)]) 684 ); 685 const chanD1map = Object.fromEntries( 686 chanD1.map(r => [`${r.status}|${r.contact_method}`, Number(r.delta)]) 687 ); 688 const channelRows = chanTotals.map(r => ({ 689 status: r.status, 690 contact_method: r.contact_method, 691 total: Number(r.total), 692 delta_24h: chanD24map[`${r.status}|${r.contact_method}`] || 0, 693 delta_1h: chanD1map[`${r.status}|${r.contact_method}`] || 0, 694 })); 695 696 // 3. Error breakdown for failed + retry_later (split queries) 697 const errTotals = await getAll( 698 `SELECT ${STATUS_EXPR} AS status, error_message, COUNT(*) AS total 699 FROM messages 700 WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ('failed', 'retry_later') 701 GROUP BY status, error_message` 702 ); 703 // delta_24h/delta_1h per error label are not displayed — skip extra queries 704 const errorRows = errTotals.map(r => ({ 705 status: r.status, 706 error_message: r.error_message, 707 total: Number(r.total), 708 })); 709 710 // 4. retry_later: count due now vs still waiting 711 const retryDueRow = await getOne( 712 `SELECT 713 COUNT(*) AS due_now, 714 MIN(retry_at) AS next_retry_at 715 FROM messages 716 WHERE ${BASE_WHERE} AND delivery_status = 'retry_later'` 717 ) || { due_now: 0, next_retry_at: null }; 718 const retryStats = { 719 due_now: Number(retryDueRow.due_now), 720 next_retry_at: retryDueRow.next_retry_at, 721 }; 722 723 // Group channel rows by status 724 const channelsByStatus = {}; 725 for (const row of channelRows) { 726 if (!channelsByStatus[row.status]) channelsByStatus[row.status] = []; 727 channelsByStatus[row.status].push(row); 728 } 729 730 const statusMap = {}; 731 for (const row of topLevel) { 732 statusMap[row.status] = row; 733 } 734 735 const tree = []; 736 const seen = new Set(); 737 738 for (const status of OUTREACH_STATUS_ORDER) { 739 if (statusMap[status]) { 740 seen.add(status); 741 const row = statusMap[status]; 742 const entry = { 743 status, 744 total: row.total, 745 delta_24h: row.delta_24h, 746 delta_1h: row.delta_1h, 747 children: null, 748 }; 749 750 if (channelsByStatus[status]) { 751 // Channel breakdown (sorted by total desc) 752 entry.children = { 753 type: 'channels', 754 rows: channelsByStatus[status] 755 .sort((a, b) => b.total - a.total) 756 .map(r => ({ 757 label: r.contact_method, 758 total: r.total, 759 delta_24h: r.delta_24h, 760 delta_1h: r.delta_1h, 761 })), 762 }; 763 } else if (status === 'failed') { 764 entry.children = buildErrorChildren( 765 errorRows.filter(r => r.status === 'failed'), 766 'outreach' 767 ); 768 } else if (status === 'retry_later') { 769 entry.retryStats = retryStats; 770 entry.children = buildErrorChildren( 771 errorRows.filter(r => r.status === 'retry_later'), 772 'outreach' 773 ); 774 } 775 776 tree.push(entry); 777 } 778 } 779 780 // Any statuses not in our order list 781 for (const row of topLevel) { 782 if (!seen.has(row.status)) { 783 tree.push({ 784 status: row.status, 785 total: row.total, 786 delta_24h: row.delta_24h, 787 delta_1h: row.delta_1h, 788 children: null, 789 }); 790 } 791 } 792 793 return tree; 794 } 795 796 // ────────────────────────────────────────────────────────────────────────────── 797 // Helpers 798 // ────────────────────────────────────────────────────────────────────────────── 799 800 /** 801 * Collapse and categorize error rows into retriable/terminal/unknown groups. 802 * Cross-border duplicates are folded into a single entry. 803 * @param {Array} rows - DB rows with error_message, total, delta_24h, delta_1h 804 * @param {'site'|'outreach'} context 805 * @returns {{ type: 'errors', retriable: Array, terminal: Array, unknown: Array }} 806 */ 807 function buildErrorChildren(rows, context) { 808 const retriable = {}; 809 const terminal = {}; 810 const unknown = {}; 811 812 for (const row of rows) { 813 const { group, label } = categorizeError(row.error_message, context); 814 const bucket = group === 'terminal' ? terminal : group === 'retriable' ? retriable : unknown; 815 816 if (!bucket[label]) { 817 bucket[label] = { label, total: 0, delta_24h: 0, delta_1h: 0 }; 818 } 819 bucket[label].total += row.total; 820 bucket[label].delta_24h += row.delta_24h; 821 bucket[label].delta_1h += row.delta_1h; 822 } 823 824 const toSortedArray = obj => Object.values(obj).sort((a, b) => b.total - a.total); 825 826 return { 827 type: 'errors', 828 retriable: toSortedArray(retriable), 829 terminal: toSortedArray(terminal), 830 unknown: toSortedArray(unknown), 831 }; 832 } 833 834 // ────────────────────────────────────────────────────────────────────────────── 835 // Conversations tree 836 // ────────────────────────────────────────────────────────────────────────────── 837 838 const SENTIMENT_ORDER = ['positive', 'neutral', 'negative', 'objection']; 839 840 /** 841 * Build inbound messages summary grouped by sentiment, with intent breakdown. 842 * @returns {Promise<Array>} sentiment rows with intent children 843 */ 844 export async function buildConversationsTree() { 845 const topLevel = await getAll( 846 `SELECT 847 COALESCE(sentiment, 'unknown') AS sentiment, 848 COUNT(*) AS total, 849 COUNT(CASE WHEN created_at > NOW() - INTERVAL '24 hours' THEN 1 END) AS delta_24h, 850 COUNT(CASE WHEN created_at > NOW() - INTERVAL '1 hour' THEN 1 END) AS delta_1h 851 FROM messages 852 WHERE direction = 'inbound' 853 GROUP BY sentiment` 854 ); 855 856 const intentRows = await getAll( 857 `SELECT 858 COALESCE(sentiment, 'unknown') AS sentiment, 859 COALESCE(intent, 'unknown') AS intent, 860 COUNT(*) AS total, 861 COUNT(CASE WHEN created_at > NOW() - INTERVAL '24 hours' THEN 1 END) AS delta_24h, 862 COUNT(CASE WHEN created_at > NOW() - INTERVAL '1 hour' THEN 1 END) AS delta_1h 863 FROM messages 864 WHERE direction = 'inbound' 865 GROUP BY sentiment, intent` 866 ); 867 868 // PG COUNT() returns bigint strings — coerce to Number throughout 869 const intentsBySentiment = {}; 870 for (const row of intentRows) { 871 if (!intentsBySentiment[row.sentiment]) intentsBySentiment[row.sentiment] = []; 872 intentsBySentiment[row.sentiment].push({ 873 ...row, 874 total: Number(row.total), 875 delta_24h: Number(row.delta_24h), 876 delta_1h: Number(row.delta_1h), 877 }); 878 } 879 880 const statusMap = {}; 881 for (const row of topLevel) { 882 statusMap[row.sentiment] = { 883 ...row, 884 total: Number(row.total), 885 delta_24h: Number(row.delta_24h), 886 delta_1h: Number(row.delta_1h), 887 }; 888 } 889 890 const tree = []; 891 const seen = new Set(); 892 893 for (const sentiment of SENTIMENT_ORDER) { 894 if (statusMap[sentiment]) { 895 seen.add(sentiment); 896 const row = statusMap[sentiment]; 897 const intents = (intentsBySentiment[sentiment] || []) 898 .sort((a, b) => b.total - a.total) 899 .map(r => ({ 900 label: r.intent, 901 total: r.total, 902 delta_24h: r.delta_24h, 903 delta_1h: r.delta_1h, 904 })); 905 tree.push({ 906 sentiment, 907 total: row.total, 908 delta_24h: row.delta_24h, 909 delta_1h: row.delta_1h, 910 intents, 911 }); 912 } 913 } 914 915 // Any sentiments not in our order list 916 for (const row of topLevel) { 917 if (!seen.has(row.sentiment)) { 918 tree.push({ 919 sentiment: row.sentiment, 920 total: row.total, 921 delta_24h: row.delta_24h, 922 delta_1h: row.delta_1h, 923 intents: (intentsBySentiment[row.sentiment] || []) 924 .sort((a, b) => b.total - a.total) 925 .map(r => ({ 926 label: r.intent, 927 total: r.total, 928 delta_24h: r.delta_24h, 929 delta_1h: r.delta_1h, 930 })), 931 }); 932 } 933 } 934 935 return tree; 936 }