error-categories.js
1 /** 2 * Error Categorization Module 3 * Regex-based categorization of site and outreach error messages. 4 * Groups errors into terminal (won't succeed on retry) and retriable. 5 */ 6 7 import { getAll, getOne } from './db.js'; 8 9 // ────────────────────────────────────────────────────────────────────────────── 10 // Pattern tables 11 // ────────────────────────────────────────────────────────────────────────────── 12 13 const SITE_TERMINAL_PATTERNS = [ 14 { pattern: /Social media platform/i, label: 'Social media' }, 15 { pattern: /Business directory/i, label: 'Business directory' }, 16 { pattern: /Not a local business/i, label: 'Not a local business' }, 17 { pattern: /Home service franchise/i, label: 'Franchise' }, 18 { pattern: /Government/i, label: 'Government domain' }, 19 { pattern: /Education domain/i, label: 'Education domain' }, 20 { pattern: /Non-commercial/i, label: 'Non-commercial' }, 21 { pattern: /Cross-border duplicate/i, label: 'Cross-border duplicate' }, 22 { pattern: /Duplicate domain/i, label: 'Duplicate domain' }, 23 { pattern: /Country mismatch/i, label: 'Country mismatch' }, 24 { pattern: /No email template/i, label: 'No template' }, 25 { pattern: /HTTP 404.*[Cc]annot capture/i, label: 'HTTP 404 (page gone)' }, 26 { pattern: /Max recapture attempts/i, label: 'Max recapture retries' }, 27 { pattern: /Ignored: Regulated industry \(.*\)/i, label: 'Regulated Industry Exclusion' }, 28 { 29 pattern: /Max retries \(\d+\) exceeded: No contacts or key_pages found/i, 30 label: 'No Contacts Found', 31 }, 32 { pattern: /Ignored: Regulated industry \(\w+\)/i, label: 'Regulated Industry Exclusion' }, 33 { 34 pattern: /Ignored: (Regulated industry|Law firm|Legal site detected).*/i, 35 label: 'Restricted Industry', 36 }, 37 { 38 pattern: 39 /(Page|The page) (not found|does not exist|you're looking for doesn't exist|appears to be an error page|is a security verification screen|indicates an error).*/i, 40 label: 'Page Not Found', 41 }, 42 { 43 pattern: 44 /Max retries \(\d+\) exceeded: (url\.toLowerCase is not a function|HTTP \d+ - Cannot capture assets for error response)/i, 45 label: 'Capture Processing Error', 46 }, 47 { pattern: /The page with the name '.*' could not be found/i, label: 'Page Not Found' }, 48 { 49 pattern: 50 /Max retries \(\d+\) exceeded: LLM daily budget exceeded: \$\d+\.\d+ spent today \(budget: \$\d+\)/i, 51 label: 'LLM Daily Budget Exceeded', 52 }, 53 { 54 pattern: /The page displays a 'no posts matched your criteria' message/i, 55 label: 'No Posts Found', 56 }, 57 { 58 pattern: 59 /Access Denied|Access to the page (is forbidden|was denied)|access denied.*HTTP Error 403/i, 60 label: 'Access Denied', 61 }, 62 { pattern: /False-positive error page/i, label: 'Error page (false positive)' }, 63 { pattern: /CHECK constraint failed/i, label: 'DB constraint error' }, 64 { pattern: /^(Franchise site|Social media:\s+\S+)$/i, label: 'Site Filter - Franchise/Social Media' }, 65 { pattern: /Max retries.*could not determine data type of parameter/i, label: 'Database Parameter Type Mismatch' }, 66 { pattern: /^Access forbidden$/i, label: 'HTTP 403 Forbidden' }, 67 { pattern: /^Permanent error: 410$/i, label: 'HTTP 410 Gone' }, 68 ]; 69 70 const SITE_RETRIABLE_PATTERNS = [ 71 { pattern: /HTTP 403.*[Cc]annot capture/i, label: 'HTTP 403 (access denied)' }, 72 { pattern: /userDataDir|launchPersistentContext/i, label: 'Browser launch conflict' }, 73 { pattern: /status code 400/i, label: 'API bad request' }, 74 { pattern: /status code 401/i, label: 'API auth error' }, 75 { pattern: /status code 429/i, label: 'Rate limited' }, 76 { pattern: /Incomplete LLM response/i, label: 'Incomplete LLM' }, 77 { pattern: /Timed out|Timeout/i, label: 'Timeout' }, 78 { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, label: 'Network error' }, 79 { pattern: /navigating and changing/i, label: 'Page navigating' }, 80 { pattern: /browser has been closed/i, label: 'Browser crashed' }, 81 { pattern: /EACCES/i, label: 'Permission denied' }, 82 { pattern: /screenshot_path is NULL/i, label: 'Missing screenshot' }, 83 { pattern: /database is locked/i, label: 'DB lock' }, 84 { pattern: /chromium-nice ENOENT/i, label: 'Chromium not found' }, 85 { pattern: /HTTP (5\d\d|null)/i, label: 'Server error' }, 86 { 87 pattern: 88 /Max retries \(3\) exceeded: (No contacts or key_pages found|Country code is required|Failed to parse JSON response)/i, 89 label: 'Max Retries Extraction Failure', 90 }, 91 { 92 pattern: 93 /Max retries \(\d+\) exceeded: page\.goto: (net::ERR_TOO_MANY_REDIRECTS|net::ERR_TIMED_OUT|net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH).*/i, 94 label: 'Page Navigation Error', 95 }, 96 { 97 pattern: /(Website is currently not reachable|No content available for evaluation|directory)/i, 98 label: 'Site Unreachable', 99 }, 100 { 101 pattern: 102 /Max retries \(\d+\) exceeded: page\.goto: (net::ERR_SSL_UNRECOGNIZED_NAME_ALERT|net::ERR_SSL_PROTOCOL_ERROR) at https?:/i, 103 label: 'SSL Connection Error', 104 }, 105 { 106 pattern: /Max retries \(\d+\) exceeded: page\.goto: net::ERR_NAME_NOT_RESOLVED/i, 107 label: 'DNS / domain not found', 108 }, 109 { 110 pattern: 111 /Max retries \(\d+\) exceeded: page\.goto: net::ERR_CERT_(DATE_INVALID|COMMON_NAME_INVALID|AUTHORITY_INVALID)/i, 112 label: 'Invalid SSL certificate', 113 }, 114 { 115 pattern: 116 /Max retries \(\d+\) exceeded: page\.goto: net::ERR_(CONNECTION_REFUSED|CONNECTION_CLOSED|CONNECTION_RESET|SOCKET_NOT_CONNECTED|ADDRESS_UNREACHABLE|ABORTED)/i, 117 label: 'Connection refused / closed', 118 }, 119 { 120 pattern: 121 /Max retries \(\d+\) exceeded: page\.goto: net::ERR_HTTP(2_PROTOCOL_ERROR|_RESPONSE_CODE_FAILURE)/i, 122 label: 'HTTP protocol error', 123 }, 124 { 125 pattern: /Max retries \(\d+\) exceeded: page\.goto: (Download is starting|net::ERR_ABORTED)/i, 126 label: 'URL is a file download', 127 }, 128 { 129 pattern: /Max retries \(\d+\) exceeded: page\.goto: Protocol error/i, 130 label: 'Browser protocol error', 131 }, 132 { 133 pattern: /Max retries \(\d+\) exceeded: page\.addStyleTag:.*Content Security Policy/i, 134 label: 'CSP blocks screenshot', 135 }, 136 { 137 pattern: /Max retries \(\d+\) exceeded: page\.(addStyleTag|evaluate): TypeError/i, 138 label: 'Page JS error', 139 }, 140 { 141 pattern: 142 /Max retries \(3\) exceeded: page\.goto: (net::ERR_INVALID_AUTH_CREDENTIALS|Page crashed|HTTP \d{3}:)/i, 143 label: 'Page Load Failure', 144 }, 145 { pattern: /Max retries \(3\) exceeded: HTTP (700|405):/i, label: 'HTTP Max Retries Exceeded' }, 146 { 147 pattern: /Max retries \(\d+\) exceeded: page\.goto: net::ERR_CONNECTION_TIMED_OUT/i, 148 label: 'Connection timed out', 149 }, 150 { 151 pattern: /Max retries \(\d+\) exceeded: page\.content: Target crashed/i, 152 label: 'Browser crashed (target)', 153 }, 154 { 155 pattern: /Max retries \(\d+\) exceeded: Request failed with status code 5\d\d/i, 156 label: 'Server error (5xx)', 157 }, 158 ]; 159 160 const OUTREACH_TERMINAL_PATTERNS = [ 161 { pattern: /opted out/i, label: 'Opted out' }, 162 { pattern: /Fake.*phone|invalid.*phone/i, label: 'Invalid phone' }, 163 { pattern: /gdpr_blocked/i, label: 'GDPR blocked' }, 164 { pattern: /Cloudflare.*blocked/i, label: 'Cloudflare blocked' }, 165 { pattern: /ZeroBounce/i, label: 'Invalid email (ZeroBounce)' }, 166 { pattern: /gov_blocked/i, label: 'Gov domain blocked' }, 167 { pattern: /no_message_button/i, label: 'No DM button' }, 168 { pattern: /'To' number: \+\d+, is not a valid mobile number/i, label: 'Invalid Phone Number' }, 169 { 170 pattern: /Permission to send an SMS has not been enabled for the region.*/i, 171 label: 'SMS Region Blocked', 172 }, 173 { 174 pattern: /No Twilio number configured for country \w+ and TWILIO_PHONE_NUMBER not set/i, 175 label: 'Twilio Number Configuration Missing', 176 }, 177 { pattern: /'To' number: \+\d+, is not a valid mobile number/i, label: 'Invalid Phone Number' }, 178 { pattern: /\+\d{10,12}X{3,4} is not a valid mobile number/i, label: 'Invalid Phone Number' }, 179 { 180 pattern: /'To' number: \+\d{11}XXXX, is not a valid mobile number/i, 181 label: 'Invalid Mobile Number', 182 }, 183 { 184 pattern: /'To' number cannot be a Short Code: \+\d{3}XXXX/i, 185 label: 'Invalid Short Code Number', 186 }, 187 { 188 pattern: /'From' \+\d{11} is not a Twilio phone number or Short Code country mismatch/i, 189 label: 'Invalid Twilio Phone Number', 190 }, 191 { 192 pattern: /'To' number: \+61\d{9}XXXX, is not a valid mobile number/i, 193 label: 'Invalid Mobile Number', 194 }, 195 { pattern: /\+?\d{10,12}XXXX is not a valid mobile number/i, label: 'Invalid Phone Number' }, 196 { pattern: /\+\d+\d{4}XXXX, is not a valid mobile number/i, label: 'Invalid Phone Number' }, 197 { 198 pattern: /'To' number cannot be a Short Code: \+61\d{2}XXXX/i, 199 label: 'Invalid Short Code Phone Number', 200 }, 201 { 202 pattern: /'To' number cannot be a Short Code: \+\d{4}XXXX/i, 203 label: 'Invalid Short Code Phone Number', 204 }, 205 { 206 pattern: /'To' number cannot be a Short Code: \+\d{8,12}/i, 207 label: 'Invalid Short Code Number', 208 }, 209 { 210 pattern: /'To' number cannot be a Short Code: \+\d{8,12}/i, 211 label: 'Invalid Short Code Number', 212 }, 213 { 214 pattern: /'To' number cannot be a Short Code: \+61(490|472)XXXX/i, 215 label: 'Invalid Short Code Number', 216 }, 217 { 218 pattern: /Invalid E\.164: starts with \+0 \(\+0\d+\)/i, 219 label: 'Invalid Phone Number Zero Prefix', 220 }, 221 { 222 pattern: /Short code detected — not a mobile number \(\+\d{8}\)/i, 223 label: 'Short Code Phone Number', 224 }, 225 { pattern: /Outreach #\d+ is for \w+, not \w+/i, label: 'Channel mismatch' }, 226 { pattern: /Contact email is a placeholder/i, label: 'Placeholder email' }, 227 { pattern: /Toll-free number — cannot receive SMS/i, label: 'Toll-free number' }, 228 { pattern: /cannot be a Short Code/i, label: 'Short code number' }, 229 { 230 pattern: /(placeholder|test).*(email|address).*(permanently|exclude)/i, 231 label: 'Placeholder/Test Email Permanently Excluded', 232 }, 233 { pattern: /email.*on.*exclusion list/i, label: 'Email Exclusion List' }, 234 { pattern: /Too short to be a real phone number \(\+\d+\)/i, label: 'Invalid Phone Number - Too Short' }, 235 { pattern: /\+1\d{3}555\d{4}/i, label: 'US 555 Placeholder Number' }, 236 { pattern: /^Authentication Error\s*-\s*invalid username$/i, label: 'Auth Error - Invalid Username' }, 237 { pattern: /'From' phone number provided.*is not a valid message-capable Twilio phone number/i, label: 'Twilio Invalid Sender Number' }, 238 { pattern: /^US 555 placeholder number/i, label: 'Test Number Detection - 555' }, 239 ]; 240 241 const OUTREACH_RETRIABLE_PATTERNS = [ 242 { 243 pattern: /under.?construction|coming soon|site maintenance|parked domain/i, 244 label: 'Under construction', 245 }, 246 { pattern: /business hours/i, label: 'Business hours block' }, 247 { pattern: /status.?code 429|rate limit/i, label: 'Rate limited' }, 248 { pattern: /Timeout/i, label: 'Form timeout' }, 249 { pattern: /net::ERR_NAME_NOT_RESOLVED/i, label: 'DNS failure' }, 250 { pattern: /per.recipient cooldown/i, label: 'Per-recipient cooldown' }, 251 { pattern: /Breaker is open/i, label: 'Circuit breaker open' }, 252 { pattern: /landline/i, label: 'Landline number' }, 253 { 254 pattern: /browser.*crash|Target page.*closed|browser has been closed/i, 255 label: 'Browser crash', 256 }, 257 { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, label: 'Network error' }, 258 { 259 pattern: 260 /Invalid Resend API response: .*"statusCode":429,"message":"You have reached your daily email sending quota"/i, 261 label: 'Daily Email Quota Exceeded', 262 }, 263 { 264 pattern: /Form page failed to load.*net::ERR_NAME_NOT_RESOLVED/i, 265 label: 'Form page DNS failure', 266 }, 267 { 268 pattern: /Form page failed to load/i, 269 label: 'Form page load failure', 270 }, 271 { 272 pattern: /Invalid Resend API response:.+"statusCode":429,.+"daily_quota_exceeded"/i, 273 label: 'Email Sending Quota Exceeded', 274 }, 275 { 276 pattern: /Invalid Resend API response:.*"statusCode"\s*:\s*429/i, 277 label: 'Resend Rate Limited', 278 }, 279 { pattern: /Timed out after \d+ms/i, label: 'Request Timeout' }, 280 { pattern: /zb_unknown:/i, label: 'ZeroBounce unknown (re-check pending)' }, 281 { 282 pattern: /No local phone number for country \w+ — parked/i, 283 label: 'No local phone number (parked)', 284 }, 285 ]; 286 287 /** 288 * Suggested retry_at offsets (in seconds) for known retriable error patterns. 289 * Used by outreach send functions to set retry_at when transitioning to retry_later. 290 * 291 * Rationale: 292 * Rate limited (429) → 1 hour (most APIs reset hourly) 293 * Form timeout → 30 min (transient load spike) 294 * DNS failure → 24 hours (DNS outage / propagation) 295 * Business hours block → 8 hours (conservative; covers most time zones) 296 * Per-recipient cooldown → 72 hours (full cooldown window) 297 * Browser crash → 15 min (process restart window) 298 * Network error → 1 hour (general network recovery) 299 * Unknown retriable → 1 hour (safe default) 300 */ 301 export const OUTREACH_RETRY_INTERVALS = [ 302 { 303 pattern: /under.?construction|coming soon|site maintenance|parked domain/i, 304 seconds: 604800, 305 label: 'Under construction', 306 }, // 7 days 307 { pattern: /status.?code 429|rate limit/i, seconds: 3600, label: 'Rate limited' }, 308 { pattern: /Timeout/i, seconds: 1800, label: 'Form timeout' }, 309 { pattern: /net::ERR_NAME_NOT_RESOLVED/i, seconds: 86400, label: 'DNS failure' }, 310 { pattern: /business hours/i, seconds: 28800, label: 'Business hours' }, 311 { pattern: /per.recipient cooldown/i, seconds: 259200, label: 'Per-recipient cooldown' }, 312 { 313 pattern: /browser.*crash|Target page.*closed|browser has been closed/i, 314 seconds: 900, 315 label: 'Browser crash', 316 }, 317 { pattern: /ECONNRESET|ETIMEDOUT|ENOTFOUND/i, seconds: 3600, label: 'Network error' }, 318 ]; 319 320 /** 321 * Compute retry_at datetime string for a retriable error message. 322 * Returns an ISO-8601 string (UTC) for use in SQLite DATETIME fields. 323 * Falls back to 1 hour if no pattern matches. 324 * 325 * @param {string} errorMessage 326 * @returns {string} ISO datetime string 327 */ 328 export function computeRetryAt(errorMessage) { 329 const msg = errorMessage || ''; 330 for (const { pattern, seconds } of OUTREACH_RETRY_INTERVALS) { 331 if (pattern.test(msg)) { 332 return new Date(Date.now() + seconds * 1000).toISOString(); 333 } 334 } 335 // Default: 1 hour 336 return new Date(Date.now() + 3600 * 1000).toISOString(); 337 } 338 339 /** 340 * Determine whether an outreach error is retriable (should become retry_later) 341 * or terminal (should become failed). 342 * 343 * @param {string|null} errorMessage 344 * @returns {boolean} true if error is retriable 345 */ 346 export function isOutreachRetriable(errorMessage) { 347 const { group } = categorizeError(errorMessage, 'outreach'); 348 return group === 'retriable'; 349 } 350 351 // markOutreachResult() was removed — it used SQLite .prepare().run() and had no active 352 // callers. Each outreach module (email.js, sms.js, form.js, linkedin.js, x.js) has an 353 // inline async markOutreachResultAsync() that uses the PG run() helper from db.js. 354 355 // ────────────────────────────────────────────────────────────────────────────── 356 // Core categorization function 357 // ────────────────────────────────────────────────────────────────────────────── 358 359 /** 360 * Categorize an error message into a group and label. 361 * @param {string|null} errorMessage 362 * @param {'site'|'outreach'} context 363 * @returns {{ group: 'terminal'|'retriable'|'unknown', label: string }} 364 */ 365 export function categorizeError(errorMessage, context = 'site') { 366 if (!errorMessage || errorMessage.trim() === '') { 367 return { group: 'retriable', label: 'Unknown (no error stored)' }; 368 } 369 370 const terminalPatterns = 371 context === 'outreach' ? OUTREACH_TERMINAL_PATTERNS : SITE_TERMINAL_PATTERNS; 372 const retriablePatterns = 373 context === 'outreach' ? OUTREACH_RETRIABLE_PATTERNS : SITE_RETRIABLE_PATTERNS; 374 375 for (const { pattern, label } of terminalPatterns) { 376 if (pattern.test(errorMessage)) { 377 return { group: 'terminal', label }; 378 } 379 } 380 381 for (const { pattern, label } of retriablePatterns) { 382 if (pattern.test(errorMessage)) { 383 return { group: 'retriable', label }; 384 } 385 } 386 387 return { group: 'unknown', label: 'Unknown' }; 388 } 389 390 // ────────────────────────────────────────────────────────────────────────────── 391 // Tree builders 392 // ────────────────────────────────────────────────────────────────────────────── 393 394 /** 395 * Pipeline stage order for display 396 */ 397 const SITE_STATUS_ORDER = [ 398 'found', 399 'ignored', 400 'assets_captured', 401 'prog_scored', 402 'semantic_scored', 403 'vision_scored', 404 'enriched', 405 'enriched_regex', 406 'enriched_llm', 407 'proposals_drafted', 408 'outreach_partial', 409 'outreach_sent', 410 'high_score', 411 'failing', 412 ]; 413 414 const OUTREACH_STATUS_ORDER = [ 415 'pending', 416 'approved', 417 'scheduled', 418 'sent', 419 'delivered', 420 'opened', 421 'clicked', 422 'replied', 423 'rework', 424 'retry_later', 425 'failed', 426 'bounced', 427 'gdpr_blocked', 428 'rejected', 429 ]; 430 431 /** 432 * Pipeline stages in order (index = pipeline depth). 433 * For a given status, all statuses at higher indexes are "downstream" — 434 * sites that passed through this status but have since advanced further. 435 */ 436 const PIPELINE_STAGE_DEPTH = [ 437 'found', 438 'ignored', 439 'assets_captured', 440 'prog_scored', 441 'semantic_scored', 442 'vision_scored', 443 'enriched', 444 'enriched_regex', 445 'enriched_llm', 446 'proposals_drafted', 447 'outreach_partial', 448 'outreach_sent', 449 ]; 450 451 /** 452 * Build status tree for sites with error breakdowns and throughput. 453 * @returns {Promise<Array>} status tree 454 */ 455 export async function buildStatusTree() { 456 // Split into separate queries to avoid full table scan with conditional COUNTs. 457 458 // 1. Totals by status (uses idx_sites_status) 459 const totalRows = await getAll('SELECT status, COUNT(*) AS total FROM sites GROUP BY status'); 460 461 // 2. Deltas: sites that entered this status recently AND are still here. 462 // Uses site_status transition log joined to sites current status. 463 // Avoids the inflation from sites.updated_at which gets bumped by updateLastOutreachAt() 464 // and other non-status-change updates (would show 2.3x actual throughput for outreach_sent). 465 // Rewritten to avoid COUNT(DISTINCT) full JOIN on 1.3M×1.2M rows. 466 // Subquery filters site_status by time (uses idx_site_status_created_at) first, 467 // then joins the small result set to sites for current-status confirmation. 468 const delta24hRows = await getAll( 469 `SELECT recent.status, COUNT(*) AS delta 470 FROM ( 471 SELECT DISTINCT site_id, status FROM site_status 472 WHERE created_at > NOW() - INTERVAL '24 hours' 473 ) recent 474 JOIN sites s ON recent.site_id = s.id AND s.status = recent.status 475 GROUP BY recent.status` 476 ); 477 const delta1hRows = await getAll( 478 `SELECT recent.status, COUNT(*) AS delta 479 FROM ( 480 SELECT DISTINCT site_id, status FROM site_status 481 WHERE created_at > NOW() - INTERVAL '1 hour' 482 ) recent 483 JOIN sites s ON recent.site_id = s.id AND s.status = recent.status 484 GROUP BY recent.status` 485 ); 486 487 // Merge into topLevel shape 488 const d24map = Object.fromEntries(delta24hRows.map(r => [r.status, Number(r.delta)])); 489 const d1map = Object.fromEntries(delta1hRows.map(r => [r.status, Number(r.delta)])); 490 const topLevel = totalRows.map(r => ({ 491 status: r.status, 492 total: Number(r.total), 493 delta_24h: d24map[r.status] || 0, 494 delta_1h: d1map[r.status] || 0, 495 })); 496 497 // Build a lookup of total per status for cumulative calculation 498 const totalByStatus = {}; 499 for (const row of topLevel) { 500 totalByStatus[row.status] = row.total; 501 } 502 503 // Throughput from site_status transition log (split queries) 504 const thruTotal = await getAll('SELECT status, COUNT(*) AS total FROM site_status GROUP BY status'); 505 const thru24h = await getAll( 506 `SELECT status, COUNT(*) AS cnt 507 FROM site_status WHERE created_at > NOW() - INTERVAL '24 hours' 508 GROUP BY status` 509 ); 510 const thru1h = await getAll( 511 `SELECT status, COUNT(*) AS cnt 512 FROM site_status WHERE created_at > NOW() - INTERVAL '1 hour' 513 GROUP BY status` 514 ); 515 516 const thru24map = Object.fromEntries(thru24h.map(r => [r.status, Number(r.cnt)])); 517 const thru1map = Object.fromEntries(thru1h.map(r => [r.status, Number(r.cnt)])); 518 519 const throughputByStatus = {}; 520 for (const row of thruTotal) { 521 throughputByStatus[row.status] = { 522 thru_24h: thru24map[row.status] || 0, 523 thru_1h: thru1map[row.status] || 0, 524 }; 525 } 526 527 // Error breakdown for failing only (ignore has 800K+ rows dominated by known categories 528 // like "Social media platform" / cross-border duplicates — not actionable in status view). 529 // Fetch raw rows and group in JS to avoid slow PG substr() GROUP BY on large table. 530 const ERR_PREFIX = 200; 531 const failingRaw = await getAll(`SELECT error_message FROM sites WHERE status = 'failing'`); 532 const failingMap = {}; 533 for (const r of failingRaw) { 534 const key = (r.error_message || '').substring(0, ERR_PREFIX); 535 failingMap[key] = (failingMap[key] || 0) + 1; 536 } 537 const errorRows = Object.entries(failingMap).map(([error_message, total]) => ({ 538 status: 'failing', 539 error_message, 540 total, 541 })); 542 543 // Group error rows by status 544 const errorsByStatus = {}; 545 for (const row of errorRows) { 546 if (!errorsByStatus[row.status]) errorsByStatus[row.status] = []; 547 errorsByStatus[row.status].push(row); 548 } 549 550 // Build result in pipeline order 551 const statusMap = {}; 552 for (const row of topLevel) { 553 statusMap[row.status] = row; 554 } 555 556 const tree = []; 557 const seen = new Set(); 558 559 for (const status of SITE_STATUS_ORDER) { 560 if (statusMap[status]) { 561 seen.add(status); 562 const row = statusMap[status]; 563 564 // Cumulative = this status + all downstream pipeline stages 565 const myDepth = PIPELINE_STAGE_DEPTH.indexOf(status); 566 let cumulative = null; 567 if (myDepth !== -1) { 568 cumulative = row.total; 569 for (let d = myDepth + 1; d < PIPELINE_STAGE_DEPTH.length; d++) { 570 cumulative += totalByStatus[PIPELINE_STAGE_DEPTH[d]] || 0; 571 } 572 // Only show cumulative when it differs from total (i.e. there are downstream sites) 573 if (cumulative === row.total) cumulative = null; 574 } 575 576 const thru = throughputByStatus[status] || { thru_24h: 0, thru_1h: 0 }; 577 const entry = { 578 status, 579 total: row.total, 580 cumulative, 581 delta_24h: row.delta_24h, 582 delta_1h: row.delta_1h, 583 thru_24h: thru.thru_24h, 584 thru_1h: thru.thru_1h, 585 children: null, 586 }; 587 588 if (errorsByStatus[status]) { 589 entry.children = buildErrorChildren(errorsByStatus[status], 'site'); 590 } 591 592 tree.push(entry); 593 } 594 } 595 596 // Any statuses not in our order list go at the end 597 for (const row of topLevel) { 598 if (!seen.has(row.status)) { 599 const thru = throughputByStatus[row.status] || { thru_24h: 0, thru_1h: 0 }; 600 tree.push({ 601 status: row.status, 602 total: row.total, 603 cumulative: null, 604 delta_24h: row.delta_24h, 605 delta_1h: row.delta_1h, 606 thru_24h: thru.thru_24h, 607 thru_1h: thru.thru_1h, 608 children: null, 609 }); 610 } 611 } 612 613 return tree; 614 } 615 616 /** 617 * Build status tree for outbound messages with error breakdowns. 618 * Combines approval_status and delivery_status into a synthetic display status: 619 * - If delivery_status is set, use it 620 * - Otherwise, use approval_status 621 * @returns {Promise<Array>} outreach status tree 622 */ 623 export async function buildOutreachTree() { 624 const STATUS_EXPR = `COALESCE(delivery_status, approval_status)`; 625 const BASE_WHERE = `direction = 'outbound'`; 626 627 // Split queries to avoid full table scans with conditional COUNTs. 628 629 // 1. Totals 630 const totalRows = await getAll( 631 `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS total 632 FROM messages WHERE ${BASE_WHERE} 633 GROUP BY status` 634 ); 635 // Use updated_at for all delta queries — tracks the most recent status change. 636 const d24Rows = await getAll( 637 `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS delta 638 FROM messages 639 WHERE ${BASE_WHERE} AND updated_at > NOW() - INTERVAL '24 hours' 640 GROUP BY status` 641 ); 642 const d1Rows = await getAll( 643 `SELECT ${STATUS_EXPR} AS status, COUNT(*) AS delta 644 FROM messages 645 WHERE ${BASE_WHERE} AND updated_at > NOW() - INTERVAL '1 hour' 646 GROUP BY status` 647 ); 648 const d24map = Object.fromEntries(d24Rows.map(r => [r.status, Number(r.delta)])); 649 const d1map = Object.fromEntries(d1Rows.map(r => [r.status, Number(r.delta)])); 650 const topLevel = totalRows.map(r => ({ 651 status: r.status, 652 total: Number(r.total), 653 delta_24h: d24map[r.status] || 0, 654 delta_1h: d1map[r.status] || 0, 655 })); 656 657 // 2. Channel breakdown for pending, rework, approved + sent (split queries) 658 const chanStatuses = `('pending', 'rework', 'approved', 'sent', 'delivered', 'opened', 'clicked', 'replied')`; 659 const chanTotals = await getAll( 660 `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS total 661 FROM messages WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses} 662 GROUP BY status, contact_method` 663 ); 664 const chanD24 = await getAll( 665 `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS delta 666 FROM messages 667 WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses} 668 AND updated_at > NOW() - INTERVAL '24 hours' 669 GROUP BY status, contact_method` 670 ); 671 const chanD1 = await getAll( 672 `SELECT ${STATUS_EXPR} AS status, contact_method, COUNT(*) AS delta 673 FROM messages 674 WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ${chanStatuses} 675 AND updated_at > NOW() - INTERVAL '1 hour' 676 GROUP BY status, contact_method` 677 ); 678 const chanD24map = Object.fromEntries( 679 chanD24.map(r => [`${r.status}|${r.contact_method}`, Number(r.delta)]) 680 ); 681 const chanD1map = Object.fromEntries( 682 chanD1.map(r => [`${r.status}|${r.contact_method}`, Number(r.delta)]) 683 ); 684 const channelRows = chanTotals.map(r => ({ 685 status: r.status, 686 contact_method: r.contact_method, 687 total: Number(r.total), 688 delta_24h: chanD24map[`${r.status}|${r.contact_method}`] || 0, 689 delta_1h: chanD1map[`${r.status}|${r.contact_method}`] || 0, 690 })); 691 692 // 3. Error breakdown for failed + retry_later (split queries) 693 const errTotals = await getAll( 694 `SELECT ${STATUS_EXPR} AS status, error_message, COUNT(*) AS total 695 FROM messages 696 WHERE ${BASE_WHERE} AND ${STATUS_EXPR} IN ('failed', 'retry_later') 697 GROUP BY status, error_message` 698 ); 699 // delta_24h/delta_1h per error label are not displayed — skip extra queries 700 const errorRows = errTotals.map(r => ({ 701 status: r.status, 702 error_message: r.error_message, 703 total: Number(r.total), 704 })); 705 706 // 4. retry_later: count due now vs still waiting 707 const retryDueRow = await getOne( 708 `SELECT 709 COUNT(*) AS due_now, 710 MIN(retry_at) AS next_retry_at 711 FROM messages 712 WHERE ${BASE_WHERE} AND delivery_status = 'retry_later'` 713 ) || { due_now: 0, next_retry_at: null }; 714 const retryStats = { 715 due_now: Number(retryDueRow.due_now), 716 next_retry_at: retryDueRow.next_retry_at, 717 }; 718 719 // Group channel rows by status 720 const channelsByStatus = {}; 721 for (const row of channelRows) { 722 if (!channelsByStatus[row.status]) channelsByStatus[row.status] = []; 723 channelsByStatus[row.status].push(row); 724 } 725 726 const statusMap = {}; 727 for (const row of topLevel) { 728 statusMap[row.status] = row; 729 } 730 731 const tree = []; 732 const seen = new Set(); 733 734 for (const status of OUTREACH_STATUS_ORDER) { 735 if (statusMap[status]) { 736 seen.add(status); 737 const row = statusMap[status]; 738 const entry = { 739 status, 740 total: row.total, 741 delta_24h: row.delta_24h, 742 delta_1h: row.delta_1h, 743 children: null, 744 }; 745 746 if (channelsByStatus[status]) { 747 // Channel breakdown (sorted by total desc) 748 entry.children = { 749 type: 'channels', 750 rows: channelsByStatus[status] 751 .sort((a, b) => b.total - a.total) 752 .map(r => ({ 753 label: r.contact_method, 754 total: r.total, 755 delta_24h: r.delta_24h, 756 delta_1h: r.delta_1h, 757 })), 758 }; 759 } else if (status === 'failed') { 760 entry.children = buildErrorChildren( 761 errorRows.filter(r => r.status === 'failed'), 762 'outreach' 763 ); 764 } else if (status === 'retry_later') { 765 entry.retryStats = retryStats; 766 entry.children = buildErrorChildren( 767 errorRows.filter(r => r.status === 'retry_later'), 768 'outreach' 769 ); 770 } 771 772 tree.push(entry); 773 } 774 } 775 776 // Any statuses not in our order list 777 for (const row of topLevel) { 778 if (!seen.has(row.status)) { 779 tree.push({ 780 status: row.status, 781 total: row.total, 782 delta_24h: row.delta_24h, 783 delta_1h: row.delta_1h, 784 children: null, 785 }); 786 } 787 } 788 789 return tree; 790 } 791 792 // ────────────────────────────────────────────────────────────────────────────── 793 // Helpers 794 // ────────────────────────────────────────────────────────────────────────────── 795 796 /** 797 * Collapse and categorize error rows into retriable/terminal/unknown groups. 798 * Cross-border duplicates are folded into a single entry. 799 * @param {Array} rows - DB rows with error_message, total, delta_24h, delta_1h 800 * @param {'site'|'outreach'} context 801 * @returns {{ type: 'errors', retriable: Array, terminal: Array, unknown: Array }} 802 */ 803 function buildErrorChildren(rows, context) { 804 const retriable = {}; 805 const terminal = {}; 806 const unknown = {}; 807 808 for (const row of rows) { 809 const { group, label } = categorizeError(row.error_message, context); 810 const bucket = group === 'terminal' ? terminal : group === 'retriable' ? retriable : unknown; 811 812 if (!bucket[label]) { 813 bucket[label] = { label, total: 0, delta_24h: 0, delta_1h: 0 }; 814 } 815 bucket[label].total += row.total; 816 bucket[label].delta_24h += row.delta_24h; 817 bucket[label].delta_1h += row.delta_1h; 818 } 819 820 const toSortedArray = obj => Object.values(obj).sort((a, b) => b.total - a.total); 821 822 return { 823 type: 'errors', 824 retriable: toSortedArray(retriable), 825 terminal: toSortedArray(terminal), 826 unknown: toSortedArray(unknown), 827 }; 828 } 829 830 // ────────────────────────────────────────────────────────────────────────────── 831 // Conversations tree 832 // ────────────────────────────────────────────────────────────────────────────── 833 834 const SENTIMENT_ORDER = ['positive', 'neutral', 'negative', 'objection']; 835 836 /** 837 * Build inbound messages summary grouped by sentiment, with intent breakdown. 838 * @returns {Promise<Array>} sentiment rows with intent children 839 */ 840 export async function buildConversationsTree() { 841 const topLevel = await getAll( 842 `SELECT 843 COALESCE(sentiment, 'unknown') AS sentiment, 844 COUNT(*) AS total, 845 COUNT(CASE WHEN created_at > NOW() - INTERVAL '24 hours' THEN 1 END) AS delta_24h, 846 COUNT(CASE WHEN created_at > NOW() - INTERVAL '1 hour' THEN 1 END) AS delta_1h 847 FROM messages 848 WHERE direction = 'inbound' 849 GROUP BY sentiment` 850 ); 851 852 const intentRows = await getAll( 853 `SELECT 854 COALESCE(sentiment, 'unknown') AS sentiment, 855 COALESCE(intent, 'unknown') AS intent, 856 COUNT(*) AS total, 857 COUNT(CASE WHEN created_at > NOW() - INTERVAL '24 hours' THEN 1 END) AS delta_24h, 858 COUNT(CASE WHEN created_at > NOW() - INTERVAL '1 hour' THEN 1 END) AS delta_1h 859 FROM messages 860 WHERE direction = 'inbound' 861 GROUP BY sentiment, intent` 862 ); 863 864 // PG COUNT() returns bigint strings — coerce to Number throughout 865 const intentsBySentiment = {}; 866 for (const row of intentRows) { 867 if (!intentsBySentiment[row.sentiment]) intentsBySentiment[row.sentiment] = []; 868 intentsBySentiment[row.sentiment].push({ 869 ...row, 870 total: Number(row.total), 871 delta_24h: Number(row.delta_24h), 872 delta_1h: Number(row.delta_1h), 873 }); 874 } 875 876 const statusMap = {}; 877 for (const row of topLevel) { 878 statusMap[row.sentiment] = { 879 ...row, 880 total: Number(row.total), 881 delta_24h: Number(row.delta_24h), 882 delta_1h: Number(row.delta_1h), 883 }; 884 } 885 886 const tree = []; 887 const seen = new Set(); 888 889 for (const sentiment of SENTIMENT_ORDER) { 890 if (statusMap[sentiment]) { 891 seen.add(sentiment); 892 const row = statusMap[sentiment]; 893 const intents = (intentsBySentiment[sentiment] || []) 894 .sort((a, b) => b.total - a.total) 895 .map(r => ({ 896 label: r.intent, 897 total: r.total, 898 delta_24h: r.delta_24h, 899 delta_1h: r.delta_1h, 900 })); 901 tree.push({ 902 sentiment, 903 total: row.total, 904 delta_24h: row.delta_24h, 905 delta_1h: row.delta_1h, 906 intents, 907 }); 908 } 909 } 910 911 // Any sentiments not in our order list 912 for (const row of topLevel) { 913 if (!seen.has(row.sentiment)) { 914 tree.push({ 915 sentiment: row.sentiment, 916 total: row.total, 917 delta_24h: row.delta_24h, 918 delta_1h: row.delta_1h, 919 intents: (intentsBySentiment[row.sentiment] || []) 920 .sort((a, b) => b.total - a.total) 921 .map(r => ({ 922 label: r.intent, 923 total: r.total, 924 delta_24h: r.delta_24h, 925 delta_1h: r.delta_1h, 926 })), 927 }); 928 } 929 } 930 931 return tree; 932 }