form.js
1 #!/usr/bin/env node 2 3 /** 4 * Contact Form Outreach Module 5 * Automates form filling using Playwright with operator panel for uncertain fields 6 */ 7 8 import { 9 launchStealthBrowser, 10 createStealthContext, 11 launchWithExtensions, 12 configureNopeCHA, 13 humanClick, 14 waitForCloudflare, 15 } from '../utils/stealth-browser.js'; 16 import { callLLM } from '../utils/llm-provider.js'; 17 import { logPromptFeedback } from '../utils/prompt-learning.js'; 18 import { readFileSync, writeFileSync } from 'fs'; 19 import { join, dirname } from 'path'; 20 import { fileURLToPath } from 'url'; 21 import Logger from '../utils/logger.js'; 22 import { isOutreachRetriable, computeRetryAt } from '../utils/error-categories.js'; 23 import { run, getOne, getAll } from '../utils/db.js'; 24 import { detectUnderConstruction } from '../utils/error-page-detector.js'; 25 import { createAgentTask } from '../agents/utils/task-manager.js'; 26 import { processBatch } from '../utils/error-handler.js'; 27 import { getAdaptiveConcurrencyFast } from '../utils/adaptive-concurrency.js'; 28 import { sanitizeHtmlForPrompt, wrapUntrusted } from '../utils/llm-sanitizer.js'; 29 import '../utils/load-env.js'; 30 31 const __filename = fileURLToPath(import.meta.url); 32 const __dirname = dirname(__filename); 33 const projectRoot = join(__dirname, '../..'); 34 35 const logger = new Logger('FormOutreach'); 36 37 const FORM_SELECT_BEST_PROMPT = readFileSync( 38 join(projectRoot, 'prompts/FORM-SELECT-BEST.md'), 39 'utf-8' 40 ).trim(); 41 const FORM_CLASSIFY_FIELDS_PROMPT = readFileSync( 42 join(projectRoot, 'prompts/FORM-CLASSIFY-FIELDS.md'), 43 'utf-8' 44 ).trim(); 45 const FORM_GUESS_VALUES_PROMPT = readFileSync( 46 join(projectRoot, 'prompts/FORM-GUESS-VALUES.md'), 47 'utf-8' 48 ).trim(); 49 50 /** 51 * Detect chat widgets / embedded contact forms that hide behind a button. 52 * Returns an array of human-readable strings describing what was found. 53 */ 54 /* c8 ignore start -- browser-context function, not testable in Node.js */ 55 function detectChatWidgets(page) { 56 /* eslint-disable no-undef -- document/window available in browser context */ 57 return page.evaluate(() => { 58 const found = []; 59 const scripts = Array.from(document.querySelectorAll('script[src]')).map(s => s.src); 60 const html = document.documentElement.innerHTML; 61 62 const signatures = [ 63 { name: 'Tidio', patterns: ['tidiochat', 'tidio.co'] }, 64 { name: 'Intercom', patterns: ['intercom', 'intercomcdn'] }, 65 { name: 'Drift', patterns: ['drift.com', 'js.driftt.com'] }, 66 { name: 'Tawk.to', patterns: ['tawk.to', 'embed.tawk.to'] }, 67 { name: 'Crisp', patterns: ['crisp.chat', 'client.crisp'] }, 68 { name: 'Zendesk', patterns: ['zopim', 'zendesk', 'zdassets'] }, 69 { name: 'HubSpot', patterns: ['hubspot', 'hs-scripts'] }, 70 { name: 'LiveChat', patterns: ['livechat.com', 'livechatinc'] }, 71 { name: 'Freshdesk/Freshchat', patterns: ['freshdesk', 'freshchat', 'freshworks'] }, 72 { name: 'Olark', patterns: ['olark.com'] }, 73 { name: 'Facebook Messenger', patterns: ['connect.facebook.net'] }, 74 { name: 'WhatsApp', patterns: ['wa.me', 'api.whatsapp'] }, 75 ]; 76 77 for (const { name, patterns } of signatures) { 78 const match = patterns.some(p => scripts.some(s => s.includes(p)) || html.includes(p)); 79 if (match) found.push(name); 80 } 81 82 return found; 83 }); 84 /* eslint-enable no-undef */ 85 } 86 /* c8 ignore stop */ 87 88 // --- Autocomplete attribute → field type map (HTML5 standard, highest confidence) --- 89 const AUTOCOMPLETE_MAP = { 90 name: 'fullName', 91 'full-name': 'fullName', 92 'given-name': 'firstName', 93 'additional-name': 'firstName', 94 'family-name': 'lastName', 95 email: 'email', 96 tel: 'phone', 97 'tel-national': 'phone', 98 'tel-local': 'phone', 99 organization: 'company', 100 'organization-title': 'company', 101 }; 102 103 /** 104 * Inject operator panel into page 105 * @param {Object} data - Sender/proposal data 106 * @param {string[]} [data.warnings] - Optional warning messages shown at top of panel 107 * @param {string} [data.llmThinking] - LLM reasoning for field identification 108 * @param {string[]} [data.filledSelectors] - Selectors already filled (to detect missed fields) 109 * @param {string} [data.builderType] - Detected form builder type (for template learning) 110 */ 111 /* c8 ignore start -- browser-context function, not testable in Node.js */ 112 // eslint-disable-next-line max-lines-per-function -- operator panel builds complex HTML + handlers in one evaluate() 113 function injectOperatorPanel(page, data) { 114 const { 115 subject, 116 proposal, 117 name, 118 email, 119 phone, 120 company, 121 warnings = [], 122 llmThinking = '', 123 filledSelectors = [], 124 builderType = 'generic', 125 } = data; 126 127 /* eslint-disable no-undef */ 128 const [firstName, ...lastParts] = name.split(' '); 129 const lastName = lastParts.join(' ') || firstName; 130 131 return page.evaluate( 132 ({ 133 subject, 134 proposal, 135 name, 136 firstName, 137 lastName, 138 email, 139 phone, 140 company, 141 warnings, 142 llmThinking, 143 filledSelectors, 144 builderType, 145 }) => { 146 const warningSubtitle = warnings.length 147 ? `\u26a0\ufe0f ${warnings.map(w => w.split('.')[0]).join(' \u00b7 ')}` 148 : 'Click a button to type its value into the focused field (drag to move)'; 149 150 // Build LLM thinking section if available 151 const llmSection = llmThinking 152 ? `<details style="margin-top:8px;border-top:1px solid #eee;padding-top:8px;"> 153 <summary style="cursor:pointer;font-size:11px;color:#666;font-weight:600;"> 154 \ud83e\udde0 LLM Field Identification Reasoning 155 </summary> 156 <pre style="font-size:10px;background:#f8f9fa;padding:8px;border-radius:4px;min-height:80px;max-height:200px;overflow-y:auto;white-space:pre-wrap;margin:6px 0;">${llmThinking}</pre> 157 <textarea id="llm-feedback" placeholder="Feedback for LLM rework (e.g. 'the textarea is the message field, not subject')" style="width:100%;height:50px;font-size:11px;border:1px solid #ddd;border-radius:4px;padding:6px;resize:vertical;box-sizing:border-box;margin-top:4px;"></textarea> 158 <button id="llm-rework-btn" style="margin-top:4px;padding:6px 12px;background:#ffc107;color:#333;border:none;border-radius:4px;cursor:pointer;font-size:11px;font-weight:600;width:100%;"> 159 \ud83d\udd04 Rework with Feedback 160 </button> 161 </details>` 162 : ''; 163 164 const panelHtml = ` 165 <div id="operator-panel" style=" 166 position: fixed; 167 top: 20px; 168 right: 20px; 169 width: 400px; 170 max-height: 80vh; 171 overflow-y: auto; 172 background: white; 173 border: 3px solid ${warnings.length ? '#ffc107' : '#0066cc'}; 174 border-radius: 8px; 175 box-shadow: 0 10px 40px rgba(0,0,0,0.3); 176 z-index: 999999; 177 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; 178 "> 179 <div id="operator-panel-header" style="padding: 16px; background: ${warnings.length ? '#856404' : '#0066cc'}; color: white; border-radius: 5px 5px 0 0; cursor: move;"> 180 <h2 style="margin: 0; font-size: 16px; font-weight: 600;">\ud83c\udfaf Operator Panel</h2> 181 <p style="margin: 4px 0 0; font-size: 12px; opacity: 0.9;">${warningSubtitle}</p> 182 </div> 183 <div style="padding: 12px;"> 184 <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 8px; margin-bottom: 12px;"> 185 <button class="copy-btn" data-value="${firstName.replace(/"/g, '"')}" data-field-type="firstName" style="padding: 10px; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px; font-weight: 600; text-align: left;">\ud83d\udcdd First: ${firstName}</button> 186 <button class="copy-btn" data-value="${lastName.replace(/"/g, '"')}" data-field-type="lastName" style="padding: 10px; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px; font-weight: 600; text-align: left;">\ud83d\udcdd Last: ${lastName}</button> 187 <button class="copy-btn" data-value="${name.replace(/"/g, '"')}" data-field-type="fullName" style="padding: 10px; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px; font-weight: 600; text-align: left;">\ud83d\udcdd Full Name</button> 188 <button class="copy-btn" data-value="${email.replace(/"/g, '"')}" data-field-type="email" style="padding: 10px; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px; font-weight: 600; text-align: left;">\ud83d\udce7 Email</button> 189 <button class="copy-btn" data-value="${phone.replace(/"/g, '"')}" data-field-type="phone" style="padding: 10px; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px; font-weight: 600; text-align: left;">\ud83d\udcde Phone</button> 190 <button class="copy-btn" data-value="${company.replace(/"/g, '"')}" data-field-type="company" style="padding: 10px; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px; font-weight: 600; text-align: left;">\ud83c\udfe2 Company</button> 191 </div> 192 193 <button class="copy-btn" data-value="${subject.replace(/"/g, '"')}" data-field-type="subject" style="width: 100%; padding: 10px; margin-bottom: 8px; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px; font-weight: 600; text-align: left;">\u2709\ufe0f Subject</button> 194 195 <button id="copy-proposal-btn" style="width: 100%; padding: 12px; margin-bottom: 12px; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 13px; font-weight: 600;">\ud83d\udcc4 Type Proposal into Field</button> 196 197 ${llmSection} 198 199 <div style="padding-top: 8px; border-top: 2px solid #eee;"> 200 <button onclick="window.localStorage.setItem('operator_panel_done', 'true'); document.getElementById('operator-panel').remove();" style="width: 100%; padding: 12px; background: #28a745; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 13px; font-weight: 600;">\u2713 Done</button> 201 </div> 202 203 <div style="padding-top: 10px; margin-top: 8px; border-top: 2px solid #eee;"> 204 <p style="margin: 0 0 6px; font-size: 11px; color: #666; font-weight: 600;">MARK AS FAILED</p> 205 <select id="fail-reason-select" style="width:100%;padding:7px 8px;font-size:12px;border:1px solid #ccc;border-radius:4px;margin-bottom:6px;box-sizing:border-box;"> 206 <option value="">— select reason —</option> 207 <optgroup label="Terminal (no retry)"> 208 <option value="No contact form on page">No contact form on page</option> 209 <option value="Opted out">Opted out</option> 210 <option value="GDPR blocked">GDPR blocked</option> 211 <option value="Cloudflare blocked">Cloudflare blocked</option> 212 <option value="Invalid email">Invalid email</option> 213 <option value="Invalid phone">Invalid phone</option> 214 <option value="Gov domain blocked">Gov domain blocked</option> 215 <option value="Franchise / chain site">Franchise / chain site</option> 216 <option value="Duplicate / already contacted">Duplicate / already contacted</option> 217 </optgroup> 218 <optgroup label="Retriable (will retry later)"> 219 <option value="Under construction">Under construction</option> 220 <option value="Business hours block">Business hours block</option> 221 <option value="Rate limited">Rate limited</option> 222 <option value="Form timeout">Form timeout</option> 223 <option value="DNS failure">DNS failure</option> 224 <option value="CAPTCHA unsolvable">CAPTCHA unsolvable</option> 225 </optgroup> 226 </select> 227 <input id="fail-reason-custom" type="text" placeholder="or type a custom reason..." style="width:100%;padding:7px 8px;font-size:12px;border:1px solid #ccc;border-radius:4px;margin-bottom:6px;box-sizing:border-box;" /> 228 <button id="mark-failed-btn" style="width:100%;padding:10px;background:#dc3545;color:white;border:none;border-radius:4px;cursor:pointer;font-size:12px;font-weight:600;">\u2717 Mark as Failed</button> 229 </div> 230 </div> 231 </div> 232 `; 233 234 document.body.insertAdjacentHTML('beforeend', panelHtml); 235 236 // Helper: type text into whatever input/textarea currently has focus. 237 // Also stores the value in localStorage so Node.js can use page.keyboard.type() 238 // for cross-origin iframes where document.activeElement is the iframe itself. 239 function typeIntoFocused(text) { 240 const el = document.activeElement; 241 // If focus is inside an iframe, activeElement is the <iframe> element — 242 // fall through to the localStorage bridge so Node.js types via keyboard API. 243 const inIframe = el && el.tagName === 'IFRAME'; 244 const inField = 245 el && (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA' || el.isContentEditable); 246 if (!inField && !inIframe) { 247 const panel = document.getElementById('operator-panel'); 248 panel.style.border = '3px solid #dc3545'; 249 setTimeout(() => { 250 panel.style.border = '3px solid #0066cc'; 251 }, 600); 252 return false; 253 } 254 if (inField) { 255 // Same-page field: type directly 256 el.focus(); 257 const supported = document.execCommand('insertText', false, text); 258 if (!supported) { 259 const nativeInputValueSetter = Object.getOwnPropertyDescriptor( 260 el.tagName === 'TEXTAREA' 261 ? window.HTMLTextAreaElement.prototype 262 : window.HTMLInputElement.prototype, 263 'value' 264 )?.set; 265 if (nativeInputValueSetter) nativeInputValueSetter.call(el, text); 266 el.dispatchEvent(new Event('input', { bubbles: true })); 267 el.dispatchEvent(new Event('change', { bubbles: true })); 268 } 269 } else { 270 // Cross-origin iframe: signal Node.js to type via Playwright keyboard API 271 localStorage.setItem('operator_type_pending', JSON.stringify({ text, ts: Date.now() })); 272 } 273 return true; 274 } 275 276 // Helper: get the human-readable label for the currently focused field 277 function getFocusedFieldLabel() { 278 const el = document.activeElement; 279 if (!el || (el.tagName !== 'INPUT' && el.tagName !== 'TEXTAREA')) return null; 280 if (el.id) { 281 const lbl = document.querySelector(`label[for="${CSS.escape(el.id)}"]`); 282 if (lbl) return lbl.innerText.trim(); 283 } 284 const parentLabel = el.closest('label'); 285 if (parentLabel) return parentLabel.innerText.trim(); 286 return ( 287 el.getAttribute('aria-label')?.trim() || 288 el.getAttribute('placeholder')?.trim() || 289 el.name || 290 null 291 ); 292 } 293 294 // Wire up all field buttons 295 document.querySelectorAll('.copy-btn').forEach(btn => { 296 const { value, fieldType } = btn.dataset; 297 const originalText = btn.textContent; 298 btn.addEventListener('mousedown', e => e.preventDefault()); 299 btn.addEventListener('click', () => { 300 // Capture label before typing (focus will still be on the field at mousedown time) 301 const labelText = getFocusedFieldLabel(); 302 const ok = typeIntoFocused(value); 303 if (ok && labelText && fieldType) { 304 // Record correction so Node.js can persist it 305 const existing = JSON.parse(localStorage.getItem('field_corrections') || '[]'); 306 existing.push({ labelText, fieldType }); 307 localStorage.setItem('field_corrections', JSON.stringify(existing)); 308 } 309 btn.textContent = ok ? '\u2713 Typed' : '\u26a0 Click a field first'; 310 btn.style.background = ok ? '#28a745' : '#dc3545'; 311 setTimeout(() => { 312 btn.textContent = originalText; 313 btn.style.background = '#0066cc'; 314 }, 1200); 315 }); 316 }); 317 318 // Proposal button 319 const copyProposalBtn = document.getElementById('copy-proposal-btn'); 320 const proposalOriginalText = copyProposalBtn.textContent; 321 copyProposalBtn.addEventListener('mousedown', e => e.preventDefault()); 322 copyProposalBtn.addEventListener('click', () => { 323 const ok = typeIntoFocused(proposal); 324 copyProposalBtn.textContent = ok 325 ? '\u2713 Typed into field' 326 : '\u26a0 Click the message field first'; 327 copyProposalBtn.style.background = ok ? '#28a745' : '#dc3545'; 328 setTimeout(() => { 329 copyProposalBtn.textContent = proposalOriginalText; 330 copyProposalBtn.style.background = '#0066cc'; 331 }, 1800); 332 }); 333 334 // Rework button: stores feedback in localStorage for Node.js to pick up 335 const reworkBtn = document.getElementById('llm-rework-btn'); 336 if (reworkBtn) { 337 reworkBtn.addEventListener('click', () => { 338 const fb = document.getElementById('llm-feedback')?.value?.trim(); 339 if (!fb) return; 340 localStorage.setItem('rework_feedback', fb); 341 reworkBtn.textContent = '\ud83d\udd04 Reworking...'; 342 reworkBtn.style.background = '#6c757d'; 343 reworkBtn.disabled = true; 344 }); 345 } 346 347 // Mark as Failed button 348 const markFailedBtn = document.getElementById('mark-failed-btn'); 349 if (markFailedBtn) { 350 markFailedBtn.addEventListener('click', () => { 351 const selectEl = document.getElementById('fail-reason-select'); 352 const customEl = document.getElementById('fail-reason-custom'); 353 const reason = customEl?.value?.trim() || selectEl?.value || ''; 354 if (!reason) { 355 selectEl.style.border = '2px solid #dc3545'; 356 customEl.style.border = '2px solid #dc3545'; 357 setTimeout(() => { 358 selectEl.style.border = '1px solid #ccc'; 359 customEl.style.border = '1px solid #ccc'; 360 }, 1500); 361 return; 362 } 363 localStorage.setItem('operator_panel_failed', reason); 364 markFailedBtn.textContent = '\u2717 Marked as Failed'; 365 markFailedBtn.disabled = true; 366 markFailedBtn.style.background = '#6c757d'; 367 document.getElementById('operator-panel').remove(); 368 }); 369 } 370 371 // Make panel draggable 372 const panel = document.getElementById('operator-panel'); 373 const header = document.getElementById('operator-panel-header'); 374 let isDragging = false; 375 let currentX; 376 let currentY; 377 let initialX; 378 let initialY; 379 380 header.addEventListener('mousedown', e => { 381 isDragging = true; 382 const rect = panel.getBoundingClientRect(); 383 initialX = e.clientX - rect.left; 384 initialY = e.clientY - rect.top; 385 header.style.cursor = 'grabbing'; 386 }); 387 388 document.addEventListener('mousemove', e => { 389 if (isDragging) { 390 e.preventDefault(); 391 currentX = e.clientX - initialX; 392 currentY = e.clientY - initialY; 393 const maxX = window.innerWidth - panel.offsetWidth; 394 const maxY = window.innerHeight - panel.offsetHeight; 395 currentX = Math.max(0, Math.min(currentX, maxX)); 396 currentY = Math.max(0, Math.min(currentY, maxY)); 397 panel.style.left = `${currentX}px`; 398 panel.style.top = `${currentY}px`; 399 panel.style.right = 'auto'; 400 } 401 }); 402 403 document.addEventListener('mouseup', () => { 404 if (isDragging) { 405 isDragging = false; 406 header.style.cursor = 'move'; 407 } 408 }); 409 410 // ── Missed-field learning ──────────────────────────────────────────── 411 // Watch for the operator filling fields that the system didn't auto-fill. 412 // When a field gets a value and its selector isn't in filledSelectors, 413 // show a small badge near the field for the operator to confirm the type, 414 // then store the correction in localStorage for Node.js to persist. 415 416 const filledSet = new Set(filledSelectors); 417 418 function inferFieldType(value, el) { 419 if (/^\+?[\d\s\-().]{7,20}$/.test(value)) return 'phone'; 420 if (/@/.test(value)) return 'email'; 421 const name_re = /^[A-Z][a-z]+ [A-Z][a-z]+/; 422 if (name_re.test(value) && value.split(' ').length <= 4) return 'fullName'; 423 if (value.length > 60) return 'message'; 424 const attr = `${el.name || ''} ${el.id || ''} ${ 425 el.getAttribute('placeholder') || '' 426 }`.toLowerCase(); 427 if (/phone|mobile|tel/.test(attr)) return 'phone'; 428 if (/email/.test(attr)) return 'email'; 429 if (/company|organisation|organization|business/.test(attr)) return 'company'; 430 if (/subject|title/.test(attr)) return 'subject'; 431 if (/message|comment|enquiry|inquiry|body/.test(attr)) return 'message'; 432 if (/first/.test(attr)) return 'firstName'; 433 if (/last/.test(attr)) return 'lastName'; 434 if (/name/.test(attr)) return 'fullName'; 435 return null; 436 } 437 438 function getFieldLabel(el) { 439 if (el.id) { 440 const lbl = document.querySelector(`label[for="${CSS.escape(el.id)}"]`); 441 if (lbl) return lbl.innerText.trim(); 442 } 443 const p = el.closest('label'); 444 if (p) return p.innerText.trim(); 445 return ( 446 el.getAttribute('aria-label')?.trim() || 447 el.getAttribute('placeholder')?.trim() || 448 el.name || 449 '' 450 ); 451 } 452 453 function showMissedFieldBadge(el, inferredType) { 454 // Remove any existing badge for this element 455 const existing = el._missedBadge; 456 if (existing) existing.remove(); 457 458 const TYPE_LABELS = { 459 phone: 'Phone', 460 email: 'Email', 461 fullName: 'Full Name', 462 firstName: 'First Name', 463 lastName: 'Last Name', 464 company: 'Company', 465 subject: 'Subject', 466 message: 'Message', 467 }; 468 469 const badge = document.createElement('div'); 470 badge.style.cssText = [ 471 'position:fixed', 472 'z-index:1000000', 473 'background:#343a40', 474 'color:#fff', 475 'padding:6px 10px', 476 'border-radius:6px', 477 'font-size:11px', 478 'font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif', 479 'box-shadow:0 4px 12px rgba(0,0,0,0.4)', 480 'max-width:260px', 481 'line-height:1.5', 482 ].join(';'); 483 484 const typeOptions = Object.keys(TYPE_LABELS) 485 .map( 486 t => 487 `<option value="${t}"${t === inferredType ? ' selected' : ''}>${TYPE_LABELS[t]}</option>` 488 ) 489 .join(''); 490 491 badge.innerHTML = ` 492 <div style="margin-bottom:4px;font-weight:600;">💡 Missed field — what is this?</div> 493 <select id="mf-type-sel" style="width:100%;padding:4px;font-size:11px;border-radius:3px;border:none;margin-bottom:6px;"> 494 <option value="">— skip —</option> 495 ${typeOptions} 496 </select> 497 <div style="display:flex;gap:6px;"> 498 <button id="mf-confirm" style="flex:1;padding:4px 8px;background:#28a745;color:#fff;border:none;border-radius:3px;cursor:pointer;font-size:11px;font-weight:600;">Save</button> 499 <button id="mf-skip" style="flex:1;padding:4px 8px;background:#6c757d;color:#fff;border:none;border-radius:3px;cursor:pointer;font-size:11px;">Skip</button> 500 </div> 501 `; 502 503 document.body.appendChild(badge); 504 el._missedBadge = badge; 505 506 // Position near the field 507 function reposition() { 508 const rect = el.getBoundingClientRect(); 509 let top = rect.bottom + 6; 510 let { left } = rect; 511 if (top + 120 > window.innerHeight) top = rect.top - 120; 512 if (left + 270 > window.innerWidth) left = window.innerWidth - 275; 513 badge.style.top = `${top}px`; 514 badge.style.left = `${Math.max(4, left)}px`; 515 } 516 reposition(); 517 518 const labelText = getFieldLabel(el); 519 520 badge.querySelector('#mf-confirm').addEventListener('click', () => { 521 const chosenType = badge.querySelector('#mf-type-sel').value; 522 if (chosenType && labelText) { 523 const existing2 = JSON.parse(localStorage.getItem('missed_field_corrections') || '[]'); 524 existing2.push({ 525 selector: el.id ? `#${el.id}` : el.name ? `[name="${el.name}"]` : null, 526 labelText, 527 fieldType: chosenType, 528 builderType, 529 }); 530 localStorage.setItem('missed_field_corrections', JSON.stringify(existing2)); 531 } 532 badge.remove(); 533 el._missedBadge = null; 534 }); 535 536 badge.querySelector('#mf-skip').addEventListener('click', () => { 537 badge.remove(); 538 el._missedBadge = null; 539 }); 540 } 541 542 // Debounce timer per element to avoid triggering on every keystroke 543 const _debounceTimers = new WeakMap(); 544 545 function onFieldInput(e) { 546 const el = e.target; 547 if (!el || (el.tagName !== 'INPUT' && el.tagName !== 'TEXTAREA')) return; 548 if (el.type === 'hidden' || el.type === 'submit' || el.type === 'button') return; 549 // Skip if this selector was already filled by the system 550 const selectors = [ 551 el.id ? `#${el.id}` : null, 552 el.name ? `[name="${el.name}"]` : null, 553 el.className ? `.${el.className.split(' ')[0]}` : null, 554 ].filter(Boolean); 555 if (selectors.some(s => filledSet.has(s))) return; 556 // Debounce: show badge 1.5s after last keystroke 557 if (_debounceTimers.has(el)) clearTimeout(_debounceTimers.get(el)); 558 _debounceTimers.set( 559 el, 560 setTimeout(() => { 561 const val = el.value.trim(); 562 if (!val || val.length < 3) return; 563 const inferred = inferFieldType(val, el); 564 showMissedFieldBadge(el, inferred); 565 }, 1500) 566 ); 567 } 568 569 document.addEventListener('input', onFieldInput, true); 570 document.addEventListener('change', onFieldInput, true); 571 // ──────────────────────────────────────────────────────────────────── 572 }, 573 { 574 subject, 575 proposal, 576 name, 577 firstName, 578 lastName, 579 email, 580 phone, 581 company, 582 warnings, 583 llmThinking, 584 filledSelectors, 585 builderType, 586 } 587 ); 588 /* eslint-enable no-undef */ 589 } 590 /* c8 ignore stop */ 591 592 /** 593 * Navigate to the form URL, wait for Cloudflare, and detect chat widgets. 594 */ 595 /* c8 ignore start -- browser automation, not testable in Node.js */ 596 async function loadFormPage(page, url) { 597 try { 598 await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 }); 599 } catch { 600 try { 601 await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); 602 logger.info('Form loaded with domcontentloaded fallback'); 603 } catch (err2) { 604 throw new Error(`Form page failed to load after retry: ${err2.message}`); 605 } 606 } 607 608 const panelWarnings = []; 609 610 const cfResolved = await waitForCloudflare(page, { timeout: 30000 }); 611 if (!cfResolved) { 612 logger.warn(`Cloudflare/bot protection may be active on ${url}`); 613 panelWarnings.push( 614 'Cloudflare/bot protection may be active. Check the page is accessible before filling the form.' 615 ); 616 } 617 618 const chatWidgets = await detectChatWidgets(page); 619 if (chatWidgets.length > 0) { 620 logger.info(`Chat widget(s) detected: ${chatWidgets.join(', ')}`); 621 } 622 623 return { panelWarnings, cfResolved, chatWidgets }; 624 } 625 /* c8 ignore stop */ 626 627 /** 628 * Score a form field element against a set of keyword signals. 629 * Runs inside page.evaluate() (browser context). 630 */ 631 /* c8 ignore start -- browser-context function, not testable in Node.js */ 632 function scoreField(el, keywords) { 633 /* eslint-disable no-undef -- runs in browser context via page.evaluate */ 634 const haystack = [ 635 el.name, 636 el.id, 637 el.placeholder, 638 el.getAttribute('aria-label'), 639 el.getAttribute('title'), 640 el.getAttribute('data-name'), 641 el.getAttribute('data-field'), 642 el.getAttribute('data-field-type'), 643 (() => { 644 if (el.id) { 645 const lbl = document.querySelector(`label[for="${el.id}"]`); 646 if (lbl) return lbl.innerText; 647 } 648 const parent = el.closest('label'); 649 return parent ? parent.innerText : ''; 650 })(), 651 ] 652 .filter(Boolean) 653 .join(' ') 654 .toLowerCase(); 655 656 return keywords.reduce((score, kw) => { 657 if (haystack.includes(kw)) score += kw.length > 4 ? 10 : 6; 658 return score; 659 }, 0); 660 /* eslint-enable no-undef */ 661 } 662 /* c8 ignore stop */ 663 664 /** 665 * When a page has multiple <form> elements, ask Haiku to pick the simplest one that 666 * contains at least an email and a message/textarea field (i.e. the contact form, 667 * not a quote-request or newsletter form). 668 * Returns a CSS selector string that uniquely identifies the chosen form, or null 669 * if there is only one form (or none) — in which case the caller uses document scope. 670 */ 671 /* c8 ignore start -- browser + LLM function, not testable in Node.js */ 672 async function selectBestForm(page, siteContext) { 673 const forms = await page.evaluate(() => { 674 /* eslint-disable no-undef */ 675 return Array.from(document.querySelectorAll('form')).map((form, i) => { 676 const clone = form.cloneNode(true); 677 for (const el of clone.querySelectorAll('script, style, svg, img, noscript')) el.remove(); 678 const id = form.id ? `#${CSS.escape(form.id)}` : null; 679 const name = form.getAttribute('name') ? `form[name="${form.getAttribute('name')}"]` : null; 680 const selector = id || name || `form:nth-of-type(${i + 1})`; 681 return { selector, html: clone.outerHTML.replace(/\s+/g, ' ').substring(0, 3000) }; 682 }); 683 /* eslint-enable no-undef */ 684 }); 685 686 if (forms.length <= 1) return null; // nothing to choose 687 688 const formList = forms 689 .map((f, i) => `Form ${i + 1} (selector: ${f.selector}):\n${f.html}`) 690 .join('\n\n---\n\n'); 691 692 const response = await callLLM({ 693 model: 'anthropic/claude-haiku-4-5', 694 stage: 'outreach', 695 messages: [ 696 { 697 role: 'system', 698 content: FORM_SELECT_BEST_PROMPT, 699 }, 700 { 701 role: 'user', 702 content: `Website: ${siteContext.domain} (${siteContext.country_code || '?'}) 703 704 The page has ${forms.length} forms. Pick the best contact form. 705 Reply with only the form number (e.g. "2") — nothing else. 706 707 ${wrapUntrusted(sanitizeHtmlForPrompt(formList), 'form_html')}`, 708 }, 709 ], 710 temperature: 0, 711 max_tokens: 10, 712 }); 713 714 const m = (response.content || '').trim().match(/\d+/); 715 if (!m) return null; 716 const idx = parseInt(m[0], 10) - 1; 717 if (idx < 0 || idx >= forms.length) return null; 718 logger.info(`selectBestForm: picked form ${idx + 1} of ${forms.length} (${forms[idx].selector})`); 719 return forms[idx].selector; 720 } 721 /* c8 ignore stop */ 722 723 /** 724 * Scan all visible, unfilled form fields and return a map of 725 * { fieldType -> { selector, score } } for the best match per type. 726 */ 727 /* c8 ignore start -- browser-context function, not testable in Node.js */ 728 function scanFormFields(page, corrections = {}, formSelector = null) { 729 /* eslint-disable no-undef -- runs in browser context */ 730 return page.evaluate( 731 ({ autocompleteMap, corrections, formSelector }) => { 732 const root = formSelector ? document.querySelector(formSelector) : document; 733 const scope = root || document; 734 function elSelector(el) { 735 if (el.id) return `#${CSS.escape(el.id)}`; 736 if (el.name) return `${el.tagName.toLowerCase()}[name="${el.name}"]`; 737 const sibs = Array.from(el.parentElement?.children || []); 738 return `${el.tagName.toLowerCase()}:nth-child(${sibs.indexOf(el) + 1})`; 739 } 740 function elLabel(el) { 741 if (el.id) { 742 const lbl = document.querySelector(`label[for="${CSS.escape(el.id)}"]`); 743 if (lbl) return lbl.innerText.trim(); 744 } 745 const parentLabel = el.closest('label'); 746 if (parentLabel) return parentLabel.innerText.trim(); 747 return ( 748 el.getAttribute('aria-label')?.trim() || 749 el.getAttribute('placeholder')?.trim() || 750 el.name || 751 el.id || 752 '' 753 ); 754 } 755 const inputs = Array.from( 756 scope.querySelectorAll( 757 'input:not([type="hidden"]):not([type="submit"]):not([type="button"]):not([type="checkbox"]):not([type="radio"]):not([type="file"]), textarea' 758 ) 759 ).filter(el => { 760 const style = window.getComputedStyle(el); 761 if ( 762 style.display === 'none' || 763 style.visibility === 'hidden' || 764 parseFloat(style.opacity) < 0.1 765 ) 766 return false; 767 if (el.value && el.value.trim().length > 0) return false; 768 if (el.disabled || el.readOnly) return false; 769 return true; 770 }); 771 const best = {}; 772 // Textareas → message field (tallest wins, last as tiebreak) 773 const textareas = inputs.filter(el => el.tagName === 'TEXTAREA'); 774 if (textareas.length > 0) { 775 const sorted = [...textareas].sort((a, b) => { 776 const hDiff = b.getBoundingClientRect().height - a.getBoundingClientRect().height; 777 if (Math.abs(hDiff) > 5) return hDiff; 778 return a.compareDocumentPosition(b) & Node.DOCUMENT_POSITION_FOLLOWING ? 1 : -1; 779 }); 780 const ta = sorted[0]; 781 best.message = { 782 selector: elSelector(ta), 783 score: 30, 784 required: ta.required || ta.getAttribute('aria-required') === 'true', 785 labelText: elLabel(ta), 786 }; 787 } 788 for (const el of inputs) { 789 if (el.tagName === 'TEXTAREA') continue; 790 const sel = elSelector(el); 791 const labelText = elLabel(el); 792 const isRequired = 793 el.required || 794 el.getAttribute('aria-required') === 'true' || 795 labelText.includes('*') || 796 !!el.closest('.gfield_contains_required, .required, [data-required]'); 797 // 1. autocomplete attr — unambiguous (score 25) 798 const ac = el.getAttribute('autocomplete'); 799 if (ac && autocompleteMap[ac]) { 800 const acType = autocompleteMap[ac]; 801 // eslint-disable-next-line security/detect-object-injection 802 if (!best[acType] || best[acType].score < 25) 803 // eslint-disable-next-line security/detect-object-injection 804 best[acType] = { selector: sel, score: 25, required: isRequired }; 805 continue; 806 } 807 // 2. type="email" / type="tel" — unambiguous (score 20) 808 if (el.type === 'email') { 809 if (!best.email || best.email.score < 20) 810 best.email = { selector: sel, score: 20, required: isRequired }; 811 continue; 812 } 813 if (el.type === 'tel') { 814 if (!best.phone || best.phone.score < 20) 815 best.phone = { selector: sel, score: 20, required: isRequired }; 816 continue; 817 } 818 // 3. Operator-taught corrections (score 50) 819 820 const correctedType = labelText && corrections[labelText.toLowerCase()]; 821 if (correctedType) { 822 // eslint-disable-next-line security/detect-object-injection 823 if (!best[correctedType] || best[correctedType].score < 50) 824 // eslint-disable-next-line security/detect-object-injection 825 best[correctedType] = { selector: sel, score: 50, required: isRequired, labelText }; 826 continue; 827 } 828 // 4. Everything else → Haiku classifies 829 if (!best.__unclassified__) best.__unclassified__ = []; 830 best.__unclassified__.push({ selector: sel, labelText, required: isRequired }); 831 } 832 // Required selects → Haiku picks value 833 // Include HTML-required AND label-asterisk AND GravityForms .gfield_contains_required 834 const selects = Array.from(scope.querySelectorAll('select')).filter(el => { 835 if (el.disabled) return false; 836 if (el.required || el.getAttribute('aria-required') === 'true') return true; 837 const lbl = elLabel(el); 838 if (lbl.includes('*')) return true; 839 if (el.closest('.gfield_contains_required, .required, [data-required]')) return true; 840 return false; 841 }); 842 for (const el of selects) { 843 const sel = el.id ? `#${CSS.escape(el.id)}` : el.name ? `select[name="${el.name}"]` : null; 844 if (!sel) continue; 845 const label = elLabel(el); 846 const options = Array.from(el.options) 847 .filter(o => o.value && o.value !== '' && o.value !== '0') 848 .map(o => ({ value: o.value, text: o.text.trim() })); 849 if (!best.__selects__) best.__selects__ = []; 850 best.__selects__.push({ selector: sel, label: label.trim(), options }); 851 } 852 return best; 853 }, 854 { autocompleteMap: AUTOCOMPLETE_MAP, corrections, formSelector } 855 ); 856 /* eslint-enable no-undef */ 857 } 858 /* c8 ignore stop */ 859 860 /** 861 * Use Haiku to classify unrecognised form input fields by type. 862 * Sees full form HTML for context — e.g. "Name" → firstName when a Last Name field also exists. 863 * Returns { detected: { fieldType: { selector, score, required, labelText } }, thinking: string } 864 */ 865 /* c8 ignore start -- browser + LLM function, not testable in Node.js */ 866 async function haikuClassifyFields(page, unclassified, siteContext, formSelector = null) { 867 if (unclassified.length === 0) return { detected: {}, thinking: '' }; 868 869 const formHTML = await page.evaluate(sel => { 870 /* eslint-disable no-undef */ 871 const root = 872 (sel && document.querySelector(sel)) || 873 document.querySelector('form') || 874 document.querySelector('main, [role="main"], .content, #content') || 875 document.body; 876 const clone = root.cloneNode(true); 877 for (const el of clone.querySelectorAll('script, style, svg, img, noscript')) el.remove(); 878 // Full outerHTML gives Haiku the complete form structure; cap at 12 KB to stay within context 879 return clone.outerHTML.replace(/\s+/g, ' ').substring(0, 12000); 880 /* eslint-enable no-undef */ 881 }, formSelector); 882 883 const fieldList = unclassified 884 .map( 885 (f, i) => 886 `${i + 1}. selector="${f.selector}" label="${f.labelText || '?'}" required=${f.required}` 887 ) 888 .join('\n'); 889 890 const response = await callLLM({ 891 model: 'anthropic/claude-haiku-4-5', 892 stage: 'outreach', 893 messages: [ 894 { 895 role: 'system', 896 content: FORM_CLASSIFY_FIELDS_PROMPT, 897 }, 898 { 899 role: 'user', 900 content: `Website: ${siteContext.domain} (${siteContext.country_code || '?'}) 901 902 Form HTML (for context): 903 ${wrapUntrusted(sanitizeHtmlForPrompt(formHTML), 'form_html')} 904 905 Classify each field. Choose from: firstName | lastName | fullName | email | phone | company | subject | city | postcode | state | address | ignore 906 Reply with one line per field: <number>: <fieldType> 907 Use "ignore" for honeypots, CAPTCHAs, or irrelevant fields. 908 909 Fields to classify: 910 ${fieldList}`, 911 }, 912 ], 913 temperature: 0.1, 914 max_tokens: 300, 915 }); 916 917 const detected = {}; 918 let thinking = ''; 919 try { 920 const rawResp = (response.content || '').trim(); 921 logger.info(`haikuClassifyFields raw response: ${rawResp.substring(0, 300)}`); 922 const respLines = rawResp.split('\n'); 923 for (const line of respLines) { 924 const m = line.match(/^(\d+)[.:]\s*(\w+)/); 925 if (!m) continue; 926 const idx = parseInt(m[1], 10) - 1; 927 const type = m[2].trim(); 928 if (type === 'ignore' || idx < 0 || idx >= unclassified.length) continue; 929 const field = unclassified[idx]; 930 // eslint-disable-next-line security/detect-object-injection 931 if (!detected[type] || detected[type].score < 15) 932 // eslint-disable-next-line security/detect-object-injection 933 detected[type] = { 934 selector: field.selector, 935 score: 15, 936 required: field.required, 937 labelText: field.labelText, 938 }; 939 } 940 thinking = `Haiku classified ${Object.keys(detected).length}/${unclassified.length} fields: ${Object.entries( 941 detected 942 ) 943 .map(([t, f]) => `${t}=${f.selector}`) 944 .join(', ')}`; 945 logger.info(thinking); 946 } catch { 947 logger.warn('haikuClassifyFields: could not parse response'); 948 } 949 950 return { detected, thinking }; 951 } 952 /* c8 ignore stop */ 953 954 /** 955 * Use Haiku to guess appropriate values for unknown mandatory form fields. 956 * For selects: pick 'Other' option if available, otherwise let Haiku choose. 957 * For text inputs: Haiku guesses based on label + site context. 958 * Returns { answers: { selector: value }, thinking: string } 959 */ 960 /* c8 ignore start -- LLM function, not testable in Node.js */ 961 async function haikuGuessUnknownFields(unknownFields, selectFields, siteContext) { 962 if (unknownFields.length === 0 && selectFields.length === 0) return { answers: {}, thinking: '' }; 963 964 const answers = {}; 965 966 // Selects: handle deterministically when possible, otherwise ask Haiku 967 const selectsNeedingHaiku = []; 968 for (const sel of selectFields) { 969 // Country dropdown: select by country_code — avoids sending 200 options to Haiku 970 const isCountry = /\bcountr(y|ies)\b/i.test(sel.label); 971 if (isCountry && siteContext.country_code) { 972 const code = siteContext.country_code.toUpperCase(); 973 // Try exact value match first, then text match 974 const opt = 975 sel.options.find(o => o.value.toUpperCase() === code) || 976 sel.options.find(o => o.text.toUpperCase().startsWith(code)); 977 if (opt) { 978 answers[sel.selector] = { type: 'select', value: opt.value }; 979 logger.info(`Select "${sel.label}": auto-picked country "${opt.text}" (${code})`); 980 continue; 981 } 982 } 983 // "How did you hear about us?" — pick the most natural internet/search answer 984 const isReferralSource = 985 /hear\s+about|find\s+us|learn\s+about|how\s+did\s+you|source|referral/i.test(sel.label); 986 if (isReferralSource) { 987 // Preference order: internet search terms first, then generic internet, then web/online 988 const REFERRAL_PREFERENCE = [ 989 /google\s*search|google/i, 990 /search\s*engine|internet\s*search/i, 991 /online\s*search/i, 992 /internet/i, 993 /web\s*search|website/i, 994 /online/i, 995 /social\s*media/i, 996 ]; 997 let picked = null; 998 for (const pattern of REFERRAL_PREFERENCE) { 999 picked = sel.options.find(o => pattern.test(o.text) || pattern.test(o.value)); 1000 if (picked) break; 1001 } 1002 if (picked) { 1003 answers[sel.selector] = { type: 'select', value: picked.value }; 1004 logger.info(`Select "${sel.label}": auto-picked referral source "${picked.text}"`); 1005 continue; 1006 } 1007 } 1008 const otherOpt = sel.options.find( 1009 o => /^other$/i.test(o.text.trim()) || /^other$/i.test(o.value) 1010 ); 1011 if (otherOpt) { 1012 answers[sel.selector] = { type: 'select', value: otherOpt.value }; 1013 logger.info(`Select "${sel.label}": auto-picked "Other" option`); 1014 } else { 1015 selectsNeedingHaiku.push(sel); 1016 } 1017 } 1018 1019 const needsHaiku = [...unknownFields, ...selectsNeedingHaiku]; 1020 if (needsHaiku.length === 0) return { answers, thinking: 'Selected "Other" for all dropdowns.' }; 1021 1022 const fieldDescriptions = needsHaiku 1023 .map((f, i) => { 1024 if (f.options) { 1025 // Cap at 20 options to avoid huge prompts from country/state dropdowns (200+ entries) 1026 const shown = f.options.slice(0, 20); 1027 const truncNote = f.options.length > 20 ? ` (+ ${f.options.length - 20} more)` : ''; 1028 return `${i + 1}. SELECT field "${f.label}" — options: ${shown.map(o => `"${o.text}" (value="${o.value}")`).join(', ')}${truncNote}`; 1029 } 1030 return `${i + 1}. TEXT field "${f.label}"`; 1031 }) 1032 .join('\n'); 1033 1034 const response = await callLLM({ 1035 model: 'anthropic/claude-haiku-4-5', 1036 stage: 'outreach', 1037 messages: [ 1038 { 1039 role: 'system', 1040 content: FORM_GUESS_VALUES_PROMPT, 1041 }, 1042 { 1043 role: 'user', 1044 content: `Website: ${siteContext.domain} (${siteContext.country_code}) 1045 City/region: ${siteContext.city || 'unknown'} 1046 State: ${siteContext.state || 'unknown'} 1047 1048 These mandatory form fields need values. For each, give the most appropriate answer. 1049 The form is being submitted by a potential customer inquiring about web design services. 1050 1051 Fields: 1052 ${wrapUntrusted(fieldDescriptions, 'form_fields')} 1053 1054 IMPORTANT: 1055 - If a postcode/zip field is present, also fill any suburb/city field with a matching suburb — even if the suburb field wasn't listed above. Add it as an extra entry in your JSON using the key "suburb_for_postcode" with value "SuburbName". 1056 - Postcode and suburb/city values MUST be geographically consistent (real suburb + its real postcode in that country/state). 1057 1058 Respond in this format: 1059 THINKING: [For each field, one line explaining WHY you chose that value — e.g. "Field 2 (postcode): 2026 because Bondi is in NSW 2026"] 1060 JSON: {"1": "answer1", "2": "answer2", ...} 1061 For SELECT fields, your answer must exactly match one of the option values shown. 1062 Keep text answers short and realistic.`, 1063 }, 1064 ], 1065 temperature: 0.3, 1066 max_tokens: 300, 1067 }); 1068 1069 let thinking = ''; 1070 try { 1071 const content = response.content || ''; 1072 const thinkMatch = content.match(/THINKING:\s*([\s\S]*?)(?=\nJSON:|$)/i); 1073 thinking = thinkMatch?.[1]?.trim() || ''; 1074 const jsonMatch = content.match(/JSON:\s*(\{[\s\S]*\})/i) || content.match(/(\{[\s\S]*\})/); 1075 if (jsonMatch) { 1076 const parsed = JSON.parse(jsonMatch[1]); 1077 for (const [idxStr, value] of Object.entries(parsed)) { 1078 if (idxStr === 'suburb_for_postcode') { 1079 // Haiku paired a suburb with the postcode — store for caller to apply 1080 answers.__suburb_for_postcode__ = String(value); 1081 logger.info(`Haiku paired suburb for postcode: ${value}`); 1082 continue; 1083 } 1084 const idx = parseInt(idxStr, 10) - 1; 1085 const field = needsHaiku[idx]; 1086 if (!field) continue; 1087 if (field.options) { 1088 // Validate it's a real option value 1089 const opt = field.options.find(o => o.value === value || o.text === value); 1090 answers[field.selector] = { 1091 type: 'select', 1092 value: opt ? opt.value : field.options[0].value, 1093 }; 1094 } else { 1095 answers[field.selector] = { type: 'text', value: String(value) }; 1096 } 1097 logger.info(`Haiku guessed "${field.label}": ${value}`); 1098 } 1099 } 1100 if (!thinking) thinking = `Haiku filled ${Object.keys(answers).length} unknown field(s).`; 1101 } catch { 1102 logger.warn('Haiku field-guessing response could not be parsed'); 1103 } 1104 1105 return { answers, thinking }; 1106 } 1107 /* c8 ignore stop */ 1108 1109 /** 1110 * Detect which form builder built this form. 1111 * Returns a builder ID string, or 'generic' if unknown. 1112 */ 1113 /* c8 ignore start -- browser-context function, not testable in Node.js */ 1114 function detectFormBuilder(page) { 1115 /* eslint-disable no-undef */ 1116 return page.evaluate(() => { 1117 const html = document.documentElement.innerHTML; 1118 const has = str => html.includes(str); 1119 // Check GravityForms before CF7 — 'cf7' substring can false-match on GravityForms pages 1120 if (has('gform_wrapper') || document.querySelector('.gform_wrapper')) return 'gravityforms'; 1121 if (has('wpcf7') || document.querySelector('.wpcf7-form')) return 'cf7'; 1122 if (has('wpforms') || document.querySelector('.wpforms-form')) return 'wpforms'; 1123 if (has('nf-form') || document.querySelector('.nf-form-cont')) return 'ninjaforms'; 1124 if (has('forminator') || document.querySelector('.forminator-form')) return 'forminator'; 1125 if (has('elementor-form') || document.querySelector('.elementor-form')) return 'elementor'; 1126 if (has('frm_form') || document.querySelector('.frm_forms')) return 'formidable'; 1127 if (has('hsForm') || document.querySelector('.hs-form')) return 'hubspot'; 1128 return 'generic'; 1129 }); 1130 /* eslint-enable no-undef */ 1131 } 1132 /* c8 ignore stop */ 1133 1134 const BUILDER_TEMPLATES_PATH = join(projectRoot, 'data', 'form-builder-templates.json'); 1135 const FIELD_CORRECTIONS_PATH = join(projectRoot, 'data', 'field-label-corrections.json'); 1136 1137 /* c8 ignore start -- file I/O helpers called only during browser automation */ 1138 /** Load operator-taught field label → type corrections. */ 1139 function loadFieldCorrections() { 1140 try { 1141 return JSON.parse(readFileSync(FIELD_CORRECTIONS_PATH, 'utf8')); 1142 } catch { 1143 return {}; 1144 } 1145 } 1146 1147 /** Save a new correction: labelText (lowercase) → fieldType. */ 1148 function saveFieldCorrection(labelText, fieldType) { 1149 if (!labelText || !fieldType) return; 1150 const corrections = loadFieldCorrections(); 1151 const key = labelText.toLowerCase().trim(); 1152 if (corrections[key] === fieldType) return; // already known 1153 corrections[key] = fieldType; 1154 writeFileSync(FIELD_CORRECTIONS_PATH, JSON.stringify(corrections, null, 2)); 1155 logger.info(`Learned: "${labelText}" → ${fieldType}`); 1156 } 1157 1158 /** 1159 * Load shared form-builder templates from data/form-builder-templates.json. 1160 * Returns map of { builderType: { fields: [{selector, type}] } } 1161 */ 1162 function loadBuilderTemplates() { 1163 try { 1164 return JSON.parse(readFileSync(BUILDER_TEMPLATES_PATH, 'utf8')); 1165 } catch { 1166 return {}; 1167 } 1168 } 1169 1170 /** 1171 * Save a generalised builder template after a successful submission. 1172 * Strips site-specific numeric IDs; keeps structural CSS patterns. 1173 */ 1174 function saveBuilderTemplate(builderType, filledFields, fillPlanInfo) { 1175 if (builderType === 'generic' || filledFields.length === 0) return; 1176 1177 const templates = loadBuilderTemplates(); 1178 1179 // Build field list — normalise selectors to remove site-specific numeric parts 1180 const fields = filledFields 1181 .map((label, i) => { 1182 const selector = fillPlanInfo[i]?.selector; 1183 if (!selector) return null; 1184 // Strip purely numeric IDs/names (e.g. #17, [name="23"]) — not generalisable 1185 if (/^#\d+$/.test(selector) || /\[name="\d+"\]/.test(selector)) return null; 1186 const type = label 1187 .replace(/ \(LLM\)$/, '') 1188 .replace(/ \(Haiku\)$/, '') 1189 .replace(/ \(select\)$/, '') 1190 .replace(/ \(paired.*\)$/, ''); 1191 return { selector, type }; 1192 }) 1193 .filter(Boolean); 1194 1195 if (fields.length === 0) return; 1196 1197 // Merge with existing — keep the most recently seen selector per type 1198 const existing = templates[builderType]?.fields || []; 1199 const merged = [...existing]; 1200 for (const f of fields) { 1201 const idx = merged.findIndex(e => e.type === f.type); 1202 if (idx >= 0) merged[idx] = f; 1203 else merged.push(f); 1204 } 1205 1206 templates[builderType] = { fields: merged, updated_at: new Date().toISOString() }; 1207 writeFileSync(BUILDER_TEMPLATES_PATH, JSON.stringify(templates, null, 2)); 1208 logger.info(`Saved builder template for "${builderType}": ${fields.length} field(s)`); 1209 } 1210 /* c8 ignore stop */ 1211 1212 /** 1213 * Fill form from a shared builder template. 1214 * Returns { filledCount, llmThinking } or null if template has no matching selectors. 1215 */ 1216 /* c8 ignore start -- browser automation, not testable in Node.js */ 1217 async function fillFromBuilderTemplate(page, template, senderInfo, outreach) { 1218 const valueMap = { 1219 firstName: senderInfo.name.split(' ')[0], 1220 lastName: senderInfo.name.split(' ').slice(1).join(' ') || senderInfo.name.split(' ')[0], 1221 name: senderInfo.name, 1222 fullName: senderInfo.name, 1223 email: senderInfo.email, 1224 phone: senderInfo.phone, 1225 company: senderInfo.company, 1226 subject: outreach.subject_line || '', 1227 message: outreach.proposal_text, 1228 }; 1229 1230 let filledCount = 0; 1231 for (const field of template.fields) { 1232 const value = valueMap[field.type]; 1233 if (!value) continue; 1234 try { 1235 const el = page.locator(field.selector).first(); 1236 if ((await el.count()) === 0) continue; 1237 await el.fill(value); 1238 filledCount++; 1239 logger.info(`Builder template filled ${field.type}: ${field.selector.substring(0, 50)}`); 1240 } catch { 1241 // selector stale or incompatible — skip 1242 } 1243 } 1244 if (filledCount === 0) return null; 1245 logger.success(`Builder template fill: ${filledCount} fields`); 1246 return { 1247 filledCount, 1248 fromTemplate: true, 1249 llmThinking: `Filled from shared ${template.builderType || 'builder'} template.`, 1250 }; 1251 } 1252 /* c8 ignore stop */ 1253 1254 /** 1255 * Fill form using a saved fingerprint from a previous successful submission. 1256 * Returns { filledCount, llmThinking: '' } on success, null if fingerprint selectors don't match. 1257 */ 1258 /* c8 ignore start -- browser automation, not testable in Node.js */ 1259 async function fillFromFingerprint(page, fingerprint, senderInfo, outreach) { 1260 const valueMap = { 1261 firstName: senderInfo.name.split(' ')[0], 1262 lastName: senderInfo.name.split(' ').slice(1).join(' ') || senderInfo.name.split(' ')[0], 1263 fullName: senderInfo.name, 1264 email: senderInfo.email, 1265 phone: senderInfo.phone, 1266 company: senderInfo.company, 1267 subject: outreach.subject_line || '', 1268 message: outreach.proposal_text, 1269 }; 1270 1271 let filledCount = 0; 1272 for (const field of fingerprint.fields) { 1273 const value = valueMap[field.type]; 1274 if (!value) continue; 1275 try { 1276 const el = await page.locator(field.selector).first(); 1277 if ((await el.count()) === 0) return null; // Selector no longer matches — fingerprint stale 1278 await el.fill(value); 1279 filledCount++; 1280 logger.info(`Filled ${field.type} from fingerprint: ${field.selector.substring(0, 50)}`); 1281 } catch { 1282 return null; // Fingerprint stale 1283 } 1284 } 1285 if (filledCount === 0) return null; 1286 logger.success(`Fingerprint fill: ${filledCount} fields from previous submission`); 1287 return { filledCount, llmThinking: '' }; 1288 } 1289 /* c8 ignore stop */ 1290 1291 /** 1292 * Save a form fingerprint to the site record after successful submission. 1293 */ 1294 /* c8 ignore start -- DB function called only during browser automation */ 1295 async function saveFormFingerprint(siteId, formUrl, filledFields, fillPlanInfo) { 1296 const fingerprint = { 1297 url: formUrl, 1298 fields: filledFields.map((label, i) => ({ 1299 selector: fillPlanInfo[i]?.selector || '', 1300 type: label.replace(' (LLM)', ''), 1301 source: label.includes('(LLM)') ? 'llm' : 'scan', 1302 })), 1303 submitted_at: new Date().toISOString(), 1304 }; 1305 await run('UPDATE sites SET form_fill_data = $1 WHERE id = $2', [ 1306 JSON.stringify(fingerprint), 1307 siteId, 1308 ]); 1309 logger.info(`Saved form fingerprint for site ${siteId}: ${filledFields.length} fields`); 1310 return fingerprint; 1311 } 1312 /* c8 ignore stop */ 1313 1314 /** 1315 * Inline equivalent of markOutreachResult() for use while error-categories.js is still 1316 * being migrated to PostgreSQL. Mirrors the same retry/terminal logic. 1317 * @param {number} messageId 1318 * @param {string} errorMessage 1319 */ 1320 async function markOutreachResultAsync(messageId, errorMessage) { 1321 if (isOutreachRetriable(errorMessage)) { 1322 const retryAt = computeRetryAt(errorMessage); 1323 await run( 1324 `UPDATE messages 1325 SET delivery_status = 'retry_later', error_message = $1, retry_at = $2 1326 WHERE id = $3`, 1327 [errorMessage || 'Unknown error', retryAt, messageId] 1328 ); 1329 } else { 1330 await run( 1331 `UPDATE messages 1332 SET delivery_status = 'failed', error_message = $1 1333 WHERE id = $2`, 1334 [errorMessage || 'Unknown error', messageId] 1335 ); 1336 } 1337 } 1338 1339 // Hosts that are unlikely to contain a fillable contact form (ads, analytics, media, etc.) 1340 const IFRAME_BLOCKLIST_PATTERNS = [ 1341 'google', 1342 'facebook', 1343 'twitter', 1344 'youtube', 1345 'vimeo', 1346 'maps', 1347 'doubleclick', 1348 'googlesyndication', 1349 'googletagmanager', 1350 'analytics', 1351 'recaptcha', 1352 'hcaptcha', 1353 'captcha', 1354 'turnstile', 1355 'paypal', 1356 'stripe', 1357 'square', 1358 'disqus', 1359 'addthis', 1360 'sharethis', 1361 'widget', 1362 'chat', 1363 'livechat', 1364 'tawk', 1365 'intercom', 1366 'drift', 1367 'freshchat', 1368 ]; 1369 1370 /** 1371 * Detect and fill embedded third-party forms (e.g. GoHighLevel/LeadConnector) 1372 * that live inside cross-origin iframes and are invisible to normal DOM queries. 1373 * Picks the iframe whose src matches a known embedded-form host, then uses 1374 * Playwright's frameLocator() to access and fill its internal fields via Haiku. 1375 * Returns { filledCount, llmThinking, filledFields, fillPlanInfo } or null if no 1376 * embedded form found. 1377 */ 1378 /* c8 ignore start -- browser + LLM function, not testable in Node.js */ 1379 async function fillEmbeddedIframeForm(page, senderInfo, outreach, siteContext) { 1380 // Find iframes that are not on the blocklist — any of them could contain an embedded form 1381 const iframes = await page.evaluate(blocklist => { 1382 /* eslint-disable no-undef */ 1383 return Array.from(document.querySelectorAll('iframe[src]')) 1384 .filter(f => { 1385 if (!f.src || f.src === 'about:blank') return false; 1386 try { 1387 const host = new URL(f.src).hostname; // check hostname only, not path 1388 return !blocklist.some(b => host.includes(b)); 1389 } catch { 1390 return false; 1391 } 1392 }) 1393 .map((f, i) => ({ src: f.src, id: f.id, index: i })); 1394 /* eslint-enable no-undef */ 1395 }, IFRAME_BLOCKLIST_PATTERNS); 1396 1397 logger.info(`fillEmbeddedIframeForm: ${iframes.length} candidate iframe(s) found`); 1398 if (iframes.length === 0) return null; 1399 1400 // Probe each candidate iframe for form fields; pick the one with the most inputs 1401 // (preferring those with a textarea/message field — indicates a contact form) 1402 let target = null; 1403 let frame = null; 1404 let frameSelector = null; 1405 for (const candidate of iframes) { 1406 const sel = candidate.id 1407 ? `iframe#${candidate.id.replace(/[^\w-]/g, '')}` 1408 : `iframe[src="${candidate.src}"]`; 1409 const f = page.frameLocator(sel); 1410 try { 1411 await f.locator('input, textarea').first().waitFor({ timeout: 5000 }); 1412 const count = await f 1413 .locator('input:not([type=hidden]):not([type=submit]), textarea') 1414 .count(); 1415 if (count > 0) { 1416 logger.info( 1417 `Embedded form iframe found: ${candidate.src.substring(0, 80)} (${count} inputs)` 1418 ); 1419 target = candidate; 1420 frame = f; 1421 frameSelector = sel; 1422 break; // take first viable iframe (they're already in DOM order) 1423 } 1424 } catch { 1425 // iframe not accessible or no form fields — skip 1426 } 1427 } 1428 1429 if (!target || !frame) return null; 1430 1431 // Scan inputs inside the iframe 1432 const fields = await frame 1433 .locator( 1434 'input:not([type=hidden]):not([type=submit]):not([type=button]):not([type=checkbox]):not([type=radio]), textarea, select' 1435 ) 1436 .all(); 1437 1438 const fieldInfos = await Promise.all( 1439 fields.map(async (el, i) => { 1440 try { 1441 const [type, name, id, placeholder, ariaLabel] = await Promise.all([ 1442 el.getAttribute('type').catch(() => null), 1443 el.getAttribute('name').catch(() => null), 1444 el.getAttribute('id').catch(() => null), 1445 el.getAttribute('placeholder').catch(() => null), 1446 el.getAttribute('aria-label').catch(() => null), 1447 ]); 1448 const tag = await el.evaluate(e => e.tagName).catch(() => 'INPUT'); 1449 // Try to get label text 1450 let labelText = ariaLabel || placeholder || name || id || ''; 1451 if (id) { 1452 labelText = await frame 1453 .locator(`label[for="${id}"]`) 1454 .first() 1455 .innerText() 1456 .catch(() => labelText); 1457 } 1458 const selector = id 1459 ? `#${id}` 1460 : name 1461 ? `[name="${name}"]` 1462 : `${tag.toLowerCase()}:nth-child(${i + 1})`; 1463 return { selector, tag, type: type || tag.toLowerCase(), labelText, index: i }; 1464 } catch { 1465 return null; 1466 } 1467 }) 1468 ); 1469 1470 const validFields = fieldInfos.filter(Boolean); 1471 if (validFields.length === 0) return null; 1472 1473 // Use Haiku to classify all fields (they're all unclassified since we can't get the full HTML) 1474 const unclassified = validFields.map(f => ({ 1475 selector: f.selector, 1476 labelText: f.labelText, 1477 required: false, // can't reliably detect in iframe 1478 })); 1479 1480 const fieldList = unclassified 1481 .map( 1482 (f, i) => 1483 `${i + 1}. selector="${f.selector}" label="${f.labelText || '?'}" type="${validFields[i]?.type || '?'}"` 1484 ) 1485 .join('\n'); 1486 1487 const haikuDetected = {}; 1488 try { 1489 const response = await callLLM({ 1490 model: 'anthropic/claude-haiku-4-5', 1491 stage: 'outreach', 1492 messages: [ 1493 { role: 'system', content: FORM_CLASSIFY_FIELDS_PROMPT }, 1494 { 1495 role: 'user', 1496 content: `Website: ${siteContext.domain} (${siteContext.country_code || '?'}) 1497 Embedded form provider: ${new URL(target.src).hostname} 1498 1499 Classify each field. Choose from: firstName | lastName | fullName | email | phone | company | subject | message | city | postcode | state | address | ignore 1500 Reply with one line per field: <number>: <fieldType> 1501 Use "message" for any general message, inquiry, comments, or notes textarea/input. 1502 Use "ignore" for honeypots, CAPTCHAs, or irrelevant fields. Do NOT ignore dropdowns — classify them or mark as "ignore" only if truly irrelevant. 1503 1504 Fields to classify: 1505 ${fieldList}`, 1506 }, 1507 ], 1508 temperature: 0.1, 1509 max_tokens: 200, 1510 }); 1511 1512 const respLines = (response.content || '').trim().split('\n'); 1513 logger.info(`Embedded iframe Haiku response: ${respLines.join(' | ')}`); 1514 for (const line of respLines) { 1515 const m = line.match(/^(\d+)[.:]\s*(\w+)/); 1516 if (!m) continue; 1517 const idx = parseInt(m[1], 10) - 1; 1518 const type = m[2].trim(); 1519 if (type === 'ignore' || idx < 0 || idx >= unclassified.length) continue; 1520 const field = unclassified[idx]; 1521 // eslint-disable-next-line security/detect-object-injection 1522 if (!haikuDetected[type]) haikuDetected[type] = { selector: field.selector, required: false }; 1523 } 1524 } catch (err) { 1525 logger.warn(`Embedded iframe Haiku classification failed: ${err.message}`); 1526 } 1527 1528 const [firstName, ...restName] = senderInfo.name.split(' '); 1529 const lastName = restName.join(' ') || firstName; 1530 const valueMap = { 1531 firstName, 1532 lastName, 1533 fullName: senderInfo.name, 1534 email: senderInfo.email, 1535 phone: senderInfo.phone, 1536 company: senderInfo.company, 1537 subject: outreach.subject_line || '', 1538 message: outreach.proposal_text, 1539 city: siteContext.city || '', 1540 state: siteContext.state || '', 1541 }; 1542 1543 let filledCount = 0; 1544 const filledFields = []; 1545 const fillPlanInfo = []; 1546 1547 // Fill name fields context-aware 1548 if (haikuDetected.firstName && haikuDetected.lastName) { 1549 for (const [type, val] of [ 1550 ['firstName', firstName], 1551 ['lastName', lastName], 1552 ]) { 1553 // eslint-disable-next-line security/detect-object-injection 1554 const info = haikuDetected[type]; 1555 try { 1556 await frame.locator(info.selector).first().fill(val); 1557 filledCount++; 1558 filledFields.push(type); 1559 fillPlanInfo.push({ selector: info.selector }); 1560 } catch { 1561 /* stale */ 1562 } 1563 } 1564 } else if (haikuDetected.firstName) { 1565 try { 1566 await frame.locator(haikuDetected.firstName.selector).first().fill(senderInfo.name); 1567 filledCount++; 1568 filledFields.push('name'); 1569 fillPlanInfo.push({ selector: haikuDetected.firstName.selector }); 1570 } catch { 1571 /* stale */ 1572 } 1573 } else if (haikuDetected.fullName) { 1574 try { 1575 await frame.locator(haikuDetected.fullName.selector).first().fill(senderInfo.name); 1576 filledCount++; 1577 filledFields.push('name'); 1578 fillPlanInfo.push({ selector: haikuDetected.fullName.selector }); 1579 } catch { 1580 /* stale */ 1581 } 1582 } 1583 1584 // In iframes we can't reliably detect required, so always fill phone/email/message/subject 1585 for (const type of ['email', 'phone', 'company', 'subject', 'message', 'city', 'state']) { 1586 // eslint-disable-next-line security/detect-object-injection 1587 const info = haikuDetected[type]; 1588 // eslint-disable-next-line security/detect-object-injection 1589 const value = valueMap[type]; 1590 if (!info || !value) continue; 1591 // company: skip if not required (can't detect in iframe, so just always skip for safety) 1592 if (type === 'company') continue; 1593 try { 1594 await frame.locator(info.selector).first().fill(value); 1595 filledCount++; 1596 filledFields.push(type); 1597 fillPlanInfo.push({ selector: info.selector }); 1598 } catch { 1599 /* stale */ 1600 } 1601 } 1602 1603 // Fill selects that Haiku ignored — scan for <select> elements in the iframe and 1604 // use haikuGuessUnknownFields to pick an appropriate value 1605 const selectEls = await frame.locator('select').all(); 1606 const selectsToGuess = []; 1607 for (const el of selectEls) { 1608 try { 1609 const id = await el.getAttribute('id').catch(() => null); 1610 const name = await el.getAttribute('name').catch(() => null); 1611 const sel = id ? `#${id}` : name ? `select[name="${name}"]` : null; 1612 if (!sel) continue; 1613 let label = ''; 1614 if (id) 1615 label = await frame 1616 .locator(`label[for="${id}"]`) 1617 .first() 1618 .innerText() 1619 .catch(() => ''); 1620 if (!label) label = (await el.getAttribute('aria-label').catch(() => '')) || name || id || ''; 1621 const optionEls = await el.locator('option').all(); 1622 const options = ( 1623 await Promise.all( 1624 optionEls.map(async o => ({ 1625 value: (await o.getAttribute('value').catch(() => '')) || '', 1626 text: (await o.innerText().catch(() => '')).trim(), 1627 })) 1628 ) 1629 ).filter(o => o.value && o.value !== '' && o.value !== '0'); 1630 if (options.length > 0) selectsToGuess.push({ selector: sel, label, options }); 1631 } catch { 1632 /* skip */ 1633 } 1634 } 1635 if (selectsToGuess.length > 0) { 1636 try { 1637 const { answers } = await haikuGuessUnknownFields([], selectsToGuess, siteContext); 1638 for (const [sel, val] of Object.entries(answers)) { 1639 if (val?.type !== 'select') continue; 1640 try { 1641 await frame.locator(sel).selectOption(val.value); 1642 filledCount++; 1643 filledFields.push( 1644 `${selectsToGuess.find(s => s.selector === sel)?.label || 'select'} (select)` 1645 ); 1646 fillPlanInfo.push({ selector: sel }); 1647 } catch { 1648 /* stale */ 1649 } 1650 } 1651 } catch { 1652 /* skip */ 1653 } 1654 } 1655 1656 if (filledCount === 0) return null; 1657 logger.success(`Embedded iframe form filled: ${filledCount} fields (${filledFields.join(', ')})`); 1658 return { 1659 filledCount, 1660 fromIframe: true, 1661 llmThinking: `Filled embedded ${new URL(target.src).hostname} iframe form.`, 1662 filledFields, 1663 fillPlanInfo, 1664 frameSelector, // expose so submitContactForm can target the right submit button 1665 }; 1666 } 1667 /* c8 ignore stop */ 1668 1669 /** 1670 * Attempt to auto-fill form fields using DOM scoring + CSS fallbacks + LLM fallback. 1671 * Returns { filledCount, llmThinking, filledFields, fillPlanInfo } 1672 */ 1673 /* c8 ignore start -- browser automation, not testable in Node.js */ 1674 async function autoFillForm(page, senderInfo, outreach, additionalFields = {}, siteData = {}) { 1675 logger.info('Attempting to auto-fill form fields...'); 1676 1677 try { 1678 const siteCtx = { 1679 domain: siteData.domain || outreach.domain, 1680 country_code: siteData.country_code, 1681 city: siteData.city, 1682 state: siteData.state, 1683 }; 1684 1685 // Phase 0: Check for embedded iframe forms (GoHighLevel, JotForm, etc.) 1686 // These are invisible to regular DOM queries — fill them first if found 1687 const iframeResult = await fillEmbeddedIframeForm(page, senderInfo, outreach, siteCtx).catch( 1688 () => null 1689 ); 1690 if (iframeResult) return iframeResult; 1691 1692 // Select best <form> element when multiple are present 1693 const formSelector = await selectBestForm(page, siteCtx).catch(() => null); 1694 1695 // Phase 1: DOM scan — autocomplete/type attrs + operator corrections (high-confidence only) 1696 const corrections = loadFieldCorrections(); 1697 const detected = await scanFormFields(page, corrections, formSelector); 1698 1699 // Phase 2: Haiku classifies all unrecognised fields using full form HTML for context 1700 let llmThinking = ''; 1701 const unclassified = detected.__unclassified__ || []; 1702 if (unclassified.length > 0) { 1703 logger.info(`Phase 2: Haiku classifying ${unclassified.length} unrecognised field(s)`); 1704 try { 1705 const { detected: haikuDetected, thinking } = await haikuClassifyFields( 1706 page, 1707 unclassified, 1708 siteCtx, 1709 formSelector 1710 ); 1711 llmThinking = thinking; 1712 // Merge Haiku classifications into detected (don't overwrite high-confidence fields) 1713 for (const [type, info] of Object.entries(haikuDetected)) { 1714 // eslint-disable-next-line security/detect-object-injection 1715 if (!detected[type]) detected[type] = info; 1716 } 1717 } catch (err) { 1718 logger.warn(`Haiku field classification failed: ${err.message}`); 1719 } 1720 } 1721 1722 const [firstName, ...rest] = senderInfo.name.split(' '); 1723 const lastName = rest.join(' '); 1724 1725 // Build value map for all field types 1726 const valueMap = { 1727 firstName, 1728 lastName: lastName || firstName, 1729 fullName: senderInfo.name, 1730 email: senderInfo.email, 1731 phone: senderInfo.phone, 1732 company: senderInfo.company, 1733 subject: outreach.subject_line || '', 1734 message: outreach.proposal_text, 1735 city: siteData.city || '', 1736 state: siteData.state || '', 1737 }; 1738 1739 const filledSelectors = new Set(); 1740 let filledCount = 0; 1741 const filledFields = []; 1742 const fillPlanInfo = []; 1743 1744 const fillField = async (selector, value, label) => { 1745 if (filledSelectors.has(selector)) return false; 1746 try { 1747 const el = await page.locator(selector).first(); 1748 if ((await el.count()) === 0) return false; 1749 await el.fill(value); 1750 filledSelectors.add(selector); 1751 filledCount++; 1752 filledFields.push(label); 1753 fillPlanInfo.push({ selector }); 1754 logger.info(`Filled ${label}: ${selector.substring(0, 60)}`); 1755 return true; 1756 } catch { 1757 return false; 1758 } 1759 }; 1760 1761 // Phone-aware fill: input masks (IMask, jQuery Mask, Cleave.js) listen to keyboard events, 1762 // not programmatic value sets — so el.fill() leaves masked fields empty. 1763 // Strategy: click to focus, type digits only (no +country-code), check result, done. 1764 const fillPhoneField = async (selector, phone, label) => { 1765 if (filledSelectors.has(selector)) return false; 1766 try { 1767 const el = await page.locator(selector).first(); 1768 if ((await el.count()) === 0) return false; 1769 1770 // Always use national digits only — masks virtually never accept +1 prefix 1771 const national = phone 1772 .replace(/^\+1/, '') 1773 .replace(/^\+\d{1,3}/, '') 1774 .replace(/\D/g, ''); 1775 const digitsToType = national.length >= 7 ? national : phone.replace(/\D/g, ''); 1776 1777 // Clear and type via keyboard so mask event listeners fire 1778 await el.click(); 1779 await page.keyboard.press('Control+A'); 1780 await page.keyboard.press('Delete'); 1781 await page.keyboard.type(digitsToType, { delay: 30 }); 1782 1783 // Verify something stuck; if empty, fall back to el.fill() as last resort 1784 const actual = await el.inputValue().catch(() => null); 1785 if (actual !== null && actual.replace(/\D/g, '').length < 7) { 1786 logger.warn( 1787 `Phone mask keyboard-type produced short value "${actual}" — trying el.fill fallback` 1788 ); 1789 await el.fill(digitsToType); 1790 } 1791 1792 filledSelectors.add(selector); 1793 filledCount++; 1794 filledFields.push(label); 1795 fillPlanInfo.push({ selector }); 1796 logger.info(`Filled ${label} (keyboard): ${selector.substring(0, 60)}`); 1797 return true; 1798 } catch { 1799 return false; 1800 } 1801 }; 1802 1803 const fillSelect = async (selector, value, label) => { 1804 if (filledSelectors.has(selector)) return false; 1805 try { 1806 await page.locator(selector).selectOption(value); 1807 filledSelectors.add(selector); 1808 filledCount++; 1809 filledFields.push(`${label} (select)`); 1810 fillPlanInfo.push({ selector }); 1811 logger.info(`Selected ${label}: "${value}" in ${selector.substring(0, 60)}`); 1812 return true; 1813 } catch { 1814 return false; 1815 } 1816 }; 1817 1818 // Fill name fields — context-aware: only split if both firstName+lastName detected 1819 if (detected.firstName && detected.lastName) { 1820 await fillField(detected.firstName.selector, firstName, 'firstName'); 1821 await fillField(detected.lastName.selector, lastName || firstName, 'lastName'); 1822 } else if (detected.firstName) { 1823 // Only one name field — use full name regardless of how it was labelled 1824 await fillField(detected.firstName.selector, senderInfo.name, 'name'); 1825 } else if (detected.fullName) { 1826 await fillField(detected.fullName.selector, senderInfo.name, 'name'); 1827 } 1828 1829 // Fill remaining detected fields from value map 1830 const fieldOrder = ['email', 'phone', 'company', 'subject', 'message', 'city', 'state']; 1831 for (const type of fieldOrder) { 1832 // eslint-disable-next-line security/detect-object-injection 1833 const info = detected[type]; 1834 // eslint-disable-next-line security/detect-object-injection 1835 const value = valueMap[type]; 1836 if (!info || !value) continue; 1837 // Company: only fill if required (avoids awkward "Company: AuditAndFix" on personal sites) 1838 if (type === 'company' && !info.required) continue; 1839 if (type === 'phone') { 1840 await fillPhoneField(info.selector, value, type); 1841 } else { 1842 await fillField(info.selector, value, type); 1843 } 1844 } 1845 1846 // CSS safety net: email, phone, message are critical — catch them even if Haiku missed them 1847 // or if lazy-loading (e.g. nitro-lazy) hid them during the initial scan. 1848 if (!filledSelectors.has(detected.email?.selector)) { 1849 for (const sel of ['input[type="email"]', 'input[name*="email" i]', 'input[id*="email" i]']) { 1850 if (await fillField(sel, senderInfo.email, 'email')) break; 1851 } 1852 } 1853 if (!filledSelectors.has(detected.phone?.selector) && senderInfo.phone) { 1854 for (const sel of [ 1855 'input[type="tel"]', 1856 'input[name*="phone" i]', 1857 'input[id*="phone" i]', 1858 'input[name*="mobile" i]', 1859 'input[autocomplete="tel"]', 1860 ]) { 1861 if (await fillPhoneField(sel, senderInfo.phone, 'phone')) break; 1862 } 1863 } 1864 if (!detected.message || !filledSelectors.has(detected.message?.selector)) { 1865 for (const sel of ['textarea[name*="message" i]', 'textarea[id*="message" i]', 'textarea']) { 1866 if (await fillField(sel, outreach.proposal_text, 'message')) break; 1867 } 1868 } 1869 1870 // Phase 3: Haiku fills geo fields without values + selects 1871 const unknownFields = []; 1872 const selectFields = detected.__selects__ || []; 1873 1874 // Geo fields that Haiku classified but have no known value → ask Haiku to guess a value 1875 const geoNeedsValue = [ 1876 detected.postcode && 1877 !filledSelectors.has(detected.postcode.selector) && { 1878 selector: detected.postcode.selector, 1879 label: 'postcode', 1880 }, 1881 detected.address && 1882 !filledSelectors.has(detected.address.selector) && { 1883 selector: detected.address.selector, 1884 label: 'street address', 1885 }, 1886 detected.city && 1887 !filledSelectors.has(detected.city.selector) && { 1888 selector: detected.city.selector, 1889 label: 'city/suburb', 1890 }, 1891 detected.state && 1892 !filledSelectors.has(detected.state.selector) && { 1893 selector: detected.state.selector, 1894 label: 'state/region', 1895 }, 1896 ].filter(Boolean); 1897 1898 const allUnknown = [...unknownFields, ...geoNeedsValue]; 1899 if (allUnknown.length > 0 || selectFields.length > 0) { 1900 logger.info( 1901 `Phase 4: Haiku guessing ${allUnknown.length} text field(s) + ${selectFields.length} select(s)` 1902 ); 1903 try { 1904 const { answers, thinking } = await haikuGuessUnknownFields(allUnknown, selectFields, { 1905 domain: siteData.domain || outreach.domain, 1906 country_code: siteData.country_code, 1907 city: siteData.city, 1908 state: siteData.state, 1909 }); 1910 if (thinking) llmThinking = llmThinking ? `${llmThinking}\n${thinking}` : thinking; 1911 for (const [selector, val] of Object.entries(answers)) { 1912 // Special key: Haiku paired a suburb with a postcode it filled 1913 if (selector === '__suburb_for_postcode__') { 1914 if (detected.city && !filledSelectors.has(detected.city.selector)) { 1915 await fillField( 1916 detected.city.selector, 1917 String(val), 1918 'suburb (paired with postcode, Haiku)' 1919 ); 1920 } 1921 continue; 1922 } 1923 const { type, value } = val; 1924 const labelEntry = [...allUnknown, ...selectFields].find(f => f.selector === selector); 1925 const label = labelEntry?.label || 'unknown'; 1926 if (type === 'select') { 1927 await fillSelect(selector, value, label); 1928 } else { 1929 await fillField(selector, value, `${label} (Haiku)`); 1930 } 1931 } 1932 } catch (haikuErr) { 1933 logger.warn(`Haiku field-guessing failed: ${haikuErr.message}`); 1934 } 1935 } 1936 1937 // Additional custom fields (e.g. E2E test password) 1938 for (const [fieldName, fieldValue] of Object.entries(additionalFields)) { 1939 if (!fieldValue) continue; 1940 const customSelectors = [ 1941 `input[name="${fieldName}"]`, 1942 `input[name*="${fieldName}" i]`, 1943 `input[id="${fieldName}"]`, 1944 `input[id*="${fieldName}" i]`, 1945 ]; 1946 for (const sel of customSelectors) { 1947 if (await fillField(sel, fieldValue, fieldName)) break; 1948 } 1949 } 1950 1951 if (filledCount > 0) { 1952 logger.success(`Auto-filled ${filledCount} fields: ${filledFields.join(', ')}`); 1953 } else { 1954 logger.warn('Could not auto-fill any fields — form structure may not match known patterns'); 1955 } 1956 return { filledCount, llmThinking, filledFields, fillPlanInfo }; 1957 } catch (error) { 1958 logger.warn(`Auto-fill failed (${error.message}), will show operator panel`); 1959 return { filledCount: 0, llmThinking: '', filledFields: [], fillPlanInfo: [] }; 1960 } 1961 } 1962 /* c8 ignore stop */ 1963 1964 /** 1965 * Detect CAPTCHA widgets on the page and extract their sitekeys. 1966 * Returns array of { type, sitekey } objects. 1967 */ 1968 /* c8 ignore start -- browser-context function, not testable in Node.js */ 1969 function detectCaptchas(page) { 1970 /* eslint-disable no-undef */ 1971 return page.evaluate(() => { 1972 const found = []; 1973 const html = document.documentElement.innerHTML; 1974 const iframes = Array.from(document.querySelectorAll('iframe')).map(f => f.src || ''); 1975 1976 const rcEl = document.querySelector('.g-recaptcha') || document.querySelector('[data-sitekey]'); 1977 if ( 1978 iframes.some(s => s.includes('recaptcha') || s.includes('google.com/recaptcha')) || 1979 html.includes('g-recaptcha') || 1980 html.includes('grecaptcha') 1981 ) 1982 found.push({ type: 'reCAPTCHA', sitekey: rcEl?.dataset?.sitekey || null }); 1983 1984 const hcEl = 1985 document.querySelector('.h-captcha') || document.querySelector('[data-hcaptcha-sitekey]'); 1986 if ( 1987 iframes.some(s => s.includes('hcaptcha')) || 1988 html.includes('h-captcha') || 1989 html.includes('hcaptcha.com') 1990 ) 1991 found.push({ 1992 type: 'hCaptcha', 1993 sitekey: hcEl?.dataset?.sitekey || hcEl?.dataset?.hcaptchaSitekey || null, 1994 }); 1995 1996 const tsEl = 1997 document.querySelector('.cf-turnstile') || 1998 document.querySelector('[data-cf-turnstile-sitekey]'); 1999 if (iframes.some(s => s.includes('challenges.cloudflare.com')) || html.includes('cf-turnstile')) 2000 found.push({ 2001 type: 'Cloudflare Turnstile', 2002 sitekey: tsEl?.dataset?.sitekey || tsEl?.dataset?.cfTurnstileSitekey || null, 2003 }); 2004 2005 // Image math CAPTCHA — GravityForms style (.gfield--type-captcha) or generic img+input pair 2006 const mathCaptcha = (() => { 2007 // GravityForms: .gfield_captcha_container holds multiple digit images + a text input 2008 const gfContainer = document.querySelector( 2009 '.gfield_captcha_container, .gfield--type-captcha' 2010 ); 2011 if (gfContainer) { 2012 const input = gfContainer.querySelector('input[type="text"]'); 2013 const containerSel = gfContainer.id 2014 ? `#${CSS.escape(gfContainer.id)}` 2015 : '.gfield_captcha_container'; 2016 const inputSel = input?.id 2017 ? `#${CSS.escape(input.id)}` 2018 : input?.name 2019 ? `input[name="${input.name}"]` 2020 : null; 2021 if (inputSel) return { inputSel, containerSel }; 2022 } 2023 // Generic fallback: <img class/id contains "captcha"> near a text input 2024 for (const img of Array.from(document.querySelectorAll('img'))) { 2025 const cls = (img.className || '').toLowerCase(); 2026 const id = (img.id || '').toLowerCase(); 2027 if (!cls.includes('captcha') && !id.includes('captcha')) continue; 2028 const container = img.parentElement; 2029 let input = container && container.querySelector('input[type="text"], input:not([type])'); 2030 if (!input && container?.parentElement) { 2031 input = container.parentElement.querySelector('input[type="text"], input:not([type])'); 2032 } 2033 if (input) { 2034 const inputSel = input.id 2035 ? `#${CSS.escape(input.id)}` 2036 : input.name 2037 ? `input[name="${input.name}"]` 2038 : null; 2039 const containerSel = container?.id 2040 ? `#${CSS.escape(container.id)}` 2041 : img.id 2042 ? `#${CSS.escape(img.id)}` 2043 : null; 2044 if (inputSel && containerSel) return { inputSel, containerSel }; 2045 } 2046 } 2047 return null; 2048 })(); 2049 if (mathCaptcha) { 2050 found.push({ type: 'mathImage', ...mathCaptcha }); 2051 } 2052 2053 return found; 2054 }); 2055 /* eslint-enable no-undef */ 2056 } 2057 /* c8 ignore stop */ 2058 2059 const CAPTCHA_BENCHMARK_PATH = join(projectRoot, 'data', 'captcha-provider-benchmark.json'); 2060 2061 /* c8 ignore start -- file I/O + external CAPTCHA API + browser functions, not testable in Node.js */ 2062 /** Load provider benchmark results (solve times in ms, or Infinity if failed). */ 2063 function loadCaptchaBenchmark() { 2064 try { 2065 return JSON.parse(readFileSync(CAPTCHA_BENCHMARK_PATH, 'utf8')); 2066 } catch { 2067 return {}; 2068 } 2069 } 2070 2071 /** Save updated benchmark results. */ 2072 function saveCaptchaBenchmark(data) { 2073 writeFileSync(CAPTCHA_BENCHMARK_PATH, JSON.stringify(data, null, 2)); 2074 } 2075 /** Submit a solve job to NopeCHA. Returns token or null. */ 2076 async function nopechaRequest(apiKey, captcha, pageUrl) { 2077 const typeMap = { 2078 reCAPTCHA: 'recaptcha2', 2079 hCaptcha: 'hcaptcha', 2080 'Cloudflare Turnstile': 'turnstile', 2081 }; 2082 const type = typeMap[captcha.type]; 2083 if (!type) return null; 2084 2085 const submitRes = await fetch('https://api.nopecha.com/token', { 2086 method: 'POST', 2087 headers: { 'Content-Type': 'application/json' }, 2088 body: JSON.stringify({ key: apiKey, type, sitekey: captcha.sitekey, url: pageUrl }), 2089 }); 2090 const submitData = await submitRes.json(); 2091 if (submitData.error) { 2092 logger.warn(`NopeCHA submit error ${submitData.error}`); 2093 return null; 2094 } 2095 2096 const jobId = submitData.data; 2097 for (let i = 0; i < 60; i++) { 2098 await new Promise(r => setTimeout(r, 3000)); 2099 const res = await fetch(`https://api.nopecha.com/token?key=${apiKey}&id=${jobId}`); 2100 const data = await res.json(); 2101 if (data.error === 14) continue; 2102 if (data.error) { 2103 logger.warn(`NopeCHA poll error ${data.error}`); 2104 return null; 2105 } 2106 if (data.data && typeof data.data === 'string' && data.data.length > 20) return data.data; 2107 } 2108 return null; 2109 } 2110 2111 /** Primary NopeCHA account. */ 2112 async function solveWithNopeCHA(captcha, pageUrl) { 2113 const apiKey = process.env.NOPECHA_API_KEY; 2114 if (!apiKey) return null; 2115 return nopechaRequest(apiKey, captcha, pageUrl); 2116 } 2117 2118 /** Secondary NopeCHA account (extra 2000/day until Apr 6). Round-robins with primary. */ 2119 async function solveWithNopeCHA2(captcha, pageUrl) { 2120 const apiKey = process.env.NOPECHA_API_KEY_2; 2121 if (!apiKey) return null; 2122 return nopechaRequest(apiKey, captcha, pageUrl); 2123 } 2124 2125 /** 2126 * Submit a solve job to CapMonster (same API format as Anti-Captcha, different base URL). 2127 * Returns token or null. 2128 */ 2129 async function solveWithCapMonster(captcha, pageUrl) { 2130 const apiKey = process.env.CAPMONSTER_API_KEY; 2131 if (!apiKey) return null; 2132 2133 const typeMap = { 2134 reCAPTCHA: 'RecaptchaV2TaskProxyless', 2135 hCaptcha: 'HCaptchaTaskProxyless', 2136 'Cloudflare Turnstile': 'TurnstileTaskProxyless', 2137 }; 2138 const taskType = typeMap[captcha.type]; 2139 if (!taskType) return null; 2140 2141 const createRes = await fetch('https://api.capmonster.cloud/createTask', { 2142 method: 'POST', 2143 headers: { 'Content-Type': 'application/json' }, 2144 body: JSON.stringify({ 2145 clientKey: apiKey, 2146 task: { type: taskType, websiteURL: pageUrl, websiteKey: captcha.sitekey }, 2147 }), 2148 }); 2149 const createData = await createRes.json(); 2150 if (createData.errorId !== 0) { 2151 logger.warn(`CapMonster submit error ${createData.errorId}: ${createData.errorDescription}`); 2152 return null; 2153 } 2154 2155 const { taskId } = createData; 2156 for (let i = 0; i < 60; i++) { 2157 await new Promise(r => setTimeout(r, 3000)); 2158 const res = await fetch('https://api.capmonster.cloud/getTaskResult', { 2159 method: 'POST', 2160 headers: { 'Content-Type': 'application/json' }, 2161 body: JSON.stringify({ clientKey: apiKey, taskId }), 2162 }); 2163 const data = await res.json(); 2164 if (data.errorId !== 0) { 2165 logger.warn(`CapMonster poll error ${data.errorId}`); 2166 return null; 2167 } 2168 if (data.status === 'ready') return data.solution?.gRecaptchaResponse || null; 2169 } 2170 return null; 2171 } 2172 2173 /** 2174 * Solve a CAPTCHA using the fastest available provider (based on benchmark), 2175 * falling back to the other if the winner fails. Injects token into page. 2176 * Returns true if solved and injected, false otherwise. 2177 */ 2178 /** 2179 * Solve an image-based math CAPTCHA using Haiku vision. 2180 * Screenshots the CAPTCHA image, asks Haiku to read the expression, evaluates it, 2181 * and fills the answer into the input field. 2182 */ 2183 async function solveMathImageCaptcha(page, captcha) { 2184 try { 2185 // Screenshot the container (which may hold multiple digit images side by side) 2186 const containerEl = page.locator(captcha.containerSel).first(); 2187 if ((await containerEl.count()) === 0) { 2188 logger.warn('Math CAPTCHA container not found'); 2189 return false; 2190 } 2191 const imgBuffer = await containerEl.screenshot(); 2192 const base64 = imgBuffer.toString('base64'); 2193 2194 const response = await callLLM({ 2195 model: 'anthropic/claude-haiku-4-5', 2196 stage: 'outreach', 2197 messages: [ 2198 { 2199 role: 'user', 2200 content: [ 2201 { 2202 type: 'image', 2203 source: { type: 'base64', media_type: 'image/png', data: base64 }, 2204 }, 2205 { 2206 type: 'text', 2207 text: 'This is a CAPTCHA image showing a math expression. Read the expression and calculate the answer. Reply with ONLY the numeric answer, nothing else.', 2208 }, 2209 ], 2210 }, 2211 ], 2212 max_tokens: 10, 2213 temperature: 0, 2214 }); 2215 2216 const answer = (response.content || '').trim().replace(/[^0-9-]/g, ''); 2217 if (!answer) { 2218 logger.warn('Math CAPTCHA: Haiku could not read expression'); 2219 return false; 2220 } 2221 2222 await page.locator(captcha.inputSel).first().fill(answer); 2223 logger.success(`Math CAPTCHA solved: filled "${answer}" into ${captcha.inputSel}`); 2224 return true; 2225 } catch (err) { 2226 logger.warn(`Math CAPTCHA solve failed: ${err.message}`); 2227 return false; 2228 } 2229 } 2230 2231 async function solveCaptcha(page, captcha, pageUrl) { 2232 // Image math CAPTCHA — no sitekey needed, handled entirely via vision LLM 2233 if (captcha.type === 'mathImage') { 2234 return solveMathImageCaptcha(page, captcha); 2235 } 2236 2237 if (!captcha.sitekey) { 2238 logger.warn(`CAPTCHA: no sitekey found for ${captcha.type}`); 2239 return false; 2240 } 2241 2242 const benchmark = loadCaptchaBenchmark(); 2243 const ms = name => 2244 benchmark[name]?.avgMs ?? 2245 (process.env[ 2246 { 2247 nopecha: 'NOPECHA_API_KEY', 2248 nopecha2: 'NOPECHA_API_KEY_2', 2249 capmonster: 'CAPMONSTER_API_KEY', 2250 }[name] 2251 ] 2252 ? 90000 2253 : Infinity); 2254 2255 // Order providers fastest-first; nopecha2 round-robins with nopecha by alternating on odd solve count 2256 const solveCount = Object.values(benchmark).reduce((s, v) => s + (v.count || 0), 0); 2257 const nopechaFn = solveCount % 2 === 0 ? solveWithNopeCHA : solveWithNopeCHA2; 2258 const nopechaName = solveCount % 2 === 0 ? 'nopecha' : 'nopecha2'; 2259 2260 const providers = [ 2261 { 2262 name: nopechaName, 2263 fn: nopechaFn, 2264 ms: Math.min(ms('nopecha'), ms('nopecha2')), 2265 timeoutMs: 30000, 2266 }, 2267 { name: 'capmonster', fn: solveWithCapMonster, ms: ms('capmonster'), timeoutMs: 120000 }, 2268 ] 2269 .filter(p => p.ms < Infinity) 2270 .sort((a, b) => a.ms - b.ms); 2271 2272 if (providers.length === 0) return false; 2273 2274 logger.info(`CAPTCHA: solving ${captcha.type} via ${providers.map(p => p.name).join(' → ')}`); 2275 2276 for (const provider of providers) { 2277 const start = Date.now(); 2278 try { 2279 const timeout = new Promise(r => setTimeout(() => r(null), provider.timeoutMs)); 2280 const token = await Promise.race([provider.fn(captcha, pageUrl), timeout]); 2281 if (token === null && Date.now() - start >= provider.timeoutMs - 500) { 2282 logger.warn( 2283 `${provider.name}: timed out after ${provider.timeoutMs / 1000}s, trying next provider` 2284 ); 2285 continue; 2286 } 2287 if (!token) { 2288 logger.warn(`${provider.name}: failed, trying next provider`); 2289 continue; 2290 } 2291 // Update benchmark with this solve time 2292 const elapsed = Date.now() - start; 2293 const prev = benchmark[provider.name] || { avgMs: elapsed, count: 0 }; 2294 const count = prev.count + 1; 2295 benchmark[provider.name] = { 2296 avgMs: Math.round((prev.avgMs * prev.count + elapsed) / count), 2297 count, 2298 updated_at: new Date().toISOString(), 2299 }; 2300 saveCaptchaBenchmark(benchmark); 2301 logger.success(`${provider.name}: solved in ${Math.round(elapsed / 1000)}s`); 2302 2303 // Inject token into page — no checkbox click needed, token goes directly into 2304 // the hidden textarea and the form submits with it for server-side verification 2305 await page.evaluate( 2306 ({ type, tok }) => { 2307 /* eslint-disable no-undef */ 2308 if (type === 'reCAPTCHA') { 2309 // 1. Fill the hidden textarea reCAPTCHA reads on submit 2310 const textarea = document.getElementById('g-recaptcha-response'); 2311 if (textarea) { 2312 textarea.style.display = 'block'; 2313 textarea.value = tok; 2314 textarea.dispatchEvent(new Event('change', { bubbles: true })); 2315 textarea.style.display = 'none'; 2316 } 2317 // 2. Walk ___grecaptcha_cfg.clients — try every known key path for the callback 2318 // reCAPTCHA obfuscates property names, so we walk all string-keyed leaves 2319 const cfg = window.___grecaptcha_cfg; 2320 if (cfg && cfg.clients) { 2321 for (const client of Object.values(cfg.clients)) { 2322 // Recursively find 'callback' function values up to depth 4 2323 const findCb = (obj, depth) => { 2324 if (!obj || typeof obj !== 'object' || depth > 4) return; 2325 for (const val of Object.values(obj)) { 2326 if (typeof val === 'function' && val.length === 1) { 2327 try { 2328 val(tok); 2329 } catch { 2330 /* ignore */ 2331 } 2332 } else { 2333 findCb(val, depth + 1); 2334 } 2335 } 2336 }; 2337 // Only search the sub-object that contains 'callback' key explicitly 2338 for (const sub of Object.values(client || {})) { 2339 if (sub && typeof sub === 'object' && typeof sub.callback === 'function') { 2340 try { 2341 sub.callback(tok); 2342 } catch { 2343 /* ignore */ 2344 } 2345 } 2346 if (sub && typeof sub === 'object') { 2347 for (const sub2 of Object.values(sub)) { 2348 if (sub2 && typeof sub2 === 'object' && typeof sub2.callback === 'function') { 2349 try { 2350 sub2.callback(tok); 2351 } catch { 2352 /* ignore */ 2353 } 2354 } 2355 } 2356 } 2357 } 2358 } 2359 } 2360 // 3. data-callback attr on .g-recaptcha div (Contact Form 7, WPForms, etc.) 2361 const div = document.querySelector('.g-recaptcha[data-callback]'); 2362 if (div) { 2363 const fnName = div.getAttribute('data-callback'); 2364 if (fnName && typeof window[fnName] === 'function') { 2365 try { 2366 window[fnName](tok); 2367 } catch { 2368 /* ignore */ 2369 } 2370 } 2371 } 2372 } else if (type === 'hCaptcha') { 2373 const el = 2374 document.querySelector('[name="h-captcha-response"]') || 2375 document.getElementById('h-captcha-response'); 2376 if (el) { 2377 el.value = tok; 2378 el.dispatchEvent(new Event('change', { bubbles: true })); 2379 } 2380 } else if (type === 'Cloudflare Turnstile') { 2381 const el = 2382 document.querySelector('[name="cf-turnstile-response"]') || 2383 document.getElementById('cf-turnstile-response'); 2384 if (el) { 2385 el.value = tok; 2386 el.dispatchEvent(new Event('change', { bubbles: true })); 2387 } 2388 } 2389 /* eslint-enable no-undef */ 2390 }, 2391 { type: captcha.type, tok: token } 2392 ); 2393 return true; 2394 } catch (err) { 2395 logger.warn(`${provider.name}: error — ${err.message}`); 2396 } 2397 } 2398 return false; 2399 } 2400 /* c8 ignore stop */ 2401 2402 /** 2403 * Inject submit-click watcher + success signal detector via addInitScript. 2404 * Also injects a subtle persistent Done button for manual override. 2405 * Survives page navigations (e.g. redirect to /thank-you after submit). 2406 */ 2407 /* c8 ignore start -- browser-context function, not testable in Node.js */ 2408 async function injectSubmitWatcher(page) { 2409 await page.addInitScript(() => { 2410 /* eslint-disable no-undef */ 2411 const SUCCESS_WORDS = [ 2412 // Gratitude 2413 'thank you', 2414 'thanks for', 2415 'thank you for', 2416 // Explicit success 2417 'successfully sent', 2418 'successfully submitted', 2419 'successfully received', 2420 'submission successful', 2421 'form submitted', 2422 'message sent', 2423 'message has been sent', 2424 'message received', 2425 'message was sent', 2426 'request sent', 2427 'inquiry sent', 2428 'enquiry sent', 2429 // Confirmation / receipt 2430 'we have received', 2431 "we've received", 2432 'we received your', 2433 'your message has been', 2434 'your enquiry has been', 2435 'your inquiry has been', 2436 'your request has been', 2437 'your submission has been', 2438 // Follow-up promises 2439 'we will be in touch', 2440 "we'll be in touch", 2441 'we will get back', 2442 "we'll get back", 2443 'will get back to you', 2444 'get back to you', 2445 'get back to you soon', 2446 'someone will contact', 2447 'we will contact', 2448 "we'll contact", 2449 'will reach out', 2450 'will be in contact', 2451 'be in contact', 2452 // Redirect-URL signals (these appear in the page URL, not body) 2453 'thank-you', 2454 'thankyou', 2455 'success', 2456 'confirmation', 2457 'confirmed', 2458 ]; 2459 2460 // --- Submit click interceptor (capture phase) --- 2461 document.addEventListener( 2462 'click', 2463 e => { 2464 const btn = e.target.closest( 2465 'button[type="submit"], input[type="submit"], button:not([type]), [role="button"]' 2466 ); 2467 if (btn && !btn.closest('#operator-panel') && !btn.closest('#success-confirm-popup')) { 2468 localStorage.setItem('form_submit_clicked', Date.now().toString()); 2469 localStorage.removeItem('form_errors_detected'); 2470 localStorage.removeItem('form_success_signal'); 2471 } 2472 }, 2473 true 2474 ); 2475 2476 // --- Error element detector --- 2477 // Scans for inline validation errors injected after submit. 2478 function collectErrors() { 2479 const errorSelectors = [ 2480 '[class*="error"]:not(script)', 2481 '[class*="invalid"]:not(script)', 2482 '[class*="alert-danger"]', 2483 '[class*="alert-error"]', 2484 '[class*="validation"]', 2485 '[role="alert"]', 2486 '[aria-invalid="true"]', 2487 '.help-block', 2488 '.field-error', 2489 '.form-error', 2490 '.wpcf7-not-valid-tip', 2491 '.gfield_description.validation_message', 2492 ]; 2493 const seen = new Set(); 2494 const errors = []; 2495 for (const sel of errorSelectors) { 2496 for (const el of Array.from(document.querySelectorAll(sel))) { 2497 const text = (el.innerText || el.textContent || '').trim(); 2498 if (!text || seen.has(text)) continue; 2499 seen.add(text); 2500 // Build a stable CSS path to the nearest input/select in the same field group 2501 let inputSel = null; 2502 const group = 2503 el.closest('[class*="field"], [class*="form-group"], .gfield, li, div') || 2504 el.parentElement; 2505 if (group) { 2506 const inp = group.querySelector( 2507 'input:not([type=hidden]):not([type=submit]), textarea, select' 2508 ); 2509 if (inp) { 2510 if (inp.id) inputSel = `#${CSS.escape(inp.id)}`; 2511 else if (inp.name) inputSel = `[name="${CSS.escape(inp.name)}"]`; 2512 } 2513 } 2514 // Highlight the error element 2515 const prev = el.style.outline; 2516 el.style.outline = '3px solid #dc3545'; 2517 el.style.backgroundColor = 'rgba(220,53,69,0.08)'; 2518 el.scrollIntoView({ behavior: 'smooth', block: 'center' }); 2519 setTimeout(() => { 2520 el.style.outline = prev; 2521 }, 8000); 2522 errors.push({ text, inputSel, elTag: el.tagName }); 2523 } 2524 } 2525 return errors; 2526 } 2527 2528 // --- Success signal detector --- 2529 // Called after submit clicks (with delay) and on page load. 2530 // Scans full DOM including dynamically-injected toasts/popups. 2531 function detectSuccess() { 2532 const url = window.location.href.toLowerCase(); 2533 if (SUCCESS_WORDS.some(p => url.includes(p))) { 2534 localStorage.setItem('form_success_signal', `url:${url}`); 2535 return true; 2536 } 2537 if (localStorage.getItem('form_submit_clicked')) { 2538 const bodyText = (document.body?.innerText || '').substring(0, 8000).toLowerCase(); 2539 const hit = SUCCESS_WORDS.find(p => bodyText.includes(p)); 2540 if (hit) { 2541 // Highlight and scroll to the success element 2542 try { 2543 const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); 2544 let node; 2545 while ((node = walker.nextNode())) { 2546 if (node.textContent.toLowerCase().includes(hit)) { 2547 const el = node.parentElement; 2548 if (el && el.id !== 'operator-panel') { 2549 el.style.outline = '3px solid #28a745'; 2550 el.style.backgroundColor = 'rgba(40,167,69,0.1)'; 2551 el.scrollIntoView({ behavior: 'smooth', block: 'center' }); 2552 break; 2553 } 2554 } 2555 } 2556 } catch { 2557 /* ignore */ 2558 } 2559 localStorage.setItem('form_success_signal', `dom:${hit}`); 2560 return true; 2561 } 2562 } 2563 return false; 2564 } 2565 2566 // After each submit click: check for errors first, then success 2567 document.addEventListener( 2568 'click', 2569 e => { 2570 const btn = e.target.closest( 2571 'button[type="submit"], input[type="submit"], button:not([type]), [role="button"]' 2572 ); 2573 if (!btn || btn.closest('#operator-panel') || btn.closest('#success-confirm-popup')) return; 2574 // Poll for response: errors get priority, then success 2575 const delays = [800, 1500, 2500, 4000, 6000, 9000]; 2576 delays.forEach(ms => 2577 setTimeout(() => { 2578 if (localStorage.getItem('form_success_signal')) return; // already done 2579 const errs = collectErrors(); 2580 if (errs.length > 0) { 2581 localStorage.setItem('form_errors_detected', JSON.stringify(errs)); 2582 } else { 2583 detectSuccess(); 2584 } 2585 }, ms) 2586 ); 2587 }, 2588 true 2589 ); 2590 2591 // Check on page load (forms that redirect to /thank-you) 2592 if (document.readyState === 'loading') { 2593 document.addEventListener('DOMContentLoaded', () => detectSuccess()); 2594 } else { 2595 detectSuccess(); 2596 } 2597 2598 // MutationObserver: catch dynamically-injected toasts/popups/errors 2599 // On each mutation after submit: scan new nodes for success/error text, 2600 // then fall back to the full-page error/success scan. 2601 const observer = new MutationObserver(mutations => { 2602 if (!localStorage.getItem('form_submit_clicked')) return; 2603 if (localStorage.getItem('form_success_signal')) return; 2604 2605 // Inspect newly added nodes first (fastest signal) 2606 for (const mut of mutations) { 2607 for (const node of Array.from(mut.addedNodes)) { 2608 if (node.nodeType !== 1) continue; // elements only 2609 // Skip large layout blocks (sliders, sections) — success toasts/divs are small 2610 if ((node.querySelectorAll?.('*')?.length ?? 0) > 20) continue; 2611 const text = (node.innerText || node.textContent || '').trim().toLowerCase(); 2612 if (!text) continue; 2613 // Check for success in new element 2614 if (SUCCESS_WORDS.some(w => text.includes(w))) { 2615 node.style.outline = '3px solid #28a745'; 2616 node.style.backgroundColor = 'rgba(40,167,69,0.1)'; 2617 node.scrollIntoView({ behavior: 'smooth', block: 'center' }); 2618 localStorage.setItem( 2619 'form_success_signal', 2620 `dom:${SUCCESS_WORDS.find(w => text.includes(w))}` 2621 ); 2622 return; 2623 } 2624 // Check for error keywords in new element 2625 const isErrorNode = 2626 /error|invalid|required|please|must|cannot|problem|fail/i.test(text) || 2627 /error|invalid|alert|danger|warning/i.test(node.className || ''); 2628 if (isErrorNode) { 2629 node.style.outline = '3px solid #dc3545'; 2630 node.style.backgroundColor = 'rgba(220,53,69,0.08)'; 2631 node.scrollIntoView({ behavior: 'smooth', block: 'center' }); 2632 } 2633 } 2634 } 2635 2636 // Full-page sweep for errors/success (covers CSS-class-based validation) 2637 const errs = collectErrors(); 2638 if (errs.length > 0) { 2639 localStorage.setItem('form_errors_detected', JSON.stringify(errs)); 2640 } else { 2641 detectSuccess(); 2642 } 2643 }); 2644 observer.observe(document.body || document.documentElement, { 2645 childList: true, 2646 subtree: true, 2647 characterData: true, 2648 }); 2649 2650 // --- Persistent Done button (subtle grey, manual override) --- 2651 function addDoneButton() { 2652 if (document.getElementById('form-done-persistent-btn')) return; 2653 if (localStorage.getItem('operator_panel_done') === 'true') return; 2654 const btn = document.createElement('button'); 2655 btn.id = 'form-done-persistent-btn'; 2656 btn.textContent = '\u2713 Done'; 2657 btn.style.cssText = [ 2658 'position:fixed', 2659 'bottom:20px', 2660 'right:20px', 2661 'z-index:2147483647', 2662 'padding:10px 18px', 2663 'background:#6c757d', 2664 'color:#fff', 2665 'border:none', 2666 'border-radius:6px', 2667 'font-size:13px', 2668 'font-weight:600', 2669 'cursor:pointer', 2670 'box-shadow:0 2px 8px rgba(0,0,0,0.25)', 2671 'opacity:0.7', 2672 'font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif', 2673 ].join(';'); 2674 btn.addEventListener('mouseenter', () => { 2675 btn.style.opacity = '1'; 2676 }); 2677 btn.addEventListener('mouseleave', () => { 2678 btn.style.opacity = '0.7'; 2679 }); 2680 btn.addEventListener('click', () => { 2681 localStorage.setItem('operator_panel_done', 'true'); 2682 btn.textContent = '\u2713 Recorded'; 2683 btn.style.background = '#28a745'; 2684 setTimeout(() => btn.remove(), 1000); 2685 }); 2686 document.body.appendChild(btn); 2687 } 2688 2689 if (document.readyState === 'loading') { 2690 document.addEventListener('DOMContentLoaded', addDoneButton); 2691 } else { 2692 addDoneButton(); 2693 } 2694 /* eslint-enable no-undef */ 2695 }); 2696 } 2697 /* c8 ignore stop */ 2698 2699 /** 2700 * Capture evidence of why the form was considered successfully submitted. 2701 * Used for training the success detection system. 2702 */ 2703 /* c8 ignore start -- browser-context function, not testable in Node.js */ 2704 function captureSuccessEvidence(page) { 2705 /* eslint-disable no-undef */ 2706 return page.evaluate(() => { 2707 const signal = localStorage.getItem('form_success_signal') || ''; 2708 const url = window.location.href; 2709 const { title } = document; 2710 const bodySnippet = (document.body?.innerText || '').substring(0, 500); 2711 return { url, title, bodySnippet, signal, timestamp: new Date().toISOString() }; 2712 }); 2713 /* eslint-enable no-undef */ 2714 } 2715 /* c8 ignore stop */ 2716 2717 /** 2718 * Show "Did the form submit OK?" confirmation popup after detecting a submit event. 2719 */ 2720 /** Returns true if the error is caused by a page navigation destroying the JS context. */ 2721 function isNavigationError(err) { 2722 const msg = err?.message || ''; 2723 // Only treat context-destruction errors as navigation-after-submit. 2724 // "Target closed" / "Session closed" mean the browser was closed by the operator — 2725 // those must bubble up so the caller can decide whether to mark sent or not. 2726 return ( 2727 msg.includes('Execution context was destroyed') || 2728 msg.includes('execution context was destroyed') || 2729 msg.includes('context was destroyed') 2730 ); 2731 } 2732 2733 /** 2734 * Show a brief overlay banner (non-blocking) to inform the operator. 2735 * Replaces the old blocking Yes/No confirmation popup. 2736 */ 2737 /* c8 ignore start -- browser-context function, not testable in Node.js */ 2738 function showStatusBanner(page, message, color = '#28a745') { 2739 return page 2740 .evaluate( 2741 (msg, clr) => { 2742 /* eslint-disable no-undef */ 2743 const existing = document.getElementById('form-status-banner'); 2744 if (existing) existing.remove(); 2745 const banner = document.createElement('div'); 2746 banner.id = 'form-status-banner'; 2747 banner.textContent = msg; 2748 banner.style.cssText = [ 2749 'position:fixed', 2750 'top:12px', 2751 'left:50%', 2752 'transform:translateX(-50%)', 2753 `background:${clr}`, 2754 'color:#fff', 2755 'padding:10px 20px', 2756 'border-radius:8px', 2757 'font-size:14px', 2758 'font-weight:600', 2759 'z-index:2147483647', 2760 'box-shadow:0 4px 16px rgba(0,0,0,0.3)', 2761 'font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif', 2762 'pointer-events:none', 2763 'max-width:80vw', 2764 'text-align:center', 2765 ].join(';'); 2766 document.body.appendChild(banner); 2767 setTimeout(() => banner.remove(), 6000); 2768 /* eslint-enable no-undef */ 2769 }, 2770 message, 2771 color 2772 ) 2773 .catch(() => {}); 2774 } 2775 /* c8 ignore stop */ 2776 2777 /** 2778 * Ask Haiku to fix form validation errors. 2779 * Sends the form's outerHTML (with current values) + error descriptions. 2780 * Returns an array of {selector, value, action} fixes. 2781 */ 2782 /* c8 ignore start -- browser + LLM function, not testable in Node.js */ 2783 async function fixFormErrors(page, errors) { 2784 // Collect the main form's outerHTML (capped at 30KB to stay within token limit) 2785 let formHtml = ''; 2786 try { 2787 formHtml = await page.evaluate(() => { 2788 /* eslint-disable no-undef */ 2789 const form = 2790 document.querySelector('form') || document.querySelector('[role="form"]') || document.body; 2791 return (form?.outerHTML || '').substring(0, 30000); 2792 /* eslint-enable no-undef */ 2793 }); 2794 } catch { 2795 /* page may be navigating */ 2796 return []; 2797 } 2798 2799 const errorList = errors 2800 .map((e, i) => `${i + 1}. "${e.text}"${e.inputSel ? ` (near field: ${e.inputSel})` : ''}`) 2801 .join('\n'); 2802 2803 const prompt = `You are filling out a contact/enquiry form on a business website. 2804 The form was submitted but returned validation errors. Fix each error. 2805 2806 ERRORS: 2807 ${errorList} 2808 2809 FORM HTML (with current field values): 2810 \`\`\`html 2811 ${formHtml} 2812 \`\`\` 2813 2814 Return ONLY a JSON array of fixes. Each fix must have: 2815 - "selector": CSS selector for the input/select/textarea to change 2816 - "value": the new value to set (string) 2817 - "action": "fill" for text/email/phone/textarea, "select" for <select>, "check" for checkbox 2818 2819 Example: [{"selector":"#email","value":"correct@email.com","action":"fill"}] 2820 2821 Rules: 2822 - Only fix fields that are causing the listed errors 2823 - Do not change fields that are working correctly 2824 - For required empty fields, provide a plausible value based on field label 2825 - Return [] if you cannot determine what to fix`; 2826 2827 try { 2828 const response = await callLLM({ 2829 model: 'anthropic/claude-haiku-4-5', 2830 messages: [{ role: 'user', content: prompt }], 2831 max_tokens: 500, 2832 temperature: 0, 2833 }); 2834 const raw = (response.content || '').trim(); 2835 // Extract JSON array from response 2836 const match = raw.match(/\[[\s\S]*\]/); 2837 if (!match) return []; 2838 return JSON.parse(match[0]); 2839 } catch (e) { 2840 logger.warn(`Haiku error-fix failed: ${e.message}`); 2841 return []; 2842 } 2843 } 2844 /* c8 ignore stop */ 2845 2846 /** 2847 * Apply a list of fixes returned by fixFormErrors() to the page. 2848 */ 2849 /* c8 ignore start -- browser automation, not testable in Node.js */ 2850 async function applyFixes(page, fixes) { 2851 let applied = 0; 2852 for (const fix of fixes) { 2853 try { 2854 if (fix.action === 'select') { 2855 await page.selectOption(fix.selector, fix.value); 2856 } else if (fix.action === 'check') { 2857 const checked = await page.$eval(fix.selector, el => el.checked).catch(() => false); 2858 if (!checked) await page.click(fix.selector); 2859 } else { 2860 await page.fill(fix.selector, fix.value); 2861 } 2862 logger.info(` Fixed field ${fix.selector} → "${fix.value}" (${fix.action})`); 2863 applied++; 2864 } catch (e) { 2865 logger.warn(` Could not apply fix for ${fix.selector}: ${e.message}`); 2866 } 2867 } 2868 return applied; 2869 } 2870 2871 /** 2872 * Wait for form submission outcome — automatically detects success or errors, 2873 * attempts Haiku-assisted fixes and retry on errors (up to MAX_FIX_RETRIES). 2874 * Falls back to manual operator confirmation only when ambiguous. 2875 * Browser close is NOT caught — outreach stays pending for retry. 2876 */ 2877 // eslint-disable-next-line complexity 2878 async function waitForOperatorDone(page, formUrl) { 2879 const MAX_FIX_RETRIES = 3; 2880 2881 // Poll for operator_type_pending alongside the main wait loop. 2882 // When a panel button is clicked while focus is in an iframe, the browser can't 2883 // type directly into the cross-origin frame — it stores the text in localStorage 2884 // instead, and Node.js uses page.keyboard.type() which works at the OS level. 2885 let keyboardPollActive = true; 2886 const keyboardPollInterval = setInterval(async () => { 2887 if (!keyboardPollActive) return; 2888 try { 2889 const pending = await page.evaluate( 2890 () => localStorage.getItem('operator_type_pending') // eslint-disable-line no-undef 2891 ); 2892 if (pending) { 2893 const { text } = JSON.parse(pending); 2894 await page.evaluate(() => localStorage.removeItem('operator_type_pending')); // eslint-disable-line no-undef 2895 await page.keyboard.type(text, { delay: 20 }); 2896 logger.info(`Keyboard-typed ${text.length} chars into focused iframe field`); 2897 } 2898 } catch { 2899 /* page may be navigating */ 2900 } 2901 }, 300); 2902 2903 const finish = async successMsg => { 2904 keyboardPollActive = false; 2905 clearInterval(keyboardPollInterval); 2906 const evidence = await captureSuccessEvidence(page); 2907 logger.success(`${successMsg} (${formUrl})`); 2908 logger.info(`Success evidence: ${JSON.stringify(evidence)}`); 2909 }; 2910 2911 let fixAttempts = 0; 2912 2913 while (true) { 2914 // Phase 1: Wait for any of: submit click, success signal, errors detected, manual Done, or mark-failed 2915 logger.info('Waiting for form submission outcome...'); 2916 try { 2917 await page.waitForFunction( 2918 () => 2919 localStorage.getItem('form_success_signal') !== null || // eslint-disable-line no-undef 2920 localStorage.getItem('form_errors_detected') !== null || // eslint-disable-line no-undef 2921 localStorage.getItem('operator_panel_done') === 'true' || // eslint-disable-line no-undef 2922 localStorage.getItem('operator_panel_failed') !== null, // eslint-disable-line no-undef 2923 { timeout: 300_000 } // 5 min 2924 ); 2925 } catch (err) { 2926 if (isNavigationError(err)) { 2927 await finish('Form submitted — page navigated away'); 2928 return; 2929 } 2930 throw err; 2931 } 2932 2933 // Read all signals (navigation may destroy context, treat as success) 2934 let successSignal = null; 2935 let errorsJson = null; 2936 let manualDone = false; 2937 let failedReason = null; 2938 try { 2939 [successSignal, errorsJson, manualDone, failedReason] = await page.evaluate(() => [ 2940 localStorage.getItem('form_success_signal'), // eslint-disable-line no-undef 2941 localStorage.getItem('form_errors_detected'), // eslint-disable-line no-undef 2942 localStorage.getItem('operator_panel_done') === 'true', // eslint-disable-line no-undef 2943 localStorage.getItem('operator_panel_failed'), // eslint-disable-line no-undef 2944 ]); 2945 } catch (err) { 2946 if (isNavigationError(err)) { 2947 await finish('Form submitted — page navigated away'); 2948 return; 2949 } 2950 throw err; 2951 } 2952 2953 // Operator manually marked as failed 2954 if (failedReason) { 2955 keyboardPollActive = false; 2956 clearInterval(keyboardPollInterval); 2957 logger.warn(`Operator marked form as failed: "${failedReason}"`); 2958 throw Object.assign(new Error(failedReason), { operatorMarkedFailed: true }); 2959 } 2960 2961 // Manual Done button takes priority 2962 if (manualDone) { 2963 await finish('Operator marked form as complete via Done button'); 2964 return; 2965 } 2966 2967 // Success detected automatically 2968 if (successSignal) { 2969 const reason = successSignal.startsWith('url:') 2970 ? `redirected to ${successSignal.slice(4)}` 2971 : `success message detected ("${successSignal.slice(4)}")`; 2972 await showStatusBanner(page, `\u2705 Form sent! (${reason})`); 2973 await finish(`Form submitted successfully — ${reason}`); 2974 // Mark done in localStorage so the Done button disappears 2975 await page 2976 .evaluate(() => localStorage.setItem('operator_panel_done', 'true')) // eslint-disable-line no-undef 2977 .catch(() => {}); 2978 return; 2979 } 2980 2981 // Errors detected — try Haiku fix 2982 if (errorsJson) { 2983 let errors = []; 2984 try { 2985 errors = JSON.parse(errorsJson); 2986 } catch { 2987 /* ignore */ 2988 } 2989 logger.warn( 2990 `Form returned ${errors.length} validation error(s): ${errors.map(e => e.text).join(' | ')}` 2991 ); 2992 await showStatusBanner( 2993 page, 2994 `\u26a0\ufe0f ${errors.length} error(s) detected — attempting auto-fix...`, 2995 '#e67e22' 2996 ); 2997 2998 if (fixAttempts < MAX_FIX_RETRIES && errors.length > 0) { 2999 fixAttempts++; 3000 logger.info(`Auto-fix attempt ${fixAttempts}/${MAX_FIX_RETRIES}...`); 3001 3002 const fixes = await fixFormErrors(page, errors); 3003 if (fixes.length > 0) { 3004 const applied = await applyFixes(page, fixes); 3005 logger.info(`Applied ${applied}/${fixes.length} fixes — re-submitting form...`); 3006 await showStatusBanner( 3007 page, 3008 `\ud83d\udd27 Applied ${applied} fix(es) — re-submitting...`, 3009 '#3498db' 3010 ); 3011 3012 // Clear error signal before re-submitting 3013 try { 3014 await page.evaluate(() => { 3015 localStorage.removeItem('form_errors_detected'); // eslint-disable-line no-undef 3016 localStorage.removeItem('form_submit_clicked'); // eslint-disable-line no-undef 3017 }); 3018 } catch { 3019 /* ignore */ 3020 } 3021 3022 // Re-click submit 3023 await autoClickSubmit(page); 3024 // Loop continues to wait for next outcome 3025 } else { 3026 logger.warn('Haiku could not determine fixes — waiting for operator to resolve manually'); 3027 await showStatusBanner( 3028 page, 3029 `\u26a0\ufe0f Could not auto-fix — please correct errors manually`, 3030 '#dc3545' 3031 ); 3032 // Clear errors so we loop back to waiting for next submit 3033 try { 3034 await page.evaluate(() => { 3035 localStorage.removeItem('form_errors_detected'); // eslint-disable-line no-undef 3036 localStorage.removeItem('form_submit_clicked'); // eslint-disable-line no-undef 3037 }); 3038 } catch { 3039 /* ignore */ 3040 } 3041 } 3042 } else { 3043 // Exhausted retries or no errors parsed — let operator handle it 3044 const msg = 3045 fixAttempts >= MAX_FIX_RETRIES 3046 ? `\u274c ${MAX_FIX_RETRIES} auto-fix attempts failed — please fix manually` 3047 : `\u26a0\ufe0f Errors detected — please fix manually`; 3048 await showStatusBanner(page, msg, '#dc3545'); 3049 logger.warn(`Auto-fix exhausted (${fixAttempts} attempts) — waiting for operator`); 3050 try { 3051 await page.evaluate(() => { 3052 localStorage.removeItem('form_errors_detected'); // eslint-disable-line no-undef 3053 localStorage.removeItem('form_submit_clicked'); // eslint-disable-line no-undef 3054 }); 3055 } catch { 3056 /* ignore */ 3057 } 3058 } 3059 // Loop back — wait for next submit click outcome 3060 } 3061 } 3062 } 3063 3064 /** 3065 * Try to find and click a submit button on the page (for automated E2E mode). 3066 */ 3067 async function autoClickSubmit(page) { 3068 const submitSelectors = [ 3069 'button[type="submit"]', 3070 'input[type="submit"]', 3071 'button:has-text("Submit")', 3072 'button:has-text("Send")', 3073 'input[value*="Submit" i]', 3074 ]; 3075 for (const selector of submitSelectors) { 3076 try { 3077 const button = await page.locator(selector).first(); 3078 if ((await button.count()) > 0) { 3079 await humanClick(page, selector); 3080 logger.success('Clicked submit button'); 3081 return; 3082 } 3083 } catch { 3084 continue; 3085 } 3086 } 3087 logger.warn('Could not find submit button'); 3088 } 3089 3090 /** 3091 * Send form submission 3092 * @param {number} outreachId - Outreach record ID 3093 * @param {boolean} headless - Run browser in headless mode 3094 * @param {Object} options - Additional options 3095 */ 3096 // eslint-disable-next-line complexity -- Form submission requires multi-step validation and fallbacks 3097 export async function submitContactForm(outreachId, headless = false, options = {}) { 3098 // If a shared context is provided, we open a new tab and close only the tab on exit. 3099 // If not, we launch a dedicated browser and close the whole browser on exit. 3100 const sharedContext = options.sharedContext || null; 3101 let ownBrowser = null; // only set when we launched our own browser 3102 let page = null; 3103 3104 try { 3105 const outreach = await getOne( 3106 `SELECT o.*, o.message_body AS proposal_text, s.domain, s.country_code, s.city, s.state 3107 FROM messages o 3108 JOIN sites s ON o.site_id = s.id 3109 WHERE o.id = $1 AND o.direction = 'outbound'`, 3110 [outreachId] 3111 ); 3112 3113 if (!outreach) throw new Error(`Outreach #${outreachId} not found`); 3114 if (outreach.contact_method !== 'form') { 3115 throw new Error( 3116 `Outreach #${outreachId} is for ${outreach.contact_method}, not Contact Form` 3117 ); 3118 } 3119 if (outreach.contact_uri === 'PENDING_CONTACT_EXTRACTION') { 3120 throw new Error(`Outreach #${outreachId} has no form URL (${outreach.contact_uri})`); 3121 } 3122 3123 // Look up local Twilio number for this site's country (preferred over env default) 3124 const countryRow = await getOne( 3125 'SELECT twilio_phone_number, sms_enabled FROM countries WHERE country_code = $1', 3126 [outreach.country_code] 3127 ); 3128 const localPhone = 3129 countryRow?.twilio_phone_number || process.env.SENDER_PHONE || '+1-555-123-4567'; 3130 3131 const senderInfo = { 3132 name: process.env.SENDER_NAME || 'John Smith', 3133 email: process.env.SENDER_EMAIL || 'john@333method.com', 3134 phone: localPhone, 3135 company: process.env.SENDER_COMPANY || '333 Method', 3136 }; 3137 3138 // If we don't have a local number for this country, we can't fill required phone fields. 3139 // Park as retry_later (no retry_at = manual trigger) so the form is skipped on every 3140 // subsequent run until a local number is purchased and the status is reset to 'approved'. 3141 if (!countryRow?.twilio_phone_number && !process.env.SENDER_PHONE) { 3142 const msg = `No local phone number for country ${outreach.country_code} — parked until number purchased`; 3143 logger.warn(msg); 3144 await run( 3145 `UPDATE messages SET delivery_status = 'retry_later', error_message = $1 WHERE id = $2`, 3146 [msg, outreachId] 3147 ); 3148 return { success: false, outreachId, skipped: true, reason: 'no_local_phone' }; 3149 } 3150 3151 logger.info( 3152 `Opening contact form at ${outreach.contact_uri} for ${outreach.domain} (outreach #${outreachId})` 3153 ); 3154 3155 let context; 3156 if (sharedContext) { 3157 context = sharedContext; 3158 } else { 3159 ownBrowser = await launchStealthBrowser({ 3160 headless, 3161 stealthLevel: 'minimal', 3162 devtools: !headless, 3163 }); 3164 context = await createStealthContext(ownBrowser); 3165 // Configure NopeCHA CAPTCHA solver if API key is set 3166 await configureNopeCHA(context); 3167 } 3168 3169 // Reuse an already-open page if supplied (avoids extra blank tab from launchPersistentContext) 3170 page = options.sharedPage || (await context.newPage()); 3171 // Set tab title so operator can identify tabs in the browser 3172 await page 3173 .evaluate(d => { 3174 document.title = `⏳ ${d} — loading`; // eslint-disable-line no-undef 3175 }, outreach.domain) 3176 .catch(() => {}); 3177 3178 // Inject submit watcher (replaces persistent Done button) 3179 await injectSubmitWatcher(page); 3180 3181 const { panelWarnings, cfResolved, chatWidgets } = await loadFormPage( 3182 page, 3183 outreach.contact_uri 3184 ); 3185 3186 // Check for under-construction / coming-soon / parked pages before doing any work 3187 { 3188 const pageHtml = await page.content().catch(() => ''); 3189 const pageTitle = await page.title().catch(() => ''); 3190 const ucResult = detectUnderConstruction(pageHtml, pageTitle); 3191 if (ucResult.isUnderConstruction) { 3192 const MAX_UC_RETRIES = 5; 3193 const current = await getOne('SELECT retry_count FROM messages WHERE id = $1', [outreachId]); 3194 const retryCount = (current?.retry_count || 0) + 1; 3195 if (retryCount >= MAX_UC_RETRIES) { 3196 await run( 3197 `UPDATE messages SET delivery_status = 'failed', error_message = $1, retry_count = $2 WHERE id = $3`, 3198 [ 3199 `Under-construction after ${retryCount} weekly retries: ${ucResult.reason}`, 3200 retryCount, 3201 outreachId, 3202 ] 3203 ); 3204 logger.warn( 3205 `Under-construction page exhausted ${MAX_UC_RETRIES} retries — marking failed (#${outreachId})` 3206 ); 3207 } else { 3208 await run( 3209 `UPDATE messages SET delivery_status = 'retry_later', error_message = $1, retry_at = NOW() + INTERVAL '7 days', retry_count = $2 WHERE id = $3`, 3210 [ 3211 `Under construction (retry ${retryCount}/${MAX_UC_RETRIES} in 7d): ${ucResult.reason}`, 3212 retryCount, 3213 outreachId, 3214 ] 3215 ); 3216 logger.warn( 3217 `Under-construction page — scheduled retry ${retryCount}/${MAX_UC_RETRIES} in 7 days (#${outreachId}): ${ucResult.phrase}` 3218 ); 3219 } 3220 await page 3221 .evaluate(d => { 3222 document.title = `🚧 ${d} — under construction`; // eslint-disable-line no-undef 3223 }, outreach.domain) 3224 .catch(() => {}); 3225 await page.close().catch(() => {}); 3226 if (ownBrowser) await ownBrowser.close().catch(() => {}); 3227 return { success: false, outreachId, skipped: true, reason: 'under_construction' }; 3228 } 3229 } 3230 3231 const isE2ETest = outreach.contact_uri === process.env.TEST_E2E_URL; 3232 const additionalFields = options.additionalFields || {}; 3233 if (isE2ETest && process.env.TEST_E2E_PASSWORD) { 3234 additionalFields.password = process.env.TEST_E2E_PASSWORD; 3235 logger.info('Detected E2E test mode, using TEST_E2E_PASSWORD'); 3236 } 3237 3238 // Detect form builder and try shared template first 3239 let fillResult = null; 3240 const builderType = cfResolved ? await detectFormBuilder(page) : 'generic'; 3241 logger.info(`Form builder detected: ${builderType}`); 3242 3243 if (cfResolved && builderType !== 'generic') { 3244 const templates = loadBuilderTemplates(); 3245 const template = templates[builderType]; 3246 if (template?.fields?.length > 0) { 3247 logger.info(`Trying shared template for builder: ${builderType}`); 3248 fillResult = await fillFromBuilderTemplate( 3249 page, 3250 { ...template, builderType }, 3251 senderInfo, 3252 outreach 3253 ); 3254 if (fillResult) logger.success(`Builder template matched (${builderType})`); 3255 else logger.info(`Builder template selectors stale — falling back to full scan`); 3256 } 3257 } 3258 3259 const autoSubmit = options.autoSubmit === true || isE2ETest; 3260 3261 // Start CAPTCHA solve in background immediately — runs in parallel with form filling. 3262 // We wait for the captcha iframe to appear (up to 15s) then submit the solve job. 3263 const hasCaptchaProvider = !!( 3264 process.env.NOPECHA_API_KEY || 3265 process.env.NOPECHA_API_KEY_2 || 3266 process.env.CAPMONSTER_API_KEY 3267 ); 3268 const captchaSolvePromise = 3269 !autoSubmit && hasCaptchaProvider 3270 ? (async () => { 3271 try { 3272 // Wait for captcha widget to appear before detecting 3273 await page 3274 .waitForSelector( 3275 'iframe[src*="recaptcha"], iframe[src*="hcaptcha"], .cf-turnstile', 3276 { timeout: 15000 } 3277 ) 3278 .catch(() => null); 3279 const captchas = await detectCaptchas(page); 3280 const results = []; 3281 for (const captcha of captchas) { 3282 const solved = await solveCaptcha(page, captcha, outreach.contact_uri); 3283 results.push({ captcha, solved }); 3284 } 3285 return results; 3286 } catch { 3287 return []; 3288 } 3289 })() 3290 : Promise.resolve([]); 3291 3292 // Always run autoFillForm to catch fields the builder template missed (e.g. selects, 3293 // geo fields, required fields not in the template). autoFillForm skips already-filled 3294 // selectors via its filledSelectors set, so there's no risk of double-filling. 3295 if (cfResolved || chatWidgets.length > 0) { 3296 const autoResult = await autoFillForm(page, senderInfo, outreach, additionalFields, { 3297 domain: outreach.domain, 3298 country_code: outreach.country_code, 3299 city: outreach.city, 3300 state: outreach.state, 3301 }); 3302 if (!fillResult) { 3303 fillResult = autoResult; 3304 } else { 3305 // Merge: combine counts and field lists from both passes 3306 fillResult = { 3307 filledCount: fillResult.filledCount + autoResult.filledCount, 3308 fromTemplate: fillResult.fromTemplate, 3309 llmThinking: [fillResult.llmThinking, autoResult.llmThinking].filter(Boolean).join('\n'), 3310 filledFields: [...(fillResult.filledFields || []), ...(autoResult.filledFields || [])], 3311 fillPlanInfo: [...(fillResult.fillPlanInfo || []), ...(autoResult.fillPlanInfo || [])], 3312 }; 3313 } 3314 } else if (!fillResult) { 3315 fillResult = { filledCount: 0, llmThinking: '', filledFields: [], fillPlanInfo: [] }; 3316 } 3317 3318 const { filledCount, llmThinking, filledFields = [], fillPlanInfo = [] } = fillResult; 3319 3320 // Auto-tick consent/T&C checkboxes — these are mandatory but not personal, safe to tick. 3321 // Only tick checkboxes whose label contains consent/terms/privacy keywords. 3322 try { 3323 const tickedLabels = await page.evaluate(() => { 3324 /* eslint-disable no-undef */ 3325 const CONSENT_PATTERNS = 3326 /terms|conditions|privacy|consent|agree|gdpr|policy|accept|acknowledge/i; 3327 const ticked = []; 3328 for (const cb of Array.from(document.querySelectorAll('input[type="checkbox"]'))) { 3329 if (cb.checked || cb.disabled) continue; 3330 // Find label text 3331 let labelText = ''; 3332 if (cb.id) { 3333 const lbl = document.querySelector(`label[for="${CSS.escape(cb.id)}"]`); 3334 if (lbl) labelText = lbl.innerText; 3335 } 3336 if (!labelText) { 3337 const parent = cb.closest('label') || cb.parentElement; 3338 if (parent) labelText = parent.innerText; 3339 } 3340 if (CONSENT_PATTERNS.test(labelText)) { 3341 cb.click(); 3342 ticked.push(labelText.trim().substring(0, 60)); 3343 } 3344 } 3345 return ticked; 3346 /* eslint-enable no-undef */ 3347 }); 3348 if (tickedLabels.length > 0) { 3349 logger.info( 3350 `Auto-ticked ${tickedLabels.length} consent checkbox(es): ${tickedLabels.join('; ')}` 3351 ); 3352 } 3353 } catch { 3354 /* ignore — page may not support evaluate */ 3355 } 3356 3357 if (autoSubmit) { 3358 logger.info(`Auto-filled ${filledCount} fields in automated mode`); 3359 await autoClickSubmit(page); 3360 await page.waitForTimeout(2000); 3361 } else { 3362 // Await background captcha solve (NopeCHA API / CapMonster) and log results 3363 const nopechaExtensionActive = !!( 3364 process.env.NOPECHA_API_KEY || process.env.NOPECHA_API_KEY_2 3365 ); 3366 const captchaResults = await captchaSolvePromise; 3367 for (const { captcha, solved } of captchaResults) { 3368 if (!solved) { 3369 // If NopeCHA extension is loaded it will handle this — don't warn 3370 if (!nopechaExtensionActive) { 3371 const msg = `CAPTCHA detected (${captcha.type}) — solve it manually before submitting.`; 3372 logger.warn(msg); 3373 panelWarnings.push(msg); 3374 } else { 3375 logger.info(`CAPTCHA detected (${captcha.type}) — NopeCHA extension will auto-solve`); 3376 } 3377 } else { 3378 logger.success(`CAPTCHA auto-solved (${captcha.type})`); 3379 } 3380 } 3381 // Always scan for math image CAPTCHAs (uses Haiku vision, no external API needed) 3382 // For other CAPTCHA types, only warn if no solver is active 3383 { 3384 const captchas = await detectCaptchas(page); 3385 for (const captcha of captchas) { 3386 if (captcha.type === 'mathImage') { 3387 const solved = await solveMathImageCaptcha(page, captcha); 3388 if (!solved && !nopechaExtensionActive) { 3389 panelWarnings.push( 3390 'Math CAPTCHA detected — could not auto-solve, solve it manually.' 3391 ); 3392 } 3393 } else if (!nopechaExtensionActive && !process.env.CAPMONSTER_API_KEY) { 3394 panelWarnings.push( 3395 `CAPTCHA detected (${captcha.type}) — solve it manually before submitting.` 3396 ); 3397 } else if (nopechaExtensionActive) { 3398 logger.info(`CAPTCHA detected (${captcha.type}) — NopeCHA extension will auto-solve`); 3399 } 3400 } 3401 } 3402 3403 // Panel data is stored so it can be re-injected after operator navigates to a 3404 // different page (e.g. /contact-us instead of /contact). 3405 const panelData = { 3406 subject: outreach.subject_line || '', 3407 proposal: outreach.proposal_text, 3408 name: senderInfo.name, 3409 email: senderInfo.email, 3410 phone: senderInfo.phone, 3411 company: senderInfo.company, 3412 warnings: panelWarnings, 3413 llmThinking, 3414 filledSelectors: fillPlanInfo.map(f => f.selector).filter(Boolean), 3415 builderType, 3416 }; 3417 3418 const thankYouPatterns = ['thank', 'success', 'confirm', 'submitted', 'complete']; 3419 3420 /** 3421 * Re-inject panel + attempt auto-fill on a new URL the operator navigated to. 3422 * Updates contact_uri in DB and re-runs all fill logic. 3423 */ 3424 const reinjectOnNewPage = async newUrl => { 3425 logger.info(`Operator navigated to ${newUrl} — re-injecting panel & attempting fill`); 3426 3427 // Update DB contact_uri so future runs go directly to the right page 3428 await run('UPDATE messages SET contact_uri = $1 WHERE id = $2', [newUrl, outreachId]); 3429 logger.info(`Updated contact_uri to ${newUrl} for outreach #${outreachId}`); 3430 3431 // Wait for the page to settle 3432 try { 3433 await page.waitForLoadState('domcontentloaded', { timeout: 15000 }); 3434 } catch { 3435 /* timeout ok — inject anyway */ 3436 } 3437 3438 // Re-inject submit watcher (was wiped by navigation) 3439 await injectSubmitWatcher(page).catch(() => {}); 3440 3441 // Clear old signals from previous page 3442 await page 3443 .evaluate(() => { 3444 /* eslint-disable no-undef */ 3445 localStorage.removeItem('form_submit_clicked'); 3446 localStorage.removeItem('form_errors_detected'); 3447 localStorage.removeItem('form_success_signal'); 3448 /* eslint-enable no-undef */ 3449 }) 3450 .catch(() => {}); 3451 3452 // Attempt auto-fill on the new page 3453 let newFillResult = null; 3454 try { 3455 const newBuilderType = await detectFormBuilder(page).catch(() => 'generic'); 3456 if (newBuilderType !== 'generic') { 3457 const templates = loadBuilderTemplates(); 3458 const tmpl = templates[newBuilderType]; 3459 if (tmpl?.fields?.length > 0) { 3460 newFillResult = await fillFromBuilderTemplate( 3461 page, 3462 { ...tmpl, builderType: newBuilderType }, 3463 senderInfo, 3464 outreach 3465 ).catch(() => null); 3466 } 3467 } 3468 if (!newFillResult) { 3469 newFillResult = await autoFillForm(page, senderInfo, outreach).catch(() => null); 3470 } 3471 } catch { 3472 /* fill failures are non-fatal */ 3473 } 3474 3475 const newFilled = newFillResult?.filledCount || 0; 3476 const newSelectors = (newFillResult?.fillPlanInfo || []) 3477 .map(f => f.selector) 3478 .filter(Boolean); 3479 3480 // Show a brief banner so operator knows we re-filled 3481 await showStatusBanner( 3482 page, 3483 newFilled > 0 3484 ? `\ud83d\udcdd Re-filled ${newFilled} field(s) on new page` 3485 : '\u26a0\ufe0f Could not auto-fill — use the panel to fill manually', 3486 newFilled > 0 ? '#3498db' : '#e67e22' 3487 ).catch(() => {}); 3488 3489 // Re-inject operator panel with updated fill info 3490 await injectOperatorPanel(page, { 3491 ...panelData, 3492 warnings: [], 3493 filledSelectors: newSelectors, 3494 }).catch(() => {}); 3495 3496 await page 3497 .evaluate(t => { 3498 document.title = t; // eslint-disable-line no-undef 3499 }, `\ud83d\udc40 ${outreach.domain} — review & submit`) 3500 .catch(() => {}); 3501 }; 3502 3503 // Listen for main-frame navigations while the operator panel is shown. 3504 // Fires AFTER page.goto completes, so we only see operator-initiated navigations. 3505 let navigationHandlerActive = true; 3506 const onNavigation = async frame => { 3507 if (!navigationHandlerActive) return; 3508 if (frame !== page.mainFrame()) return; // ignore iframe loads 3509 const newUrl = frame.url(); 3510 if (!newUrl || newUrl === 'about:blank') return; 3511 3512 // Check if a submit was clicked recently (within 10s) — if so, this navigation 3513 // is a post-submit redirect. We check for thank-you patterns; if found, mark 3514 // success. If not a thank-you page, fall through to re-inject (form may have 3515 // redirected to a different form page). 3516 let submitClickedAt = null; 3517 try { 3518 const raw = await page.evaluate( 3519 () => localStorage.getItem('form_submit_clicked') // eslint-disable-line no-undef 3520 ); 3521 submitClickedAt = raw ? parseInt(raw, 10) : null; 3522 } catch { 3523 /* context destroyed during navigation — treat as success */ 3524 return; 3525 } 3526 3527 const isPostSubmit = submitClickedAt && Date.now() - submitClickedAt < 10_000; 3528 3529 if (isPostSubmit) { 3530 // Check if it's a thank-you page 3531 const lowerUrl = newUrl.toLowerCase(); 3532 if (thankYouPatterns.some(p => lowerUrl.includes(p))) { 3533 // Success — let waitForOperatorDone pick it up via the signal 3534 try { 3535 await page.evaluate(u => { 3536 localStorage.setItem('form_success_signal', `url:${u}`); // eslint-disable-line no-undef 3537 }, newUrl); 3538 } catch { 3539 /* already navigated */ 3540 } 3541 return; 3542 } 3543 // Post-submit but not a thank-you URL — could be a different form page or error page 3544 // Fall through to re-inject so operator can handle it 3545 } 3546 3547 // Pre-submit navigation (operator browsing) OR ambiguous post-submit non-thank-you 3548 await reinjectOnNewPage(newUrl); 3549 }; 3550 3551 page.on('framenavigated', onNavigation); 3552 3553 await injectOperatorPanel(page, panelData); 3554 3555 // Update tab title so operator can identify which tabs need attention in multi-tab mode 3556 const tabTitle = panelWarnings.length 3557 ? `\u26a0\ufe0f ${outreach.domain} — needs review` 3558 : `\ud83d\udc40 ${outreach.domain} — review & submit`; 3559 await page 3560 .evaluate(t => { 3561 document.title = t; // eslint-disable-line no-undef 3562 }, tabTitle) 3563 .catch(() => {}); 3564 3565 if (panelWarnings.length > 0) { 3566 logger.warn(`Operator panel shown with ${panelWarnings.length} warning(s).`); 3567 } 3568 logger.info( 3569 filledCount > 0 3570 ? `Auto-filled ${filledCount} fields. Review, submit form, then confirm in popup.` 3571 : 'Could not auto-fill. Manually fill form using operator panel, then submit.' 3572 ); 3573 3574 page.setDefaultTimeout(600_000); // 10 min — operator needs time to fill the form 3575 try { 3576 await waitForOperatorDone(page, outreach.contact_uri); 3577 } catch (waitErr) { 3578 navigationHandlerActive = false; 3579 page.off('framenavigated', onNavigation); 3580 // Operator clicked "Mark as Failed" — record it and queue a learning task 3581 if (waitErr.operatorMarkedFailed) { 3582 const failReason = waitErr.message; 3583 await markOutreachResultAsync(outreachId, failReason); 3584 logger.warn(`Outreach #${outreachId} marked as failed by operator: "${failReason}"`); 3585 3586 // Queue agent learning task — accumulate operator-marked failures so the 3587 // Developer agent can propose auto-detection rules (e.g. detect "no form" 3588 // before visiting, auto-mark CAPTCHA-unsolvable, etc.) 3589 try { 3590 // Count how many operator-marked failures share this reason pattern 3591 const recentRow = await getOne( 3592 `SELECT COUNT(*) AS n FROM messages 3593 WHERE direction = 'outbound' 3594 AND delivery_status IN ('failed', 'retry_later') 3595 AND error_message LIKE $1 3596 AND updated_at > NOW() - INTERVAL '30 days'`, 3597 [`%${failReason.substring(0, 40)}%`] 3598 ); 3599 const recentCount = recentRow?.n || 0; 3600 3601 await createAgentTask({ 3602 taskType: 'form_failure_learning', 3603 assignedTo: 'developer', 3604 createdBy: 'form_operator', 3605 priority: recentCount >= 5 ? 3 : 6, // escalate if pattern is recurring 3606 contextJson: { 3607 trigger: 'operator_marked_failed', 3608 outreachId, 3609 domain: outreach.domain, 3610 contactUri: outreach.contact_uri, 3611 failReason, 3612 recentSimilarCount: recentCount, 3613 description: 3614 `Operator marked form outreach #${outreachId} (${outreach.domain}) as failed: "${failReason}". ` + 3615 `${recentCount} similar failures in the last 30 days. ` + 3616 `Investigate whether this failure can be auto-detected and handled in form.js or prioritize.js ` + 3617 `(e.g. detect no-form pages before visiting, skip CAPTCHA-only pages, add new error pattern to error-categories.js).`, 3618 }, 3619 }); 3620 logger.info(`Queued form_failure_learning agent task for "${failReason}"`); 3621 } catch (taskErr) { 3622 logger.warn(`Could not queue learning task: ${taskErr.message}`); 3623 } 3624 3625 await page.close().catch(() => {}); 3626 if (ownBrowser) await ownBrowser.close().catch(() => {}); 3627 return { success: false, outreachId, failed: true, reason: failReason }; 3628 } 3629 // If the browser was closed, only treat as sent if we landed on a thank-you URL. 3630 // A bare browser close (no thank-you page) means the operator closed without 3631 // submitting — leave the outreach as approved so it can be retried. 3632 if ( 3633 waitErr.message?.includes('Target closed') || 3634 waitErr.message?.includes('has been closed') 3635 ) { 3636 let lastUrl = ''; 3637 try { 3638 lastUrl = page.url().toLowerCase(); 3639 } catch { 3640 /* page gone */ 3641 } 3642 const thankYouPatterns = ['thank', 'success', 'confirm', 'submitted', 'complete']; 3643 if (thankYouPatterns.some(p => lastUrl.includes(p))) { 3644 logger.success(`Browser closed on thank-you page — treating as success (${lastUrl})`); 3645 // fall through to mark as sent below 3646 } else { 3647 navigationHandlerActive = false; 3648 page.off('framenavigated', onNavigation); 3649 logger.warn( 3650 `Browser closed without thank-you confirmation — leaving as approved for retry (#${outreachId})` 3651 ); 3652 // Do NOT mark as sent — just return without updating delivery_status 3653 return { 3654 success: false, 3655 outreachId, 3656 skipped: true, 3657 reason: 'browser_closed_no_confirm', 3658 }; 3659 } 3660 } else { 3661 throw waitErr; 3662 } 3663 } 3664 navigationHandlerActive = false; 3665 page.off('framenavigated', onNavigation); 3666 page.setDefaultTimeout(30_000); // restore 3667 3668 // Persist any field label corrections the operator made during this session 3669 const correctionsJson = await page 3670 .evaluate(() => localStorage.getItem('field_corrections')) // eslint-disable-line no-undef 3671 .catch(() => null); 3672 if (correctionsJson) { 3673 try { 3674 const corrections = JSON.parse(correctionsJson); 3675 for (const { labelText, fieldType } of corrections) { 3676 saveFieldCorrection(labelText, fieldType); 3677 } 3678 } catch { 3679 /* ignore parse errors */ 3680 } 3681 } 3682 3683 // Persist missed-field corrections the operator confirmed via the learning badges 3684 const missedCorrectionsJson = await page 3685 .evaluate(() => localStorage.getItem('missed_field_corrections')) // eslint-disable-line no-undef 3686 .catch(() => null); 3687 if (missedCorrectionsJson) { 3688 try { 3689 const missedCorrections = JSON.parse(missedCorrectionsJson); 3690 const missedFillPlan = []; 3691 for (const { selector, labelText, fieldType } of missedCorrections) { 3692 if (labelText && fieldType) { 3693 saveFieldCorrection(labelText, fieldType); 3694 if (selector) missedFillPlan.push({ selector }); 3695 } 3696 } 3697 // Also update builder template with the newly learned selectors 3698 const learnedBuilderType = missedCorrections[0]?.builderType; 3699 if (learnedBuilderType && learnedBuilderType !== 'generic' && missedFillPlan.length > 0) { 3700 const learnedFields = missedCorrections.filter(c => c.selector).map(c => c.fieldType); 3701 saveBuilderTemplate(learnedBuilderType, learnedFields, missedFillPlan); 3702 } 3703 if (missedCorrections.length > 0) 3704 logger.info( 3705 `Persisted ${missedCorrections.length} missed-field correction(s) from operator` 3706 ); 3707 } catch { 3708 /* ignore parse errors */ 3709 } 3710 } 3711 3712 // Read operator LLM feedback (from the "Rework with Feedback" button in operator panel) 3713 const reworkFeedback = await page 3714 .evaluate(() => localStorage.getItem('rework_feedback')) // eslint-disable-line no-undef 3715 .catch(() => null); 3716 if (reworkFeedback) { 3717 logPromptFeedback({ 3718 outreachId, 3719 siteId: outreach.site_id, 3720 promptFile: 'FORM-FIELD-DETECTION', 3721 feedbackType: 'rework', 3722 feedbackText: reworkFeedback, 3723 }); 3724 logger.info(`Logged operator field-detection feedback: "${reworkFeedback}"`); 3725 } 3726 } 3727 3728 await run( 3729 `UPDATE messages 3730 SET delivery_status = 'sent', 3731 delivered_at = CURRENT_TIMESTAMP, 3732 sent_at = CURRENT_TIMESTAMP 3733 WHERE id = $1`, 3734 [outreachId] 3735 ); 3736 3737 // Save generalised builder template only when autoFillForm ran (not when an existing 3738 // template was reused — otherwise we'd just overwrite the template with itself) 3739 if (filledFields.length > 0 && builderType !== 'generic' && !fillResult?.fromTemplate) { 3740 saveBuilderTemplate(builderType, filledFields, fillPlanInfo); 3741 } 3742 3743 let screenshotPath = null; 3744 try { 3745 screenshotPath = join( 3746 projectRoot, 3747 'screenshots', 3748 `form-submit-${outreachId}-${Date.now()}.png` // nosemgrep: path-join-resolve-traversal 3749 ); 3750 await page.screenshot({ path: screenshotPath, fullPage: false }); 3751 logger.info(`Saved screenshot: ${screenshotPath}`); 3752 } catch { 3753 logger.info('Could not capture screenshot (browser already closed)'); 3754 } 3755 await page.close().catch(() => {}); 3756 if (ownBrowser) await ownBrowser.close().catch(() => {}); 3757 3758 return { success: true, outreachId, formUrl: outreach.contact_uri, screenshotPath }; 3759 } catch (error) { 3760 await page?.close().catch(() => {}); 3761 if (ownBrowser) await ownBrowser.close().catch(() => {}); 3762 // retry_later for transient errors (timeout, rate limit, browser crash), failed for terminal 3763 await markOutreachResultAsync(outreachId, error.message); 3764 logger.error(`Failed to submit form for outreach #${outreachId}`, error); 3765 throw error; 3766 } 3767 } 3768 3769 // CLI functionality 3770 if (import.meta.url === `file://${process.argv[1]}`) { 3771 const command = process.argv[2]; 3772 const headless = process.argv.includes('--headless'); 3773 3774 if (command === 'send' || command === 'submit') { 3775 // Single form: node src/outreach/form.js send <id> 3776 const outreachId = parseInt(process.argv[3], 10); 3777 if (!outreachId) { 3778 console.error('Usage: node src/outreach/form.js send <outreach_id> [--headless]'); 3779 process.exit(1); 3780 } 3781 submitContactForm(outreachId, headless) 3782 .then(result => { 3783 console.log('\n\u2705 Form submitted!\n'); 3784 console.log(`Outreach ID: ${result.outreachId}`); 3785 console.log(`Form URL: ${result.formUrl}`); 3786 console.log(`Screenshot: ${result.screenshotPath}\n`); 3787 process.exit(0); 3788 }) 3789 .catch(error => { 3790 console.error(`\n\u274c Failed: ${error.message}\n`); 3791 process.exit(1); 3792 }); 3793 } else if (command === 'run') { 3794 // Bulk run: node src/outreach/form.js run [--limit N] [--headless] 3795 const limitIdx = process.argv.indexOf('--limit'); 3796 const limit = limitIdx !== -1 ? parseInt(process.argv[limitIdx + 1], 10) : 50; 3797 getAll( 3798 `SELECT o.id FROM messages o 3799 JOIN sites s ON o.site_id = s.id 3800 WHERE o.contact_method = 'form' 3801 AND o.direction = 'outbound' 3802 AND o.approval_status = 'approved' 3803 AND (o.delivery_status IS NULL 3804 OR (o.delivery_status = 'retry_later' 3805 AND (o.error_message NOT LIKE '%No local phone number%' OR o.error_message IS NULL) 3806 AND (o.retry_at IS NULL OR o.retry_at <= NOW()))) 3807 ORDER BY o.created_at ASC 3808 LIMIT ${limit}`, 3809 [] 3810 ) 3811 .then(rows => { 3812 const ids = rows.map(r => r.id); 3813 if (ids.length === 0) { 3814 console.log('No approved form outreaches pending.'); 3815 process.exit(0); 3816 } 3817 console.log( 3818 `Running ${ids.length} form outreaches (ceiling: ${process.env.FORM_OUTREACH_CONCURRENCY || 3} tabs)...` 3819 ); 3820 return bulkSubmitContactForms(ids, { headless }); 3821 }) 3822 .then(stats => { 3823 console.log( 3824 `\n\u2705 Done: ${stats.succeeded} sent, ${stats.failed} failed, ${stats.skipped} skipped\n` 3825 ); 3826 process.exit(0); 3827 }) 3828 .catch(error => { 3829 console.error(`\n\u274c Failed: ${error.message}\n`); 3830 process.exit(1); 3831 }); 3832 } else { 3833 console.log('Usage:'); 3834 console.log( 3835 ' run [--limit N] [--headless] - Run all pending form outreaches (adaptive concurrency)' 3836 ); 3837 console.log(' send <outreach_id> [--headless] - Submit a single form outreach'); 3838 console.log(''); 3839 console.log('Examples:'); 3840 console.log(' node src/outreach/form.js run'); 3841 console.log(' node src/outreach/form.js run --limit 20'); 3842 console.log(' node src/outreach/form.js send 42'); 3843 console.log(' node src/outreach/form.js send 42 --headless'); 3844 console.log(''); 3845 console.log('Environment:'); 3846 console.log(' FORM_OUTREACH_CONCURRENCY=3 max tabs (autoscales down under CPU load)'); 3847 console.log(''); 3848 process.exit(1); 3849 } 3850 } 3851 3852 /** 3853 * Submit multiple contact forms concurrently using a shared Chromium instance. 3854 * 3855 * Concurrency autoscales from 1 up to FORM_OUTREACH_CONCURRENCY (default 3) 3856 * based on real-time CPU load — same adaptive logic as BROWSER_CONCURRENCY / 3857 * ENRICHMENT_CONCURRENCY. Re-evaluated after every tab completes so the pool 3858 * expands/contracts without restarting. 3859 * 3860 * Tabs awaiting operator input stay open and hold their slot; they don't block 3861 * other tabs from running up to the current concurrency ceiling. 3862 * 3863 * @param {number[]} outreachIds - Outreach IDs to process (approved form outreaches) 3864 * @param {Object} options 3865 * @param {boolean} [options.headless=false] 3866 * @returns {Promise<{succeeded: number, failed: number, skipped: number}>} 3867 */ 3868 export async function bulkSubmitContactForms(outreachIds, options = {}) { 3869 const headless = options.headless ?? false; 3870 3871 if (outreachIds.length === 0) return { succeeded: 0, failed: 0, skipped: 0 }; 3872 3873 // FORM_OUTREACH_CONCURRENCY is the ceiling — always open that many tabs from the start. 3874 // getDynamicConcurrency throttles down under CPU load but never below 1. 3875 // This is different from headless pipeline stages: here the user is present watching 3876 // tabs, so we want all requested tabs open immediately. 3877 const ceiling = parseInt(process.env.FORM_OUTREACH_CONCURRENCY || '3', 10); 3878 logger.info(`Bulk form outreach: ${outreachIds.length} forms, ceiling=${ceiling} tabs`); 3879 3880 // Use extension-capable launch when NOPECHA_API_KEY is set — solves CAPTCHAs automatically. 3881 // Falls back to plain launch (no extensions) if key absent. 3882 const hasNopeCHA = !!(process.env.NOPECHA_API_KEY || process.env.NOPECHA_API_KEY_2); 3883 let context, closeBrowser; 3884 3885 if (hasNopeCHA) { 3886 const ext = await launchWithExtensions({ headless, devtools: !headless }); 3887 context = ext.context; // eslint-disable-line prefer-destructuring 3888 closeBrowser = ext.close; 3889 if (ext.hasNopeCHA) { 3890 logger.info('NopeCHA CAPTCHA solver active — CAPTCHAs will be solved automatically'); 3891 } 3892 } else { 3893 const browser = await launchStealthBrowser({ 3894 headless, 3895 stealthLevel: 'minimal', 3896 devtools: !headless, 3897 }); 3898 context = await createStealthContext(browser); 3899 closeBrowser = () => browser.close().catch(() => {}); 3900 } 3901 3902 // launchPersistentContext always opens one initial blank page. 3903 // Pass it to the first form as a reusable page so --limit N shows exactly N 3904 // contact form tabs rather than blank + N-1 forms. 3905 const [initialBlankPage = null] = context.pages(); 3906 let initialPageHandedOff = false; 3907 3908 const stats = { succeeded: 0, failed: 0, skipped: 0 }; 3909 3910 await processBatch( 3911 outreachIds, 3912 async outreachId => { 3913 try { 3914 let sharedPage; 3915 if (!initialPageHandedOff && initialBlankPage) { 3916 initialPageHandedOff = true; 3917 sharedPage = initialBlankPage; 3918 } 3919 const result = await submitContactForm(outreachId, headless, { 3920 ...options, 3921 sharedContext: context, 3922 sharedPage, 3923 }); 3924 if (result.success) stats.succeeded++; 3925 else if (result.failed) stats.failed++; 3926 else stats.skipped++; 3927 } catch { 3928 stats.failed++; 3929 } 3930 }, 3931 { 3932 concurrency: ceiling, 3933 getDynamicConcurrency: () => 3934 getAdaptiveConcurrencyFast(1, ceiling, 'FORM_OUTREACH_CONCURRENCY'), 3935 onProgress: (done, total) => logger.info(`Form outreach: ${done}/${total} tabs processed`), 3936 } 3937 ); 3938 3939 await closeBrowser(); 3940 logger.info( 3941 `Bulk form outreach complete: ${stats.succeeded} sent, ${stats.failed} failed, ${stats.skipped} skipped` 3942 ); 3943 return stats; 3944 } 3945 3946 // Exported for testing 3947 export { isNavigationError, AUTOCOMPLETE_MAP, IFRAME_BLOCKLIST_PATTERNS }; 3948 3949 export default { 3950 submitContactForm, 3951 bulkSubmitContactForms, 3952 };