prioritize.js
1 #!/usr/bin/env node 2 3 /** 4 * Contact Prioritization Module 5 * Matches proposal variants to appropriate contact channels based on priority: 6 * SMS (Variant 1) > Form (Variant 2) > Email (Variant 3) > Social 7 */ 8 9 // Safe: all object keys come from our own DB data and hardcoded channel maps, not user input 10 /* eslint-disable security/detect-object-injection */ 11 12 import { join, dirname } from 'path'; 13 import { fileURLToPath } from 'url'; 14 import Logger from '../utils/logger.js'; 15 import { getCountryByCode, isMobileNumber } from '../config/countries.js'; 16 import { extractFirstname } from '../utils/name-extractor.js'; 17 import { getContactsDataWithFallback } from '../utils/contacts-storage.js'; 18 import { run, getOne, getAll } from '../utils/db.js'; 19 import '../utils/load-env.js'; 20 21 const __filename = fileURLToPath(import.meta.url); 22 const __dirname = dirname(__filename); 23 24 const logger = new Logger('ContactPrioritizer'); 25 26 /** 27 * Channel priority order 28 * SMS is highest priority (best conversion), email is lowest 29 */ 30 const CHANNEL_PRIORITY = { 31 sms: 1, 32 form: 2, 33 email: 3, 34 x: 4, 35 linkedin: 5, 36 }; 37 38 /** 39 * Variant to preferred channel mapping 40 * Based on Architecture.md design 41 */ 42 const VARIANT_CHANNEL_PREFERENCE = { 43 1: 'sms', // Short variant for SMS 44 2: 'form', // Medium variant for forms 45 3: 'email', // Detailed variant for email 46 }; 47 48 /** 49 * Detect if an email address is a government email 50 * @param {string} email - Email address to check 51 * @returns {boolean} True if government email 52 */ 53 function isGovernmentEmail(email) { 54 if (!email || typeof email !== 'string') return false; 55 56 const lower = email.toLowerCase().trim(); 57 58 // Government TLD patterns by country 59 const govPatterns = [ 60 /\.gov$/i, // US federal (.gov) 61 /\.gov\.[a-z]{2}$/i, // Country-specific (.gov.au, .gov.uk, .gov.in, etc.) 62 /\.gc\.ca$/i, // Canada (Government of Canada) 63 /\.govt\.nz$/i, // New Zealand 64 /\.gob\.[a-z]{2}$/i, // Spanish-speaking countries (.gob.mx, .gob.es, etc.) 65 /\.gouv\.[a-z]{2}$/i, // French-speaking countries (.gouv.fr, .gouv.be, etc.) 66 /\.go\.[a-z]{2}$/i, // Japan, Korea (.go.jp, .go.kr) 67 /\.gov\.br$/i, // Brazil 68 /\.mil$/i, // US military 69 /\.mil\.[a-z]{2}$/i, // Country military domains 70 ]; 71 72 // Check if email matches any government pattern 73 const domain = lower.split('@')[1]; 74 if (!domain) return false; 75 76 return govPatterns.some(pattern => pattern.test(domain)); 77 } 78 79 /** 80 * Detect social platform from URL 81 * @param {string} url - Social profile URL 82 * @returns {string|null} Platform name ('linkedin', 'x') or null 83 */ 84 function detectSocialPlatform(url) { 85 if (!url) return null; 86 const lower = url.toLowerCase(); 87 if (lower.includes('linkedin.com')) return 'linkedin'; 88 if (lower.includes('x.com') || lower.includes('twitter.com')) return 'x'; 89 return null; 90 } 91 92 /** 93 * Check if a Twitter/X.com URL has a valid username 94 * @param {string} url - URL to check 95 * @returns {boolean} True if URL has a username, false otherwise 96 */ 97 function hasTwitterUsername(url) { 98 if (!url || typeof url !== 'string') return false; 99 100 const lower = url.toLowerCase().trim(); 101 102 // Check if it's a Twitter/X.com URL 103 if (!lower.includes('twitter.com') && !lower.includes('x.com')) { 104 return true; // Not a Twitter/X URL, so it's valid 105 } 106 107 // Extract path after domain 108 const match = lower.match(/(?:twitter\.com|x\.com)\/([^/?#]+)/i); 109 110 if (!match || !match[1]) { 111 return false; // No path after domain 112 } 113 114 const username = match[1]; 115 116 // Invalid if it's just the root domain or common non-username paths 117 const invalidPaths = [ 118 'home', 119 'explore', 120 'notifications', 121 'messages', 122 'settings', 123 'compose', 124 'i', 125 'intent', 126 'share', 127 ]; 128 129 return !invalidPaths.includes(username); 130 } 131 132 /** 133 * Clean up invalid Twitter/X.com links from contacts_json 134 * Removes links that don't have a username (e.g., just "https://x.com" or "https://twitter.com") 135 * @param {Object} contactsJson - Parsed contacts JSON 136 * @returns {Object} Cleaned contacts JSON 137 */ 138 export function cleanInvalidSocialLinks(contactsJson) { 139 if (!contactsJson || typeof contactsJson !== 'object') { 140 return contactsJson; 141 } 142 143 const cleaned = { ...contactsJson }; 144 145 // Clean social_profiles array 146 if (cleaned.social_profiles && Array.isArray(cleaned.social_profiles)) { 147 cleaned.social_profiles = cleaned.social_profiles.filter(item => { 148 if (typeof item === 'string') { 149 return hasTwitterUsername(item); 150 } 151 if (item && typeof item === 'object' && item.url) { 152 return hasTwitterUsername(item.url); 153 } 154 return true; // Keep other formats 155 }); 156 } 157 158 // Clean old format: social.x and social.twitter arrays 159 if (cleaned.social && typeof cleaned.social === 'object') { 160 if (cleaned.social.x && Array.isArray(cleaned.social.x)) { 161 cleaned.social.x = cleaned.social.x.filter(url => hasTwitterUsername(url)); 162 } 163 if (cleaned.social.twitter && Array.isArray(cleaned.social.twitter)) { 164 cleaned.social.twitter = cleaned.social.twitter.filter(url => hasTwitterUsername(url)); 165 } 166 } 167 168 return cleaned; 169 } 170 171 /** 172 * Extract ALL contacts from contacts_json with deduplication 173 * @param {Object} contactsJson - Parsed contacts JSON from database 174 * @param {string} countryCode - ISO country code for phone validation (e.g., 'AU', 'US', 'UK') 175 * @returns {Array} Array of {type, uri, name, channel} objects 176 */ 177 // eslint-disable-next-line complexity -- Contact extraction requires checking multiple channel types and formats 178 export function getAllContacts(contactsJson, countryCode = 'AU') { 179 if (!contactsJson) { 180 return []; 181 } 182 183 const country = getCountryByCode(countryCode); 184 const contacts = []; 185 const seen = new Set(); // For deduplication 186 187 // Helper to add contact with deduplication 188 const addContact = (type, uri, name = null, channel = null) => { 189 if (!uri) return; 190 191 // Normalize URI for deduplication (lowercase, trim) 192 const normalizedUri = String(uri).toLowerCase().trim(); 193 194 if (seen.has(normalizedUri)) { 195 return; // Skip duplicates 196 } 197 198 seen.add(normalizedUri); 199 contacts.push({ 200 type, 201 uri, 202 name, 203 channel: channel || type, 204 }); 205 }; 206 207 // Extract emails (with government email filtering and basic format validation) 208 const emailArray = contactsJson.email_addresses || contactsJson.email; 209 if (emailArray && Array.isArray(emailArray)) { 210 emailArray.forEach(item => { 211 const emailAddress = typeof item === 'object' ? item.email : item; 212 213 // Basic format guard — catch LLM hallucinations like "mail7", "0120-857-962", "your-email" 214 if (!emailAddress || !/^[^\s@]+@[^\s@]+\.[^\s@]{2,}$/.test(String(emailAddress).trim())) { 215 logger.warn(`Skipping invalid email address: ${emailAddress}`); 216 return; 217 } 218 219 // Skip government emails 220 if (isGovernmentEmail(emailAddress)) { 221 logger.info(`Skipping government email: ${emailAddress}`); 222 return; 223 } 224 225 if (typeof item === 'object') { 226 addContact('email', item.email, item.label || null, 'email'); 227 } else { 228 addContact('email', item, null, 'email'); 229 } 230 }); 231 } 232 233 // Extract phone numbers (for SMS) 234 // Prioritize mobile numbers based on country-specific patterns 235 const phoneArray = contactsJson.phone_numbers || contactsJson.phone; 236 if (phoneArray && Array.isArray(phoneArray)) { 237 // Separate mobile and landline numbers 238 const mobileNumbers = []; 239 const landlineNumbers = []; 240 241 phoneArray.forEach(item => { 242 const phoneNumber = typeof item === 'object' ? item.number : item; 243 const label = typeof item === 'object' ? item.label : null; 244 245 // Hard-exclude fax numbers — they cannot receive SMS and produce "Hey Fax" greetings 246 if (label && label.toLowerCase() === 'fax') { 247 logger.info(`Skipping fax number: ${phoneNumber}`); 248 return; 249 } 250 251 if (phoneNumber) { 252 // E.164 format validation: must start with +[1-9] and have 8-15 digits total 253 // Catches LLM hallucinations like "+0401040", "+01520153" (repeated digit patterns) 254 const cleanedPhone = String(phoneNumber).replace(/[\s\-().]/g, ''); 255 const e164Regex = /^\+[1-9]\d{7,14}$/; 256 if (!e164Regex.test(cleanedPhone)) { 257 logger.warn(`Skipping invalid phone number (not E.164): ${phoneNumber}`); 258 return; 259 } 260 // Country calling code validation: phone prefix must match site's country 261 // Catches LLM hallucinations like "+6036884738" (Malaysia +60) for NZ sites 262 if (country?.phoneFormat && !cleanedPhone.startsWith(country.phoneFormat)) { 263 logger.warn( 264 `Skipping phone ${cleanedPhone}: country code mismatch (expected ${country.phoneFormat} for ${countryCode})` 265 ); 266 return; 267 } 268 if (isMobileNumber(phoneNumber, country)) { 269 mobileNumbers.push({ number: phoneNumber, label }); 270 } else { 271 landlineNumbers.push({ number: phoneNumber, label }); 272 } 273 } 274 }); 275 276 // Add mobile numbers only — landlines are excluded from SMS (will fail for AU/GB/NZ/etc.) 277 mobileNumbers.forEach(item => { 278 addContact('sms', item.number, item.label, 'sms'); 279 }); 280 } 281 282 // Extract contact form (single entry) 283 // New format: primary_contact_form.form_url 284 if (contactsJson.primary_contact_form?.form_url) { 285 addContact('form', contactsJson.primary_contact_form.form_url, null, 'form'); 286 } 287 // Fallback: old form_action_url 288 else if (contactsJson.primary_contact_form?.form_action_url) { 289 addContact('form', contactsJson.primary_contact_form.form_action_url, null, 'form'); 290 } 291 // Old format: form_url array 292 else if (contactsJson.form_url && Array.isArray(contactsJson.form_url)) { 293 contactsJson.form_url.forEach(url => addContact('form', url, null, 'form')); 294 } 295 // Alternative: key_pages array (look for contact page URLs) 296 // Only use this fallback if primary_contact_form key is ABSENT — if it's null, 297 // enrich already visited and confirmed there is no form there. 298 else if ( 299 !('primary_contact_form' in contactsJson) && 300 contactsJson.key_pages && 301 Array.isArray(contactsJson.key_pages) 302 ) { 303 const contactPage = contactsJson.key_pages.find(url => 304 url.toLowerCase().match(/contact|support|get-in-touch/i) 305 ); 306 if (contactPage) { 307 addContact('form', contactPage, null, 'form'); 308 } 309 } 310 // Fallback: old contact_pages array 311 else if (contactsJson.contact_pages && Array.isArray(contactsJson.contact_pages)) { 312 contactsJson.contact_pages.forEach(url => addContact('form', url, null, 'form')); 313 } 314 315 // Extract social profiles 316 // New format: social_profiles array of objects with {url, label} or URLs (need to detect platform from URL) 317 if (contactsJson.social_profiles && Array.isArray(contactsJson.social_profiles)) { 318 contactsJson.social_profiles.forEach(item => { 319 if (typeof item === 'string') { 320 const platform = detectSocialPlatform(item); 321 if (platform) addContact(platform, item, null, platform); 322 } else if (item && typeof item === 'object') { 323 const { url, label = null, platform: explicitPlatform } = item; 324 const raw = explicitPlatform?.toLowerCase(); 325 const platform = (raw === 'twitter' ? 'x' : raw) || detectSocialPlatform(url); 326 if (url && platform) addContact(platform, url, label, platform); 327 } 328 }); 329 } 330 // Old format: social object 331 else if (contactsJson.social) { 332 if (contactsJson.social.linkedin && Array.isArray(contactsJson.social.linkedin)) { 333 contactsJson.social.linkedin.forEach(url => addContact('linkedin', url, null, 'linkedin')); 334 } 335 if (contactsJson.social.x && Array.isArray(contactsJson.social.x)) { 336 contactsJson.social.x.forEach(url => addContact('x', url, null, 'x')); 337 } 338 if (contactsJson.social.twitter && Array.isArray(contactsJson.social.twitter)) { 339 contactsJson.social.twitter.forEach(url => addContact('x', url, null, 'x')); 340 } 341 } 342 343 logger.info(`Extracted ${contacts.length} unique contacts`); 344 345 return contacts; 346 } 347 348 /** 349 * Parse contacts_json and extract available contact methods 350 * @param {Object} contactsJson - Parsed contacts JSON from database 351 * @returns {Object} Available channels with URIs 352 */ 353 // eslint-disable-next-line complexity -- Contact parsing requires checking multiple channel types 354 export function parseAvailableChannels(contactsJson) { 355 if (!contactsJson) { 356 return {}; 357 } 358 359 const channels = {}; 360 361 // Email addresses - handle both formats 362 const emailArray = contactsJson.email_addresses || contactsJson.email; 363 if (emailArray && Array.isArray(emailArray) && emailArray.length > 0) { 364 // Extract email string from object format {email: "...", label: "..."} or plain string 365 const firstEmail = emailArray[0]; 366 channels.email = typeof firstEmail === 'object' ? firstEmail.email : firstEmail; 367 } 368 369 // Phone numbers (for SMS) - handle both formats 370 const phoneArray = contactsJson.phone_numbers || contactsJson.phone; 371 if (phoneArray && Array.isArray(phoneArray) && phoneArray.length > 0) { 372 // Normalize phone numbers to consistent format 373 const normalizedPhones = phoneArray.map(phone => { 374 // New format: {number, label} 375 if (typeof phone === 'object' && phone.number) { 376 return { number: phone.number, label: phone.label }; 377 } 378 // Old format: plain string 379 return { number: phone, label: null }; 380 }); 381 382 // Prioritize by label: Mobile > Sales > null/other > Office/Landline 383 const labelPriority = { 384 mobile: 1, 385 cell: 1, 386 sales: 2, 387 jim: 3, // Named contacts often personal mobile 388 null: 4, 389 office: 5, 390 landline: 5, 391 fax: 99, // Avoid fax numbers 392 }; 393 394 normalizedPhones.sort((a, b) => { 395 const labelA = (a.label || '').toLowerCase(); 396 const labelB = (b.label || '').toLowerCase(); 397 const priorityA = labelPriority[labelA] || 4; 398 const priorityB = labelPriority[labelB] || 4; 399 return priorityA - priorityB; 400 }); 401 402 // Filter for mobile numbers (assumed to support SMS) 403 const mobileNumbers = normalizedPhones.filter( 404 phone => phone.number.match(/^(\+61|04|614)/i) // Australian mobile patterns 405 ); 406 407 if (mobileNumbers.length > 0) { 408 channels.sms = mobileNumbers[0].number; 409 } else if (normalizedPhones.length > 0) { 410 // If no mobile detected, use first phone anyway (after label-based sorting) 411 channels.sms = normalizedPhones[0].number; 412 } 413 } 414 415 // Contact forms - handle both formats 416 // New format: primary_contact_form.form_url 417 if (contactsJson.primary_contact_form?.form_url) { 418 channels.form = contactsJson.primary_contact_form.form_url; 419 } 420 // Fallback: old form_action_url (for backwards compatibility) 421 else if (contactsJson.primary_contact_form?.form_action_url) { 422 channels.form = contactsJson.primary_contact_form.form_action_url; 423 } 424 // Old format: form_url array 425 else if ( 426 contactsJson.form_url && 427 Array.isArray(contactsJson.form_url) && 428 contactsJson.form_url.length > 0 429 ) { 430 channels.form = contactsJson.form_url[0]; 431 } 432 // Alternative: key_pages array (look for contact page URLs) 433 // Only use this fallback if primary_contact_form key is ABSENT — if it's null, 434 // enrich already visited and confirmed there is no form there. 435 else if ( 436 !('primary_contact_form' in contactsJson) && 437 contactsJson.key_pages && 438 Array.isArray(contactsJson.key_pages) && 439 contactsJson.key_pages.length > 0 440 ) { 441 // Find first URL that looks like a contact page 442 const contactPage = contactsJson.key_pages.find(url => 443 url.toLowerCase().match(/contact|support|get-in-touch/i) 444 ); 445 if (contactPage) { 446 channels.form = contactPage; 447 } 448 } 449 // Fallback: old contact_pages array (for backwards compatibility) 450 else if ( 451 contactsJson.contact_pages && 452 Array.isArray(contactsJson.contact_pages) && 453 contactsJson.contact_pages.length > 0 454 ) { 455 channels.form = contactsJson.contact_pages[0]; 456 } 457 458 // Social media platforms - handle both formats 459 // Only X.com and LinkedIn allow cold outreach 460 // New format: social_profiles array of objects {url, label} or strings 461 if (contactsJson.social_profiles && Array.isArray(contactsJson.social_profiles)) { 462 for (const profile of contactsJson.social_profiles) { 463 let url, platform; 464 if (typeof profile === 'string') { 465 url = profile; 466 platform = detectSocialPlatform(profile); 467 } else if (profile && typeof profile === 'object') { 468 ({ url } = profile); 469 const raw = profile.platform?.toLowerCase(); 470 platform = (raw === 'twitter' ? 'x' : raw) || detectSocialPlatform(url); 471 } 472 if (url && platform && !channels[platform]) { 473 channels[platform] = url; 474 } 475 } 476 } 477 // Old format: social object 478 else if (contactsJson.social) { 479 if (contactsJson.social.linkedin && contactsJson.social.linkedin.length > 0) { 480 channels.linkedin = contactsJson.social.linkedin[0]; 481 } 482 if (contactsJson.social.x && contactsJson.social.x.length > 0) { 483 channels.x = contactsJson.social.x[0]; 484 } 485 // Twitter is now X 486 if (contactsJson.social.twitter && contactsJson.social.twitter.length > 0) { 487 channels.x = contactsJson.social.twitter[0]; 488 } 489 } 490 491 return channels; 492 } 493 494 /** 495 * Prioritize contact methods for outreach 496 * Match variants to channels based on preference and availability 497 * @param {Object} contactsJson - Parsed contacts JSON 498 * @param {Array} variants - Array of proposal variants (1, 2, 3) 499 * @returns {Array} Array of {variant, channel, uri} mappings 500 */ 501 export function prioritizeContacts(contactsJson, variants = [1, 2, 3]) { 502 const availableChannels = parseAvailableChannels(contactsJson); 503 504 if (Object.keys(availableChannels).length === 0) { 505 logger.warn('No contact channels found'); 506 return []; 507 } 508 509 const assignments = []; 510 const usedChannels = new Set(); // Track which channels we've already assigned 511 512 // Sort variants to process them in order (1, 2, 3) 513 const sortedVariants = [...variants].sort((a, b) => a - b); 514 515 // First pass: Try to match each variant to its preferred channel 516 for (const variantNumber of sortedVariants) { 517 const preferredChannel = VARIANT_CHANNEL_PREFERENCE[variantNumber]; 518 519 if (availableChannels[preferredChannel] && !usedChannels.has(preferredChannel)) { 520 assignments.push({ 521 variant: variantNumber, 522 channel: preferredChannel, 523 uri: availableChannels[preferredChannel], 524 }); 525 usedChannels.add(preferredChannel); 526 } 527 } 528 529 // Second pass: For unassigned variants, assign to highest priority available channel 530 const unassignedVariants = sortedVariants.filter(v => !assignments.some(a => a.variant === v)); 531 532 // Get remaining channels sorted by priority 533 const remainingChannels = Object.keys(availableChannels) 534 .filter(channel => !usedChannels.has(channel)) 535 .sort((a, b) => CHANNEL_PRIORITY[a] - CHANNEL_PRIORITY[b]); 536 537 for (let i = 0; i < unassignedVariants.length && i < remainingChannels.length; i++) { 538 const variantNumber = unassignedVariants[i]; 539 const channel = remainingChannels[i]; 540 541 assignments.push({ 542 variant: variantNumber, 543 channel, 544 uri: availableChannels[channel], 545 }); 546 usedChannels.add(channel); 547 } 548 549 // Sort assignments by variant number for clarity 550 assignments.sort((a, b) => a.variant - b.variant); 551 552 logger.info( 553 `Assigned ${assignments.length} variants to channels: ${assignments.map(a => `V${a.variant}→${a.channel}`).join(', ')}` 554 ); 555 556 return assignments; 557 } 558 559 /** 560 * Update outbound message records with proper contact URIs 561 * @param {number} siteId - Site ID to update messages for 562 * @returns {Promise<number>} Number of messages updated 563 */ 564 export async function updateOutreachContacts(siteId) { 565 try { 566 // Get site contacts_json 567 const site = await getOne('SELECT id, domain FROM sites WHERE id = $1', [siteId]); 568 569 if (!site) { 570 throw new Error(`Site not found: ${siteId}`); 571 } 572 573 const contactsJson = getContactsDataWithFallback(siteId, site); 574 if (!contactsJson) { 575 throw new Error(`Site ${site.domain} has no contacts_json data`); 576 } 577 578 // Get pending outbound messages for this site 579 const outreaches = await getAll( 580 `SELECT id, contact_method, contact_uri 581 FROM messages 582 WHERE site_id = $1 583 AND direction = 'outbound' 584 AND approval_status = 'pending'`, 585 [siteId] 586 ); 587 588 if (outreaches.length === 0) { 589 logger.warn(`No pending outbound messages found for site #${siteId}`); 590 return 0; 591 } 592 593 // Get prioritized contact assignments (use sequential indices) 594 const variants = outreaches.map((_, i) => i + 1); 595 const assignments = prioritizeContacts(contactsJson, variants); 596 597 if (assignments.length === 0) { 598 throw new Error(`No contact channels available for site ${site.domain}`); 599 } 600 601 // Update each message with proper contact info 602 let updatedCount = 0; 603 604 for (let i = 0; i < outreaches.length; i++) { 605 const outreach = outreaches[i]; 606 const assignment = assignments.find(a => a.variant === i + 1); 607 608 if (assignment) { 609 await run( 610 `UPDATE messages 611 SET contact_method = $1, contact_uri = $2 612 WHERE id = $3`, 613 [assignment.channel, assignment.uri, outreach.id] 614 ); 615 updatedCount++; 616 logger.success( 617 `Updated outreach #${outreach.id}: V${assignment.variant} → ${assignment.channel} (${assignment.uri})` 618 ); 619 } else { 620 logger.warn(`No contact assignment found for outreach #${outreach.id} index ${i + 1}`); 621 } 622 } 623 624 logger.success(`Updated ${updatedCount}/${outreaches.length} outreaches for ${site.domain}`); 625 626 return updatedCount; 627 } catch (error) { 628 logger.error(`Failed to update outreach contacts for site ${siteId}`, error); 629 throw error; 630 } 631 } 632 633 /** 634 * Bulk update all pending outreaches with contact URIs 635 * @param {number} limit - Maximum number of sites to process 636 * @returns {Promise<Object>} Summary of updates 637 */ 638 export async function bulkUpdateOutreachContacts(limit = null) { 639 // Get all sites with pending outbound messages that need contact URIs updated 640 const sites = await getAll( 641 `SELECT DISTINCT s.id, s.domain 642 FROM sites s 643 JOIN messages m ON s.id = m.site_id 644 WHERE m.direction = 'outbound' 645 AND m.approval_status = 'pending' 646 AND (m.contact_uri = 'PENDING_CONTACT_EXTRACTION' OR m.contact_uri IS NULL) 647 ${limit ? `LIMIT ${limit}` : ''}`, 648 [] 649 ); 650 651 logger.info(`Updating contact URIs for ${sites.length} sites...`); 652 653 const results = { 654 total: sites.length, 655 succeeded: 0, 656 failed: 0, 657 totalOutreachesUpdated: 0, 658 }; 659 660 for (const site of sites) { 661 try { 662 const count = await updateOutreachContacts(site.id); 663 results.succeeded++; 664 results.totalOutreachesUpdated += count; 665 } catch (error) { 666 logger.error(`Failed for site ${site.domain}:`, error); 667 results.failed++; 668 } 669 } 670 671 logger.success( 672 `Bulk update complete: ${results.succeeded} sites, ${results.totalOutreachesUpdated} outreaches updated, ${results.failed} failures` 673 ); 674 675 return results; 676 } 677 678 /** 679 * Get outreach readiness report 680 * Shows which sites have proposals ready to send 681 */ 682 export async function getOutreachReadinessReport() { 683 const report = await getAll( 684 `SELECT 685 s.id, 686 s.domain, 687 COUNT(m.id) AS variant_count, 688 SUM(CASE WHEN m.contact_uri != 'PENDING_CONTACT_EXTRACTION' THEN 1 ELSE 0 END) AS ready_count, 689 STRING_AGG(m.contact_method, ', ') AS channels 690 FROM sites s 691 JOIN messages m ON s.id = m.site_id 692 WHERE m.direction = 'outbound' 693 AND m.approval_status = 'pending' 694 GROUP BY s.id, s.domain 695 ORDER BY ready_count DESC, s.domain`, 696 [] 697 ); 698 699 return report; 700 } 701 702 // CLI functionality 703 if (import.meta.url === `file://${process.argv[1]}`) { 704 const command = process.argv[2]; 705 706 if (command === 'update') { 707 const siteId = parseInt(process.argv[3], 10); 708 709 if (!siteId) { 710 console.error('Usage: node src/contacts/prioritize.js update <site_id>'); 711 process.exit(1); 712 } 713 714 updateOutreachContacts(siteId) 715 .then(count => { 716 console.log(`\n\u2705 Updated ${count} outreach(es)\n`); 717 process.exit(0); 718 }) 719 .catch(error => { 720 console.error(`\n\u274c Failed: ${error.message}\n`); 721 process.exit(1); 722 }); 723 } else if (command === 'bulk') { 724 const limit = process.argv[3] ? parseInt(process.argv[3], 10) : null; 725 726 bulkUpdateOutreachContacts(limit) 727 .then(results => { 728 console.log('\n\u2705 Bulk update complete!\n'); 729 console.log(`Sites processed: ${results.succeeded}/${results.total}`); 730 console.log(`Outreaches updated: ${results.totalOutreachesUpdated}`); 731 console.log(`Failures: ${results.failed}\n`); 732 process.exit(0); 733 }) 734 .catch(error => { 735 console.error(`\n\u274c Failed: ${error.message}\n`); 736 process.exit(1); 737 }); 738 } else if (command === 'report') { 739 getOutreachReadinessReport() 740 .then(report => { 741 console.log(`\n\ud83d\udcca Outreach Readiness Report (${report.length} sites)\n`); 742 743 if (report.length === 0) { 744 console.log('No pending outreaches found.\n'); 745 process.exit(0); 746 } 747 748 report.forEach(r => { 749 const status = r.ready_count === r.variant_count ? '\u2705' : '\u23f3'; 750 console.log( 751 `${status} ${r.domain} - ${r.ready_count}/${r.variant_count} ready - ${r.channels}` 752 ); 753 }); 754 755 console.log(''); 756 process.exit(0); 757 }) 758 .catch(error => { 759 console.error(`\n\u274c Failed: ${error.message}\n`); 760 process.exit(1); 761 }); 762 } else { 763 console.log('Contact Prioritization Module'); 764 console.log(''); 765 console.log('Usage:'); 766 console.log(' update <site_id> - Update contact URIs for a specific site'); 767 console.log(' bulk [limit] - Update all pending outreaches'); 768 console.log(' report - Show which outreaches are ready to send'); 769 console.log(''); 770 console.log('Examples:'); 771 console.log(' node src/contacts/prioritize.js update 42'); 772 console.log(' node src/contacts/prioritize.js bulk 10'); 773 console.log(' node src/contacts/prioritize.js report'); 774 console.log(''); 775 process.exit(1); 776 } 777 } 778 779 /** 780 * Async version of getAllContacts that resolves contact labels to real first names 781 * using a Haiku LLM call. Non-person labels (Office, Sales, Büro, etc.) become null. 782 * Use this in proposal generation where the contact name affects the greeting. 783 * 784 * @param {Object} contactsJson - Parsed contacts JSON 785 * @param {string} countryCode - ISO country code 786 * @returns {Promise<Array>} Contacts with resolved .name field 787 */ 788 /** 789 * Strip TLD from a domain to get the brand stem for name extraction. 790 * e.g. "nickelectrical.com.au" → "nickelectrical", "jims-plumbing.co.nz" → "jims-plumbing" 791 */ 792 function _domainStem(domain) { 793 if (!domain) return null; 794 return domain 795 .replace(/^www\./, '') 796 .replace(/\.(com|net|org|biz|info|co)\.[a-z]{2}$/, '') // compound TLDs (.com.au, .co.nz) 797 .replace(/\.[a-z]{2,}$/, ''); // simple TLD (.com, .ca, .de) 798 } 799 800 export async function getAllContactsWithNames(contactsJson, countryCode = 'AU', domain = null) { 801 const contacts = getAllContacts(contactsJson, countryCode); 802 const domainStem = _domainStem(domain); 803 804 return Promise.all( 805 contacts.map(async contact => { 806 // Try label first 807 if (contact.name) { 808 const firstname = await extractFirstname(contact.name); 809 if (firstname) return { ...contact, name: firstname }; 810 } 811 // Fallback: try email address prefix (e.g. nick@domain.com → "Nick") 812 if (contact.channel === 'email' && contact.uri && contact.uri.includes('@')) { 813 const prefix = contact.uri.split('@')[0]; 814 const firstname = await extractFirstname(prefix); 815 if (firstname) return { ...contact, name: firstname }; 816 } 817 // Final fallback: try domain stem (e.g. "nickelectrical.com.au" → "nickelectrical" → "Nick") 818 if (domainStem) { 819 const firstname = await extractFirstname(domainStem); 820 if (firstname) return { ...contact, name: firstname }; 821 } 822 return { ...contact, name: null }; 823 }) 824 ); 825 } 826 827 export default { 828 getAllContacts, 829 getAllContactsWithNames, 830 parseAvailableChannels, 831 prioritizeContacts, 832 updateOutreachContacts, 833 bulkUpdateOutreachContacts, 834 getOutreachReadinessReport, 835 };