/ src / contacts / prioritize.js
prioritize.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Contact Prioritization Module
  5   * Matches proposal variants to appropriate contact channels based on priority:
  6   * SMS (Variant 1) > Form (Variant 2) > Email (Variant 3) > Social
  7   */
  8  
  9  // Safe: all object keys come from our own DB data and hardcoded channel maps, not user input
 10  /* eslint-disable security/detect-object-injection */
 11  
 12  import { join, dirname } from 'path';
 13  import { fileURLToPath } from 'url';
 14  import Logger from '../utils/logger.js';
 15  import { getCountryByCode, isMobileNumber } from '../config/countries.js';
 16  import { extractFirstname } from '../utils/name-extractor.js';
 17  import { getContactsDataWithFallback } from '../utils/contacts-storage.js';
 18  import { run, getOne, getAll } from '../utils/db.js';
 19  import '../utils/load-env.js';
 20  
 21  const __filename = fileURLToPath(import.meta.url);
 22  const __dirname = dirname(__filename);
 23  
 24  const logger = new Logger('ContactPrioritizer');
 25  
 26  /**
 27   * Channel priority order
 28   * SMS is highest priority (best conversion), email is lowest
 29   */
 30  const CHANNEL_PRIORITY = {
 31    sms: 1,
 32    form: 2,
 33    email: 3,
 34    x: 4,
 35    linkedin: 5,
 36  };
 37  
 38  /**
 39   * Variant to preferred channel mapping
 40   * Based on Architecture.md design
 41   */
 42  const VARIANT_CHANNEL_PREFERENCE = {
 43    1: 'sms', // Short variant for SMS
 44    2: 'form', // Medium variant for forms
 45    3: 'email', // Detailed variant for email
 46  };
 47  
 48  /**
 49   * Detect if an email address is a government email
 50   * @param {string} email - Email address to check
 51   * @returns {boolean} True if government email
 52   */
 53  function isGovernmentEmail(email) {
 54    if (!email || typeof email !== 'string') return false;
 55  
 56    const lower = email.toLowerCase().trim();
 57  
 58    // Government TLD patterns by country
 59    const govPatterns = [
 60      /\.gov$/i, // US federal (.gov)
 61      /\.gov\.[a-z]{2}$/i, // Country-specific (.gov.au, .gov.uk, .gov.in, etc.)
 62      /\.gc\.ca$/i, // Canada (Government of Canada)
 63      /\.govt\.nz$/i, // New Zealand
 64      /\.gob\.[a-z]{2}$/i, // Spanish-speaking countries (.gob.mx, .gob.es, etc.)
 65      /\.gouv\.[a-z]{2}$/i, // French-speaking countries (.gouv.fr, .gouv.be, etc.)
 66      /\.go\.[a-z]{2}$/i, // Japan, Korea (.go.jp, .go.kr)
 67      /\.gov\.br$/i, // Brazil
 68      /\.mil$/i, // US military
 69      /\.mil\.[a-z]{2}$/i, // Country military domains
 70    ];
 71  
 72    // Check if email matches any government pattern
 73    const domain = lower.split('@')[1];
 74    if (!domain) return false;
 75  
 76    return govPatterns.some(pattern => pattern.test(domain));
 77  }
 78  
 79  /**
 80   * Detect social platform from URL
 81   * @param {string} url - Social profile URL
 82   * @returns {string|null} Platform name ('linkedin', 'x') or null
 83   */
 84  function detectSocialPlatform(url) {
 85    if (!url) return null;
 86    const lower = url.toLowerCase();
 87    if (lower.includes('linkedin.com')) return 'linkedin';
 88    if (lower.includes('x.com') || lower.includes('twitter.com')) return 'x';
 89    return null;
 90  }
 91  
 92  /**
 93   * Check if a Twitter/X.com URL has a valid username
 94   * @param {string} url - URL to check
 95   * @returns {boolean} True if URL has a username, false otherwise
 96   */
 97  function hasTwitterUsername(url) {
 98    if (!url || typeof url !== 'string') return false;
 99  
100    const lower = url.toLowerCase().trim();
101  
102    // Check if it's a Twitter/X.com URL
103    if (!lower.includes('twitter.com') && !lower.includes('x.com')) {
104      return true; // Not a Twitter/X URL, so it's valid
105    }
106  
107    // Extract path after domain
108    const match = lower.match(/(?:twitter\.com|x\.com)\/([^/?#]+)/i);
109  
110    if (!match || !match[1]) {
111      return false; // No path after domain
112    }
113  
114    const username = match[1];
115  
116    // Invalid if it's just the root domain or common non-username paths
117    const invalidPaths = [
118      'home',
119      'explore',
120      'notifications',
121      'messages',
122      'settings',
123      'compose',
124      'i',
125      'intent',
126      'share',
127    ];
128  
129    return !invalidPaths.includes(username);
130  }
131  
132  /**
133   * Clean up invalid Twitter/X.com links from contacts_json
134   * Removes links that don't have a username (e.g., just "https://x.com" or "https://twitter.com")
135   * @param {Object} contactsJson - Parsed contacts JSON
136   * @returns {Object} Cleaned contacts JSON
137   */
138  export function cleanInvalidSocialLinks(contactsJson) {
139    if (!contactsJson || typeof contactsJson !== 'object') {
140      return contactsJson;
141    }
142  
143    const cleaned = { ...contactsJson };
144  
145    // Clean social_profiles array
146    if (cleaned.social_profiles && Array.isArray(cleaned.social_profiles)) {
147      cleaned.social_profiles = cleaned.social_profiles.filter(item => {
148        if (typeof item === 'string') {
149          return hasTwitterUsername(item);
150        }
151        if (item && typeof item === 'object' && item.url) {
152          return hasTwitterUsername(item.url);
153        }
154        return true; // Keep other formats
155      });
156    }
157  
158    // Clean old format: social.x and social.twitter arrays
159    if (cleaned.social && typeof cleaned.social === 'object') {
160      if (cleaned.social.x && Array.isArray(cleaned.social.x)) {
161        cleaned.social.x = cleaned.social.x.filter(url => hasTwitterUsername(url));
162      }
163      if (cleaned.social.twitter && Array.isArray(cleaned.social.twitter)) {
164        cleaned.social.twitter = cleaned.social.twitter.filter(url => hasTwitterUsername(url));
165      }
166    }
167  
168    return cleaned;
169  }
170  
171  /**
172   * Extract ALL contacts from contacts_json with deduplication
173   * @param {Object} contactsJson - Parsed contacts JSON from database
174   * @param {string} countryCode - ISO country code for phone validation (e.g., 'AU', 'US', 'UK')
175   * @returns {Array} Array of {type, uri, name, channel} objects
176   */
177  // eslint-disable-next-line complexity -- Contact extraction requires checking multiple channel types and formats
178  export function getAllContacts(contactsJson, countryCode = 'AU') {
179    if (!contactsJson) {
180      return [];
181    }
182  
183    const country = getCountryByCode(countryCode);
184    const contacts = [];
185    const seen = new Set(); // For deduplication
186  
187    // Helper to add contact with deduplication
188    const addContact = (type, uri, name = null, channel = null) => {
189      if (!uri) return;
190  
191      // Normalize URI for deduplication (lowercase, trim)
192      const normalizedUri = String(uri).toLowerCase().trim();
193  
194      if (seen.has(normalizedUri)) {
195        return; // Skip duplicates
196      }
197  
198      seen.add(normalizedUri);
199      contacts.push({
200        type,
201        uri,
202        name,
203        channel: channel || type,
204      });
205    };
206  
207    // Extract emails (with government email filtering and basic format validation)
208    const emailArray = contactsJson.email_addresses || contactsJson.email;
209    if (emailArray && Array.isArray(emailArray)) {
210      emailArray.forEach(item => {
211        const emailAddress = typeof item === 'object' ? item.email : item;
212  
213        // Basic format guard — catch LLM hallucinations like "mail7", "0120-857-962", "your-email"
214        if (!emailAddress || !/^[^\s@]+@[^\s@]+\.[^\s@]{2,}$/.test(String(emailAddress).trim())) {
215          logger.warn(`Skipping invalid email address: ${emailAddress}`);
216          return;
217        }
218  
219        // Skip government emails
220        if (isGovernmentEmail(emailAddress)) {
221          logger.info(`Skipping government email: ${emailAddress}`);
222          return;
223        }
224  
225        if (typeof item === 'object') {
226          addContact('email', item.email, item.label || null, 'email');
227        } else {
228          addContact('email', item, null, 'email');
229        }
230      });
231    }
232  
233    // Extract phone numbers (for SMS)
234    // Prioritize mobile numbers based on country-specific patterns
235    const phoneArray = contactsJson.phone_numbers || contactsJson.phone;
236    if (phoneArray && Array.isArray(phoneArray)) {
237      // Separate mobile and landline numbers
238      const mobileNumbers = [];
239      const landlineNumbers = [];
240  
241      phoneArray.forEach(item => {
242        const phoneNumber = typeof item === 'object' ? item.number : item;
243        const label = typeof item === 'object' ? item.label : null;
244  
245        // Hard-exclude fax numbers — they cannot receive SMS and produce "Hey Fax" greetings
246        if (label && label.toLowerCase() === 'fax') {
247          logger.info(`Skipping fax number: ${phoneNumber}`);
248          return;
249        }
250  
251        if (phoneNumber) {
252          // E.164 format validation: must start with +[1-9] and have 8-15 digits total
253          // Catches LLM hallucinations like "+0401040", "+01520153" (repeated digit patterns)
254          const cleanedPhone = String(phoneNumber).replace(/[\s\-().]/g, '');
255          const e164Regex = /^\+[1-9]\d{7,14}$/;
256          if (!e164Regex.test(cleanedPhone)) {
257            logger.warn(`Skipping invalid phone number (not E.164): ${phoneNumber}`);
258            return;
259          }
260          // Country calling code validation: phone prefix must match site's country
261          // Catches LLM hallucinations like "+6036884738" (Malaysia +60) for NZ sites
262          if (country?.phoneFormat && !cleanedPhone.startsWith(country.phoneFormat)) {
263            logger.warn(
264              `Skipping phone ${cleanedPhone}: country code mismatch (expected ${country.phoneFormat} for ${countryCode})`
265            );
266            return;
267          }
268          if (isMobileNumber(phoneNumber, country)) {
269            mobileNumbers.push({ number: phoneNumber, label });
270          } else {
271            landlineNumbers.push({ number: phoneNumber, label });
272          }
273        }
274      });
275  
276      // Add mobile numbers only — landlines are excluded from SMS (will fail for AU/GB/NZ/etc.)
277      mobileNumbers.forEach(item => {
278        addContact('sms', item.number, item.label, 'sms');
279      });
280    }
281  
282    // Extract contact form (single entry)
283    // New format: primary_contact_form.form_url
284    if (contactsJson.primary_contact_form?.form_url) {
285      addContact('form', contactsJson.primary_contact_form.form_url, null, 'form');
286    }
287    // Fallback: old form_action_url
288    else if (contactsJson.primary_contact_form?.form_action_url) {
289      addContact('form', contactsJson.primary_contact_form.form_action_url, null, 'form');
290    }
291    // Old format: form_url array
292    else if (contactsJson.form_url && Array.isArray(contactsJson.form_url)) {
293      contactsJson.form_url.forEach(url => addContact('form', url, null, 'form'));
294    }
295    // Alternative: key_pages array (look for contact page URLs)
296    // Only use this fallback if primary_contact_form key is ABSENT — if it's null,
297    // enrich already visited and confirmed there is no form there.
298    else if (
299      !('primary_contact_form' in contactsJson) &&
300      contactsJson.key_pages &&
301      Array.isArray(contactsJson.key_pages)
302    ) {
303      const contactPage = contactsJson.key_pages.find(url =>
304        url.toLowerCase().match(/contact|support|get-in-touch/i)
305      );
306      if (contactPage) {
307        addContact('form', contactPage, null, 'form');
308      }
309    }
310    // Fallback: old contact_pages array
311    else if (contactsJson.contact_pages && Array.isArray(contactsJson.contact_pages)) {
312      contactsJson.contact_pages.forEach(url => addContact('form', url, null, 'form'));
313    }
314  
315    // Extract social profiles
316    // New format: social_profiles array of objects with {url, label} or URLs (need to detect platform from URL)
317    if (contactsJson.social_profiles && Array.isArray(contactsJson.social_profiles)) {
318      contactsJson.social_profiles.forEach(item => {
319        if (typeof item === 'string') {
320          const platform = detectSocialPlatform(item);
321          if (platform) addContact(platform, item, null, platform);
322        } else if (item && typeof item === 'object') {
323          const { url, label = null, platform: explicitPlatform } = item;
324          const raw = explicitPlatform?.toLowerCase();
325          const platform = (raw === 'twitter' ? 'x' : raw) || detectSocialPlatform(url);
326          if (url && platform) addContact(platform, url, label, platform);
327        }
328      });
329    }
330    // Old format: social object
331    else if (contactsJson.social) {
332      if (contactsJson.social.linkedin && Array.isArray(contactsJson.social.linkedin)) {
333        contactsJson.social.linkedin.forEach(url => addContact('linkedin', url, null, 'linkedin'));
334      }
335      if (contactsJson.social.x && Array.isArray(contactsJson.social.x)) {
336        contactsJson.social.x.forEach(url => addContact('x', url, null, 'x'));
337      }
338      if (contactsJson.social.twitter && Array.isArray(contactsJson.social.twitter)) {
339        contactsJson.social.twitter.forEach(url => addContact('x', url, null, 'x'));
340      }
341    }
342  
343    logger.info(`Extracted ${contacts.length} unique contacts`);
344  
345    return contacts;
346  }
347  
348  /**
349   * Parse contacts_json and extract available contact methods
350   * @param {Object} contactsJson - Parsed contacts JSON from database
351   * @returns {Object} Available channels with URIs
352   */
353  // eslint-disable-next-line complexity -- Contact parsing requires checking multiple channel types
354  export function parseAvailableChannels(contactsJson) {
355    if (!contactsJson) {
356      return {};
357    }
358  
359    const channels = {};
360  
361    // Email addresses - handle both formats
362    const emailArray = contactsJson.email_addresses || contactsJson.email;
363    if (emailArray && Array.isArray(emailArray) && emailArray.length > 0) {
364      // Extract email string from object format {email: "...", label: "..."} or plain string
365      const firstEmail = emailArray[0];
366      channels.email = typeof firstEmail === 'object' ? firstEmail.email : firstEmail;
367    }
368  
369    // Phone numbers (for SMS) - handle both formats
370    const phoneArray = contactsJson.phone_numbers || contactsJson.phone;
371    if (phoneArray && Array.isArray(phoneArray) && phoneArray.length > 0) {
372      // Normalize phone numbers to consistent format
373      const normalizedPhones = phoneArray.map(phone => {
374        // New format: {number, label}
375        if (typeof phone === 'object' && phone.number) {
376          return { number: phone.number, label: phone.label };
377        }
378        // Old format: plain string
379        return { number: phone, label: null };
380      });
381  
382      // Prioritize by label: Mobile > Sales > null/other > Office/Landline
383      const labelPriority = {
384        mobile: 1,
385        cell: 1,
386        sales: 2,
387        jim: 3, // Named contacts often personal mobile
388        null: 4,
389        office: 5,
390        landline: 5,
391        fax: 99, // Avoid fax numbers
392      };
393  
394      normalizedPhones.sort((a, b) => {
395        const labelA = (a.label || '').toLowerCase();
396        const labelB = (b.label || '').toLowerCase();
397        const priorityA = labelPriority[labelA] || 4;
398        const priorityB = labelPriority[labelB] || 4;
399        return priorityA - priorityB;
400      });
401  
402      // Filter for mobile numbers (assumed to support SMS)
403      const mobileNumbers = normalizedPhones.filter(
404        phone => phone.number.match(/^(\+61|04|614)/i) // Australian mobile patterns
405      );
406  
407      if (mobileNumbers.length > 0) {
408        channels.sms = mobileNumbers[0].number;
409      } else if (normalizedPhones.length > 0) {
410        // If no mobile detected, use first phone anyway (after label-based sorting)
411        channels.sms = normalizedPhones[0].number;
412      }
413    }
414  
415    // Contact forms - handle both formats
416    // New format: primary_contact_form.form_url
417    if (contactsJson.primary_contact_form?.form_url) {
418      channels.form = contactsJson.primary_contact_form.form_url;
419    }
420    // Fallback: old form_action_url (for backwards compatibility)
421    else if (contactsJson.primary_contact_form?.form_action_url) {
422      channels.form = contactsJson.primary_contact_form.form_action_url;
423    }
424    // Old format: form_url array
425    else if (
426      contactsJson.form_url &&
427      Array.isArray(contactsJson.form_url) &&
428      contactsJson.form_url.length > 0
429    ) {
430      channels.form = contactsJson.form_url[0];
431    }
432    // Alternative: key_pages array (look for contact page URLs)
433    // Only use this fallback if primary_contact_form key is ABSENT — if it's null,
434    // enrich already visited and confirmed there is no form there.
435    else if (
436      !('primary_contact_form' in contactsJson) &&
437      contactsJson.key_pages &&
438      Array.isArray(contactsJson.key_pages) &&
439      contactsJson.key_pages.length > 0
440    ) {
441      // Find first URL that looks like a contact page
442      const contactPage = contactsJson.key_pages.find(url =>
443        url.toLowerCase().match(/contact|support|get-in-touch/i)
444      );
445      if (contactPage) {
446        channels.form = contactPage;
447      }
448    }
449    // Fallback: old contact_pages array (for backwards compatibility)
450    else if (
451      contactsJson.contact_pages &&
452      Array.isArray(contactsJson.contact_pages) &&
453      contactsJson.contact_pages.length > 0
454    ) {
455      channels.form = contactsJson.contact_pages[0];
456    }
457  
458    // Social media platforms - handle both formats
459    // Only X.com and LinkedIn allow cold outreach
460    // New format: social_profiles array of objects {url, label} or strings
461    if (contactsJson.social_profiles && Array.isArray(contactsJson.social_profiles)) {
462      for (const profile of contactsJson.social_profiles) {
463        let url, platform;
464        if (typeof profile === 'string') {
465          url = profile;
466          platform = detectSocialPlatform(profile);
467        } else if (profile && typeof profile === 'object') {
468          ({ url } = profile);
469          const raw = profile.platform?.toLowerCase();
470          platform = (raw === 'twitter' ? 'x' : raw) || detectSocialPlatform(url);
471        }
472        if (url && platform && !channels[platform]) {
473          channels[platform] = url;
474        }
475      }
476    }
477    // Old format: social object
478    else if (contactsJson.social) {
479      if (contactsJson.social.linkedin && contactsJson.social.linkedin.length > 0) {
480        channels.linkedin = contactsJson.social.linkedin[0];
481      }
482      if (contactsJson.social.x && contactsJson.social.x.length > 0) {
483        channels.x = contactsJson.social.x[0];
484      }
485      // Twitter is now X
486      if (contactsJson.social.twitter && contactsJson.social.twitter.length > 0) {
487        channels.x = contactsJson.social.twitter[0];
488      }
489    }
490  
491    return channels;
492  }
493  
494  /**
495   * Prioritize contact methods for outreach
496   * Match variants to channels based on preference and availability
497   * @param {Object} contactsJson - Parsed contacts JSON
498   * @param {Array} variants - Array of proposal variants (1, 2, 3)
499   * @returns {Array} Array of {variant, channel, uri} mappings
500   */
501  export function prioritizeContacts(contactsJson, variants = [1, 2, 3]) {
502    const availableChannels = parseAvailableChannels(contactsJson);
503  
504    if (Object.keys(availableChannels).length === 0) {
505      logger.warn('No contact channels found');
506      return [];
507    }
508  
509    const assignments = [];
510    const usedChannels = new Set(); // Track which channels we've already assigned
511  
512    // Sort variants to process them in order (1, 2, 3)
513    const sortedVariants = [...variants].sort((a, b) => a - b);
514  
515    // First pass: Try to match each variant to its preferred channel
516    for (const variantNumber of sortedVariants) {
517      const preferredChannel = VARIANT_CHANNEL_PREFERENCE[variantNumber];
518  
519      if (availableChannels[preferredChannel] && !usedChannels.has(preferredChannel)) {
520        assignments.push({
521          variant: variantNumber,
522          channel: preferredChannel,
523          uri: availableChannels[preferredChannel],
524        });
525        usedChannels.add(preferredChannel);
526      }
527    }
528  
529    // Second pass: For unassigned variants, assign to highest priority available channel
530    const unassignedVariants = sortedVariants.filter(v => !assignments.some(a => a.variant === v));
531  
532    // Get remaining channels sorted by priority
533    const remainingChannels = Object.keys(availableChannels)
534      .filter(channel => !usedChannels.has(channel))
535      .sort((a, b) => CHANNEL_PRIORITY[a] - CHANNEL_PRIORITY[b]);
536  
537    for (let i = 0; i < unassignedVariants.length && i < remainingChannels.length; i++) {
538      const variantNumber = unassignedVariants[i];
539      const channel = remainingChannels[i];
540  
541      assignments.push({
542        variant: variantNumber,
543        channel,
544        uri: availableChannels[channel],
545      });
546      usedChannels.add(channel);
547    }
548  
549    // Sort assignments by variant number for clarity
550    assignments.sort((a, b) => a.variant - b.variant);
551  
552    logger.info(
553      `Assigned ${assignments.length} variants to channels: ${assignments.map(a => `V${a.variant}→${a.channel}`).join(', ')}`
554    );
555  
556    return assignments;
557  }
558  
559  /**
560   * Update outbound message records with proper contact URIs
561   * @param {number} siteId - Site ID to update messages for
562   * @returns {Promise<number>} Number of messages updated
563   */
564  export async function updateOutreachContacts(siteId) {
565    try {
566      // Get site contacts_json
567      const site = await getOne('SELECT id, domain FROM sites WHERE id = $1', [siteId]);
568  
569      if (!site) {
570        throw new Error(`Site not found: ${siteId}`);
571      }
572  
573      const contactsJson = getContactsDataWithFallback(siteId, site);
574      if (!contactsJson) {
575        throw new Error(`Site ${site.domain} has no contacts_json data`);
576      }
577  
578      // Get pending outbound messages for this site
579      const outreaches = await getAll(
580        `SELECT id, contact_method, contact_uri
581         FROM messages
582         WHERE site_id = $1
583         AND direction = 'outbound'
584         AND approval_status = 'pending'`,
585        [siteId]
586      );
587  
588      if (outreaches.length === 0) {
589        logger.warn(`No pending outbound messages found for site #${siteId}`);
590        return 0;
591      }
592  
593      // Get prioritized contact assignments (use sequential indices)
594      const variants = outreaches.map((_, i) => i + 1);
595      const assignments = prioritizeContacts(contactsJson, variants);
596  
597      if (assignments.length === 0) {
598        throw new Error(`No contact channels available for site ${site.domain}`);
599      }
600  
601      // Update each message with proper contact info
602      let updatedCount = 0;
603  
604      for (let i = 0; i < outreaches.length; i++) {
605        const outreach = outreaches[i];
606        const assignment = assignments.find(a => a.variant === i + 1);
607  
608        if (assignment) {
609          await run(
610            `UPDATE messages
611             SET contact_method = $1, contact_uri = $2
612             WHERE id = $3`,
613            [assignment.channel, assignment.uri, outreach.id]
614          );
615          updatedCount++;
616          logger.success(
617            `Updated outreach #${outreach.id}: V${assignment.variant} → ${assignment.channel} (${assignment.uri})`
618          );
619        } else {
620          logger.warn(`No contact assignment found for outreach #${outreach.id} index ${i + 1}`);
621        }
622      }
623  
624      logger.success(`Updated ${updatedCount}/${outreaches.length} outreaches for ${site.domain}`);
625  
626      return updatedCount;
627    } catch (error) {
628      logger.error(`Failed to update outreach contacts for site ${siteId}`, error);
629      throw error;
630    }
631  }
632  
633  /**
634   * Bulk update all pending outreaches with contact URIs
635   * @param {number} limit - Maximum number of sites to process
636   * @returns {Promise<Object>} Summary of updates
637   */
638  export async function bulkUpdateOutreachContacts(limit = null) {
639    // Get all sites with pending outbound messages that need contact URIs updated
640    const sites = await getAll(
641      `SELECT DISTINCT s.id, s.domain
642       FROM sites s
643       JOIN messages m ON s.id = m.site_id
644       WHERE m.direction = 'outbound'
645       AND m.approval_status = 'pending'
646       AND (m.contact_uri = 'PENDING_CONTACT_EXTRACTION' OR m.contact_uri IS NULL)
647       ${limit ? `LIMIT ${limit}` : ''}`,
648      []
649    );
650  
651    logger.info(`Updating contact URIs for ${sites.length} sites...`);
652  
653    const results = {
654      total: sites.length,
655      succeeded: 0,
656      failed: 0,
657      totalOutreachesUpdated: 0,
658    };
659  
660    for (const site of sites) {
661      try {
662        const count = await updateOutreachContacts(site.id);
663        results.succeeded++;
664        results.totalOutreachesUpdated += count;
665      } catch (error) {
666        logger.error(`Failed for site ${site.domain}:`, error);
667        results.failed++;
668      }
669    }
670  
671    logger.success(
672      `Bulk update complete: ${results.succeeded} sites, ${results.totalOutreachesUpdated} outreaches updated, ${results.failed} failures`
673    );
674  
675    return results;
676  }
677  
678  /**
679   * Get outreach readiness report
680   * Shows which sites have proposals ready to send
681   */
682  export async function getOutreachReadinessReport() {
683    const report = await getAll(
684      `SELECT
685        s.id,
686        s.domain,
687        COUNT(m.id) AS variant_count,
688        SUM(CASE WHEN m.contact_uri != 'PENDING_CONTACT_EXTRACTION' THEN 1 ELSE 0 END) AS ready_count,
689        STRING_AGG(m.contact_method, ', ') AS channels
690      FROM sites s
691      JOIN messages m ON s.id = m.site_id
692      WHERE m.direction = 'outbound'
693      AND m.approval_status = 'pending'
694      GROUP BY s.id, s.domain
695      ORDER BY ready_count DESC, s.domain`,
696      []
697    );
698  
699    return report;
700  }
701  
702  // CLI functionality
703  if (import.meta.url === `file://${process.argv[1]}`) {
704    const command = process.argv[2];
705  
706    if (command === 'update') {
707      const siteId = parseInt(process.argv[3], 10);
708  
709      if (!siteId) {
710        console.error('Usage: node src/contacts/prioritize.js update <site_id>');
711        process.exit(1);
712      }
713  
714      updateOutreachContacts(siteId)
715        .then(count => {
716          console.log(`\n\u2705 Updated ${count} outreach(es)\n`);
717          process.exit(0);
718        })
719        .catch(error => {
720          console.error(`\n\u274c Failed: ${error.message}\n`);
721          process.exit(1);
722        });
723    } else if (command === 'bulk') {
724      const limit = process.argv[3] ? parseInt(process.argv[3], 10) : null;
725  
726      bulkUpdateOutreachContacts(limit)
727        .then(results => {
728          console.log('\n\u2705 Bulk update complete!\n');
729          console.log(`Sites processed: ${results.succeeded}/${results.total}`);
730          console.log(`Outreaches updated: ${results.totalOutreachesUpdated}`);
731          console.log(`Failures: ${results.failed}\n`);
732          process.exit(0);
733        })
734        .catch(error => {
735          console.error(`\n\u274c Failed: ${error.message}\n`);
736          process.exit(1);
737        });
738    } else if (command === 'report') {
739      getOutreachReadinessReport()
740        .then(report => {
741          console.log(`\n\ud83d\udcca Outreach Readiness Report (${report.length} sites)\n`);
742  
743          if (report.length === 0) {
744            console.log('No pending outreaches found.\n');
745            process.exit(0);
746          }
747  
748          report.forEach(r => {
749            const status = r.ready_count === r.variant_count ? '\u2705' : '\u23f3';
750            console.log(
751              `${status} ${r.domain} - ${r.ready_count}/${r.variant_count} ready - ${r.channels}`
752            );
753          });
754  
755          console.log('');
756          process.exit(0);
757        })
758        .catch(error => {
759          console.error(`\n\u274c Failed: ${error.message}\n`);
760          process.exit(1);
761        });
762    } else {
763      console.log('Contact Prioritization Module');
764      console.log('');
765      console.log('Usage:');
766      console.log('  update <site_id>  - Update contact URIs for a specific site');
767      console.log('  bulk [limit]      - Update all pending outreaches');
768      console.log('  report            - Show which outreaches are ready to send');
769      console.log('');
770      console.log('Examples:');
771      console.log('  node src/contacts/prioritize.js update 42');
772      console.log('  node src/contacts/prioritize.js bulk 10');
773      console.log('  node src/contacts/prioritize.js report');
774      console.log('');
775      process.exit(1);
776    }
777  }
778  
779  /**
780   * Async version of getAllContacts that resolves contact labels to real first names
781   * using a Haiku LLM call. Non-person labels (Office, Sales, Büro, etc.) become null.
782   * Use this in proposal generation where the contact name affects the greeting.
783   *
784   * @param {Object} contactsJson - Parsed contacts JSON
785   * @param {string} countryCode - ISO country code
786   * @returns {Promise<Array>} Contacts with resolved .name field
787   */
788  /**
789   * Strip TLD from a domain to get the brand stem for name extraction.
790   * e.g. "nickelectrical.com.au" → "nickelectrical", "jims-plumbing.co.nz" → "jims-plumbing"
791   */
792  function _domainStem(domain) {
793    if (!domain) return null;
794    return domain
795      .replace(/^www\./, '')
796      .replace(/\.(com|net|org|biz|info|co)\.[a-z]{2}$/, '') // compound TLDs (.com.au, .co.nz)
797      .replace(/\.[a-z]{2,}$/, ''); // simple TLD (.com, .ca, .de)
798  }
799  
800  export async function getAllContactsWithNames(contactsJson, countryCode = 'AU', domain = null) {
801    const contacts = getAllContacts(contactsJson, countryCode);
802    const domainStem = _domainStem(domain);
803  
804    return Promise.all(
805      contacts.map(async contact => {
806        // Try label first
807        if (contact.name) {
808          const firstname = await extractFirstname(contact.name);
809          if (firstname) return { ...contact, name: firstname };
810        }
811        // Fallback: try email address prefix (e.g. nick@domain.com → "Nick")
812        if (contact.channel === 'email' && contact.uri && contact.uri.includes('@')) {
813          const prefix = contact.uri.split('@')[0];
814          const firstname = await extractFirstname(prefix);
815          if (firstname) return { ...contact, name: firstname };
816        }
817        // Final fallback: try domain stem (e.g. "nickelectrical.com.au" → "nickelectrical" → "Nick")
818        if (domainStem) {
819          const firstname = await extractFirstname(domainStem);
820          if (firstname) return { ...contact, name: firstname };
821        }
822        return { ...contact, name: null };
823      })
824    );
825  }
826  
827  export default {
828    getAllContacts,
829    getAllContactsWithNames,
830    parseAvailableChannels,
831    prioritizeContacts,
832    updateOutreachContacts,
833    bulkUpdateOutreachContacts,
834    getOutreachReadinessReport,
835  };