Cradicle Explorer

/ src / utils / template-proposals.js
template-proposals.js
   1  /**
   2   * Template-based Proposal Generation
   3   * Cost-free alternative to LLM-based proposals ($0.18/site → $0/site)
   4   *
   5   * Extracts scoring weaknesses from score_json and populates pre-written templates
   6   * Templates are tested, culturally optimized, and channel-specific
   7   */
   8  
   9  import { readFileSync, readdirSync } from 'fs';
  10  import { join, dirname } from 'path';
  11  import { fileURLToPath } from 'url';
  12  import Logger from './logger.js';
  13  import { spin } from './spintax.js';
  14  import { callLLM } from './llm-provider.js';
  15  import { safeJsonParse } from './error-handler.js';
  16  import { computeGrade } from '../score.js';
  17  import { wrapUntrusted } from './llm-sanitizer.js';
  18  
  19  const __filename = fileURLToPath(import.meta.url);
  20  const __dirname = dirname(__filename);
  21  const projectRoot = join(__dirname, '../..');
  22  
  23  const logger = new Logger('TemplateProposals');
  24  
  25  const POLISH_MODEL = process.env.POLISH_MODEL || 'google/gemini-2.0-flash-001';
  26  
  27  // Load prompts
  28  const HAIKU_POLISH_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-POLISH.md'), 'utf-8');
  29  const HAIKU_ANALYZE_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-ANALYZE.md'), 'utf-8');
  30  
  31  /**
  32   * In-process circuit breaker for Haiku polish failures.
  33   *
  34   * When >50 JSON parse failures accumulate in a rolling 10-min window, the polish
  35   * step is bypassed entirely (proposals use the unpolished template text) until the
  36   * window resets.  This prevents thousands of wasted LLM calls during burst failures.
  37   *
  38   * Set env var HAIKU_POLISH_DEBUG=1 to force the polish pass even when the breaker
  39   * is open — useful for troubleshooting from the CLI without restarting the service.
  40   */
  41  const _polishBreaker = {
  42    failures: [],
  43    WINDOW_MS: 10 * 60 * 1000, // 10 minutes
  44    THRESHOLD: 50,
  45    isOpen() {
  46      const cutoff = Date.now() - this.WINDOW_MS;
  47      this.failures = this.failures.filter(t => t > cutoff);
  48      return this.failures.length >= this.THRESHOLD;
  49    },
  50    record() {
  51      this.failures.push(Date.now());
  52      if (this.failures.length === this.THRESHOLD) {
  53        logger.warn(
  54          `Haiku polish circuit breaker OPEN — ${this.THRESHOLD} JSON failures in 10 min. Polish bypassed until failures drop below threshold. Set HAIKU_POLISH_DEBUG=1 to force.`
  55        );
  56      }
  57    },
  58  };
  59  
  60  /**
  61   * Extract scoring weaknesses from score_json to populate templates
  62   * @param {Object} scoreData - Parsed score_json from database
  63   * @returns {Object} Template fields {primaryWeakness, secondaryWeakness, evidence, reasoning, industry, score, grade}
  64   */
  65  // Human-readable labels for factor_scores keys (new scoring format)
  66  const FACTOR_LABELS = {
  67    headline_quality: "site doesn't tell visitors what you do in the first few seconds",
  68    value_proposition: "nothing tells visitors why they should pick you over the next business in Google",
  69    unique_selling_proposition: "your site doesn't explain what makes you different",
  70    call_to_action: "no clear way to call or book — visitors don't know how to contact you",
  71    urgency_messaging: 'no reason for visitors to act now',
  72    hook_engagement: "visitors can't figure out your services in a quick glance",
  73    trust_signals: "no reviews or licences visible on your site — nothing to prove you're legit",
  74    imagery_design: 'weak visual design — stock photos or outdated look',
  75    offer_clarity: "visitors can't see what you're actually offering",
  76    contextual_appropriateness: 'poor relevance to your industry',
  77  };
  78  
  79  export function extractTemplateFields(scoreData) {
  80    if (!scoreData || (!scoreData.sections && !scoreData.factor_scores)) {
  81      return {
  82        primaryWeakness: 'weak call-to-action',
  83        secondaryWeakness: 'unclear value proposition',
  84        quickImprovementOpportunity:
  85          'add a clear call-to-action and highlight your key offer above the fold',
  86        evidence: 'Several key conversion elements are missing',
  87        reasoning: 'Missing trust signals and clear calls-to-action reduce visitor confidence',
  88        industry: 'local service',
  89        score: 0,
  90        grade: 'F',
  91        impact: 30,
  92      };
  93    }
  94  
  95    // Extract all factor scores — support both scoring formats:
  96    // New flat format: scoreData.factor_scores[name] = {score, reasoning, evidence}
  97    // Old nested format: scoreData.sections[section].criteria[name] = {score, explanation, reasoning}
  98    const factors = [];
  99    if (scoreData.factor_scores) {
 100      Object.entries(scoreData.factor_scores).forEach(([name, criteria]) => {
 101        if (criteria && typeof criteria.score === 'number') {
 102          factors.push({
 103            name: FACTOR_LABELS[name] || name.replace(/_/g, ' '), // eslint-disable-line security/detect-object-injection
 104            score: criteria.score,
 105            explanation: criteria.evidence || criteria.reasoning || '',
 106            reasoning: criteria.reasoning || criteria.evidence || '',
 107            section: 'general',
 108          });
 109        }
 110      });
 111    } else {
 112      Object.entries(scoreData.sections || {}).forEach(([sectionName, section]) => {
 113        if (section && section.criteria) {
 114          Object.entries(section.criteria).forEach(([criteriaName, criteria]) => {
 115            if (criteria && typeof criteria.score === 'number') {
 116              factors.push({
 117                name: criteriaName,
 118                score: criteria.score,
 119                explanation: criteria.explanation || '',
 120                reasoning: criteria.reasoning || criteria.explanation || '',
 121                section: sectionName,
 122              });
 123            }
 124          });
 125        }
 126      });
 127    }
 128  
 129    // Sort by score ascending (lowest = worst = primary weakness)
 130    factors.sort((a, b) => a.score - b.score);
 131  
 132    const primaryWeakness = factors[0] || {
 133      name: 'weak call-to-action',
 134      score: 5,
 135      explanation: 'Multiple conversion elements need improvement',
 136      reasoning: 'Weak conversion elements reduce lead generation potential',
 137    };
 138  
 139    // Prefer LLM-generated critical_weaknesses for secondary weakness name —
 140    // the scoring LLM already prioritised these; use [1] if two exist, [0] if only one.
 141    const cw = Array.isArray(scoreData.critical_weaknesses) ? scoreData.critical_weaknesses : [];
 142    const cwSecondary = (cw[1] ?? cw[0] ?? '').replace(/\.$/, '');
 143    const secondaryWeaknessName = cwSecondary
 144      ? cwSecondary.charAt(0).toLowerCase() + cwSecondary.slice(1)
 145      : null;
 146  
 147    const secondaryWeakness = secondaryWeaknessName
 148      ? {
 149          name: secondaryWeaknessName,
 150          score: 6,
 151          explanation: factors[1]?.explanation || '',
 152          reasoning: factors[1]?.reasoning || '',
 153        }
 154      : factors[1] || {
 155          name: 'unclear value proposition',
 156          score: 6,
 157          explanation: 'Value proposition could be made clearer',
 158          reasoning: 'Unclear value proposition reduces visitor confidence',
 159        };
 160  
 161    // Extract industry from factor_scores if available
 162    let industry = 'local service';
 163    if (
 164      scoreData.factor_scores &&
 165      scoreData.factor_scores.contextual_appropriateness &&
 166      scoreData.factor_scores.contextual_appropriateness.industry_context
 167    ) {
 168      industry = scoreData.factor_scores.contextual_appropriateness.industry_context;
 169    }
 170  
 171    // Calculate estimated impact (conversion loss percentage)
 172    // Lower scores = higher impact
 173    const avgScore =
 174      factors.slice(0, 3).reduce((sum, f) => sum + f.score, 0) / Math.min(3, factors.length);
 175    const impact = Math.round((10 - avgScore) * 5); // Score 0-10 maps to 50-0% impact
 176  
 177    // Filter out LLM non-answers for evidence (e.g. "None found", "N/A", empty)
 178    const NON_ANSWER =
 179      /^(none\s*found\.?|n\/a\.?|none\.?|not\s*applicable\.?|not\s*available\.?|\s*)$/i;
 180    const pickEvidence = f => {
 181      const e = f?.explanation || '';
 182      return NON_ANSWER.test(e.trim()) ? '' : e;
 183    };
 184    const evidence =
 185      pickEvidence(primaryWeakness) ||
 186      pickEvidence(secondaryWeakness) ||
 187      'Several key conversion elements need improvement';
 188    const reasoning =
 189      (NON_ANSWER.test((primaryWeakness.reasoning || '').trim())
 190        ? secondaryWeakness.reasoning
 191        : primaryWeakness.reasoning) || 'Improving these elements will increase lead generation';
 192  
 193    // Use LLM-generated quick_improvement_opportunities from score_json —
 194    // prefer [1] (second entry) to avoid repeating the primary weakness, fall back to [0].
 195    const qio = Array.isArray(scoreData.quick_improvement_opportunities)
 196      ? scoreData.quick_improvement_opportunities
 197      : [];
 198    const qioRaw = (qio[1] ?? qio[0] ?? '').replace(/\.$/, '');
 199    const quickImprovementOpportunity = qioRaw
 200      ? qioRaw.charAt(0).toLowerCase() + qioRaw.slice(1)
 201      : 'add a clear call-to-action and highlight your key offer above the fold';
 202  
 203    return {
 204      primaryWeakness: primaryWeakness.name,
 205      secondaryWeakness: secondaryWeakness.name,
 206      quickImprovementOpportunity,
 207      evidence,
 208      reasoning,
 209      industry,
 210      score: Math.round(scoreData.overall_calculation?.conversion_score || 0),
 211      grade: computeGrade(scoreData.overall_calculation?.conversion_score || 0),
 212      impact: Math.max(20, Math.min(50, impact)), // Clamp 20-50%
 213    };
 214  }
 215  
 216  /**
 217   * Haiku Pass 1: Analyze score_json to extract one concrete recommendation sentence.
 218   * Returns { industry, recommendation } ready for programmatic injection.
 219   *
 220   * Falls back to a deterministic recommendation from extractTemplateFields() on any failure.
 221   *
 222   * @param {Object} scoreData - Parsed score_json from database
 223   * @param {string|null} keyword - Raw search keyword (e.g. "heat pump tauranga")
 224   * @param {string} languageCode - ISO 639-1 language code
 225   * @param {string|null} countryCode - ISO 3166-1 alpha-2 country code
 226   * @returns {Promise<{industry: string, recommendation: string}>}
 227   */
 228  export async function analyzeScoreJson(
 229    scoreData,
 230    keyword,
 231    languageCode = 'en',
 232    countryCode = null
 233  ) {
 234    const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection
 235    const season = getCurrentSeason(countryCode);
 236    const keywordSection = keyword ? `\nKEYWORD: ${keyword}` : '';
 237    const seasonSection = season ? `\nSEASON: ${season}` : '';
 238  
 239    const userMessage = `LANGUAGE: ${langName}${keywordSection}${seasonSection}
 240  SCORE_JSON:
 241  ${wrapUntrusted(JSON.stringify(scoreData), 'score_json')}`;
 242  
 243    const ANALYZE_MODEL = process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5';
 244  
 245    for (let attempt = 1; attempt <= 2; attempt++) {
 246      try {
 247        const { content } = await callLLM({
 248          model: ANALYZE_MODEL,
 249          temperature: 0,
 250          max_tokens: 300,
 251          json_mode: true,
 252          messages: [
 253            { role: 'system', content: HAIKU_ANALYZE_PROMPT },
 254            { role: 'user', content: userMessage },
 255          ],
 256          stage: 'proposals',
 257        });
 258  
 259        const result = safeJsonParse(content);
 260        if (!result || typeof result.recommendation !== 'string' || !result.recommendation.trim()) {
 261          logger.warn(`analyzeScoreJson attempt ${attempt}/2: invalid/empty JSON response`);
 262          continue;
 263        }
 264  
 265        let industry = (result.industry || '').trim() || _extractIndustry(keyword);
 266        // If Haiku returned the raw keyword unchanged (failed to categorise), apply heuristic
 267        if (keyword && industry.toLowerCase() === keyword.trim().toLowerCase()) {
 268          industry = _extractIndustry(keyword);
 269        }
 270        const recommendation = result.recommendation.trim();
 271        // Use Sonnet-provided SMS fragment if valid, else derive from recommendation
 272        const smsFrag = (result.recommendation_sms || '').trim();
 273        const recommendation_sms =
 274          smsFrag.length > 0 && smsFrag.length <= 50 ? smsFrag : _smsFragment(recommendation);
 275  
 276        if (!recommendation_sms) {
 277          logger.warn(
 278            `analyzeScoreJson attempt ${attempt}/2: recommendation_sms blank for keyword="${keyword}"`
 279          );
 280          continue;
 281        }
 282  
 283        return { industry, recommendation, recommendation_sms };
 284      } catch (err) {
 285        logger.warn(`analyzeScoreJson attempt ${attempt}/2 error: ${err.message}`);
 286        if (attempt < 2) continue;
 287        throw new Error(`analyzeScoreJson failed after 2 attempts: ${err.message}`);
 288      }
 289    }
 290  
 291    throw new Error('analyzeScoreJson: recommendation_sms blank after 2 LLM attempts');
 292  }
 293  
 294  /**
 295   * Fallback: extract a bare industry label from a keyword when Haiku is unavailable.
 296   * For 2-word keywords: strip the last word unless the keyword is a known compound service.
 297   * For 3-word keywords: keep intact (likely a compound service like "hot water repairs").
 298   * For 4+ word keywords: strip last word (city/suburb suffix).
 299   */
 300  function _extractIndustry(keyword) {
 301    if (!keyword) return 'local service';
 302    const parts = keyword.trim().split(/\s+/);
 303    if (parts.length === 1) return keyword.trim();
 304    if (parts.length >= 4) return parts.slice(0, -1).join(' ');
 305    if (parts.length === 3) return keyword.trim();
 306    // 2-word keyword: strip last word unless it's a known compound service name
 307    const COMPOUND_SERVICES = new Set([
 308      'pressure washing',
 309      'power washing',
 310      'heat pump',
 311      'heat pumps',
 312      'hot water',
 313      'metal roofing',
 314      'solar panels',
 315      'solar power',
 316      'lawn mowing',
 317      'lawn care',
 318      'window cleaning',
 319      'gutter cleaning',
 320      'air conditioning',
 321      'pest control',
 322      'tree removal',
 323      'tree service',
 324      'concrete cutting',
 325      'carpet cleaning',
 326      'tile laying',
 327      'brick laying',
 328      'pool cleaning',
 329      'pool repair',
 330      'roof repair',
 331      'roof restoration',
 332      'damp proofing',
 333      'water damage',
 334      'fire damage',
 335      'storm damage',
 336    ]);
 337    const lower = keyword.trim().toLowerCase();
 338    if (COMPOUND_SERVICES.has(lower)) return lower;
 339    return parts[0]; // strip location (last word)
 340  }
 341  
 342  /**
 343   * Compress a full recommendation sentence into a ≤50 char SMS fragment.
 344   * Takes first clause (before em-dash or ", which/so"), strips period, truncates at word boundary.
 345   */
 346  function _smsFragment(rec) {
 347    if (!rec) return '';
 348    const noTrailingPunct = rec.replace(/[.!?]$/, '').trim();
 349    // Take the first clause (before em-dash or ", which…" style continuation)
 350    const firstClause = noTrailingPunct
 351      .split(/\s*[—–]\s*|\s*,\s*(?:which|so|meaning|this)\s/i)[0]
 352      .trim();
 353    if (firstClause.length <= 50) return firstClause;
 354    // Truncate at word boundary
 355    const truncated = firstClause.slice(0, 50);
 356    const lastSpace = truncated.lastIndexOf(' ');
 357    return lastSpace > 20 ? truncated.slice(0, lastSpace) : truncated;
 358  }
 359  
 360  /**
 361   * Build a deterministic fallback recommendation from extractTemplateFields() data.
 362   * Used when the Haiku analysis LLM call fails.
 363   */
 364  function buildFallbackAnalysis(scoreData, keyword) {
 365    const fields = extractTemplateFields(scoreData);
 366    const industry = _extractIndustry(keyword);
 367    const evidence = fields.evidence || 'several conversion elements need improvement';
 368    const reasoning = fields.reasoning || 'improving these will increase lead generation';
 369    // Build a lowercase clause: "evidence — reasoning"
 370    const evidenceLower = evidence.charAt(0).toLowerCase() + evidence.slice(1).replace(/\.$/, '');
 371    const reasoningLower = reasoning.charAt(0).toLowerCase() + reasoning.slice(1).replace(/\.$/, '');
 372    const recommendation = `${evidenceLower} — ${reasoningLower}`;
 373    return {
 374      industry,
 375      recommendation,
 376      recommendation_sms: _smsFragment(evidenceLower),
 377    };
 378  }
 379  
 380  /**
 381   * Load templates for a specific country, language, and channel.
 382   * Lookup order:
 383   *   1. data/templates/{countryCode}/{languageCode}/{channel}.json  (new per-language path)
 384   *   2. data/templates/{countryCode}/{channel}.json  (legacy flat path, English only)
 385   * Throws if no template file found — no fallback to other countries.
 386   *
 387   * @param {string} countryCode - Two-letter country code (AU, US, etc.)
 388   * @param {string} languageCode - ISO 639-1 language code (en, hi, ja, etc.)
 389   * @param {string} channel - Contact channel (sms, email, form, x, linkedin)
 390   * @returns {Array<Object>} Template objects
 391   * @throws {Error} If no templates found for this country/language/channel
 392   */
 393  export function loadTemplates(countryCode, languageCode, channel) {
 394    const validChannels = ['sms', 'email'];
 395    if (!validChannels.includes(channel)) {
 396      logger.warn(`Channel ${channel} not supported for templates, using email`);
 397      channel = 'email';
 398    }
 399  
 400    // Normalize ISO 639-2 three-letter codes to ISO 639-1 two-letter codes
 401    const ISO639_2_TO_1 = {
 402      eng: 'en',
 403      fra: 'fr',
 404      deu: 'de',
 405      spa: 'es',
 406      ita: 'it',
 407      por: 'pt',
 408      nld: 'nl',
 409      jpn: 'ja',
 410      kor: 'ko',
 411      zho: 'zh',
 412      nor: 'no',
 413      nob: 'no',
 414      nno: 'no',
 415      ind: 'id',
 416      hin: 'hi',
 417      pol: 'pl',
 418      swe: 'sv',
 419      dan: 'da',
 420    };
 421    const rawLang = (languageCode || 'en').toLowerCase();
 422    const lang = ISO639_2_TO_1[rawLang] ?? rawLang; // eslint-disable-line security/detect-object-injection
 423  
 424    // 1. Try language-specific subdirectory path
 425    const langPath = join(projectRoot, `data/templates/${countryCode}/${lang}/${channel}.json`);
 426    try {
 427      const content = readFileSync(langPath, 'utf-8');
 428      const data = JSON.parse(content);
 429      if (data.templates?.length) return data.templates;
 430    } catch (_) {
 431      // not found at language-specific path — try legacy flat path for English
 432    }
 433  
 434    // 2. Legacy flat path (English) — also used as fallback for unrecognized/unsupported langs
 435    // e.g. 'zxx' (no linguistic content), 'fr_ca' (malformed BCP 47), etc.
 436    const flatPath = join(projectRoot, `data/templates/${countryCode}/${channel}.json`);
 437    try {
 438      const content = readFileSync(flatPath, 'utf-8');
 439      const data = JSON.parse(content);
 440      if (data.templates?.length) return data.templates;
 441    } catch (_) {
 442      // not found
 443    }
 444  
 445    // 3. Native language fallback — for countries without English flat templates (DE, FR, IT, etc.)
 446    // If detected language doesn't match the country's native language, try the first available subdir.
 447    const countryDir = join(projectRoot, `data/templates/${countryCode}`);
 448    try {
 449      const entries = readdirSync(countryDir, { withFileTypes: true });
 450      for (const entry of entries) {
 451        if (!entry.isDirectory() || entry.name === lang) continue; // skip already-tried lang
 452        const nativePath = join(countryDir, entry.name, `${channel}.json`);
 453        try {
 454          const content = readFileSync(nativePath, 'utf-8');
 455          const data = JSON.parse(content);
 456          if (data.templates?.length) return data.templates;
 457        } catch (_) {
 458          // not found in this subdir
 459        }
 460      }
 461    } catch (_) {
 462      // country dir not readable
 463    }
 464  
 465    // No templates found — throw, no fallback
 466    throw new Error(`No templates for ${countryCode}/${lang}/${channel}`);
 467  }
 468  
 469  /**
 470   * Select best template for this outreach
 471   * Selection criteria:
 472   * 1. Match primary weakness type (CTA template for CTA problems, trust template for trust problems)
 473   * 2. Rotate templates for testing (LRU)
 474   * 3. Weight toward better-performing templates after 1000+ sends
 475   *
 476   * @param {Array<Object>} templates - Available templates
 477   * @param {Object} fields - Template fields from extractTemplateFields()
 478   * @param {string} channel - Contact channel
 479   * @returns {Object} Selected template
 480   */
 481  export function selectTemplate(templates, fields, channel, hasFirstname = false) {
 482    if (!templates || templates.length === 0) {
 483      throw new Error(`No templates available for channel: ${channel}`);
 484    }
 485  
 486    // When a firstname is available, prefer templates that actually use it.
 487    // Fall back to the full pool only if all named templates are exhausted.
 488    let pool = templates;
 489    if (hasFirstname) {
 490      const named = templates.filter(
 491        t =>
 492          (t.body_spintax || '').includes('[firstname') ||
 493          (t.subject_spintax || '').includes('[firstname')
 494      );
 495      if (named.length > 0) pool = named;
 496    }
 497  
 498    // Rotation strategy: prefer template with lowest sends count.
 499    // After 1000+ sends, weight by conversion rate.
 500    // Tiebreak with Math.random() so equal-sends templates rotate rather than
 501    // always picking the same one (e.g. always the first in file order).
 502    const shuffled = [...pool].sort(() => Math.random() - 0.5);
 503    shuffled.sort((a, b) => {
 504      const sendsA = a.sends || 0;
 505      const sendsB = b.sends || 0;
 506  
 507      // After 1000+ sends, weight by conversion rate
 508      if (sendsA >= 1000 && sendsB >= 1000) {
 509        const convRateA = sendsA > 0 ? (a.conversions || 0) / sendsA : 0;
 510        const convRateB = sendsB > 0 ? (b.conversions || 0) / sendsB : 0;
 511        return convRateB - convRateA; // Higher conversion rate first
 512      }
 513  
 514      // Otherwise, prefer template with fewer sends (rotation testing)
 515      return sendsA - sendsB;
 516    });
 517  
 518    return shuffled[0];
 519  }
 520  
 521  // Non-person label patterns — safety net after LLM name extraction
 522  const NON_PERSON_WORDS = new Set([
 523    'office',
 524    'info',
 525    'sales',
 526    'admin',
 527    'support',
 528    'contact',
 529    'hello',
 530    'team',
 531    'reception',
 532    'enquiries',
 533    'enquiry',
 534    'general',
 535    'main',
 536    'emergency',
 537    'after',
 538    'hours',
 539    'mobile',
 540    'phone',
 541    'fax',
 542    'sms',
 543    'toll',
 544    'free',
 545    'tollfree',
 546    'hotline',
 547    'helpdesk',
 548    'helpline',
 549    'service',
 550    'services',
 551    'department',
 552    'head',
 553    'headquarters',
 554    'hq',
 555    'branch',
 556    'accounts',
 557    'billing',
 558    'booking',
 559    'bookings',
 560    'reservations',
 561    'store',
 562    'shop',
 563    'centre',
 564    'center',
 565    'clinic',
 566    'group',
 567    'media',
 568    'marketing',
 569    'hr',
 570    'recruiter',
 571    'recruitment',
 572    'website',
 573    // Generic directional/scale words that appear in email local-parts but are never firstnames
 574    'north',
 575    'south',
 576    'east',
 577    'west',
 578    'central',
 579    'national',
 580    'regional',
 581    'local',
 582  ]);
 583  
 584  /**
 585   * Deterministic check: does this string look like a real human first name?
 586   * Primary filter is the Haiku LLM call in getAllContactsWithNames; this is a
 587   * safety net for edge cases (city names, phone-type labels, etc.).
 588   */
 589  function isPersonFirstname(name) {
 590    if (!name || typeof name !== 'string') return false;
 591    const trimmed = name.trim();
 592    if (!trimmed || trimmed.length < 2 || trimmed.length > 25) return false;
 593    if (/\d/.test(trimmed)) return false; // contains digits
 594    if (/-/.test(trimmed) && trimmed.split('-').length > 2) return false; // triple-hyphen
 595    // Reject if any word in the label is a known non-person word
 596    const words = trimmed.toLowerCase().split(/[\s-]+/);
 597    if (words.some(w => NON_PERSON_WORDS.has(w))) return false;
 598    // Reject labels with 3+ words (e.g. "Head Of Marketing")
 599    if (words.length >= 3) return false;
 600    return true;
 601  }
 602  
 603  /**
 604   * Populate template with extracted fields
 605   * @param {string} template - Template string with {placeholders}
 606   * @param {Object} fields - Template fields
 607   * @param {Object} siteData - Site data (domain, keyword, etc.)
 608   * @param {Object} contact - Contact info (name, channel, uri)
 609   * @returns {string} Populated proposal text
 610   */
 611  export function populateTemplate(template, fields, siteData, contact = null, analysisData = null) {
 612    // Extract business name from domain (simple approach)
 613    const businessName = siteData.domain.split('.')[0].replace(/-/g, ' ');
 614  
 615    // Determine greeting — contact.name has already been filtered by getAllContactsWithNames
 616    // (Haiku LLM call), but keep a deterministic safety net for edge cases.
 617    const firstname = contact?.name || '';
 618    // Use empty string when no real name — templates control their own locale fallback:
 619    //   [firstname|there]  → English fallback (EN templates: "Hi there")
 620    //   [firstname|Hallo]  → German fallback (DE sms_002: "Hallo,")
 621    //   [firstname|]       → empty; punctuation cleanup handles "Guten Tag ," → "Guten Tag,"
 622    const greetingRaw = isPersonFirstname(firstname) ? firstname.split(/\s+/)[0] : '';
 623    const greeting = greetingRaw ? greetingRaw.charAt(0).toUpperCase() + greetingRaw.slice(1) : '';
 624  
 625    // Build replacement map — all tokens filled programmatically.
 626    // analysis-derived fields (recommendation, industry) come from analyzeScoreJson() (Pass 1).
 627    const industry = analysisData?.industry || _extractIndustry(siteData.keyword);
 628    const replacements = {
 629      business_name: businessName,
 630      firstname: greeting,
 631      grade: fields.grade,
 632      score: fields.score,
 633      industry,
 634      impact: fields.impact,
 635      domain: siteData.domain,
 636      recommendation: (() => {
 637        const r = (analysisData?.recommendation || '').trim();
 638        return r && !/[.!?]$/.test(r) ? `${r}.` : r;
 639      })(),
 640      recommendation_sms: (analysisData?.recommendation_sms || '').trim(),
 641      sites_scored: fields.sites_scored || null,
 642    };
 643  
 644    // Resolve [key|fallback] and [key] variables BEFORE spinning.
 645    // Critical: [firstname|there] inside a {…} spintax group has a | that the
 646    // spintax engine would split on, producing garbled output like "Hey [firstname"
 647    // or "there]!" as separate spin options.
 648    const resolved = template.replace(/\[(\w+)(?:\|([^\]]*))?\]/g, (_match, key, fallback) => {
 649      const val = replacements[key]; // eslint-disable-line security/detect-object-injection
 650      if (val !== null && val !== undefined && val !== '') return String(val);
 651      return fallback !== undefined ? fallback : '';
 652    });
 653  
 654    // Spin after variable resolution so {option1|option2} has clean, unambiguous input
 655    let populated = spin(resolved);
 656  
 657    // Clean up spacing artifacts (e.g. empty value leaves "Hi ,")
 658    // Collapse multiple spaces only — preserve \n\n paragraph breaks
 659    populated = populated
 660      .replace(/ ([,!?.;:])/g, '$1')
 661      .replace(/ {2,}/g, ' ')
 662      .replace(/\n{3,}/g, '\n\n')
 663      .trim();
 664  
 665    return populated;
 666  }
 667  
 668  /**
 669   * Throw if any [field] tokens remain unfilled in the text.
 670   * Called after populateTemplate() and after polishProposal() to catch
 671   * any missed tokens early — the outreach stage will catch the error and mark
 672   * the site as failed with the token name as the reason.
 673   *
 674   * @param {string} text - Text to check
 675   * @param {string} label - Descriptive label for the error message (e.g. 'body', 'subject')
 676   */
 677  export function checkForUnfilledTokens(text, label) {
 678    if (!text) return;
 679    const match = text.match(/\[[a-z_]+\]/);
 680    if (match) {
 681      throw new Error(`Unfilled token ${match[0]} in ${label} — template population incomplete`);
 682    }
 683  }
 684  
 685  // Countries in the Southern hemisphere (seasons are flipped relative to Northern)
 686  const SOUTHERN_COUNTRIES = new Set([
 687    'AU',
 688    'NZ',
 689    'ZA',
 690    'AR',
 691    'CL',
 692    'BR',
 693    'PE',
 694    'UY',
 695    'PY',
 696    'BO',
 697    'EC',
 698    'MZ',
 699    'ZW',
 700    'BW',
 701    'NA',
 702    'LS',
 703    'SZ',
 704    'MG',
 705  ]);
 706  
 707  // Tropical/equatorial countries — no meaningful seasons
 708  const TROPICAL_COUNTRIES = new Set([
 709    'SG',
 710    'MY',
 711    'ID',
 712    'TH',
 713    'PH',
 714    'VN',
 715    'KH',
 716    'MM',
 717    'BN',
 718    'PG',
 719    'FJ',
 720    'TL',
 721  ]);
 722  
 723  /**
 724   * Return the current meteorological season for a country, or null if seasonal
 725   * references don't apply (tropical countries or unknown country code).
 726   *
 727   * @param {string|null} countryCode - ISO 3166-1 alpha-2 code (e.g. 'AU', 'US')
 728   * @param {Date} [date=new Date()] - Date to use (defaults to today)
 729   * @returns {'Spring'|'Summer'|'Autumn'|'Winter'|null}
 730   */
 731  export function getCurrentSeason(countryCode, date = new Date()) {
 732    if (!countryCode || TROPICAL_COUNTRIES.has(countryCode)) return null;
 733  
 734    const month = date.getMonth(); // 0=Jan … 11=Dec
 735    const southern = SOUTHERN_COUNTRIES.has(countryCode);
 736  
 737    // Northern meteorological seasons; flip for Southern
 738    if (month >= 2 && month <= 4) return southern ? 'Autumn' : 'Spring';
 739    if (month >= 5 && month <= 7) return southern ? 'Winter' : 'Summer';
 740    if (month >= 8 && month <= 10) return southern ? 'Spring' : 'Autumn';
 741    return southern ? 'Summer' : 'Winter'; // Dec, Jan, Feb
 742  }
 743  
 744  // ISO 639-1 code → full language name for polish prompt
 745  const POLISH_LANG_NAMES = {
 746    en: 'English',
 747    de: 'German',
 748    fr: 'French',
 749    it: 'Italian',
 750    ja: 'Japanese',
 751    ko: 'Korean',
 752    zh: 'Chinese',
 753    es: 'Spanish',
 754    nl: 'Dutch',
 755    pl: 'Polish',
 756    sv: 'Swedish',
 757    da: 'Danish',
 758    no: 'Norwegian',
 759    id: 'Indonesian',
 760    hi: 'Hindi',
 761  };
 762  
 763  /**
 764   * Haiku Pass 2: Polish a fully-populated proposal.
 765   * All [field] tokens must already be filled before calling this.
 766   * Tasks: fix grammar, capitalisation, punctuation; compress SMS to ≤160 chars;
 767   *        polish subject line. Does NOT fill tokens or analyse score_json.
 768   *
 769   * @param {string} text - Proposal body (all tokens pre-filled)
 770   * @param {string} channel - Contact channel (sms, email, form, x, linkedin)
 771   * @param {string} languageCode - ISO 639-1 language code (en, de, fr, etc.)
 772   * @param {string|null} subjectLine - Email subject line (email/form only)
 773   * @param {string|null} countryCode - ISO 3166-1 alpha-2 code (for season constraint)
 774   * @returns {Promise<{text: string, subjectLine: string|null}>} Polished text and subject
 775   */
 776  export async function polishProposal(
 777    text,
 778    channel,
 779    languageCode = 'en',
 780    subjectLine = null,
 781    countryCode = null
 782  ) {
 783    // Circuit breaker: skip LLM call when JSON failure rate is high.
 784    // Bypass with HAIKU_POLISH_DEBUG=1 for troubleshooting.
 785    if (_polishBreaker.isOpen() && process.env.HAIKU_POLISH_DEBUG !== '1') {
 786      return { text, subjectLine };
 787    }
 788  
 789    const isEmailChannel = channel === 'email' || channel === 'form';
 790  
 791    // SMS-specific fast path: skip polish when text is already within the 160-char limit.
 792    // Haiku's only SMS task is compression — if already short, it adds nothing and
 793    // sometimes garbles short texts (merged words, structural JSON errors).
 794    // 155 chars = 5-char buffer for sender IDs that compliance may append after this call.
 795    if (channel === 'sms' && text.length <= 155 && process.env.HAIKU_POLISH_DEBUG !== '1') {
 796      return { text, subjectLine: null };
 797    }
 798  
 799    const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection
 800  
 801    const subjectSection = isEmailChannel && subjectLine ? `\nSUBJECT LINE:\n${subjectLine}` : '';
 802    const season = getCurrentSeason(countryCode);
 803    const seasonSection = season ? `\nSEASON: ${season}` : '';
 804  
 805    const userMessage = `CHANNEL: ${channel}
 806  LANGUAGE: ${langName}${seasonSection}
 807  ---
 808  PROPOSAL BODY:
 809  ${text}${subjectSection}`;
 810  
 811    try {
 812      const { content } = await callLLM({
 813        model: POLISH_MODEL,
 814        temperature: 0,
 815        max_tokens: 3000,
 816        json_mode: true,
 817        messages: [
 818          { role: 'system', content: HAIKU_POLISH_PROMPT },
 819          { role: 'user', content: userMessage },
 820        ],
 821        stage: 'proposals',
 822      });
 823  
 824      const result = safeJsonParse(content);
 825      if (!result || typeof result.body !== 'string' || result.body.trim() === '') {
 826        _polishBreaker.record();
 827        logger.warn(`Polish (${POLISH_MODEL}) returned invalid response — using original`, {
 828          hasResult: !!result,
 829          bodyType: result ? typeof result.body : 'n/a',
 830          bodyEmpty: result?.body?.trim() === '',
 831          keys: result ? Object.keys(result) : [],
 832          contentSnippet: content?.substring(0, 100),
 833          breakerFailures: _polishBreaker.failures.length,
 834        });
 835        return { text, subjectLine };
 836      }
 837  
 838      // Guard: if Haiku collapsed ANY paragraph breaks, reject the body and use the original.
 839      // Paragraph breaks (double newlines) are structural — losing them breaks email formatting.
 840      // Zero-tolerance: if input had N breaks, output must also have N breaks.
 841      const inputBreaks = (text.match(/\n\n/g) || []).length;
 842      const outputBreaks = (result.body.match(/\n\n/g) || []).length;
 843      const polishedBody =
 844        inputBreaks >= 1 && outputBreaks < inputBreaks
 845          ? (logger.warn('Polish collapsed paragraph breaks — keeping original body'), text)
 846          : result.body.trim();
 847  
 848      return {
 849        text: polishedBody,
 850        subjectLine: isEmailChannel ? result.subject?.trim() || subjectLine : null,
 851      };
 852    } catch (err) {
 853      logger.warn(`Polish (${POLISH_MODEL}) failed: ${err.message} — using original`);
 854      return { text, subjectLine };
 855    }
 856  }
 857  
 858  /**
 859   * @deprecated Use polishProposal() instead.
 860   * Kept for backwards compatibility — delegates to the new polish function.
 861   */
 862  export async function shortenSmsWithHaiku(text) {
 863    const polished = await polishProposal(text, 'sms', 'en', null, null);
 864    return polished.text;
 865  }
 866  
 867  /** @deprecated Renamed to polishProposal */
 868  export const polishProposalWithHaiku = polishProposal;
 869  
 870  /**
 871   * Generate proposal using templates (replaces LLM generation)
 872   * @param {Object} siteData - Site data from database (must include country_code, language_code)
 873   * @param {Object} scoreData - Parsed score_json
 874   * @param {Object} contact - Contact object {name, channel, uri}
 875   * @returns {Promise<Object>} Generated proposal {proposalText, templateId, subjectLine}
 876   * @throws {Error} If no templates found for this country/language/channel
 877   */
 878  export async function generateTemplateProposal(
 879    siteData,
 880    scoreData,
 881    contact,
 882    cachedAnalysis = null
 883  ) {
 884    const fields = extractTemplateFields(scoreData);
 885    const countryCode = siteData.country_code || 'AU';
 886    const languageCode = siteData.language_code || null;
 887    const channel = contact.channel || 'email';
 888  
 889    // Pass 1: Haiku analyzes score_json → concrete recommendation sentence + industry.
 890    // Use cachedAnalysis if provided (same result for all contacts on a site).
 891    const analysisData =
 892      cachedAnalysis ||
 893      (await analyzeScoreJson(
 894        scoreData,
 895        siteData.keyword || null,
 896        languageCode || 'en',
 897        countryCode
 898      ));
 899  
 900    // Throws if no template found — caller handles the error
 901    const templates = loadTemplates(countryCode, languageCode, channel);
 902    const template = selectTemplate(templates, fields, channel, !!contact?.name);
 903  
 904    // Populate template — all tokens including [recommendation]/[industry] filled here
 905    let proposalText = populateTemplate(
 906      template.body_spintax,
 907      fields,
 908      siteData,
 909      contact,
 910      analysisData
 911    );
 912  
 913    // If a firstname is known but didn't land in this spin (optional greeting branch missed),
 914    // re-spin up to 9 more times — (0.5)^10 ≈ 0.1% chance of still missing after 10 tries.
 915    if (contact?.name) {
 916      const { name } = contact;
 917      for (let i = 0; i < 9 && !proposalText.includes(name); i++) {
 918        proposalText = populateTemplate(
 919          template.body_spintax,
 920          fields,
 921          siteData,
 922          contact,
 923          analysisData
 924        );
 925      }
 926    }
 927  
 928    // SMS: re-spin up to 3 times to try to reach ≤160 chars before the polish pass
 929    if (channel === 'sms' && proposalText.length > 160) {
 930      for (let i = 0; i < 3; i++) {
 931        const candidate = populateTemplate(
 932          template.body_spintax,
 933          fields,
 934          siteData,
 935          contact,
 936          analysisData
 937        );
 938        if (candidate.length < proposalText.length) proposalText = candidate;
 939        if (proposalText.length <= 160) break;
 940      }
 941    }
 942  
 943    // Email (and form-as-email) use subject lines; SMS/linkedin/x do not
 944    const usesEmailTemplate = channel === 'email' || channel === 'form';
 945    let rawSubjectLine = null;
 946  
 947    if (usesEmailTemplate) {
 948      // Prefer selected template's subject_spintax, fall back to others in the same file.
 949      // Throws if no usable subject_spintax exists — prevents silent blank subjects.
 950      const subjectCandidates = [template, ...templates.filter(t => t.id !== template.id)];
 951      let subjectSourceId = null;
 952  
 953      for (const t of subjectCandidates) {
 954        if (!t.subject_spintax?.trim()) continue;
 955        const candidate = populateTemplate(
 956          t.subject_spintax,
 957          fields,
 958          siteData,
 959          contact,
 960          analysisData
 961        );
 962        if (candidate) {
 963          rawSubjectLine = candidate;
 964          subjectSourceId = t.id;
 965          break;
 966        }
 967      }
 968  
 969      if (!rawSubjectLine) {
 970        throw new Error(
 971          `No usable subject_spintax found in any template for ${countryCode}/${languageCode}/${channel}`
 972        );
 973      }
 974  
 975      if (subjectSourceId !== template.id) {
 976        logger.warn(
 977          `Template ${template.id} has no usable subject_spintax — using subject from ${subjectSourceId}`
 978        );
 979      }
 980    }
 981  
 982    // Bracket check after population — any remaining [token] means a template bug
 983    checkForUnfilledTokens(proposalText, 'body');
 984    if (rawSubjectLine) checkForUnfilledTokens(rawSubjectLine, 'subject');
 985  
 986    // Pass 2: polish grammar, capitalisation, punctuation; compress SMS to ≤160 chars.
 987    // Does NOT fill tokens — all tokens are already resolved above.
 988    const polished = await polishProposal(
 989      proposalText,
 990      channel,
 991      languageCode || 'en',
 992      rawSubjectLine,
 993      countryCode || null
 994    );
 995  
 996    // Bracket check after polish — catches any token LLM may have reintroduced (shouldn't happen)
 997    checkForUnfilledTokens(polished.text, 'polished body');
 998    if (polished.subjectLine) checkForUnfilledTokens(polished.subjectLine, 'polished subject');
 999  
1000    logger.info(
1001      `Generated ${channel} proposal using template ${template.id} for ${siteData.domain} (${countryCode}/${languageCode})`
1002    );
1003  
1004    return {
1005      proposalText: polished.text,
1006      templateId: template.id,
1007      subjectLine: polished.subjectLine ?? rawSubjectLine,
1008    };
1009  }
1010  
1011  export default {
1012    extractTemplateFields,
1013    analyzeScoreJson,
1014    loadTemplates,
1015    selectTemplate,
1016    populateTemplate,
1017    checkForUnfilledTokens,
1018    generateTemplateProposal,
1019  };