/ src / utils / template-proposals.js
template-proposals.js
   1  /**
   2   * Template-based Proposal Generation
   3   * Cost-free alternative to LLM-based proposals ($0.18/site → $0/site)
   4   *
   5   * Extracts scoring weaknesses from score_json and populates pre-written templates
   6   * Templates are tested, culturally optimized, and channel-specific
   7   */
   8  
   9  import { readFileSync, readdirSync } from 'fs';
  10  import { join, dirname } from 'path';
  11  import { fileURLToPath } from 'url';
  12  import Logger from './logger.js';
  13  import { spin } from './spintax.js';
  14  import { callLLM } from './llm-provider.js';
  15  import { safeJsonParse } from './error-handler.js';
  16  import { computeGrade } from '../score.js';
  17  import { wrapUntrusted } from './llm-sanitizer.js';
  18  
  19  const __filename = fileURLToPath(import.meta.url);
  20  const __dirname = dirname(__filename);
  21  const projectRoot = join(__dirname, '../..');
  22  
  23  const logger = new Logger('TemplateProposals');
  24  
  25  const POLISH_MODEL = process.env.POLISH_MODEL || 'google/gemini-2.0-flash-001';
  26  
  27  // Load prompts
  28  const HAIKU_POLISH_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-POLISH.md'), 'utf-8');
  29  const HAIKU_ANALYZE_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-ANALYZE.md'), 'utf-8');
  30  
  31  /**
  32   * In-process circuit breaker for Haiku polish failures.
  33   *
  34   * When >50 JSON parse failures accumulate in a rolling 10-min window, the polish
  35   * step is bypassed entirely (proposals use the unpolished template text) until the
  36   * window resets.  This prevents thousands of wasted LLM calls during burst failures.
  37   *
  38   * Set env var HAIKU_POLISH_DEBUG=1 to force the polish pass even when the breaker
  39   * is open — useful for troubleshooting from the CLI without restarting the service.
  40   */
  41  const _polishBreaker = {
  42    failures: [],
  43    WINDOW_MS: 10 * 60 * 1000, // 10 minutes
  44    THRESHOLD: 50,
  45    isOpen() {
  46      const cutoff = Date.now() - this.WINDOW_MS;
  47      this.failures = this.failures.filter(t => t > cutoff);
  48      return this.failures.length >= this.THRESHOLD;
  49    },
  50    record() {
  51      this.failures.push(Date.now());
  52      if (this.failures.length === this.THRESHOLD) {
  53        logger.warn(
  54          `Haiku polish circuit breaker OPEN — ${this.THRESHOLD} JSON failures in 10 min. Polish bypassed until failures drop below threshold. Set HAIKU_POLISH_DEBUG=1 to force.`
  55        );
  56      }
  57    },
  58  };
  59  
  60  /**
  61   * Extract scoring weaknesses from score_json to populate templates
  62   * @param {Object} scoreData - Parsed score_json from database
  63   * @returns {Object} Template fields {primaryWeakness, secondaryWeakness, evidence, reasoning, industry, score, grade}
  64   */
  65  // Human-readable labels for factor_scores keys (new scoring format)
  66  const FACTOR_LABELS = {
  67    headline_quality: "site doesn't tell visitors what you do in the first few seconds",
  68    value_proposition: "nothing tells visitors why they should pick you over the next business in Google",
  69    unique_selling_proposition: "your site doesn't explain what makes you different",
  70    call_to_action: "no clear way to call or book — visitors don't know how to contact you",
  71    urgency_messaging: 'no reason for visitors to act now',
  72    hook_engagement: "visitors can't figure out your services in a quick glance",
  73    trust_signals: "no reviews or licences visible on your site — nothing to prove you're legit",
  74    imagery_design: 'weak visual design — stock photos or outdated look',
  75    offer_clarity: "visitors can't see what you're actually offering",
  76    contextual_appropriateness: 'poor relevance to your industry',
  77  };
  78  
  79  export function extractTemplateFields(scoreData) {
  80    if (!scoreData || (!scoreData.sections && !scoreData.factor_scores)) {
  81      return {
  82        primaryWeakness: 'weak call-to-action',
  83        secondaryWeakness: 'unclear value proposition',
  84        quickImprovementOpportunity:
  85          'add a clear call-to-action and highlight your key offer above the fold',
  86        evidence: 'Several key conversion elements are missing',
  87        reasoning: 'Missing trust signals and clear calls-to-action reduce visitor confidence',
  88        industry: 'local service',
  89        score: 0,
  90        grade: 'F',
  91        impact: 30,
  92      };
  93    }
  94  
  95    // Extract all factor scores — support both scoring formats:
  96    // New flat format: scoreData.factor_scores[name] = {score, reasoning, evidence}
  97    // Old nested format: scoreData.sections[section].criteria[name] = {score, explanation, reasoning}
  98    const factors = [];
  99    if (scoreData.factor_scores) {
 100      Object.entries(scoreData.factor_scores).forEach(([name, criteria]) => {
 101        if (criteria && typeof criteria.score === 'number') {
 102          factors.push({
 103            name: FACTOR_LABELS[name] || name.replace(/_/g, ' '), // eslint-disable-line security/detect-object-injection
 104            score: criteria.score,
 105            explanation: criteria.evidence || criteria.reasoning || '',
 106            reasoning: criteria.reasoning || criteria.evidence || '',
 107            section: 'general',
 108          });
 109        }
 110      });
 111    } else {
 112      Object.entries(scoreData.sections || {}).forEach(([sectionName, section]) => {
 113        if (section && section.criteria) {
 114          Object.entries(section.criteria).forEach(([criteriaName, criteria]) => {
 115            if (criteria && typeof criteria.score === 'number') {
 116              factors.push({
 117                name: criteriaName,
 118                score: criteria.score,
 119                explanation: criteria.explanation || '',
 120                reasoning: criteria.reasoning || criteria.explanation || '',
 121                section: sectionName,
 122              });
 123            }
 124          });
 125        }
 126      });
 127    }
 128  
 129    // Sort by score ascending (lowest = worst = primary weakness)
 130    factors.sort((a, b) => a.score - b.score);
 131  
 132    const primaryWeakness = factors[0] || {
 133      name: 'weak call-to-action',
 134      score: 5,
 135      explanation: 'Multiple conversion elements need improvement',
 136      reasoning: 'Weak conversion elements reduce lead generation potential',
 137    };
 138  
 139    // Prefer LLM-generated critical_weaknesses for secondary weakness name —
 140    // the scoring LLM already prioritised these; use [1] if two exist, [0] if only one.
 141    const cw = Array.isArray(scoreData.critical_weaknesses) ? scoreData.critical_weaknesses : [];
 142    const cwSecondary = (cw[1] ?? cw[0] ?? '').replace(/\.$/, '');
 143    const secondaryWeaknessName = cwSecondary
 144      ? cwSecondary.charAt(0).toLowerCase() + cwSecondary.slice(1)
 145      : null;
 146  
 147    const secondaryWeakness = secondaryWeaknessName
 148      ? {
 149          name: secondaryWeaknessName,
 150          score: 6,
 151          explanation: factors[1]?.explanation || '',
 152          reasoning: factors[1]?.reasoning || '',
 153        }
 154      : factors[1] || {
 155          name: 'unclear value proposition',
 156          score: 6,
 157          explanation: 'Value proposition could be made clearer',
 158          reasoning: 'Unclear value proposition reduces visitor confidence',
 159        };
 160  
 161    // Extract industry from factor_scores if available
 162    let industry = 'local service';
 163    if (
 164      scoreData.factor_scores &&
 165      scoreData.factor_scores.contextual_appropriateness &&
 166      scoreData.factor_scores.contextual_appropriateness.industry_context
 167    ) {
 168      industry = scoreData.factor_scores.contextual_appropriateness.industry_context;
 169    }
 170  
 171    // Calculate estimated impact (conversion loss percentage)
 172    // Lower scores = higher impact
 173    const avgScore =
 174      factors.slice(0, 3).reduce((sum, f) => sum + f.score, 0) / Math.min(3, factors.length);
 175    const impact = Math.round((10 - avgScore) * 5); // Score 0-10 maps to 50-0% impact
 176  
 177    // Filter out LLM non-answers for evidence (e.g. "None found", "N/A", empty)
 178    const NON_ANSWER =
 179      /^(none\s*found\.?|n\/a\.?|none\.?|not\s*applicable\.?|not\s*available\.?|\s*)$/i;
 180    const pickEvidence = f => {
 181      const e = f?.explanation || '';
 182      return NON_ANSWER.test(e.trim()) ? '' : e;
 183    };
 184    const evidence =
 185      pickEvidence(primaryWeakness) ||
 186      pickEvidence(secondaryWeakness) ||
 187      'Several key conversion elements need improvement';
 188    const reasoning =
 189      (NON_ANSWER.test((primaryWeakness.reasoning || '').trim())
 190        ? secondaryWeakness.reasoning
 191        : primaryWeakness.reasoning) || 'Improving these elements will increase lead generation';
 192  
 193    // Use LLM-generated quick_improvement_opportunities from score_json —
 194    // prefer [1] (second entry) to avoid repeating the primary weakness, fall back to [0].
 195    const qio = Array.isArray(scoreData.quick_improvement_opportunities)
 196      ? scoreData.quick_improvement_opportunities
 197      : [];
 198    const qioRaw = (qio[1] ?? qio[0] ?? '').replace(/\.$/, '');
 199    const quickImprovementOpportunity = qioRaw
 200      ? qioRaw.charAt(0).toLowerCase() + qioRaw.slice(1)
 201      : 'add a clear call-to-action and highlight your key offer above the fold';
 202  
 203    return {
 204      primaryWeakness: primaryWeakness.name,
 205      secondaryWeakness: secondaryWeakness.name,
 206      quickImprovementOpportunity,
 207      evidence,
 208      reasoning,
 209      industry,
 210      score: Math.round(scoreData.overall_calculation?.conversion_score || 0),
 211      grade: computeGrade(scoreData.overall_calculation?.conversion_score || 0),
 212      impact: Math.max(20, Math.min(50, impact)), // Clamp 20-50%
 213    };
 214  }
 215  
 216  /**
 217   * Haiku Pass 1: Analyze score_json to extract one concrete recommendation sentence.
 218   * Returns { industry, recommendation } ready for programmatic injection.
 219   *
 220   * Falls back to a deterministic recommendation from extractTemplateFields() on any failure.
 221   *
 222   * @param {Object} scoreData - Parsed score_json from database
 223   * @param {string|null} keyword - Raw search keyword (e.g. "heat pump tauranga")
 224   * @param {string} languageCode - ISO 639-1 language code
 225   * @param {string|null} countryCode - ISO 3166-1 alpha-2 country code
 226   * @returns {Promise<{industry: string, recommendation: string}>}
 227   */
 228  export async function analyzeScoreJson(
 229    scoreData,
 230    keyword,
 231    languageCode = 'en',
 232    countryCode = null
 233  ) {
 234    const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection
 235    const season = getCurrentSeason(countryCode);
 236    const keywordSection = keyword ? `\nKEYWORD: ${keyword}` : '';
 237    const seasonSection = season ? `\nSEASON: ${season}` : '';
 238  
 239    const userMessage = `LANGUAGE: ${langName}${keywordSection}${seasonSection}
 240  SCORE_JSON:
 241  ${wrapUntrusted(JSON.stringify(scoreData), 'score_json')}`;
 242  
 243    const ANALYZE_MODEL = process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5';
 244  
 245    for (let attempt = 1; attempt <= 2; attempt++) {
 246      try {
 247        const { content } = await callLLM({
 248          model: ANALYZE_MODEL,
 249          temperature: 0,
 250          max_tokens: 300,
 251          json_mode: true,
 252          messages: [
 253            { role: 'system', content: HAIKU_ANALYZE_PROMPT },
 254            { role: 'user', content: userMessage },
 255          ],
 256          stage: 'proposals',
 257        });
 258  
 259        const result = safeJsonParse(content);
 260        if (!result || typeof result.recommendation !== 'string' || !result.recommendation.trim()) {
 261          logger.warn(`analyzeScoreJson attempt ${attempt}/2: invalid/empty JSON response`);
 262          continue;
 263        }
 264  
 265        let industry = (result.industry || '').trim() || _extractIndustry(keyword);
 266        // If Haiku returned the raw keyword unchanged (failed to categorise), apply heuristic
 267        if (keyword && industry.toLowerCase() === keyword.trim().toLowerCase()) {
 268          industry = _extractIndustry(keyword);
 269        }
 270        const recommendation = result.recommendation.trim();
 271        // Use Sonnet-provided SMS fragment if valid, else derive from recommendation
 272        const smsFrag = (result.recommendation_sms || '').trim();
 273        const recommendation_sms =
 274          smsFrag.length > 0 && smsFrag.length <= 50 ? smsFrag : _smsFragment(recommendation);
 275  
 276        if (!recommendation_sms) {
 277          logger.warn(
 278            `analyzeScoreJson attempt ${attempt}/2: recommendation_sms blank for keyword="${keyword}"`
 279          );
 280          continue;
 281        }
 282  
 283        return { industry, recommendation, recommendation_sms };
 284      } catch (err) {
 285        logger.warn(`analyzeScoreJson attempt ${attempt}/2 error: ${err.message}`);
 286        if (attempt < 2) continue;
 287        throw new Error(`analyzeScoreJson failed after 2 attempts: ${err.message}`);
 288      }
 289    }
 290  
 291    throw new Error('analyzeScoreJson: recommendation_sms blank after 2 LLM attempts');
 292  }
 293  
 294  /**
 295   * Fallback: extract a bare industry label from a keyword when Haiku is unavailable.
 296   * For 2-word keywords: strip the last word unless the keyword is a known compound service.
 297   * For 3-word keywords: keep intact (likely a compound service like "hot water repairs").
 298   * For 4+ word keywords: strip last word (city/suburb suffix).
 299   */
 300  function _extractIndustry(keyword) {
 301    if (!keyword) return 'local service';
 302    const parts = keyword.trim().split(/\s+/);
 303    if (parts.length === 1) return keyword.trim();
 304    if (parts.length >= 4) return parts.slice(0, -1).join(' ');
 305    if (parts.length === 3) return keyword.trim();
 306    // 2-word keyword: strip last word unless it's a known compound service name
 307    const COMPOUND_SERVICES = new Set([
 308      'pressure washing',
 309      'power washing',
 310      'heat pump',
 311      'heat pumps',
 312      'hot water',
 313      'metal roofing',
 314      'solar panels',
 315      'solar power',
 316      'lawn mowing',
 317      'lawn care',
 318      'window cleaning',
 319      'gutter cleaning',
 320      'air conditioning',
 321      'pest control',
 322      'tree removal',
 323      'tree service',
 324      'concrete cutting',
 325      'carpet cleaning',
 326      'tile laying',
 327      'brick laying',
 328      'pool cleaning',
 329      'pool repair',
 330      'roof repair',
 331      'roof restoration',
 332      'damp proofing',
 333      'water damage',
 334      'fire damage',
 335      'storm damage',
 336    ]);
 337    const lower = keyword.trim().toLowerCase();
 338    if (COMPOUND_SERVICES.has(lower)) return lower;
 339    return parts[0]; // strip location (last word)
 340  }
 341  
 342  /**
 343   * Compress a full recommendation sentence into a ≤50 char SMS fragment.
 344   * Takes first clause (before em-dash or ", which/so"), strips period, truncates at word boundary.
 345   */
 346  function _smsFragment(rec) {
 347    if (!rec) return '';
 348    const noTrailingPunct = rec.replace(/[.!?]$/, '').trim();
 349    // Take the first clause (before em-dash or ", which…" style continuation)
 350    const firstClause = noTrailingPunct
 351      .split(/\s*[—–]\s*|\s*,\s*(?:which|so|meaning|this)\s/i)[0]
 352      .trim();
 353    if (firstClause.length <= 50) return firstClause;
 354    // Truncate at word boundary
 355    const truncated = firstClause.slice(0, 50);
 356    const lastSpace = truncated.lastIndexOf(' ');
 357    return lastSpace > 20 ? truncated.slice(0, lastSpace) : truncated;
 358  }
 359  
 360  /**
 361   * Build a deterministic fallback recommendation from extractTemplateFields() data.
 362   * Used when the Haiku analysis LLM call fails.
 363   */
 364  function buildFallbackAnalysis(scoreData, keyword) {
 365    const fields = extractTemplateFields(scoreData);
 366    const industry = _extractIndustry(keyword);
 367    const evidence = fields.evidence || 'several conversion elements need improvement';
 368    const reasoning = fields.reasoning || 'improving these will increase lead generation';
 369    // Build a lowercase clause: "evidence — reasoning"
 370    const evidenceLower = evidence.charAt(0).toLowerCase() + evidence.slice(1).replace(/\.$/, '');
 371    const reasoningLower = reasoning.charAt(0).toLowerCase() + reasoning.slice(1).replace(/\.$/, '');
 372    const recommendation = `${evidenceLower} — ${reasoningLower}`;
 373    return {
 374      industry,
 375      recommendation,
 376      recommendation_sms: _smsFragment(evidenceLower),
 377    };
 378  }
 379  
 380  /**
 381   * Load templates for a specific country, language, and channel.
 382   * Lookup order:
 383   *   1. data/templates/{countryCode}/{languageCode}/{prefix}{channel}.json  (new per-language path)
 384   *   2. data/templates/{countryCode}/{prefix}{channel}.json  (legacy flat path, English only)
 385   * Where {prefix} is determined by campaignTag:
 386   *   - 'freefix' → 'freefix-'
 387   *   - 'review_acquisition' → 'review-campaign-'
 388   *   - 'standard' or null → '' (no prefix, original templates)
 389   * Throws if no template file found — no fallback to other countries.
 390   *
 391   * @param {string} countryCode - Two-letter country code (AU, US, etc.)
 392   * @param {string} languageCode - ISO 639-1 language code (en, hi, ja, etc.)
 393   * @param {string} channel - Contact channel (sms, email, form, x, linkedin)
 394   * @param {string|null} campaignTag - Campaign identifier ('freefix', 'review_acquisition', 'standard')
 395   * @returns {Array<Object>} Template objects
 396   * @throws {Error} If no templates found for this country/language/channel
 397   */
 398  export function loadTemplates(countryCode, languageCode, channel, campaignTag = null) {
 399    const validChannels = ['sms', 'email'];
 400    if (!validChannels.includes(channel)) {
 401      logger.warn(`Channel ${channel} not supported for templates, using email`);
 402      channel = 'email';
 403    }
 404  
 405    // Normalize ISO 639-2 three-letter codes to ISO 639-1 two-letter codes
 406    const ISO639_2_TO_1 = {
 407      eng: 'en',
 408      fra: 'fr',
 409      deu: 'de',
 410      spa: 'es',
 411      ita: 'it',
 412      por: 'pt',
 413      nld: 'nl',
 414      jpn: 'ja',
 415      kor: 'ko',
 416      zho: 'zh',
 417      nor: 'no',
 418      nob: 'no',
 419      nno: 'no',
 420      ind: 'id',
 421      hin: 'hi',
 422      pol: 'pl',
 423      swe: 'sv',
 424      dan: 'da',
 425    };
 426    const rawLang = (languageCode || 'en').toLowerCase();
 427    const lang = ISO639_2_TO_1[rawLang] ?? rawLang; // eslint-disable-line security/detect-object-injection
 428  
 429    // Campaign-specific template prefix (DR-128)
 430    const CAMPAIGN_PREFIXES = { freefix: 'freefix-', review_acquisition: 'review-campaign-' };
 431    const prefix = CAMPAIGN_PREFIXES[campaignTag] || ''; // eslint-disable-line security/detect-object-injection
 432    const templateFile = `${prefix}${channel}.json`;
 433  
 434    // 1. Try language-specific subdirectory path
 435    const langPath = join(projectRoot, `data/templates/${countryCode}/${lang}/${templateFile}`);
 436    try {
 437      const content = readFileSync(langPath, 'utf-8');
 438      const data = JSON.parse(content);
 439      if (data.templates?.length) return data.templates;
 440    } catch (_) {
 441      // not found at language-specific path — try legacy flat path for English
 442    }
 443  
 444    // 2. Legacy flat path (English) — also used as fallback for unrecognized/unsupported langs
 445    // e.g. 'zxx' (no linguistic content), 'fr_ca' (malformed BCP 47), etc.
 446    const flatPath = join(projectRoot, `data/templates/${countryCode}/${templateFile}`);
 447    try {
 448      const content = readFileSync(flatPath, 'utf-8');
 449      const data = JSON.parse(content);
 450      if (data.templates?.length) return data.templates;
 451    } catch (_) {
 452      // not found
 453    }
 454  
 455    // 3. Native language fallback — for countries without English flat templates (DE, FR, IT, etc.)
 456    // If detected language doesn't match the country's native language, try the first available subdir.
 457    const countryDir = join(projectRoot, `data/templates/${countryCode}`);
 458    try {
 459      const entries = readdirSync(countryDir, { withFileTypes: true });
 460      for (const entry of entries) {
 461        if (!entry.isDirectory() || entry.name === lang) continue; // skip already-tried lang
 462        const nativePath = join(countryDir, entry.name, templateFile);
 463        try {
 464          const content = readFileSync(nativePath, 'utf-8');
 465          const data = JSON.parse(content);
 466          if (data.templates?.length) return data.templates;
 467        } catch (_) {
 468          // not found in this subdir
 469        }
 470      }
 471    } catch (_) {
 472      // country dir not readable
 473    }
 474  
 475    // No templates found — throw, no fallback
 476    // Campaign-specific templates not found — fall back to standard templates for non-standard campaigns
 477    if (prefix) {
 478      logger.info(`No ${campaignTag} templates for ${countryCode}/${lang}/${channel}, falling back to standard`);
 479      return loadTemplates(countryCode, languageCode, channel, null);
 480    }
 481  
 482    throw new Error(`No templates for ${countryCode}/${lang}/${channel}`);
 483  }
 484  
 485  /**
 486   * Select best template for this outreach
 487   * Selection criteria:
 488   * 1. Match primary weakness type (CTA template for CTA problems, trust template for trust problems)
 489   * 2. Rotate templates for testing (LRU)
 490   * 3. Weight toward better-performing templates after 1000+ sends
 491   *
 492   * @param {Array<Object>} templates - Available templates
 493   * @param {Object} fields - Template fields from extractTemplateFields()
 494   * @param {string} channel - Contact channel
 495   * @returns {Object} Selected template
 496   */
 497  export function selectTemplate(templates, fields, channel, hasFirstname = false) {
 498    if (!templates || templates.length === 0) {
 499      throw new Error(`No templates available for channel: ${channel}`);
 500    }
 501  
 502    // When a firstname is available, prefer templates that actually use it.
 503    // Fall back to the full pool only if all named templates are exhausted.
 504    let pool = templates;
 505    if (hasFirstname) {
 506      const named = templates.filter(
 507        t =>
 508          (t.body_spintax || '').includes('[firstname') ||
 509          (t.subject_spintax || '').includes('[firstname')
 510      );
 511      if (named.length > 0) pool = named;
 512    }
 513  
 514    // Rotation strategy: prefer template with lowest sends count.
 515    // After 1000+ sends, weight by conversion rate.
 516    // Tiebreak with Math.random() so equal-sends templates rotate rather than
 517    // always picking the same one (e.g. always the first in file order).
 518    const shuffled = [...pool].sort(() => Math.random() - 0.5);
 519    shuffled.sort((a, b) => {
 520      const sendsA = a.sends || 0;
 521      const sendsB = b.sends || 0;
 522  
 523      // After 1000+ sends, weight by conversion rate
 524      if (sendsA >= 1000 && sendsB >= 1000) {
 525        const convRateA = sendsA > 0 ? (a.conversions || 0) / sendsA : 0;
 526        const convRateB = sendsB > 0 ? (b.conversions || 0) / sendsB : 0;
 527        return convRateB - convRateA; // Higher conversion rate first
 528      }
 529  
 530      // Otherwise, prefer template with fewer sends (rotation testing)
 531      return sendsA - sendsB;
 532    });
 533  
 534    return shuffled[0];
 535  }
 536  
 537  // Non-person label patterns — safety net after LLM name extraction
 538  const NON_PERSON_WORDS = new Set([
 539    'office',
 540    'info',
 541    'sales',
 542    'admin',
 543    'support',
 544    'contact',
 545    'hello',
 546    'team',
 547    'reception',
 548    'enquiries',
 549    'enquiry',
 550    'general',
 551    'main',
 552    'emergency',
 553    'after',
 554    'hours',
 555    'mobile',
 556    'phone',
 557    'fax',
 558    'sms',
 559    'toll',
 560    'free',
 561    'tollfree',
 562    'hotline',
 563    'helpdesk',
 564    'helpline',
 565    'service',
 566    'services',
 567    'department',
 568    'head',
 569    'headquarters',
 570    'hq',
 571    'branch',
 572    'accounts',
 573    'billing',
 574    'booking',
 575    'bookings',
 576    'reservations',
 577    'store',
 578    'shop',
 579    'centre',
 580    'center',
 581    'clinic',
 582    'group',
 583    'media',
 584    'marketing',
 585    'hr',
 586    'recruiter',
 587    'recruitment',
 588    'website',
 589    // Generic directional/scale words that appear in email local-parts but are never firstnames
 590    'north',
 591    'south',
 592    'east',
 593    'west',
 594    'central',
 595    'national',
 596    'regional',
 597    'local',
 598  ]);
 599  
 600  /**
 601   * Deterministic check: does this string look like a real human first name?
 602   * Primary filter is the Haiku LLM call in getAllContactsWithNames; this is a
 603   * safety net for edge cases (city names, phone-type labels, etc.).
 604   */
 605  function isPersonFirstname(name) {
 606    if (!name || typeof name !== 'string') return false;
 607    const trimmed = name.trim();
 608    if (!trimmed || trimmed.length < 2 || trimmed.length > 25) return false;
 609    if (/\d/.test(trimmed)) return false; // contains digits
 610    if (/-/.test(trimmed) && trimmed.split('-').length > 2) return false; // triple-hyphen
 611    // Reject if any word in the label is a known non-person word
 612    const words = trimmed.toLowerCase().split(/[\s-]+/);
 613    if (words.some(w => NON_PERSON_WORDS.has(w))) return false;
 614    // Reject labels with 3+ words (e.g. "Head Of Marketing")
 615    if (words.length >= 3) return false;
 616    return true;
 617  }
 618  
 619  /**
 620   * Populate template with extracted fields
 621   * @param {string} template - Template string with {placeholders}
 622   * @param {Object} fields - Template fields
 623   * @param {Object} siteData - Site data (domain, keyword, etc.)
 624   * @param {Object} contact - Contact info (name, channel, uri)
 625   * @returns {string} Populated proposal text
 626   */
 627  export function populateTemplate(template, fields, siteData, contact = null, analysisData = null) {
 628    // Extract business name from domain (simple approach)
 629    const businessName = siteData.domain.split('.')[0].replace(/-/g, ' ');
 630  
 631    // Determine greeting — contact.name has already been filtered by getAllContactsWithNames
 632    // (Haiku LLM call), but keep a deterministic safety net for edge cases.
 633    const firstname = contact?.name || '';
 634    // Use empty string when no real name — templates control their own locale fallback:
 635    //   [firstname|there]  → English fallback (EN templates: "Hi there")
 636    //   [firstname|Hallo]  → German fallback (DE sms_002: "Hallo,")
 637    //   [firstname|]       → empty; punctuation cleanup handles "Guten Tag ," → "Guten Tag,"
 638    const greetingRaw = isPersonFirstname(firstname) ? firstname.split(/\s+/)[0] : '';
 639    const greeting = greetingRaw ? greetingRaw.charAt(0).toUpperCase() + greetingRaw.slice(1) : '';
 640  
 641    // Build replacement map — all tokens filled programmatically.
 642    // analysis-derived fields (recommendation, industry) come from analyzeScoreJson() (Pass 1).
 643    const industry = analysisData?.industry || _extractIndustry(siteData.keyword);
 644    const replacements = {
 645      business_name: businessName,
 646      firstname: greeting,
 647      grade: fields.grade,
 648      score: fields.score,
 649      industry,
 650      impact: fields.impact,
 651      domain: siteData.domain,
 652      recommendation: (() => {
 653        const r = (analysisData?.recommendation || '').trim();
 654        return r && !/[.!?]$/.test(r) ? `${r}.` : r;
 655      })(),
 656      recommendation_sms: (analysisData?.recommendation_sms || '').trim(),
 657      sites_scored: fields.sites_scored || null,
 658      brand_url_short: process.env.BRAND_DOMAIN,
 659      brand_url: process.env.BRAND_URL,
 660      persona_name: process.env.PERSONA_NAME || '',
 661      persona_first_name: process.env.PERSONA_FIRST_NAME || '',
 662      brand_name: process.env.BRAND_NAME || '',
 663    };
 664  
 665    // Resolve [key|fallback] and [key] variables BEFORE spinning.
 666    // Critical: [firstname|there] inside a {…} spintax group has a | that the
 667    // spintax engine would split on, producing garbled output like "Hey [firstname"
 668    // or "there]!" as separate spin options.
 669    const resolved = template.replace(/\[(\w+)(?:\|([^\]]*))?\]/g, (_match, key, fallback) => {
 670      const val = replacements[key]; // eslint-disable-line security/detect-object-injection
 671      if (val !== null && val !== undefined && val !== '') return String(val);
 672      return fallback !== undefined ? fallback : '';
 673    });
 674  
 675    // Spin after variable resolution so {option1|option2} has clean, unambiguous input
 676    let populated = spin(resolved);
 677  
 678    // Clean up spacing artifacts (e.g. empty value leaves "Hi ,")
 679    // Collapse multiple spaces only — preserve \n\n paragraph breaks
 680    populated = populated
 681      .replace(/ ([,!?.;:])/g, '$1')
 682      .replace(/ {2,}/g, ' ')
 683      .replace(/\n{3,}/g, '\n\n')
 684      .trim();
 685  
 686    return populated;
 687  }
 688  
 689  /**
 690   * Throw if any [field] tokens remain unfilled in the text.
 691   * Called after populateTemplate() and after polishProposal() to catch
 692   * any missed tokens early — the outreach stage will catch the error and mark
 693   * the site as failed with the token name as the reason.
 694   *
 695   * @param {string} text - Text to check
 696   * @param {string} label - Descriptive label for the error message (e.g. 'body', 'subject')
 697   */
 698  export function checkForUnfilledTokens(text, label) {
 699    if (!text) return;
 700    const match = text.match(/\[[a-z_]+\]/);
 701    if (match) {
 702      throw new Error(`Unfilled token ${match[0]} in ${label} — template population incomplete`);
 703    }
 704  }
 705  
 706  // Countries in the Southern hemisphere (seasons are flipped relative to Northern)
 707  const SOUTHERN_COUNTRIES = new Set([
 708    'AU',
 709    'NZ',
 710    'ZA',
 711    'AR',
 712    'CL',
 713    'BR',
 714    'PE',
 715    'UY',
 716    'PY',
 717    'BO',
 718    'EC',
 719    'MZ',
 720    'ZW',
 721    'BW',
 722    'NA',
 723    'LS',
 724    'SZ',
 725    'MG',
 726  ]);
 727  
 728  // Tropical/equatorial countries — no meaningful seasons
 729  const TROPICAL_COUNTRIES = new Set([
 730    'SG',
 731    'MY',
 732    'ID',
 733    'TH',
 734    'PH',
 735    'VN',
 736    'KH',
 737    'MM',
 738    'BN',
 739    'PG',
 740    'FJ',
 741    'TL',
 742  ]);
 743  
 744  /**
 745   * Return the current meteorological season for a country, or null if seasonal
 746   * references don't apply (tropical countries or unknown country code).
 747   *
 748   * @param {string|null} countryCode - ISO 3166-1 alpha-2 code (e.g. 'AU', 'US')
 749   * @param {Date} [date=new Date()] - Date to use (defaults to today)
 750   * @returns {'Spring'|'Summer'|'Autumn'|'Winter'|null}
 751   */
 752  export function getCurrentSeason(countryCode, date = new Date()) {
 753    if (!countryCode || TROPICAL_COUNTRIES.has(countryCode)) return null;
 754  
 755    const month = date.getMonth(); // 0=Jan … 11=Dec
 756    const southern = SOUTHERN_COUNTRIES.has(countryCode);
 757  
 758    // Northern meteorological seasons; flip for Southern
 759    if (month >= 2 && month <= 4) return southern ? 'Autumn' : 'Spring';
 760    if (month >= 5 && month <= 7) return southern ? 'Winter' : 'Summer';
 761    if (month >= 8 && month <= 10) return southern ? 'Spring' : 'Autumn';
 762    return southern ? 'Summer' : 'Winter'; // Dec, Jan, Feb
 763  }
 764  
 765  // ISO 639-1 code → full language name for polish prompt
 766  const POLISH_LANG_NAMES = {
 767    en: 'English',
 768    de: 'German',
 769    fr: 'French',
 770    it: 'Italian',
 771    ja: 'Japanese',
 772    ko: 'Korean',
 773    zh: 'Chinese',
 774    es: 'Spanish',
 775    nl: 'Dutch',
 776    pl: 'Polish',
 777    sv: 'Swedish',
 778    da: 'Danish',
 779    no: 'Norwegian',
 780    id: 'Indonesian',
 781    hi: 'Hindi',
 782  };
 783  
 784  /**
 785   * Haiku Pass 2: Polish a fully-populated proposal.
 786   * All [field] tokens must already be filled before calling this.
 787   * Tasks: fix grammar, capitalisation, punctuation; compress SMS to ≤160 chars;
 788   *        polish subject line. Does NOT fill tokens or analyse score_json.
 789   *
 790   * @param {string} text - Proposal body (all tokens pre-filled)
 791   * @param {string} channel - Contact channel (sms, email, form, x, linkedin)
 792   * @param {string} languageCode - ISO 639-1 language code (en, de, fr, etc.)
 793   * @param {string|null} subjectLine - Email subject line (email/form only)
 794   * @param {string|null} countryCode - ISO 3166-1 alpha-2 code (for season constraint)
 795   * @returns {Promise<{text: string, subjectLine: string|null}>} Polished text and subject
 796   */
 797  export async function polishProposal(
 798    text,
 799    channel,
 800    languageCode = 'en',
 801    subjectLine = null,
 802    countryCode = null
 803  ) {
 804    // Circuit breaker: skip LLM call when JSON failure rate is high.
 805    // Bypass with HAIKU_POLISH_DEBUG=1 for troubleshooting.
 806    if (_polishBreaker.isOpen() && process.env.HAIKU_POLISH_DEBUG !== '1') {
 807      return { text, subjectLine };
 808    }
 809  
 810    const isEmailChannel = channel === 'email' || channel === 'form';
 811  
 812    // SMS-specific fast path: skip polish when text is already within the 160-char limit.
 813    // Haiku's only SMS task is compression — if already short, it adds nothing and
 814    // sometimes garbles short texts (merged words, structural JSON errors).
 815    // 155 chars = 5-char buffer for sender IDs that compliance may append after this call.
 816    if (channel === 'sms' && text.length <= 155 && process.env.HAIKU_POLISH_DEBUG !== '1') {
 817      return { text, subjectLine: null };
 818    }
 819  
 820    const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection
 821  
 822    const subjectSection = isEmailChannel && subjectLine ? `\nSUBJECT LINE:\n${subjectLine}` : '';
 823    const season = getCurrentSeason(countryCode);
 824    const seasonSection = season ? `\nSEASON: ${season}` : '';
 825  
 826    const userMessage = `CHANNEL: ${channel}
 827  LANGUAGE: ${langName}${seasonSection}
 828  ---
 829  PROPOSAL BODY:
 830  ${text}${subjectSection}`;
 831  
 832    try {
 833      const { content } = await callLLM({
 834        model: POLISH_MODEL,
 835        temperature: 0,
 836        max_tokens: 3000,
 837        json_mode: true,
 838        messages: [
 839          { role: 'system', content: HAIKU_POLISH_PROMPT },
 840          { role: 'user', content: userMessage },
 841        ],
 842        stage: 'proposals',
 843      });
 844  
 845      const result = safeJsonParse(content);
 846      if (!result || typeof result.body !== 'string' || result.body.trim() === '') {
 847        _polishBreaker.record();
 848        logger.warn(`Polish (${POLISH_MODEL}) returned invalid response — using original`, {
 849          hasResult: !!result,
 850          bodyType: result ? typeof result.body : 'n/a',
 851          bodyEmpty: result?.body?.trim() === '',
 852          keys: result ? Object.keys(result) : [],
 853          contentSnippet: content?.substring(0, 100),
 854          breakerFailures: _polishBreaker.failures.length,
 855        });
 856        return { text, subjectLine };
 857      }
 858  
 859      // Guard: if Haiku collapsed ANY paragraph breaks, reject the body and use the original.
 860      // Paragraph breaks (double newlines) are structural — losing them breaks email formatting.
 861      // Zero-tolerance: if input had N breaks, output must also have N breaks.
 862      const inputBreaks = (text.match(/\n\n/g) || []).length;
 863      const outputBreaks = (result.body.match(/\n\n/g) || []).length;
 864      const polishedBody =
 865        inputBreaks >= 1 && outputBreaks < inputBreaks
 866          ? (logger.warn('Polish collapsed paragraph breaks — keeping original body'), text)
 867          : result.body.trim();
 868  
 869      return {
 870        text: polishedBody,
 871        subjectLine: isEmailChannel ? result.subject?.trim() || subjectLine : null,
 872      };
 873    } catch (err) {
 874      logger.warn(`Polish (${POLISH_MODEL}) failed: ${err.message} — using original`);
 875      return { text, subjectLine };
 876    }
 877  }
 878  
 879  /**
 880   * @deprecated Use polishProposal() instead.
 881   * Kept for backwards compatibility — delegates to the new polish function.
 882   */
 883  export async function shortenSmsWithHaiku(text) {
 884    const polished = await polishProposal(text, 'sms', 'en', null, null);
 885    return polished.text;
 886  }
 887  
 888  /** @deprecated Renamed to polishProposal */
 889  export const polishProposalWithHaiku = polishProposal;
 890  
 891  /**
 892   * Generate proposal using templates (replaces LLM generation)
 893   * @param {Object} siteData - Site data from database (must include country_code, language_code)
 894   * @param {Object} scoreData - Parsed score_json
 895   * @param {Object} contact - Contact object {name, channel, uri}
 896   * @returns {Promise<Object>} Generated proposal {proposalText, templateId, subjectLine}
 897   * @throws {Error} If no templates found for this country/language/channel
 898   */
 899  export async function generateTemplateProposal(
 900    siteData,
 901    scoreData,
 902    contact,
 903    cachedAnalysis = null
 904  ) {
 905    const fields = extractTemplateFields(scoreData);
 906    if (!siteData.country_code) throw new Error(`country_code is required for template proposal (site ${siteData.id || siteData.domain})`);
 907    const countryCode = siteData.country_code;
 908    const languageCode = siteData.language_code || null;
 909    const channel = contact.channel || 'email';
 910  
 911    // Pass 1: Haiku analyzes score_json → concrete recommendation sentence + industry.
 912    // Use cachedAnalysis if provided (same result for all contacts on a site).
 913    const analysisData =
 914      cachedAnalysis ||
 915      (await analyzeScoreJson(
 916        scoreData,
 917        siteData.keyword || null,
 918        languageCode || 'en',
 919        countryCode
 920      ));
 921  
 922    // Throws if no template found — caller handles the error
 923    const campaignTag = siteData.campaign_tag || null;
 924    const templates = loadTemplates(countryCode, languageCode, channel, campaignTag);
 925    const template = selectTemplate(templates, fields, channel, !!contact?.name);
 926  
 927    // Populate template — all tokens including [recommendation]/[industry] filled here
 928    let proposalText = populateTemplate(
 929      template.body_spintax,
 930      fields,
 931      siteData,
 932      contact,
 933      analysisData
 934    );
 935  
 936    // If a firstname is known but didn't land in this spin (optional greeting branch missed),
 937    // re-spin up to 9 more times — (0.5)^10 ≈ 0.1% chance of still missing after 10 tries.
 938    if (contact?.name) {
 939      const { name } = contact;
 940      for (let i = 0; i < 9 && !proposalText.includes(name); i++) {
 941        proposalText = populateTemplate(
 942          template.body_spintax,
 943          fields,
 944          siteData,
 945          contact,
 946          analysisData
 947        );
 948      }
 949    }
 950  
 951    // SMS: re-spin up to 3 times to try to reach ≤160 chars before the polish pass
 952    if (channel === 'sms' && proposalText.length > 160) {
 953      for (let i = 0; i < 3; i++) {
 954        const candidate = populateTemplate(
 955          template.body_spintax,
 956          fields,
 957          siteData,
 958          contact,
 959          analysisData
 960        );
 961        if (candidate.length < proposalText.length) proposalText = candidate;
 962        if (proposalText.length <= 160) break;
 963      }
 964    }
 965  
 966    // Email (and form-as-email) use subject lines; SMS/linkedin/x do not
 967    const usesEmailTemplate = channel === 'email' || channel === 'form';
 968    let rawSubjectLine = null;
 969  
 970    if (usesEmailTemplate) {
 971      // Prefer selected template's subject_spintax, fall back to others in the same file.
 972      // Throws if no usable subject_spintax exists — prevents silent blank subjects.
 973      const subjectCandidates = [template, ...templates.filter(t => t.id !== template.id)];
 974      let subjectSourceId = null;
 975  
 976      for (const t of subjectCandidates) {
 977        if (!t.subject_spintax?.trim()) continue;
 978        const candidate = populateTemplate(
 979          t.subject_spintax,
 980          fields,
 981          siteData,
 982          contact,
 983          analysisData
 984        );
 985        if (candidate) {
 986          rawSubjectLine = candidate;
 987          subjectSourceId = t.id;
 988          break;
 989        }
 990      }
 991  
 992      if (!rawSubjectLine) {
 993        throw new Error(
 994          `No usable subject_spintax found in any template for ${countryCode}/${languageCode}/${channel}`
 995        );
 996      }
 997  
 998      if (subjectSourceId !== template.id) {
 999        logger.warn(
1000          `Template ${template.id} has no usable subject_spintax — using subject from ${subjectSourceId}`
1001        );
1002      }
1003    }
1004  
1005    // Bracket check after population — any remaining [token] means a template bug
1006    checkForUnfilledTokens(proposalText, 'body');
1007    if (rawSubjectLine) checkForUnfilledTokens(rawSubjectLine, 'subject');
1008  
1009    // Pass 2: polish grammar, capitalisation, punctuation; compress SMS to ≤160 chars.
1010    // Does NOT fill tokens — all tokens are already resolved above.
1011    const polished = await polishProposal(
1012      proposalText,
1013      channel,
1014      languageCode || 'en',
1015      rawSubjectLine,
1016      countryCode || null
1017    );
1018  
1019    // Bracket check after polish — catches any token LLM may have reintroduced (shouldn't happen)
1020    checkForUnfilledTokens(polished.text, 'polished body');
1021    if (polished.subjectLine) checkForUnfilledTokens(polished.subjectLine, 'polished subject');
1022  
1023    logger.info(
1024      `Generated ${channel} proposal using template ${template.id} for ${siteData.domain} (${countryCode}/${languageCode})`
1025    );
1026  
1027    return {
1028      proposalText: polished.text,
1029      templateId: template.id,
1030      subjectLine: polished.subjectLine ?? rawSubjectLine,
1031    };
1032  }
1033  
1034  export default {
1035    extractTemplateFields,
1036    analyzeScoreJson,
1037    loadTemplates,
1038    selectTemplate,
1039    populateTemplate,
1040    checkForUnfilledTokens,
1041    generateTemplateProposal,
1042  };