template-proposals.js
1 /** 2 * Template-based Proposal Generation 3 * Cost-free alternative to LLM-based proposals ($0.18/site → $0/site) 4 * 5 * Extracts scoring weaknesses from score_json and populates pre-written templates 6 * Templates are tested, culturally optimized, and channel-specific 7 */ 8 9 import { readFileSync, readdirSync } from 'fs'; 10 import { join, dirname } from 'path'; 11 import { fileURLToPath } from 'url'; 12 import Logger from './logger.js'; 13 import { spin } from './spintax.js'; 14 import { callLLM } from './llm-provider.js'; 15 import { safeJsonParse } from './error-handler.js'; 16 import { computeGrade } from '../score.js'; 17 import { wrapUntrusted } from './llm-sanitizer.js'; 18 19 const __filename = fileURLToPath(import.meta.url); 20 const __dirname = dirname(__filename); 21 const projectRoot = join(__dirname, '../..'); 22 23 const logger = new Logger('TemplateProposals'); 24 25 const POLISH_MODEL = process.env.POLISH_MODEL || 'google/gemini-2.0-flash-001'; 26 27 // Load prompts 28 const HAIKU_POLISH_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-POLISH.md'), 'utf-8'); 29 const HAIKU_ANALYZE_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-ANALYZE.md'), 'utf-8'); 30 31 /** 32 * In-process circuit breaker for Haiku polish failures. 33 * 34 * When >50 JSON parse failures accumulate in a rolling 10-min window, the polish 35 * step is bypassed entirely (proposals use the unpolished template text) until the 36 * window resets. This prevents thousands of wasted LLM calls during burst failures. 37 * 38 * Set env var HAIKU_POLISH_DEBUG=1 to force the polish pass even when the breaker 39 * is open — useful for troubleshooting from the CLI without restarting the service. 40 */ 41 const _polishBreaker = { 42 failures: [], 43 WINDOW_MS: 10 * 60 * 1000, // 10 minutes 44 THRESHOLD: 50, 45 isOpen() { 46 const cutoff = Date.now() - this.WINDOW_MS; 47 this.failures = this.failures.filter(t => t > cutoff); 48 return this.failures.length >= this.THRESHOLD; 49 }, 50 record() { 51 this.failures.push(Date.now()); 52 if (this.failures.length === this.THRESHOLD) { 53 logger.warn( 54 `Haiku polish circuit breaker OPEN — ${this.THRESHOLD} JSON failures in 10 min. Polish bypassed until failures drop below threshold. Set HAIKU_POLISH_DEBUG=1 to force.` 55 ); 56 } 57 }, 58 }; 59 60 /** 61 * Extract scoring weaknesses from score_json to populate templates 62 * @param {Object} scoreData - Parsed score_json from database 63 * @returns {Object} Template fields {primaryWeakness, secondaryWeakness, evidence, reasoning, industry, score, grade} 64 */ 65 // Human-readable labels for factor_scores keys (new scoring format) 66 const FACTOR_LABELS = { 67 headline_quality: "site doesn't tell visitors what you do in the first few seconds", 68 value_proposition: "nothing tells visitors why they should pick you over the next business in Google", 69 unique_selling_proposition: "your site doesn't explain what makes you different", 70 call_to_action: "no clear way to call or book — visitors don't know how to contact you", 71 urgency_messaging: 'no reason for visitors to act now', 72 hook_engagement: "visitors can't figure out your services in a quick glance", 73 trust_signals: "no reviews or licences visible on your site — nothing to prove you're legit", 74 imagery_design: 'weak visual design — stock photos or outdated look', 75 offer_clarity: "visitors can't see what you're actually offering", 76 contextual_appropriateness: 'poor relevance to your industry', 77 }; 78 79 export function extractTemplateFields(scoreData) { 80 if (!scoreData || (!scoreData.sections && !scoreData.factor_scores)) { 81 return { 82 primaryWeakness: 'weak call-to-action', 83 secondaryWeakness: 'unclear value proposition', 84 quickImprovementOpportunity: 85 'add a clear call-to-action and highlight your key offer above the fold', 86 evidence: 'Several key conversion elements are missing', 87 reasoning: 'Missing trust signals and clear calls-to-action reduce visitor confidence', 88 industry: 'local service', 89 score: 0, 90 grade: 'F', 91 impact: 30, 92 }; 93 } 94 95 // Extract all factor scores — support both scoring formats: 96 // New flat format: scoreData.factor_scores[name] = {score, reasoning, evidence} 97 // Old nested format: scoreData.sections[section].criteria[name] = {score, explanation, reasoning} 98 const factors = []; 99 if (scoreData.factor_scores) { 100 Object.entries(scoreData.factor_scores).forEach(([name, criteria]) => { 101 if (criteria && typeof criteria.score === 'number') { 102 factors.push({ 103 name: FACTOR_LABELS[name] || name.replace(/_/g, ' '), // eslint-disable-line security/detect-object-injection 104 score: criteria.score, 105 explanation: criteria.evidence || criteria.reasoning || '', 106 reasoning: criteria.reasoning || criteria.evidence || '', 107 section: 'general', 108 }); 109 } 110 }); 111 } else { 112 Object.entries(scoreData.sections || {}).forEach(([sectionName, section]) => { 113 if (section && section.criteria) { 114 Object.entries(section.criteria).forEach(([criteriaName, criteria]) => { 115 if (criteria && typeof criteria.score === 'number') { 116 factors.push({ 117 name: criteriaName, 118 score: criteria.score, 119 explanation: criteria.explanation || '', 120 reasoning: criteria.reasoning || criteria.explanation || '', 121 section: sectionName, 122 }); 123 } 124 }); 125 } 126 }); 127 } 128 129 // Sort by score ascending (lowest = worst = primary weakness) 130 factors.sort((a, b) => a.score - b.score); 131 132 const primaryWeakness = factors[0] || { 133 name: 'weak call-to-action', 134 score: 5, 135 explanation: 'Multiple conversion elements need improvement', 136 reasoning: 'Weak conversion elements reduce lead generation potential', 137 }; 138 139 // Prefer LLM-generated critical_weaknesses for secondary weakness name — 140 // the scoring LLM already prioritised these; use [1] if two exist, [0] if only one. 141 const cw = Array.isArray(scoreData.critical_weaknesses) ? scoreData.critical_weaknesses : []; 142 const cwSecondary = (cw[1] ?? cw[0] ?? '').replace(/\.$/, ''); 143 const secondaryWeaknessName = cwSecondary 144 ? cwSecondary.charAt(0).toLowerCase() + cwSecondary.slice(1) 145 : null; 146 147 const secondaryWeakness = secondaryWeaknessName 148 ? { 149 name: secondaryWeaknessName, 150 score: 6, 151 explanation: factors[1]?.explanation || '', 152 reasoning: factors[1]?.reasoning || '', 153 } 154 : factors[1] || { 155 name: 'unclear value proposition', 156 score: 6, 157 explanation: 'Value proposition could be made clearer', 158 reasoning: 'Unclear value proposition reduces visitor confidence', 159 }; 160 161 // Extract industry from factor_scores if available 162 let industry = 'local service'; 163 if ( 164 scoreData.factor_scores && 165 scoreData.factor_scores.contextual_appropriateness && 166 scoreData.factor_scores.contextual_appropriateness.industry_context 167 ) { 168 industry = scoreData.factor_scores.contextual_appropriateness.industry_context; 169 } 170 171 // Calculate estimated impact (conversion loss percentage) 172 // Lower scores = higher impact 173 const avgScore = 174 factors.slice(0, 3).reduce((sum, f) => sum + f.score, 0) / Math.min(3, factors.length); 175 const impact = Math.round((10 - avgScore) * 5); // Score 0-10 maps to 50-0% impact 176 177 // Filter out LLM non-answers for evidence (e.g. "None found", "N/A", empty) 178 const NON_ANSWER = 179 /^(none\s*found\.?|n\/a\.?|none\.?|not\s*applicable\.?|not\s*available\.?|\s*)$/i; 180 const pickEvidence = f => { 181 const e = f?.explanation || ''; 182 return NON_ANSWER.test(e.trim()) ? '' : e; 183 }; 184 const evidence = 185 pickEvidence(primaryWeakness) || 186 pickEvidence(secondaryWeakness) || 187 'Several key conversion elements need improvement'; 188 const reasoning = 189 (NON_ANSWER.test((primaryWeakness.reasoning || '').trim()) 190 ? secondaryWeakness.reasoning 191 : primaryWeakness.reasoning) || 'Improving these elements will increase lead generation'; 192 193 // Use LLM-generated quick_improvement_opportunities from score_json — 194 // prefer [1] (second entry) to avoid repeating the primary weakness, fall back to [0]. 195 const qio = Array.isArray(scoreData.quick_improvement_opportunities) 196 ? scoreData.quick_improvement_opportunities 197 : []; 198 const qioRaw = (qio[1] ?? qio[0] ?? '').replace(/\.$/, ''); 199 const quickImprovementOpportunity = qioRaw 200 ? qioRaw.charAt(0).toLowerCase() + qioRaw.slice(1) 201 : 'add a clear call-to-action and highlight your key offer above the fold'; 202 203 return { 204 primaryWeakness: primaryWeakness.name, 205 secondaryWeakness: secondaryWeakness.name, 206 quickImprovementOpportunity, 207 evidence, 208 reasoning, 209 industry, 210 score: Math.round(scoreData.overall_calculation?.conversion_score || 0), 211 grade: computeGrade(scoreData.overall_calculation?.conversion_score || 0), 212 impact: Math.max(20, Math.min(50, impact)), // Clamp 20-50% 213 }; 214 } 215 216 /** 217 * Haiku Pass 1: Analyze score_json to extract one concrete recommendation sentence. 218 * Returns { industry, recommendation } ready for programmatic injection. 219 * 220 * Falls back to a deterministic recommendation from extractTemplateFields() on any failure. 221 * 222 * @param {Object} scoreData - Parsed score_json from database 223 * @param {string|null} keyword - Raw search keyword (e.g. "heat pump tauranga") 224 * @param {string} languageCode - ISO 639-1 language code 225 * @param {string|null} countryCode - ISO 3166-1 alpha-2 country code 226 * @returns {Promise<{industry: string, recommendation: string}>} 227 */ 228 export async function analyzeScoreJson( 229 scoreData, 230 keyword, 231 languageCode = 'en', 232 countryCode = null 233 ) { 234 const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection 235 const season = getCurrentSeason(countryCode); 236 const keywordSection = keyword ? `\nKEYWORD: ${keyword}` : ''; 237 const seasonSection = season ? `\nSEASON: ${season}` : ''; 238 239 const userMessage = `LANGUAGE: ${langName}${keywordSection}${seasonSection} 240 SCORE_JSON: 241 ${wrapUntrusted(JSON.stringify(scoreData), 'score_json')}`; 242 243 const ANALYZE_MODEL = process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5'; 244 245 for (let attempt = 1; attempt <= 2; attempt++) { 246 try { 247 const { content } = await callLLM({ 248 model: ANALYZE_MODEL, 249 temperature: 0, 250 max_tokens: 300, 251 json_mode: true, 252 messages: [ 253 { role: 'system', content: HAIKU_ANALYZE_PROMPT }, 254 { role: 'user', content: userMessage }, 255 ], 256 stage: 'proposals', 257 }); 258 259 const result = safeJsonParse(content); 260 if (!result || typeof result.recommendation !== 'string' || !result.recommendation.trim()) { 261 logger.warn(`analyzeScoreJson attempt ${attempt}/2: invalid/empty JSON response`); 262 continue; 263 } 264 265 let industry = (result.industry || '').trim() || _extractIndustry(keyword); 266 // If Haiku returned the raw keyword unchanged (failed to categorise), apply heuristic 267 if (keyword && industry.toLowerCase() === keyword.trim().toLowerCase()) { 268 industry = _extractIndustry(keyword); 269 } 270 const recommendation = result.recommendation.trim(); 271 // Use Sonnet-provided SMS fragment if valid, else derive from recommendation 272 const smsFrag = (result.recommendation_sms || '').trim(); 273 const recommendation_sms = 274 smsFrag.length > 0 && smsFrag.length <= 50 ? smsFrag : _smsFragment(recommendation); 275 276 if (!recommendation_sms) { 277 logger.warn( 278 `analyzeScoreJson attempt ${attempt}/2: recommendation_sms blank for keyword="${keyword}"` 279 ); 280 continue; 281 } 282 283 return { industry, recommendation, recommendation_sms }; 284 } catch (err) { 285 logger.warn(`analyzeScoreJson attempt ${attempt}/2 error: ${err.message}`); 286 if (attempt < 2) continue; 287 throw new Error(`analyzeScoreJson failed after 2 attempts: ${err.message}`); 288 } 289 } 290 291 throw new Error('analyzeScoreJson: recommendation_sms blank after 2 LLM attempts'); 292 } 293 294 /** 295 * Fallback: extract a bare industry label from a keyword when Haiku is unavailable. 296 * For 2-word keywords: strip the last word unless the keyword is a known compound service. 297 * For 3-word keywords: keep intact (likely a compound service like "hot water repairs"). 298 * For 4+ word keywords: strip last word (city/suburb suffix). 299 */ 300 function _extractIndustry(keyword) { 301 if (!keyword) return 'local service'; 302 const parts = keyword.trim().split(/\s+/); 303 if (parts.length === 1) return keyword.trim(); 304 if (parts.length >= 4) return parts.slice(0, -1).join(' '); 305 if (parts.length === 3) return keyword.trim(); 306 // 2-word keyword: strip last word unless it's a known compound service name 307 const COMPOUND_SERVICES = new Set([ 308 'pressure washing', 309 'power washing', 310 'heat pump', 311 'heat pumps', 312 'hot water', 313 'metal roofing', 314 'solar panels', 315 'solar power', 316 'lawn mowing', 317 'lawn care', 318 'window cleaning', 319 'gutter cleaning', 320 'air conditioning', 321 'pest control', 322 'tree removal', 323 'tree service', 324 'concrete cutting', 325 'carpet cleaning', 326 'tile laying', 327 'brick laying', 328 'pool cleaning', 329 'pool repair', 330 'roof repair', 331 'roof restoration', 332 'damp proofing', 333 'water damage', 334 'fire damage', 335 'storm damage', 336 ]); 337 const lower = keyword.trim().toLowerCase(); 338 if (COMPOUND_SERVICES.has(lower)) return lower; 339 return parts[0]; // strip location (last word) 340 } 341 342 /** 343 * Compress a full recommendation sentence into a ≤50 char SMS fragment. 344 * Takes first clause (before em-dash or ", which/so"), strips period, truncates at word boundary. 345 */ 346 function _smsFragment(rec) { 347 if (!rec) return ''; 348 const noTrailingPunct = rec.replace(/[.!?]$/, '').trim(); 349 // Take the first clause (before em-dash or ", which…" style continuation) 350 const firstClause = noTrailingPunct 351 .split(/\s*[—–]\s*|\s*,\s*(?:which|so|meaning|this)\s/i)[0] 352 .trim(); 353 if (firstClause.length <= 50) return firstClause; 354 // Truncate at word boundary 355 const truncated = firstClause.slice(0, 50); 356 const lastSpace = truncated.lastIndexOf(' '); 357 return lastSpace > 20 ? truncated.slice(0, lastSpace) : truncated; 358 } 359 360 /** 361 * Build a deterministic fallback recommendation from extractTemplateFields() data. 362 * Used when the Haiku analysis LLM call fails. 363 */ 364 function buildFallbackAnalysis(scoreData, keyword) { 365 const fields = extractTemplateFields(scoreData); 366 const industry = _extractIndustry(keyword); 367 const evidence = fields.evidence || 'several conversion elements need improvement'; 368 const reasoning = fields.reasoning || 'improving these will increase lead generation'; 369 // Build a lowercase clause: "evidence — reasoning" 370 const evidenceLower = evidence.charAt(0).toLowerCase() + evidence.slice(1).replace(/\.$/, ''); 371 const reasoningLower = reasoning.charAt(0).toLowerCase() + reasoning.slice(1).replace(/\.$/, ''); 372 const recommendation = `${evidenceLower} — ${reasoningLower}`; 373 return { 374 industry, 375 recommendation, 376 recommendation_sms: _smsFragment(evidenceLower), 377 }; 378 } 379 380 /** 381 * Load templates for a specific country, language, and channel. 382 * Lookup order: 383 * 1. data/templates/{countryCode}/{languageCode}/{prefix}{channel}.json (new per-language path) 384 * 2. data/templates/{countryCode}/{prefix}{channel}.json (legacy flat path, English only) 385 * Where {prefix} is determined by campaignTag: 386 * - 'freefix' → 'freefix-' 387 * - 'review_acquisition' → 'review-campaign-' 388 * - 'standard' or null → '' (no prefix, original templates) 389 * Throws if no template file found — no fallback to other countries. 390 * 391 * @param {string} countryCode - Two-letter country code (AU, US, etc.) 392 * @param {string} languageCode - ISO 639-1 language code (en, hi, ja, etc.) 393 * @param {string} channel - Contact channel (sms, email, form, x, linkedin) 394 * @param {string|null} campaignTag - Campaign identifier ('freefix', 'review_acquisition', 'standard') 395 * @returns {Array<Object>} Template objects 396 * @throws {Error} If no templates found for this country/language/channel 397 */ 398 export function loadTemplates(countryCode, languageCode, channel, campaignTag = null) { 399 const validChannels = ['sms', 'email']; 400 if (!validChannels.includes(channel)) { 401 logger.warn(`Channel ${channel} not supported for templates, using email`); 402 channel = 'email'; 403 } 404 405 // Normalize ISO 639-2 three-letter codes to ISO 639-1 two-letter codes 406 const ISO639_2_TO_1 = { 407 eng: 'en', 408 fra: 'fr', 409 deu: 'de', 410 spa: 'es', 411 ita: 'it', 412 por: 'pt', 413 nld: 'nl', 414 jpn: 'ja', 415 kor: 'ko', 416 zho: 'zh', 417 nor: 'no', 418 nob: 'no', 419 nno: 'no', 420 ind: 'id', 421 hin: 'hi', 422 pol: 'pl', 423 swe: 'sv', 424 dan: 'da', 425 }; 426 const rawLang = (languageCode || 'en').toLowerCase(); 427 const lang = ISO639_2_TO_1[rawLang] ?? rawLang; // eslint-disable-line security/detect-object-injection 428 429 // Campaign-specific template prefix (DR-128) 430 const CAMPAIGN_PREFIXES = { freefix: 'freefix-', review_acquisition: 'review-campaign-' }; 431 const prefix = CAMPAIGN_PREFIXES[campaignTag] || ''; // eslint-disable-line security/detect-object-injection 432 const templateFile = `${prefix}${channel}.json`; 433 434 // 1. Try language-specific subdirectory path 435 const langPath = join(projectRoot, `data/templates/${countryCode}/${lang}/${templateFile}`); 436 try { 437 const content = readFileSync(langPath, 'utf-8'); 438 const data = JSON.parse(content); 439 if (data.templates?.length) return data.templates; 440 } catch (_) { 441 // not found at language-specific path — try legacy flat path for English 442 } 443 444 // 2. Legacy flat path (English) — also used as fallback for unrecognized/unsupported langs 445 // e.g. 'zxx' (no linguistic content), 'fr_ca' (malformed BCP 47), etc. 446 const flatPath = join(projectRoot, `data/templates/${countryCode}/${templateFile}`); 447 try { 448 const content = readFileSync(flatPath, 'utf-8'); 449 const data = JSON.parse(content); 450 if (data.templates?.length) return data.templates; 451 } catch (_) { 452 // not found 453 } 454 455 // 3. Native language fallback — for countries without English flat templates (DE, FR, IT, etc.) 456 // If detected language doesn't match the country's native language, try the first available subdir. 457 const countryDir = join(projectRoot, `data/templates/${countryCode}`); 458 try { 459 const entries = readdirSync(countryDir, { withFileTypes: true }); 460 for (const entry of entries) { 461 if (!entry.isDirectory() || entry.name === lang) continue; // skip already-tried lang 462 const nativePath = join(countryDir, entry.name, templateFile); 463 try { 464 const content = readFileSync(nativePath, 'utf-8'); 465 const data = JSON.parse(content); 466 if (data.templates?.length) return data.templates; 467 } catch (_) { 468 // not found in this subdir 469 } 470 } 471 } catch (_) { 472 // country dir not readable 473 } 474 475 // No templates found — throw, no fallback 476 // Campaign-specific templates not found — fall back to standard templates for non-standard campaigns 477 if (prefix) { 478 logger.info(`No ${campaignTag} templates for ${countryCode}/${lang}/${channel}, falling back to standard`); 479 return loadTemplates(countryCode, languageCode, channel, null); 480 } 481 482 throw new Error(`No templates for ${countryCode}/${lang}/${channel}`); 483 } 484 485 /** 486 * Select best template for this outreach 487 * Selection criteria: 488 * 1. Match primary weakness type (CTA template for CTA problems, trust template for trust problems) 489 * 2. Rotate templates for testing (LRU) 490 * 3. Weight toward better-performing templates after 1000+ sends 491 * 492 * @param {Array<Object>} templates - Available templates 493 * @param {Object} fields - Template fields from extractTemplateFields() 494 * @param {string} channel - Contact channel 495 * @returns {Object} Selected template 496 */ 497 export function selectTemplate(templates, fields, channel, hasFirstname = false) { 498 if (!templates || templates.length === 0) { 499 throw new Error(`No templates available for channel: ${channel}`); 500 } 501 502 // When a firstname is available, prefer templates that actually use it. 503 // Fall back to the full pool only if all named templates are exhausted. 504 let pool = templates; 505 if (hasFirstname) { 506 const named = templates.filter( 507 t => 508 (t.body_spintax || '').includes('[firstname') || 509 (t.subject_spintax || '').includes('[firstname') 510 ); 511 if (named.length > 0) pool = named; 512 } 513 514 // Rotation strategy: prefer template with lowest sends count. 515 // After 1000+ sends, weight by conversion rate. 516 // Tiebreak with Math.random() so equal-sends templates rotate rather than 517 // always picking the same one (e.g. always the first in file order). 518 const shuffled = [...pool].sort(() => Math.random() - 0.5); 519 shuffled.sort((a, b) => { 520 const sendsA = a.sends || 0; 521 const sendsB = b.sends || 0; 522 523 // After 1000+ sends, weight by conversion rate 524 if (sendsA >= 1000 && sendsB >= 1000) { 525 const convRateA = sendsA > 0 ? (a.conversions || 0) / sendsA : 0; 526 const convRateB = sendsB > 0 ? (b.conversions || 0) / sendsB : 0; 527 return convRateB - convRateA; // Higher conversion rate first 528 } 529 530 // Otherwise, prefer template with fewer sends (rotation testing) 531 return sendsA - sendsB; 532 }); 533 534 return shuffled[0]; 535 } 536 537 // Non-person label patterns — safety net after LLM name extraction 538 const NON_PERSON_WORDS = new Set([ 539 'office', 540 'info', 541 'sales', 542 'admin', 543 'support', 544 'contact', 545 'hello', 546 'team', 547 'reception', 548 'enquiries', 549 'enquiry', 550 'general', 551 'main', 552 'emergency', 553 'after', 554 'hours', 555 'mobile', 556 'phone', 557 'fax', 558 'sms', 559 'toll', 560 'free', 561 'tollfree', 562 'hotline', 563 'helpdesk', 564 'helpline', 565 'service', 566 'services', 567 'department', 568 'head', 569 'headquarters', 570 'hq', 571 'branch', 572 'accounts', 573 'billing', 574 'booking', 575 'bookings', 576 'reservations', 577 'store', 578 'shop', 579 'centre', 580 'center', 581 'clinic', 582 'group', 583 'media', 584 'marketing', 585 'hr', 586 'recruiter', 587 'recruitment', 588 'website', 589 // Generic directional/scale words that appear in email local-parts but are never firstnames 590 'north', 591 'south', 592 'east', 593 'west', 594 'central', 595 'national', 596 'regional', 597 'local', 598 ]); 599 600 /** 601 * Deterministic check: does this string look like a real human first name? 602 * Primary filter is the Haiku LLM call in getAllContactsWithNames; this is a 603 * safety net for edge cases (city names, phone-type labels, etc.). 604 */ 605 function isPersonFirstname(name) { 606 if (!name || typeof name !== 'string') return false; 607 const trimmed = name.trim(); 608 if (!trimmed || trimmed.length < 2 || trimmed.length > 25) return false; 609 if (/\d/.test(trimmed)) return false; // contains digits 610 if (/-/.test(trimmed) && trimmed.split('-').length > 2) return false; // triple-hyphen 611 // Reject if any word in the label is a known non-person word 612 const words = trimmed.toLowerCase().split(/[\s-]+/); 613 if (words.some(w => NON_PERSON_WORDS.has(w))) return false; 614 // Reject labels with 3+ words (e.g. "Head Of Marketing") 615 if (words.length >= 3) return false; 616 return true; 617 } 618 619 /** 620 * Populate template with extracted fields 621 * @param {string} template - Template string with {placeholders} 622 * @param {Object} fields - Template fields 623 * @param {Object} siteData - Site data (domain, keyword, etc.) 624 * @param {Object} contact - Contact info (name, channel, uri) 625 * @returns {string} Populated proposal text 626 */ 627 export function populateTemplate(template, fields, siteData, contact = null, analysisData = null) { 628 // Extract business name from domain (simple approach) 629 const businessName = siteData.domain.split('.')[0].replace(/-/g, ' '); 630 631 // Determine greeting — contact.name has already been filtered by getAllContactsWithNames 632 // (Haiku LLM call), but keep a deterministic safety net for edge cases. 633 const firstname = contact?.name || ''; 634 // Use empty string when no real name — templates control their own locale fallback: 635 // [firstname|there] → English fallback (EN templates: "Hi there") 636 // [firstname|Hallo] → German fallback (DE sms_002: "Hallo,") 637 // [firstname|] → empty; punctuation cleanup handles "Guten Tag ," → "Guten Tag," 638 const greetingRaw = isPersonFirstname(firstname) ? firstname.split(/\s+/)[0] : ''; 639 const greeting = greetingRaw ? greetingRaw.charAt(0).toUpperCase() + greetingRaw.slice(1) : ''; 640 641 // Build replacement map — all tokens filled programmatically. 642 // analysis-derived fields (recommendation, industry) come from analyzeScoreJson() (Pass 1). 643 const industry = analysisData?.industry || _extractIndustry(siteData.keyword); 644 const replacements = { 645 business_name: businessName, 646 firstname: greeting, 647 grade: fields.grade, 648 score: fields.score, 649 industry, 650 impact: fields.impact, 651 domain: siteData.domain, 652 recommendation: (() => { 653 const r = (analysisData?.recommendation || '').trim(); 654 return r && !/[.!?]$/.test(r) ? `${r}.` : r; 655 })(), 656 recommendation_sms: (analysisData?.recommendation_sms || '').trim(), 657 sites_scored: fields.sites_scored || null, 658 brand_url_short: process.env.BRAND_DOMAIN, 659 brand_url: process.env.BRAND_URL, 660 persona_name: process.env.PERSONA_NAME || '', 661 persona_first_name: process.env.PERSONA_FIRST_NAME || '', 662 brand_name: process.env.BRAND_NAME || '', 663 }; 664 665 // Resolve [key|fallback] and [key] variables BEFORE spinning. 666 // Critical: [firstname|there] inside a {…} spintax group has a | that the 667 // spintax engine would split on, producing garbled output like "Hey [firstname" 668 // or "there]!" as separate spin options. 669 const resolved = template.replace(/\[(\w+)(?:\|([^\]]*))?\]/g, (_match, key, fallback) => { 670 const val = replacements[key]; // eslint-disable-line security/detect-object-injection 671 if (val !== null && val !== undefined && val !== '') return String(val); 672 return fallback !== undefined ? fallback : ''; 673 }); 674 675 // Spin after variable resolution so {option1|option2} has clean, unambiguous input 676 let populated = spin(resolved); 677 678 // Clean up spacing artifacts (e.g. empty value leaves "Hi ,") 679 // Collapse multiple spaces only — preserve \n\n paragraph breaks 680 populated = populated 681 .replace(/ ([,!?.;:])/g, '$1') 682 .replace(/ {2,}/g, ' ') 683 .replace(/\n{3,}/g, '\n\n') 684 .trim(); 685 686 return populated; 687 } 688 689 /** 690 * Throw if any [field] tokens remain unfilled in the text. 691 * Called after populateTemplate() and after polishProposal() to catch 692 * any missed tokens early — the outreach stage will catch the error and mark 693 * the site as failed with the token name as the reason. 694 * 695 * @param {string} text - Text to check 696 * @param {string} label - Descriptive label for the error message (e.g. 'body', 'subject') 697 */ 698 export function checkForUnfilledTokens(text, label) { 699 if (!text) return; 700 const match = text.match(/\[[a-z_]+\]/); 701 if (match) { 702 throw new Error(`Unfilled token ${match[0]} in ${label} — template population incomplete`); 703 } 704 } 705 706 // Countries in the Southern hemisphere (seasons are flipped relative to Northern) 707 const SOUTHERN_COUNTRIES = new Set([ 708 'AU', 709 'NZ', 710 'ZA', 711 'AR', 712 'CL', 713 'BR', 714 'PE', 715 'UY', 716 'PY', 717 'BO', 718 'EC', 719 'MZ', 720 'ZW', 721 'BW', 722 'NA', 723 'LS', 724 'SZ', 725 'MG', 726 ]); 727 728 // Tropical/equatorial countries — no meaningful seasons 729 const TROPICAL_COUNTRIES = new Set([ 730 'SG', 731 'MY', 732 'ID', 733 'TH', 734 'PH', 735 'VN', 736 'KH', 737 'MM', 738 'BN', 739 'PG', 740 'FJ', 741 'TL', 742 ]); 743 744 /** 745 * Return the current meteorological season for a country, or null if seasonal 746 * references don't apply (tropical countries or unknown country code). 747 * 748 * @param {string|null} countryCode - ISO 3166-1 alpha-2 code (e.g. 'AU', 'US') 749 * @param {Date} [date=new Date()] - Date to use (defaults to today) 750 * @returns {'Spring'|'Summer'|'Autumn'|'Winter'|null} 751 */ 752 export function getCurrentSeason(countryCode, date = new Date()) { 753 if (!countryCode || TROPICAL_COUNTRIES.has(countryCode)) return null; 754 755 const month = date.getMonth(); // 0=Jan … 11=Dec 756 const southern = SOUTHERN_COUNTRIES.has(countryCode); 757 758 // Northern meteorological seasons; flip for Southern 759 if (month >= 2 && month <= 4) return southern ? 'Autumn' : 'Spring'; 760 if (month >= 5 && month <= 7) return southern ? 'Winter' : 'Summer'; 761 if (month >= 8 && month <= 10) return southern ? 'Spring' : 'Autumn'; 762 return southern ? 'Summer' : 'Winter'; // Dec, Jan, Feb 763 } 764 765 // ISO 639-1 code → full language name for polish prompt 766 const POLISH_LANG_NAMES = { 767 en: 'English', 768 de: 'German', 769 fr: 'French', 770 it: 'Italian', 771 ja: 'Japanese', 772 ko: 'Korean', 773 zh: 'Chinese', 774 es: 'Spanish', 775 nl: 'Dutch', 776 pl: 'Polish', 777 sv: 'Swedish', 778 da: 'Danish', 779 no: 'Norwegian', 780 id: 'Indonesian', 781 hi: 'Hindi', 782 }; 783 784 /** 785 * Haiku Pass 2: Polish a fully-populated proposal. 786 * All [field] tokens must already be filled before calling this. 787 * Tasks: fix grammar, capitalisation, punctuation; compress SMS to ≤160 chars; 788 * polish subject line. Does NOT fill tokens or analyse score_json. 789 * 790 * @param {string} text - Proposal body (all tokens pre-filled) 791 * @param {string} channel - Contact channel (sms, email, form, x, linkedin) 792 * @param {string} languageCode - ISO 639-1 language code (en, de, fr, etc.) 793 * @param {string|null} subjectLine - Email subject line (email/form only) 794 * @param {string|null} countryCode - ISO 3166-1 alpha-2 code (for season constraint) 795 * @returns {Promise<{text: string, subjectLine: string|null}>} Polished text and subject 796 */ 797 export async function polishProposal( 798 text, 799 channel, 800 languageCode = 'en', 801 subjectLine = null, 802 countryCode = null 803 ) { 804 // Circuit breaker: skip LLM call when JSON failure rate is high. 805 // Bypass with HAIKU_POLISH_DEBUG=1 for troubleshooting. 806 if (_polishBreaker.isOpen() && process.env.HAIKU_POLISH_DEBUG !== '1') { 807 return { text, subjectLine }; 808 } 809 810 const isEmailChannel = channel === 'email' || channel === 'form'; 811 812 // SMS-specific fast path: skip polish when text is already within the 160-char limit. 813 // Haiku's only SMS task is compression — if already short, it adds nothing and 814 // sometimes garbles short texts (merged words, structural JSON errors). 815 // 155 chars = 5-char buffer for sender IDs that compliance may append after this call. 816 if (channel === 'sms' && text.length <= 155 && process.env.HAIKU_POLISH_DEBUG !== '1') { 817 return { text, subjectLine: null }; 818 } 819 820 const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection 821 822 const subjectSection = isEmailChannel && subjectLine ? `\nSUBJECT LINE:\n${subjectLine}` : ''; 823 const season = getCurrentSeason(countryCode); 824 const seasonSection = season ? `\nSEASON: ${season}` : ''; 825 826 const userMessage = `CHANNEL: ${channel} 827 LANGUAGE: ${langName}${seasonSection} 828 --- 829 PROPOSAL BODY: 830 ${text}${subjectSection}`; 831 832 try { 833 const { content } = await callLLM({ 834 model: POLISH_MODEL, 835 temperature: 0, 836 max_tokens: 3000, 837 json_mode: true, 838 messages: [ 839 { role: 'system', content: HAIKU_POLISH_PROMPT }, 840 { role: 'user', content: userMessage }, 841 ], 842 stage: 'proposals', 843 }); 844 845 const result = safeJsonParse(content); 846 if (!result || typeof result.body !== 'string' || result.body.trim() === '') { 847 _polishBreaker.record(); 848 logger.warn(`Polish (${POLISH_MODEL}) returned invalid response — using original`, { 849 hasResult: !!result, 850 bodyType: result ? typeof result.body : 'n/a', 851 bodyEmpty: result?.body?.trim() === '', 852 keys: result ? Object.keys(result) : [], 853 contentSnippet: content?.substring(0, 100), 854 breakerFailures: _polishBreaker.failures.length, 855 }); 856 return { text, subjectLine }; 857 } 858 859 // Guard: if Haiku collapsed ANY paragraph breaks, reject the body and use the original. 860 // Paragraph breaks (double newlines) are structural — losing them breaks email formatting. 861 // Zero-tolerance: if input had N breaks, output must also have N breaks. 862 const inputBreaks = (text.match(/\n\n/g) || []).length; 863 const outputBreaks = (result.body.match(/\n\n/g) || []).length; 864 const polishedBody = 865 inputBreaks >= 1 && outputBreaks < inputBreaks 866 ? (logger.warn('Polish collapsed paragraph breaks — keeping original body'), text) 867 : result.body.trim(); 868 869 return { 870 text: polishedBody, 871 subjectLine: isEmailChannel ? result.subject?.trim() || subjectLine : null, 872 }; 873 } catch (err) { 874 logger.warn(`Polish (${POLISH_MODEL}) failed: ${err.message} — using original`); 875 return { text, subjectLine }; 876 } 877 } 878 879 /** 880 * @deprecated Use polishProposal() instead. 881 * Kept for backwards compatibility — delegates to the new polish function. 882 */ 883 export async function shortenSmsWithHaiku(text) { 884 const polished = await polishProposal(text, 'sms', 'en', null, null); 885 return polished.text; 886 } 887 888 /** @deprecated Renamed to polishProposal */ 889 export const polishProposalWithHaiku = polishProposal; 890 891 /** 892 * Generate proposal using templates (replaces LLM generation) 893 * @param {Object} siteData - Site data from database (must include country_code, language_code) 894 * @param {Object} scoreData - Parsed score_json 895 * @param {Object} contact - Contact object {name, channel, uri} 896 * @returns {Promise<Object>} Generated proposal {proposalText, templateId, subjectLine} 897 * @throws {Error} If no templates found for this country/language/channel 898 */ 899 export async function generateTemplateProposal( 900 siteData, 901 scoreData, 902 contact, 903 cachedAnalysis = null 904 ) { 905 const fields = extractTemplateFields(scoreData); 906 if (!siteData.country_code) throw new Error(`country_code is required for template proposal (site ${siteData.id || siteData.domain})`); 907 const countryCode = siteData.country_code; 908 const languageCode = siteData.language_code || null; 909 const channel = contact.channel || 'email'; 910 911 // Pass 1: Haiku analyzes score_json → concrete recommendation sentence + industry. 912 // Use cachedAnalysis if provided (same result for all contacts on a site). 913 const analysisData = 914 cachedAnalysis || 915 (await analyzeScoreJson( 916 scoreData, 917 siteData.keyword || null, 918 languageCode || 'en', 919 countryCode 920 )); 921 922 // Throws if no template found — caller handles the error 923 const campaignTag = siteData.campaign_tag || null; 924 const templates = loadTemplates(countryCode, languageCode, channel, campaignTag); 925 const template = selectTemplate(templates, fields, channel, !!contact?.name); 926 927 // Populate template — all tokens including [recommendation]/[industry] filled here 928 let proposalText = populateTemplate( 929 template.body_spintax, 930 fields, 931 siteData, 932 contact, 933 analysisData 934 ); 935 936 // If a firstname is known but didn't land in this spin (optional greeting branch missed), 937 // re-spin up to 9 more times — (0.5)^10 ≈ 0.1% chance of still missing after 10 tries. 938 if (contact?.name) { 939 const { name } = contact; 940 for (let i = 0; i < 9 && !proposalText.includes(name); i++) { 941 proposalText = populateTemplate( 942 template.body_spintax, 943 fields, 944 siteData, 945 contact, 946 analysisData 947 ); 948 } 949 } 950 951 // SMS: re-spin up to 3 times to try to reach ≤160 chars before the polish pass 952 if (channel === 'sms' && proposalText.length > 160) { 953 for (let i = 0; i < 3; i++) { 954 const candidate = populateTemplate( 955 template.body_spintax, 956 fields, 957 siteData, 958 contact, 959 analysisData 960 ); 961 if (candidate.length < proposalText.length) proposalText = candidate; 962 if (proposalText.length <= 160) break; 963 } 964 } 965 966 // Email (and form-as-email) use subject lines; SMS/linkedin/x do not 967 const usesEmailTemplate = channel === 'email' || channel === 'form'; 968 let rawSubjectLine = null; 969 970 if (usesEmailTemplate) { 971 // Prefer selected template's subject_spintax, fall back to others in the same file. 972 // Throws if no usable subject_spintax exists — prevents silent blank subjects. 973 const subjectCandidates = [template, ...templates.filter(t => t.id !== template.id)]; 974 let subjectSourceId = null; 975 976 for (const t of subjectCandidates) { 977 if (!t.subject_spintax?.trim()) continue; 978 const candidate = populateTemplate( 979 t.subject_spintax, 980 fields, 981 siteData, 982 contact, 983 analysisData 984 ); 985 if (candidate) { 986 rawSubjectLine = candidate; 987 subjectSourceId = t.id; 988 break; 989 } 990 } 991 992 if (!rawSubjectLine) { 993 throw new Error( 994 `No usable subject_spintax found in any template for ${countryCode}/${languageCode}/${channel}` 995 ); 996 } 997 998 if (subjectSourceId !== template.id) { 999 logger.warn( 1000 `Template ${template.id} has no usable subject_spintax — using subject from ${subjectSourceId}` 1001 ); 1002 } 1003 } 1004 1005 // Bracket check after population — any remaining [token] means a template bug 1006 checkForUnfilledTokens(proposalText, 'body'); 1007 if (rawSubjectLine) checkForUnfilledTokens(rawSubjectLine, 'subject'); 1008 1009 // Pass 2: polish grammar, capitalisation, punctuation; compress SMS to ≤160 chars. 1010 // Does NOT fill tokens — all tokens are already resolved above. 1011 const polished = await polishProposal( 1012 proposalText, 1013 channel, 1014 languageCode || 'en', 1015 rawSubjectLine, 1016 countryCode || null 1017 ); 1018 1019 // Bracket check after polish — catches any token LLM may have reintroduced (shouldn't happen) 1020 checkForUnfilledTokens(polished.text, 'polished body'); 1021 if (polished.subjectLine) checkForUnfilledTokens(polished.subjectLine, 'polished subject'); 1022 1023 logger.info( 1024 `Generated ${channel} proposal using template ${template.id} for ${siteData.domain} (${countryCode}/${languageCode})` 1025 ); 1026 1027 return { 1028 proposalText: polished.text, 1029 templateId: template.id, 1030 subjectLine: polished.subjectLine ?? rawSubjectLine, 1031 }; 1032 } 1033 1034 export default { 1035 extractTemplateFields, 1036 analyzeScoreJson, 1037 loadTemplates, 1038 selectTemplate, 1039 populateTemplate, 1040 checkForUnfilledTokens, 1041 generateTemplateProposal, 1042 };