template-proposals.js
1 /** 2 * Template-based Proposal Generation 3 * Cost-free alternative to LLM-based proposals ($0.18/site → $0/site) 4 * 5 * Extracts scoring weaknesses from score_json and populates pre-written templates 6 * Templates are tested, culturally optimized, and channel-specific 7 */ 8 9 import { readFileSync, readdirSync } from 'fs'; 10 import { join, dirname } from 'path'; 11 import { fileURLToPath } from 'url'; 12 import Logger from './logger.js'; 13 import { spin } from './spintax.js'; 14 import { callLLM } from './llm-provider.js'; 15 import { safeJsonParse } from './error-handler.js'; 16 import { computeGrade } from '../score.js'; 17 import { wrapUntrusted } from './llm-sanitizer.js'; 18 19 const __filename = fileURLToPath(import.meta.url); 20 const __dirname = dirname(__filename); 21 const projectRoot = join(__dirname, '../..'); 22 23 const logger = new Logger('TemplateProposals'); 24 25 const POLISH_MODEL = process.env.POLISH_MODEL || 'google/gemini-2.0-flash-001'; 26 27 // Load prompts 28 const HAIKU_POLISH_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-POLISH.md'), 'utf-8'); 29 const HAIKU_ANALYZE_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-ANALYZE.md'), 'utf-8'); 30 31 /** 32 * In-process circuit breaker for Haiku polish failures. 33 * 34 * When >50 JSON parse failures accumulate in a rolling 10-min window, the polish 35 * step is bypassed entirely (proposals use the unpolished template text) until the 36 * window resets. This prevents thousands of wasted LLM calls during burst failures. 37 * 38 * Set env var HAIKU_POLISH_DEBUG=1 to force the polish pass even when the breaker 39 * is open — useful for troubleshooting from the CLI without restarting the service. 40 */ 41 const _polishBreaker = { 42 failures: [], 43 WINDOW_MS: 10 * 60 * 1000, // 10 minutes 44 THRESHOLD: 50, 45 isOpen() { 46 const cutoff = Date.now() - this.WINDOW_MS; 47 this.failures = this.failures.filter(t => t > cutoff); 48 return this.failures.length >= this.THRESHOLD; 49 }, 50 record() { 51 this.failures.push(Date.now()); 52 if (this.failures.length === this.THRESHOLD) { 53 logger.warn( 54 `Haiku polish circuit breaker OPEN — ${this.THRESHOLD} JSON failures in 10 min. Polish bypassed until failures drop below threshold. Set HAIKU_POLISH_DEBUG=1 to force.` 55 ); 56 } 57 }, 58 }; 59 60 /** 61 * Extract scoring weaknesses from score_json to populate templates 62 * @param {Object} scoreData - Parsed score_json from database 63 * @returns {Object} Template fields {primaryWeakness, secondaryWeakness, evidence, reasoning, industry, score, grade} 64 */ 65 // Human-readable labels for factor_scores keys (new scoring format) 66 const FACTOR_LABELS = { 67 headline_quality: "site doesn't tell visitors what you do in the first few seconds", 68 value_proposition: "nothing tells visitors why they should pick you over the next business in Google", 69 unique_selling_proposition: "your site doesn't explain what makes you different", 70 call_to_action: "no clear way to call or book — visitors don't know how to contact you", 71 urgency_messaging: 'no reason for visitors to act now', 72 hook_engagement: "visitors can't figure out your services in a quick glance", 73 trust_signals: "no reviews or licences visible on your site — nothing to prove you're legit", 74 imagery_design: 'weak visual design — stock photos or outdated look', 75 offer_clarity: "visitors can't see what you're actually offering", 76 contextual_appropriateness: 'poor relevance to your industry', 77 }; 78 79 export function extractTemplateFields(scoreData) { 80 if (!scoreData || (!scoreData.sections && !scoreData.factor_scores)) { 81 return { 82 primaryWeakness: 'weak call-to-action', 83 secondaryWeakness: 'unclear value proposition', 84 quickImprovementOpportunity: 85 'add a clear call-to-action and highlight your key offer above the fold', 86 evidence: 'Several key conversion elements are missing', 87 reasoning: 'Missing trust signals and clear calls-to-action reduce visitor confidence', 88 industry: 'local service', 89 score: 0, 90 grade: 'F', 91 impact: 30, 92 }; 93 } 94 95 // Extract all factor scores — support both scoring formats: 96 // New flat format: scoreData.factor_scores[name] = {score, reasoning, evidence} 97 // Old nested format: scoreData.sections[section].criteria[name] = {score, explanation, reasoning} 98 const factors = []; 99 if (scoreData.factor_scores) { 100 Object.entries(scoreData.factor_scores).forEach(([name, criteria]) => { 101 if (criteria && typeof criteria.score === 'number') { 102 factors.push({ 103 name: FACTOR_LABELS[name] || name.replace(/_/g, ' '), // eslint-disable-line security/detect-object-injection 104 score: criteria.score, 105 explanation: criteria.evidence || criteria.reasoning || '', 106 reasoning: criteria.reasoning || criteria.evidence || '', 107 section: 'general', 108 }); 109 } 110 }); 111 } else { 112 Object.entries(scoreData.sections || {}).forEach(([sectionName, section]) => { 113 if (section && section.criteria) { 114 Object.entries(section.criteria).forEach(([criteriaName, criteria]) => { 115 if (criteria && typeof criteria.score === 'number') { 116 factors.push({ 117 name: criteriaName, 118 score: criteria.score, 119 explanation: criteria.explanation || '', 120 reasoning: criteria.reasoning || criteria.explanation || '', 121 section: sectionName, 122 }); 123 } 124 }); 125 } 126 }); 127 } 128 129 // Sort by score ascending (lowest = worst = primary weakness) 130 factors.sort((a, b) => a.score - b.score); 131 132 const primaryWeakness = factors[0] || { 133 name: 'weak call-to-action', 134 score: 5, 135 explanation: 'Multiple conversion elements need improvement', 136 reasoning: 'Weak conversion elements reduce lead generation potential', 137 }; 138 139 // Prefer LLM-generated critical_weaknesses for secondary weakness name — 140 // the scoring LLM already prioritised these; use [1] if two exist, [0] if only one. 141 const cw = Array.isArray(scoreData.critical_weaknesses) ? scoreData.critical_weaknesses : []; 142 const cwSecondary = (cw[1] ?? cw[0] ?? '').replace(/\.$/, ''); 143 const secondaryWeaknessName = cwSecondary 144 ? cwSecondary.charAt(0).toLowerCase() + cwSecondary.slice(1) 145 : null; 146 147 const secondaryWeakness = secondaryWeaknessName 148 ? { 149 name: secondaryWeaknessName, 150 score: 6, 151 explanation: factors[1]?.explanation || '', 152 reasoning: factors[1]?.reasoning || '', 153 } 154 : factors[1] || { 155 name: 'unclear value proposition', 156 score: 6, 157 explanation: 'Value proposition could be made clearer', 158 reasoning: 'Unclear value proposition reduces visitor confidence', 159 }; 160 161 // Extract industry from factor_scores if available 162 let industry = 'local service'; 163 if ( 164 scoreData.factor_scores && 165 scoreData.factor_scores.contextual_appropriateness && 166 scoreData.factor_scores.contextual_appropriateness.industry_context 167 ) { 168 industry = scoreData.factor_scores.contextual_appropriateness.industry_context; 169 } 170 171 // Calculate estimated impact (conversion loss percentage) 172 // Lower scores = higher impact 173 const avgScore = 174 factors.slice(0, 3).reduce((sum, f) => sum + f.score, 0) / Math.min(3, factors.length); 175 const impact = Math.round((10 - avgScore) * 5); // Score 0-10 maps to 50-0% impact 176 177 // Filter out LLM non-answers for evidence (e.g. "None found", "N/A", empty) 178 const NON_ANSWER = 179 /^(none\s*found\.?|n\/a\.?|none\.?|not\s*applicable\.?|not\s*available\.?|\s*)$/i; 180 const pickEvidence = f => { 181 const e = f?.explanation || ''; 182 return NON_ANSWER.test(e.trim()) ? '' : e; 183 }; 184 const evidence = 185 pickEvidence(primaryWeakness) || 186 pickEvidence(secondaryWeakness) || 187 'Several key conversion elements need improvement'; 188 const reasoning = 189 (NON_ANSWER.test((primaryWeakness.reasoning || '').trim()) 190 ? secondaryWeakness.reasoning 191 : primaryWeakness.reasoning) || 'Improving these elements will increase lead generation'; 192 193 // Use LLM-generated quick_improvement_opportunities from score_json — 194 // prefer [1] (second entry) to avoid repeating the primary weakness, fall back to [0]. 195 const qio = Array.isArray(scoreData.quick_improvement_opportunities) 196 ? scoreData.quick_improvement_opportunities 197 : []; 198 const qioRaw = (qio[1] ?? qio[0] ?? '').replace(/\.$/, ''); 199 const quickImprovementOpportunity = qioRaw 200 ? qioRaw.charAt(0).toLowerCase() + qioRaw.slice(1) 201 : 'add a clear call-to-action and highlight your key offer above the fold'; 202 203 return { 204 primaryWeakness: primaryWeakness.name, 205 secondaryWeakness: secondaryWeakness.name, 206 quickImprovementOpportunity, 207 evidence, 208 reasoning, 209 industry, 210 score: Math.round(scoreData.overall_calculation?.conversion_score || 0), 211 grade: computeGrade(scoreData.overall_calculation?.conversion_score || 0), 212 impact: Math.max(20, Math.min(50, impact)), // Clamp 20-50% 213 }; 214 } 215 216 /** 217 * Haiku Pass 1: Analyze score_json to extract one concrete recommendation sentence. 218 * Returns { industry, recommendation } ready for programmatic injection. 219 * 220 * Falls back to a deterministic recommendation from extractTemplateFields() on any failure. 221 * 222 * @param {Object} scoreData - Parsed score_json from database 223 * @param {string|null} keyword - Raw search keyword (e.g. "heat pump tauranga") 224 * @param {string} languageCode - ISO 639-1 language code 225 * @param {string|null} countryCode - ISO 3166-1 alpha-2 country code 226 * @returns {Promise<{industry: string, recommendation: string}>} 227 */ 228 export async function analyzeScoreJson( 229 scoreData, 230 keyword, 231 languageCode = 'en', 232 countryCode = null 233 ) { 234 const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection 235 const season = getCurrentSeason(countryCode); 236 const keywordSection = keyword ? `\nKEYWORD: ${keyword}` : ''; 237 const seasonSection = season ? `\nSEASON: ${season}` : ''; 238 239 const userMessage = `LANGUAGE: ${langName}${keywordSection}${seasonSection} 240 SCORE_JSON: 241 ${wrapUntrusted(JSON.stringify(scoreData), 'score_json')}`; 242 243 const ANALYZE_MODEL = process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5'; 244 245 for (let attempt = 1; attempt <= 2; attempt++) { 246 try { 247 const { content } = await callLLM({ 248 model: ANALYZE_MODEL, 249 temperature: 0, 250 max_tokens: 300, 251 json_mode: true, 252 messages: [ 253 { role: 'system', content: HAIKU_ANALYZE_PROMPT }, 254 { role: 'user', content: userMessage }, 255 ], 256 stage: 'proposals', 257 }); 258 259 const result = safeJsonParse(content); 260 if (!result || typeof result.recommendation !== 'string' || !result.recommendation.trim()) { 261 logger.warn(`analyzeScoreJson attempt ${attempt}/2: invalid/empty JSON response`); 262 continue; 263 } 264 265 let industry = (result.industry || '').trim() || _extractIndustry(keyword); 266 // If Haiku returned the raw keyword unchanged (failed to categorise), apply heuristic 267 if (keyword && industry.toLowerCase() === keyword.trim().toLowerCase()) { 268 industry = _extractIndustry(keyword); 269 } 270 const recommendation = result.recommendation.trim(); 271 // Use Sonnet-provided SMS fragment if valid, else derive from recommendation 272 const smsFrag = (result.recommendation_sms || '').trim(); 273 const recommendation_sms = 274 smsFrag.length > 0 && smsFrag.length <= 50 ? smsFrag : _smsFragment(recommendation); 275 276 if (!recommendation_sms) { 277 logger.warn( 278 `analyzeScoreJson attempt ${attempt}/2: recommendation_sms blank for keyword="${keyword}"` 279 ); 280 continue; 281 } 282 283 return { industry, recommendation, recommendation_sms }; 284 } catch (err) { 285 logger.warn(`analyzeScoreJson attempt ${attempt}/2 error: ${err.message}`); 286 if (attempt < 2) continue; 287 throw new Error(`analyzeScoreJson failed after 2 attempts: ${err.message}`); 288 } 289 } 290 291 throw new Error('analyzeScoreJson: recommendation_sms blank after 2 LLM attempts'); 292 } 293 294 /** 295 * Fallback: extract a bare industry label from a keyword when Haiku is unavailable. 296 * For 2-word keywords: strip the last word unless the keyword is a known compound service. 297 * For 3-word keywords: keep intact (likely a compound service like "hot water repairs"). 298 * For 4+ word keywords: strip last word (city/suburb suffix). 299 */ 300 function _extractIndustry(keyword) { 301 if (!keyword) return 'local service'; 302 const parts = keyword.trim().split(/\s+/); 303 if (parts.length === 1) return keyword.trim(); 304 if (parts.length >= 4) return parts.slice(0, -1).join(' '); 305 if (parts.length === 3) return keyword.trim(); 306 // 2-word keyword: strip last word unless it's a known compound service name 307 const COMPOUND_SERVICES = new Set([ 308 'pressure washing', 309 'power washing', 310 'heat pump', 311 'heat pumps', 312 'hot water', 313 'metal roofing', 314 'solar panels', 315 'solar power', 316 'lawn mowing', 317 'lawn care', 318 'window cleaning', 319 'gutter cleaning', 320 'air conditioning', 321 'pest control', 322 'tree removal', 323 'tree service', 324 'concrete cutting', 325 'carpet cleaning', 326 'tile laying', 327 'brick laying', 328 'pool cleaning', 329 'pool repair', 330 'roof repair', 331 'roof restoration', 332 'damp proofing', 333 'water damage', 334 'fire damage', 335 'storm damage', 336 ]); 337 const lower = keyword.trim().toLowerCase(); 338 if (COMPOUND_SERVICES.has(lower)) return lower; 339 return parts[0]; // strip location (last word) 340 } 341 342 /** 343 * Compress a full recommendation sentence into a ≤50 char SMS fragment. 344 * Takes first clause (before em-dash or ", which/so"), strips period, truncates at word boundary. 345 */ 346 function _smsFragment(rec) { 347 if (!rec) return ''; 348 const noTrailingPunct = rec.replace(/[.!?]$/, '').trim(); 349 // Take the first clause (before em-dash or ", which…" style continuation) 350 const firstClause = noTrailingPunct 351 .split(/\s*[—–]\s*|\s*,\s*(?:which|so|meaning|this)\s/i)[0] 352 .trim(); 353 if (firstClause.length <= 50) return firstClause; 354 // Truncate at word boundary 355 const truncated = firstClause.slice(0, 50); 356 const lastSpace = truncated.lastIndexOf(' '); 357 return lastSpace > 20 ? truncated.slice(0, lastSpace) : truncated; 358 } 359 360 /** 361 * Build a deterministic fallback recommendation from extractTemplateFields() data. 362 * Used when the Haiku analysis LLM call fails. 363 */ 364 function buildFallbackAnalysis(scoreData, keyword) { 365 const fields = extractTemplateFields(scoreData); 366 const industry = _extractIndustry(keyword); 367 const evidence = fields.evidence || 'several conversion elements need improvement'; 368 const reasoning = fields.reasoning || 'improving these will increase lead generation'; 369 // Build a lowercase clause: "evidence — reasoning" 370 const evidenceLower = evidence.charAt(0).toLowerCase() + evidence.slice(1).replace(/\.$/, ''); 371 const reasoningLower = reasoning.charAt(0).toLowerCase() + reasoning.slice(1).replace(/\.$/, ''); 372 const recommendation = `${evidenceLower} — ${reasoningLower}`; 373 return { 374 industry, 375 recommendation, 376 recommendation_sms: _smsFragment(evidenceLower), 377 }; 378 } 379 380 /** 381 * Load templates for a specific country, language, and channel. 382 * Lookup order: 383 * 1. data/templates/{countryCode}/{languageCode}/{channel}.json (new per-language path) 384 * 2. data/templates/{countryCode}/{channel}.json (legacy flat path, English only) 385 * Throws if no template file found — no fallback to other countries. 386 * 387 * @param {string} countryCode - Two-letter country code (AU, US, etc.) 388 * @param {string} languageCode - ISO 639-1 language code (en, hi, ja, etc.) 389 * @param {string} channel - Contact channel (sms, email, form, x, linkedin) 390 * @returns {Array<Object>} Template objects 391 * @throws {Error} If no templates found for this country/language/channel 392 */ 393 export function loadTemplates(countryCode, languageCode, channel) { 394 const validChannels = ['sms', 'email']; 395 if (!validChannels.includes(channel)) { 396 logger.warn(`Channel ${channel} not supported for templates, using email`); 397 channel = 'email'; 398 } 399 400 // Normalize ISO 639-2 three-letter codes to ISO 639-1 two-letter codes 401 const ISO639_2_TO_1 = { 402 eng: 'en', 403 fra: 'fr', 404 deu: 'de', 405 spa: 'es', 406 ita: 'it', 407 por: 'pt', 408 nld: 'nl', 409 jpn: 'ja', 410 kor: 'ko', 411 zho: 'zh', 412 nor: 'no', 413 nob: 'no', 414 nno: 'no', 415 ind: 'id', 416 hin: 'hi', 417 pol: 'pl', 418 swe: 'sv', 419 dan: 'da', 420 }; 421 const rawLang = (languageCode || 'en').toLowerCase(); 422 const lang = ISO639_2_TO_1[rawLang] ?? rawLang; // eslint-disable-line security/detect-object-injection 423 424 // 1. Try language-specific subdirectory path 425 const langPath = join(projectRoot, `data/templates/${countryCode}/${lang}/${channel}.json`); 426 try { 427 const content = readFileSync(langPath, 'utf-8'); 428 const data = JSON.parse(content); 429 if (data.templates?.length) return data.templates; 430 } catch (_) { 431 // not found at language-specific path — try legacy flat path for English 432 } 433 434 // 2. Legacy flat path (English) — also used as fallback for unrecognized/unsupported langs 435 // e.g. 'zxx' (no linguistic content), 'fr_ca' (malformed BCP 47), etc. 436 const flatPath = join(projectRoot, `data/templates/${countryCode}/${channel}.json`); 437 try { 438 const content = readFileSync(flatPath, 'utf-8'); 439 const data = JSON.parse(content); 440 if (data.templates?.length) return data.templates; 441 } catch (_) { 442 // not found 443 } 444 445 // 3. Native language fallback — for countries without English flat templates (DE, FR, IT, etc.) 446 // If detected language doesn't match the country's native language, try the first available subdir. 447 const countryDir = join(projectRoot, `data/templates/${countryCode}`); 448 try { 449 const entries = readdirSync(countryDir, { withFileTypes: true }); 450 for (const entry of entries) { 451 if (!entry.isDirectory() || entry.name === lang) continue; // skip already-tried lang 452 const nativePath = join(countryDir, entry.name, `${channel}.json`); 453 try { 454 const content = readFileSync(nativePath, 'utf-8'); 455 const data = JSON.parse(content); 456 if (data.templates?.length) return data.templates; 457 } catch (_) { 458 // not found in this subdir 459 } 460 } 461 } catch (_) { 462 // country dir not readable 463 } 464 465 // No templates found — throw, no fallback 466 throw new Error(`No templates for ${countryCode}/${lang}/${channel}`); 467 } 468 469 /** 470 * Select best template for this outreach 471 * Selection criteria: 472 * 1. Match primary weakness type (CTA template for CTA problems, trust template for trust problems) 473 * 2. Rotate templates for testing (LRU) 474 * 3. Weight toward better-performing templates after 1000+ sends 475 * 476 * @param {Array<Object>} templates - Available templates 477 * @param {Object} fields - Template fields from extractTemplateFields() 478 * @param {string} channel - Contact channel 479 * @returns {Object} Selected template 480 */ 481 export function selectTemplate(templates, fields, channel, hasFirstname = false) { 482 if (!templates || templates.length === 0) { 483 throw new Error(`No templates available for channel: ${channel}`); 484 } 485 486 // When a firstname is available, prefer templates that actually use it. 487 // Fall back to the full pool only if all named templates are exhausted. 488 let pool = templates; 489 if (hasFirstname) { 490 const named = templates.filter( 491 t => 492 (t.body_spintax || '').includes('[firstname') || 493 (t.subject_spintax || '').includes('[firstname') 494 ); 495 if (named.length > 0) pool = named; 496 } 497 498 // Rotation strategy: prefer template with lowest sends count. 499 // After 1000+ sends, weight by conversion rate. 500 // Tiebreak with Math.random() so equal-sends templates rotate rather than 501 // always picking the same one (e.g. always the first in file order). 502 const shuffled = [...pool].sort(() => Math.random() - 0.5); 503 shuffled.sort((a, b) => { 504 const sendsA = a.sends || 0; 505 const sendsB = b.sends || 0; 506 507 // After 1000+ sends, weight by conversion rate 508 if (sendsA >= 1000 && sendsB >= 1000) { 509 const convRateA = sendsA > 0 ? (a.conversions || 0) / sendsA : 0; 510 const convRateB = sendsB > 0 ? (b.conversions || 0) / sendsB : 0; 511 return convRateB - convRateA; // Higher conversion rate first 512 } 513 514 // Otherwise, prefer template with fewer sends (rotation testing) 515 return sendsA - sendsB; 516 }); 517 518 return shuffled[0]; 519 } 520 521 // Non-person label patterns — safety net after LLM name extraction 522 const NON_PERSON_WORDS = new Set([ 523 'office', 524 'info', 525 'sales', 526 'admin', 527 'support', 528 'contact', 529 'hello', 530 'team', 531 'reception', 532 'enquiries', 533 'enquiry', 534 'general', 535 'main', 536 'emergency', 537 'after', 538 'hours', 539 'mobile', 540 'phone', 541 'fax', 542 'sms', 543 'toll', 544 'free', 545 'tollfree', 546 'hotline', 547 'helpdesk', 548 'helpline', 549 'service', 550 'services', 551 'department', 552 'head', 553 'headquarters', 554 'hq', 555 'branch', 556 'accounts', 557 'billing', 558 'booking', 559 'bookings', 560 'reservations', 561 'store', 562 'shop', 563 'centre', 564 'center', 565 'clinic', 566 'group', 567 'media', 568 'marketing', 569 'hr', 570 'recruiter', 571 'recruitment', 572 'website', 573 // Generic directional/scale words that appear in email local-parts but are never firstnames 574 'north', 575 'south', 576 'east', 577 'west', 578 'central', 579 'national', 580 'regional', 581 'local', 582 ]); 583 584 /** 585 * Deterministic check: does this string look like a real human first name? 586 * Primary filter is the Haiku LLM call in getAllContactsWithNames; this is a 587 * safety net for edge cases (city names, phone-type labels, etc.). 588 */ 589 function isPersonFirstname(name) { 590 if (!name || typeof name !== 'string') return false; 591 const trimmed = name.trim(); 592 if (!trimmed || trimmed.length < 2 || trimmed.length > 25) return false; 593 if (/\d/.test(trimmed)) return false; // contains digits 594 if (/-/.test(trimmed) && trimmed.split('-').length > 2) return false; // triple-hyphen 595 // Reject if any word in the label is a known non-person word 596 const words = trimmed.toLowerCase().split(/[\s-]+/); 597 if (words.some(w => NON_PERSON_WORDS.has(w))) return false; 598 // Reject labels with 3+ words (e.g. "Head Of Marketing") 599 if (words.length >= 3) return false; 600 return true; 601 } 602 603 /** 604 * Populate template with extracted fields 605 * @param {string} template - Template string with {placeholders} 606 * @param {Object} fields - Template fields 607 * @param {Object} siteData - Site data (domain, keyword, etc.) 608 * @param {Object} contact - Contact info (name, channel, uri) 609 * @returns {string} Populated proposal text 610 */ 611 export function populateTemplate(template, fields, siteData, contact = null, analysisData = null) { 612 // Extract business name from domain (simple approach) 613 const businessName = siteData.domain.split('.')[0].replace(/-/g, ' '); 614 615 // Determine greeting — contact.name has already been filtered by getAllContactsWithNames 616 // (Haiku LLM call), but keep a deterministic safety net for edge cases. 617 const firstname = contact?.name || ''; 618 // Use empty string when no real name — templates control their own locale fallback: 619 // [firstname|there] → English fallback (EN templates: "Hi there") 620 // [firstname|Hallo] → German fallback (DE sms_002: "Hallo,") 621 // [firstname|] → empty; punctuation cleanup handles "Guten Tag ," → "Guten Tag," 622 const greetingRaw = isPersonFirstname(firstname) ? firstname.split(/\s+/)[0] : ''; 623 const greeting = greetingRaw ? greetingRaw.charAt(0).toUpperCase() + greetingRaw.slice(1) : ''; 624 625 // Build replacement map — all tokens filled programmatically. 626 // analysis-derived fields (recommendation, industry) come from analyzeScoreJson() (Pass 1). 627 const industry = analysisData?.industry || _extractIndustry(siteData.keyword); 628 const replacements = { 629 business_name: businessName, 630 firstname: greeting, 631 grade: fields.grade, 632 score: fields.score, 633 industry, 634 impact: fields.impact, 635 domain: siteData.domain, 636 recommendation: (() => { 637 const r = (analysisData?.recommendation || '').trim(); 638 return r && !/[.!?]$/.test(r) ? `${r}.` : r; 639 })(), 640 recommendation_sms: (analysisData?.recommendation_sms || '').trim(), 641 sites_scored: fields.sites_scored || null, 642 }; 643 644 // Resolve [key|fallback] and [key] variables BEFORE spinning. 645 // Critical: [firstname|there] inside a {…} spintax group has a | that the 646 // spintax engine would split on, producing garbled output like "Hey [firstname" 647 // or "there]!" as separate spin options. 648 const resolved = template.replace(/\[(\w+)(?:\|([^\]]*))?\]/g, (_match, key, fallback) => { 649 const val = replacements[key]; // eslint-disable-line security/detect-object-injection 650 if (val !== null && val !== undefined && val !== '') return String(val); 651 return fallback !== undefined ? fallback : ''; 652 }); 653 654 // Spin after variable resolution so {option1|option2} has clean, unambiguous input 655 let populated = spin(resolved); 656 657 // Clean up spacing artifacts (e.g. empty value leaves "Hi ,") 658 // Collapse multiple spaces only — preserve \n\n paragraph breaks 659 populated = populated 660 .replace(/ ([,!?.;:])/g, '$1') 661 .replace(/ {2,}/g, ' ') 662 .replace(/\n{3,}/g, '\n\n') 663 .trim(); 664 665 return populated; 666 } 667 668 /** 669 * Throw if any [field] tokens remain unfilled in the text. 670 * Called after populateTemplate() and after polishProposal() to catch 671 * any missed tokens early — the outreach stage will catch the error and mark 672 * the site as failed with the token name as the reason. 673 * 674 * @param {string} text - Text to check 675 * @param {string} label - Descriptive label for the error message (e.g. 'body', 'subject') 676 */ 677 export function checkForUnfilledTokens(text, label) { 678 if (!text) return; 679 const match = text.match(/\[[a-z_]+\]/); 680 if (match) { 681 throw new Error(`Unfilled token ${match[0]} in ${label} — template population incomplete`); 682 } 683 } 684 685 // Countries in the Southern hemisphere (seasons are flipped relative to Northern) 686 const SOUTHERN_COUNTRIES = new Set([ 687 'AU', 688 'NZ', 689 'ZA', 690 'AR', 691 'CL', 692 'BR', 693 'PE', 694 'UY', 695 'PY', 696 'BO', 697 'EC', 698 'MZ', 699 'ZW', 700 'BW', 701 'NA', 702 'LS', 703 'SZ', 704 'MG', 705 ]); 706 707 // Tropical/equatorial countries — no meaningful seasons 708 const TROPICAL_COUNTRIES = new Set([ 709 'SG', 710 'MY', 711 'ID', 712 'TH', 713 'PH', 714 'VN', 715 'KH', 716 'MM', 717 'BN', 718 'PG', 719 'FJ', 720 'TL', 721 ]); 722 723 /** 724 * Return the current meteorological season for a country, or null if seasonal 725 * references don't apply (tropical countries or unknown country code). 726 * 727 * @param {string|null} countryCode - ISO 3166-1 alpha-2 code (e.g. 'AU', 'US') 728 * @param {Date} [date=new Date()] - Date to use (defaults to today) 729 * @returns {'Spring'|'Summer'|'Autumn'|'Winter'|null} 730 */ 731 export function getCurrentSeason(countryCode, date = new Date()) { 732 if (!countryCode || TROPICAL_COUNTRIES.has(countryCode)) return null; 733 734 const month = date.getMonth(); // 0=Jan … 11=Dec 735 const southern = SOUTHERN_COUNTRIES.has(countryCode); 736 737 // Northern meteorological seasons; flip for Southern 738 if (month >= 2 && month <= 4) return southern ? 'Autumn' : 'Spring'; 739 if (month >= 5 && month <= 7) return southern ? 'Winter' : 'Summer'; 740 if (month >= 8 && month <= 10) return southern ? 'Spring' : 'Autumn'; 741 return southern ? 'Summer' : 'Winter'; // Dec, Jan, Feb 742 } 743 744 // ISO 639-1 code → full language name for polish prompt 745 const POLISH_LANG_NAMES = { 746 en: 'English', 747 de: 'German', 748 fr: 'French', 749 it: 'Italian', 750 ja: 'Japanese', 751 ko: 'Korean', 752 zh: 'Chinese', 753 es: 'Spanish', 754 nl: 'Dutch', 755 pl: 'Polish', 756 sv: 'Swedish', 757 da: 'Danish', 758 no: 'Norwegian', 759 id: 'Indonesian', 760 hi: 'Hindi', 761 }; 762 763 /** 764 * Haiku Pass 2: Polish a fully-populated proposal. 765 * All [field] tokens must already be filled before calling this. 766 * Tasks: fix grammar, capitalisation, punctuation; compress SMS to ≤160 chars; 767 * polish subject line. Does NOT fill tokens or analyse score_json. 768 * 769 * @param {string} text - Proposal body (all tokens pre-filled) 770 * @param {string} channel - Contact channel (sms, email, form, x, linkedin) 771 * @param {string} languageCode - ISO 639-1 language code (en, de, fr, etc.) 772 * @param {string|null} subjectLine - Email subject line (email/form only) 773 * @param {string|null} countryCode - ISO 3166-1 alpha-2 code (for season constraint) 774 * @returns {Promise<{text: string, subjectLine: string|null}>} Polished text and subject 775 */ 776 export async function polishProposal( 777 text, 778 channel, 779 languageCode = 'en', 780 subjectLine = null, 781 countryCode = null 782 ) { 783 // Circuit breaker: skip LLM call when JSON failure rate is high. 784 // Bypass with HAIKU_POLISH_DEBUG=1 for troubleshooting. 785 if (_polishBreaker.isOpen() && process.env.HAIKU_POLISH_DEBUG !== '1') { 786 return { text, subjectLine }; 787 } 788 789 const isEmailChannel = channel === 'email' || channel === 'form'; 790 791 // SMS-specific fast path: skip polish when text is already within the 160-char limit. 792 // Haiku's only SMS task is compression — if already short, it adds nothing and 793 // sometimes garbles short texts (merged words, structural JSON errors). 794 // 155 chars = 5-char buffer for sender IDs that compliance may append after this call. 795 if (channel === 'sms' && text.length <= 155 && process.env.HAIKU_POLISH_DEBUG !== '1') { 796 return { text, subjectLine: null }; 797 } 798 799 const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection 800 801 const subjectSection = isEmailChannel && subjectLine ? `\nSUBJECT LINE:\n${subjectLine}` : ''; 802 const season = getCurrentSeason(countryCode); 803 const seasonSection = season ? `\nSEASON: ${season}` : ''; 804 805 const userMessage = `CHANNEL: ${channel} 806 LANGUAGE: ${langName}${seasonSection} 807 --- 808 PROPOSAL BODY: 809 ${text}${subjectSection}`; 810 811 try { 812 const { content } = await callLLM({ 813 model: POLISH_MODEL, 814 temperature: 0, 815 max_tokens: 3000, 816 json_mode: true, 817 messages: [ 818 { role: 'system', content: HAIKU_POLISH_PROMPT }, 819 { role: 'user', content: userMessage }, 820 ], 821 stage: 'proposals', 822 }); 823 824 const result = safeJsonParse(content); 825 if (!result || typeof result.body !== 'string' || result.body.trim() === '') { 826 _polishBreaker.record(); 827 logger.warn(`Polish (${POLISH_MODEL}) returned invalid response — using original`, { 828 hasResult: !!result, 829 bodyType: result ? typeof result.body : 'n/a', 830 bodyEmpty: result?.body?.trim() === '', 831 keys: result ? Object.keys(result) : [], 832 contentSnippet: content?.substring(0, 100), 833 breakerFailures: _polishBreaker.failures.length, 834 }); 835 return { text, subjectLine }; 836 } 837 838 // Guard: if Haiku collapsed ANY paragraph breaks, reject the body and use the original. 839 // Paragraph breaks (double newlines) are structural — losing them breaks email formatting. 840 // Zero-tolerance: if input had N breaks, output must also have N breaks. 841 const inputBreaks = (text.match(/\n\n/g) || []).length; 842 const outputBreaks = (result.body.match(/\n\n/g) || []).length; 843 const polishedBody = 844 inputBreaks >= 1 && outputBreaks < inputBreaks 845 ? (logger.warn('Polish collapsed paragraph breaks — keeping original body'), text) 846 : result.body.trim(); 847 848 return { 849 text: polishedBody, 850 subjectLine: isEmailChannel ? result.subject?.trim() || subjectLine : null, 851 }; 852 } catch (err) { 853 logger.warn(`Polish (${POLISH_MODEL}) failed: ${err.message} — using original`); 854 return { text, subjectLine }; 855 } 856 } 857 858 /** 859 * @deprecated Use polishProposal() instead. 860 * Kept for backwards compatibility — delegates to the new polish function. 861 */ 862 export async function shortenSmsWithHaiku(text) { 863 const polished = await polishProposal(text, 'sms', 'en', null, null); 864 return polished.text; 865 } 866 867 /** @deprecated Renamed to polishProposal */ 868 export const polishProposalWithHaiku = polishProposal; 869 870 /** 871 * Generate proposal using templates (replaces LLM generation) 872 * @param {Object} siteData - Site data from database (must include country_code, language_code) 873 * @param {Object} scoreData - Parsed score_json 874 * @param {Object} contact - Contact object {name, channel, uri} 875 * @returns {Promise<Object>} Generated proposal {proposalText, templateId, subjectLine} 876 * @throws {Error} If no templates found for this country/language/channel 877 */ 878 export async function generateTemplateProposal( 879 siteData, 880 scoreData, 881 contact, 882 cachedAnalysis = null 883 ) { 884 const fields = extractTemplateFields(scoreData); 885 const countryCode = siteData.country_code || 'AU'; 886 const languageCode = siteData.language_code || null; 887 const channel = contact.channel || 'email'; 888 889 // Pass 1: Haiku analyzes score_json → concrete recommendation sentence + industry. 890 // Use cachedAnalysis if provided (same result for all contacts on a site). 891 const analysisData = 892 cachedAnalysis || 893 (await analyzeScoreJson( 894 scoreData, 895 siteData.keyword || null, 896 languageCode || 'en', 897 countryCode 898 )); 899 900 // Throws if no template found — caller handles the error 901 const templates = loadTemplates(countryCode, languageCode, channel); 902 const template = selectTemplate(templates, fields, channel, !!contact?.name); 903 904 // Populate template — all tokens including [recommendation]/[industry] filled here 905 let proposalText = populateTemplate( 906 template.body_spintax, 907 fields, 908 siteData, 909 contact, 910 analysisData 911 ); 912 913 // If a firstname is known but didn't land in this spin (optional greeting branch missed), 914 // re-spin up to 9 more times — (0.5)^10 ≈ 0.1% chance of still missing after 10 tries. 915 if (contact?.name) { 916 const { name } = contact; 917 for (let i = 0; i < 9 && !proposalText.includes(name); i++) { 918 proposalText = populateTemplate( 919 template.body_spintax, 920 fields, 921 siteData, 922 contact, 923 analysisData 924 ); 925 } 926 } 927 928 // SMS: re-spin up to 3 times to try to reach ≤160 chars before the polish pass 929 if (channel === 'sms' && proposalText.length > 160) { 930 for (let i = 0; i < 3; i++) { 931 const candidate = populateTemplate( 932 template.body_spintax, 933 fields, 934 siteData, 935 contact, 936 analysisData 937 ); 938 if (candidate.length < proposalText.length) proposalText = candidate; 939 if (proposalText.length <= 160) break; 940 } 941 } 942 943 // Email (and form-as-email) use subject lines; SMS/linkedin/x do not 944 const usesEmailTemplate = channel === 'email' || channel === 'form'; 945 let rawSubjectLine = null; 946 947 if (usesEmailTemplate) { 948 // Prefer selected template's subject_spintax, fall back to others in the same file. 949 // Throws if no usable subject_spintax exists — prevents silent blank subjects. 950 const subjectCandidates = [template, ...templates.filter(t => t.id !== template.id)]; 951 let subjectSourceId = null; 952 953 for (const t of subjectCandidates) { 954 if (!t.subject_spintax?.trim()) continue; 955 const candidate = populateTemplate( 956 t.subject_spintax, 957 fields, 958 siteData, 959 contact, 960 analysisData 961 ); 962 if (candidate) { 963 rawSubjectLine = candidate; 964 subjectSourceId = t.id; 965 break; 966 } 967 } 968 969 if (!rawSubjectLine) { 970 throw new Error( 971 `No usable subject_spintax found in any template for ${countryCode}/${languageCode}/${channel}` 972 ); 973 } 974 975 if (subjectSourceId !== template.id) { 976 logger.warn( 977 `Template ${template.id} has no usable subject_spintax — using subject from ${subjectSourceId}` 978 ); 979 } 980 } 981 982 // Bracket check after population — any remaining [token] means a template bug 983 checkForUnfilledTokens(proposalText, 'body'); 984 if (rawSubjectLine) checkForUnfilledTokens(rawSubjectLine, 'subject'); 985 986 // Pass 2: polish grammar, capitalisation, punctuation; compress SMS to ≤160 chars. 987 // Does NOT fill tokens — all tokens are already resolved above. 988 const polished = await polishProposal( 989 proposalText, 990 channel, 991 languageCode || 'en', 992 rawSubjectLine, 993 countryCode || null 994 ); 995 996 // Bracket check after polish — catches any token LLM may have reintroduced (shouldn't happen) 997 checkForUnfilledTokens(polished.text, 'polished body'); 998 if (polished.subjectLine) checkForUnfilledTokens(polished.subjectLine, 'polished subject'); 999 1000 logger.info( 1001 `Generated ${channel} proposal using template ${template.id} for ${siteData.domain} (${countryCode}/${languageCode})` 1002 ); 1003 1004 return { 1005 proposalText: polished.text, 1006 templateId: template.id, 1007 subjectLine: polished.subjectLine ?? rawSubjectLine, 1008 }; 1009 } 1010 1011 export default { 1012 extractTemplateFields, 1013 analyzeScoreJson, 1014 loadTemplates, 1015 selectTemplate, 1016 populateTemplate, 1017 checkForUnfilledTokens, 1018 generateTemplateProposal, 1019 };