proposal-generator-v2.js
1 #!/usr/bin/env node 2 3 /** 4 * Proposal Generator V2 5 * Generates 3 variants of personalized outreach proposals using the new Proposal.md prompt 6 * Stores in messages table with pending approval_status for operator approval 7 */ 8 9 import { run, getOne, getAll } from './utils/db.js'; 10 import { execFileSync } from 'child_process'; 11 import { readFileSync, existsSync, readdirSync } from 'fs'; 12 import { join, dirname } from 'path'; 13 import { fileURLToPath } from 'url'; 14 import Logger from './utils/logger.js'; 15 import { retryWithBackoff, isRetryableError, safeJsonParse } from './utils/error-handler.js'; 16 import { getAllContactsWithNames, cleanInvalidSocialLinks } from './contacts/prioritize.js'; 17 import { spin } from './utils/spintax.js'; 18 import { openRouterLimiter } from './utils/rate-limiter.js'; 19 import { openRouterBreaker } from './utils/circuit-breaker.js'; 20 import { callLLM, getProviderDisplayName } from './utils/llm-provider.js'; 21 import { getCountryByCode, COUNTRIES } from './config/countries.js'; 22 import { computeGrade } from './score.js'; 23 import { isDemoEmail, isGovernmentEmail, isEducationEmail } from './utils/site-filters.js'; 24 import { addCountryCode } from './utils/phone-normalizer.js'; 25 import { generatePromptRecommendations } from './utils/prompt-learning.js'; 26 import { getCurrentSeason } from './utils/template-proposals.js'; 27 import { wrapUntrusted, stripInjectionMarkers, sanitizeLlmOutput } from './utils/llm-sanitizer.js'; 28 import { validateProposalResponse } from './utils/llm-response-validator.js'; 29 import { getScoreDataWithFallback } from './utils/score-storage.js'; 30 import { getContactsDataWithFallback } from './utils/contacts-storage.js'; 31 import { parseEnvSet } from './utils/load-env.js'; 32 33 const __filename = fileURLToPath(import.meta.url); 34 const __dirname = dirname(__filename); 35 const projectRoot = join(__dirname, '..'); 36 37 const logger = new Logger('ProposalGeneratorV2'); 38 39 // Countries blocked from SMS outreach (DR-121 — only AU/NZ have legal basis for cold SMS). 40 const BLOCKED_SMS_COUNTRIES = parseEnvSet(process.env.OUTREACH_BLOCKED_SMS_COUNTRIES); 41 42 // Load proposal prompt and best practices, injecting brand/persona env vars 43 const PROPOSAL_PROMPT = readFileSync(join(projectRoot, 'prompts/PROPOSAL.md'), 'utf-8') 44 .replaceAll('[BRAND_NAME]', process.env.BRAND_NAME || 'the brand') 45 .replaceAll('BRAND_DOMAIN', process.env.BRAND_DOMAIN || 'the site'); 46 const HAIKU_POLISH_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-POLISH.md'), 'utf-8'); 47 const EMAIL_BEST_PRACTICES = readFileSync( 48 join(projectRoot, 'docs/05-outreach/email-best-practices.md'), 49 'utf-8' 50 ); 51 52 // SMS best practices (from CLAUDE.md compliance section) 53 const SMS_BEST_PRACTICES = `# SMS Best Practices (TCPA Compliance) 54 55 ## Core Requirements 56 57 **Prior Consent**: TCPA requires explicit prior written consent before sending marketing SMS. For cold outreach: 58 - Initial SMS must be informational, not promotional 59 - Must provide opt-out instructions 60 - Cannot include hard sales pitches without consent 61 62 **Opt-Out Instructions**: TCPA (US/CA only) requires opt-out instructions in every SMS: 63 - Required for: US, CA only 64 - Standard: "Reply STOP to opt out" 65 - NOT required for: AU, GB, NZ, IE, ZA, IN (different regulatory regimes) 66 - Must honor opt-outs immediately for all markets 67 - Store opt-outs permanently 68 69 **Business Hours**: SMS must be sent during reasonable hours: 70 - 8am - 9pm in recipient's local timezone 71 - Avoid weekends for initial outreach 72 - Consider industry-specific work hours 73 74 **Sender Identification**: Clearly identify who is sending: 75 - Include business name in first message 76 - Provide contact information 77 - Be transparent about purpose 78 79 **Message Length**: Keep SMS concise: 80 - Aim for <160 characters to avoid segmentation 81 - Ultra-short, urgent, personal tone 82 - Single clear call-to-action 83 84 **Compliance Checklist**: 85 - [ ] Explicit consent obtained (or informational-only content) 86 - [ ] Opt-out instructions included 87 - [ ] Sent during business hours (8am-9pm local time) 88 - [ ] Sender clearly identified 89 - [ ] Message under 160 characters 90 - [ ] No misleading or deceptive content 91 - [ ] Opt-outs honored immediately 92 `; 93 94 // Model configuration (from env or default) 95 const PROPOSAL_MODEL = 96 process.env.PROPOSAL_MODEL || process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5'; 97 98 // Language code → full name for polish prompt 99 const POLISH_LANG_NAMES = { 100 en: 'English', 101 de: 'German', 102 fr: 'French', 103 it: 'Italian', 104 ja: 'Japanese', 105 ko: 'Korean', 106 es: 'Spanish', 107 nl: 'Dutch', 108 pl: 'Polish', 109 sv: 'Swedish', 110 da: 'Danish', 111 id: 'Indonesian', 112 hi: 'Hindi', 113 }; 114 115 /** 116 * Polish a generated proposal via Claude CLI (uses Max subscription, zero cost). 117 * Falls back to OpenRouter if CLI is unavailable. 118 */ 119 async function polishProposal( 120 text, 121 channel, 122 languageCode = 'en', 123 subjectLine = null, 124 countryCode = null 125 ) { 126 const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection 127 const isEmailChannel = channel === 'email' || channel === 'form'; 128 129 const subjectSection = isEmailChannel && subjectLine ? `\nSUBJECT LINE:\n${subjectLine}` : ''; 130 const season = getCurrentSeason(countryCode); 131 const seasonSection = season ? `\nSEASON: ${season}` : ''; 132 133 const userMessage = `CHANNEL: ${channel} 134 LANGUAGE: ${langName}${seasonSection} 135 --- 136 PROPOSAL BODY: 137 ${wrapUntrusted(text, 'proposal_draft')}${subjectSection}`; 138 139 // Try Claude CLI first (Max subscription, zero cost) 140 try { 141 const cliInput = `${HAIKU_POLISH_PROMPT}\n\n---\n\n${userMessage}`; 142 const output = execFileSync('claude', ['-p', '--model', 'haiku', '--output-format', 'json'], { 143 input: cliInput, 144 encoding: 'utf-8', 145 timeout: 30000, 146 maxBuffer: 1024 * 1024, 147 }); 148 149 // Claude CLI with --output-format json wraps response in a JSON object 150 const cliResult = safeJsonParse(output); 151 // The CLI returns {result: "..."} — the result contains the LLM output 152 const llmOutput = cliResult?.result || output; 153 const parsed = safeJsonParse(llmOutput); 154 155 if (parsed && typeof parsed.body === 'string' && parsed.body.trim() !== '') { 156 return { 157 text: parsed.body.trim(), 158 subjectLine: isEmailChannel ? parsed.subject?.trim() || subjectLine : null, 159 }; 160 } 161 logger.warn('Polish (claude CLI) returned invalid response — falling back to OpenRouter'); 162 } catch (cliErr) { 163 logger.warn(`Polish (claude CLI) failed: ${cliErr.message} — falling back to OpenRouter`); 164 } 165 166 // Fallback: OpenRouter 167 try { 168 const { content } = await openRouterBreaker.fire(async () => 169 openRouterLimiter.schedule(() => 170 callLLM({ 171 model: 'anthropic/claude-haiku-4-5', 172 temperature: 0, 173 max_tokens: 3000, 174 json_mode: true, 175 messages: [ 176 { role: 'system', content: HAIKU_POLISH_PROMPT }, 177 { role: 'user', content: userMessage }, 178 ], 179 stage: 'proposals', 180 }) 181 ) 182 ); 183 184 const result = safeJsonParse(content); 185 if (!result || typeof result.body !== 'string' || result.body.trim() === '') { 186 logger.warn('Polish (OpenRouter fallback) returned invalid response — using original'); 187 return { text, subjectLine }; 188 } 189 190 return { 191 text: result.body.trim(), 192 subjectLine: isEmailChannel ? result.subject?.trim() || subjectLine : null, 193 }; 194 } catch (err) { 195 logger.warn(`Polish (OpenRouter fallback) failed: ${err.message} — using original`); 196 return { text, subjectLine }; 197 } 198 } 199 200 /** 201 * Generate proposal variants for a site and store in messages table 202 * @param {number} siteId - Site ID from database 203 * @param {string} reworkInstructions - Optional feedback for rework 204 * @returns {Promise<Object>} Generated proposals 205 */ 206 export async function generateProposalVariants(siteId, reworkInstructions = null) { 207 const _t0 = Date.now(); 208 209 // Get site data (including country metadata) 210 const siteData = await getOne( 211 `SELECT 212 s.*, 213 s.score, 214 s.grade, 215 s.country_code, 216 COALESCE(s.google_domain, 'google.com.au') as google_domain, 217 COALESCE(s.language_code, 'en') as language_code, 218 COALESCE(s.currency_code, 'AUD') as currency_code 219 FROM sites s 220 WHERE s.id = $1`, 221 [siteId] 222 ); 223 224 if (!siteData) { 225 throw new Error(`Site not found: ${siteId}`); 226 } 227 228 // Language override: if site language_code is 'en' but country's primary language isn't English 229 if (siteData.language_code === 'en' && siteData.country_code) { 230 const countryInfo = getCountryByCode(siteData.country_code); 231 if (countryInfo && countryInfo.language !== 'en') { 232 logger.info( 233 `[proposals] Language override: ${siteData.country_code} site has language_code='en' but country language is '${countryInfo.language}' — using '${countryInfo.language}'` 234 ); 235 siteData.language_code = countryInfo.language; 236 } 237 } 238 239 // Guard: country blocked from ALL outreach channels 240 const blockedCountries = new Set( 241 (process.env.OUTREACH_BLOCKED_COUNTRIES || '') 242 .split(',') 243 .map(s => s.trim().toUpperCase()) 244 .filter(Boolean) 245 ); 246 if (blockedCountries.has(siteData.country_code)) { 247 const msg = `country ${siteData.country_code} blocked via OUTREACH_BLOCKED_COUNTRIES — skipping proposal generation`; 248 logger.info(` ⏭️ ${siteData.domain}: ${msg}`); 249 throw new Error(`Skipping ${siteData.domain}: ${msg}`); 250 } 251 252 // Guard: country_code must be known 253 if (!siteData.country_code) { 254 const msg = 255 'country_code is unknown — re-enrich site to detect country before generating proposals'; 256 await run('UPDATE sites SET error_message = $1 WHERE id = $2', [msg, siteId]); 257 throw new Error(`Skipping ${siteData.domain}: ${msg}`); 258 } 259 260 // Guard: a template must exist for this country 261 const templatesDir = join(__dirname, '../data/templates'); 262 const templatePath = join(templatesDir, siteData.country_code, 'email.json'); 263 if (!existsSync(templatePath)) { 264 const msg = `no email template for country ${siteData.country_code} — add data/templates/${siteData.country_code}/email.json to enable outreach`; 265 await run('UPDATE sites SET error_message = $1 WHERE id = $2', [msg, siteId]); 266 throw new Error(`Skipping ${siteData.domain}: ${msg}`); 267 } 268 269 // Check if site meets low-score threshold (unless this is a rework) 270 if (!reworkInstructions) { 271 const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82'); 272 if (siteData.score >= cutoff) { 273 throw new Error( 274 `Site ${siteData.domain} has score ${siteData.score} which is above the cutoff ${cutoff}. Proposals are only generated for low-scoring sites.` 275 ); 276 } 277 } 278 279 logger.info(`Generating proposal variants for ${siteData.domain} (${siteData.keyword})`); 280 281 // Parse JSON data (filesystem first, DB fallback) 282 const scoreData = getScoreDataWithFallback(siteData.id, siteData); 283 let contactData = getContactsDataWithFallback(siteData.id, siteData); 284 285 // Clean up invalid Twitter/X.com links (those without usernames) 286 if (contactData) { 287 contactData = cleanInvalidSocialLinks(contactData); 288 } 289 290 // Get all contacts with Haiku-resolved first names 291 const _tNames = Date.now(); 292 const allContacts = contactData 293 ? await getAllContactsWithNames(contactData, siteData.country_code) 294 : []; 295 logger.debug( 296 `[proposals] name-extract ${Date.now() - _tNames}ms (${allContacts.length} contacts) for ${siteData.domain}` 297 ); 298 299 if (allContacts.length === 0) { 300 logger.warn(`No contacts found for ${siteData.domain} - skipping proposal generation`); 301 return { 302 siteId, 303 domain: siteData.domain, 304 keyword: siteData.keyword, 305 outreachIds: [], 306 variants: [], 307 reasoning: 'No contacts found', 308 contactCount: 0, 309 }; 310 } 311 312 // Filter out contacts that already have outbound messages 313 const existingMessages = await getAll( 314 `SELECT contact_method, contact_uri 315 FROM messages 316 WHERE site_id = $1 AND direction = 'outbound'`, 317 [siteId] 318 ); 319 320 const existingSet = new Set( 321 existingMessages.map(o => `${normalizeContactMethod(o.contact_method)}:${o.contact_uri}`) 322 ); 323 324 const countryUpper = (siteData.country_code || '').toUpperCase(); 325 const availableContacts = allContacts.filter(contact => { 326 const key = `${normalizeContactMethod(contact.channel)}:${contact.uri}`; 327 if (existingSet.has(key)) return false; 328 // Skip SMS contacts for countries without legal basis (DR-121) 329 if (contact.channel === 'sms' && BLOCKED_SMS_COUNTRIES.has(countryUpper)) { 330 logger.info(`Skipping SMS contact for ${siteData.domain} — SMS blocked for ${siteData.country_code} (DR-121)`); 331 return false; 332 } 333 return true; 334 }); 335 336 if (availableContacts.length === 0) { 337 logger.warn( 338 `All ${allContacts.length} contacts for ${siteData.domain} already have messages - skipping` 339 ); 340 return { 341 siteId, 342 domain: siteData.domain, 343 keyword: siteData.keyword, 344 outreachIds: [], 345 variants: [], 346 reasoning: 'All contacts already have messages', 347 contactCount: allContacts.length, 348 }; 349 } 350 351 if (availableContacts.length < allContacts.length) { 352 logger.info( 353 `Filtered ${allContacts.length - availableContacts.length} contacts with existing messages` 354 ); 355 } 356 357 logger.info(`Found ${availableContacts.length} contacts - generating personalized proposals`); 358 359 // Generate one proposal per contact 360 const _tGen = Date.now(); 361 const proposalData = await generateProposalsWithAI( 362 siteData, 363 scoreData, 364 availableContacts, 365 reworkInstructions, 366 siteId 367 ); 368 369 logger.debug(`[proposals] generation ${Date.now() - _tGen}ms for ${siteData.domain}`); 370 371 // Validate that we got the right number of variants 372 if (proposalData.variants.length !== availableContacts.length) { 373 throw new Error( 374 `Expected ${availableContacts.length} variants but got ${proposalData.variants.length}` 375 ); 376 } 377 378 // Polish all variants concurrently then store 379 const tPolish = Date.now(); 380 const polishedVariants = await Promise.all( 381 availableContacts.map(async (contact, i) => { 382 const variant = proposalData.variants[i]; // eslint-disable-line security/detect-object-injection 383 const contactMethod = normalizeContactMethod(contact.channel); 384 const isEmailChannel = contactMethod === 'email' || contactMethod === 'form'; 385 const polished = await polishProposal( 386 variant.proposal_text, 387 contactMethod, 388 siteData.language_code || 'en', 389 isEmailChannel ? proposalData.subject_line : null, 390 siteData.country_code || null 391 ); 392 return { contact, variant, contactMethod, polished }; 393 }) 394 ); 395 logger.debug( 396 `[proposals] polish ${Date.now() - tPolish}ms for ${availableContacts.length} contacts` 397 ); 398 399 const outreachIds = []; 400 for (const { contact, variant, polished } of polishedVariants) { 401 // Store outreach entry (with GDPR filtering) using polished content 402 const outreachId = await storeProposalVariant( 403 siteId, 404 { ...variant, proposal_text: polished.text }, 405 polished.subjectLine ?? proposalData.subject_line, 406 contact, 407 siteData 408 ); 409 outreachIds.push(outreachId); 410 411 const nameInfo = contact.name ? ` (${contact.name})` : ''; 412 const contactMethod = normalizeContactMethod(contact.channel); 413 logger.success( 414 `Stored outreach #${outreachId}: V${variant.variant_number} → ${contactMethod}${nameInfo} (${contact.uri})` 415 ); 416 } 417 418 // Update site status 419 await run( 420 `UPDATE sites 421 SET status = 'proposals_drafted', 422 updated_at = CURRENT_TIMESTAMP 423 WHERE id = $1`, 424 [siteId] 425 ); 426 427 logger.info( 428 `[proposals] total ${Date.now() - _t0}ms for ${siteData.domain} (${allContacts.length} contacts)` 429 ); 430 logger.success( 431 `Generated ${proposalData.variants.length} proposal variants and created ${outreachIds.length} outreach entries for ${siteData.domain}` 432 ); 433 434 return { 435 siteId, 436 domain: siteData.domain, 437 keyword: siteData.keyword, 438 outreachIds, 439 variants: proposalData.variants, 440 reasoning: proposalData.reasoning, 441 contactCount: allContacts.length, 442 }; 443 } 444 445 /** 446 * Generate proposal variants using LLM API 447 * @param {Object} siteData - Site data 448 * @param {Object} scoreData - Scoring data 449 * @param {Array} contacts - Array of contacts from getAllContacts() 450 * @param {string} reworkInstructions - Optional rework instructions 451 * @param {number} siteId - Site ID for usage tracking 452 * @returns {Promise<Object>} Proposal data with variants 453 */ 454 455 async function generateProposalsWithAI( 456 siteData, 457 scoreData, 458 contacts, 459 reworkInstructions, 460 siteId 461 ) { 462 // Extract business type from keyword 463 const businessType = extractBusinessType(siteData.keyword); 464 465 // Build context for the AI 466 const context = await buildProposalContext(siteData, scoreData, contacts, businessType); 467 468 // Add rework instructions if provided 469 const reworkSection = reworkInstructions 470 ? `\n\nREWORK INSTRUCTIONS FROM OPERATOR:\n${reworkInstructions}\n\nPlease incorporate this feedback into the new variants.` 471 : ''; 472 473 const userPrompt = `${context}${reworkSection}`; 474 475 return retryWithBackoff( 476 async () => { 477 // Calculate max_tokens based on number of contacts 478 const maxTokens = Math.max(8192, contacts.length * 1200); 479 480 const response = await openRouterBreaker.fire(async () => 481 openRouterLimiter.schedule(() => 482 callLLM({ 483 model: PROPOSAL_MODEL, 484 messages: [ 485 { role: 'system', content: PROPOSAL_PROMPT }, 486 { role: 'user', content: userPrompt }, 487 ], 488 temperature: 0.7, 489 max_tokens: maxTokens, 490 json_mode: true, 491 stage: 'proposals', 492 siteId, 493 }) 494 ) 495 ); 496 497 const { content, usage } = response; 498 499 logger.info( 500 `Generated proposals using ${getProviderDisplayName()} - ${usage.promptTokens + usage.completionTokens} tokens` 501 ); 502 503 if (!content) { 504 throw new Error('No content in API response'); 505 } 506 507 const result = safeJsonParse(content); 508 509 if (!result || !result.variants || !Array.isArray(result.variants)) { 510 throw new Error('Invalid proposal response format'); 511 } 512 513 if (result.variants.length !== contacts.length) { 514 throw new Error(`Expected ${contacts.length} variants but got ${result.variants.length}`); 515 } 516 517 // Validate response structure and check for suspicious URLs 518 validateProposalResponse(result, contacts.length); 519 520 // Programmatic content validation — retry if LLM ignores constraints 521 const freePattern = 522 /\bfree\s+(audit|report|assessment|consultation|review|strategy\s+call|snapshot|analysis|evaluation|estimate|quote)\b|\bno\s+cost\b|\bat\s+no\s+charge\b|\bcomplimentary\b/i; 523 const phonePattern = /\+\d[\d\s\-().]{6,}/; 524 for (const variant of result.variants) { 525 const text = variant.proposal_text || ''; 526 if (freePattern.test(text)) { 527 throw new Error( 528 `LLM promised a free service in variant ${variant.variant_number} — retry required` 529 ); 530 } 531 if (phonePattern.test(text)) { 532 throw new Error( 533 `LLM hallucinated a phone number in variant ${variant.variant_number} — retry required` 534 ); 535 } 536 } 537 538 return result; 539 }, 540 { 541 maxRetries: 3, 542 shouldRetry: error => error.message.includes('retry required') || isRetryableError(error), 543 onRetry: (attempt, error) => { 544 logger.warn(`Retry ${attempt + 1}/3 for ${siteData.domain}: ${error.message}`); 545 }, 546 } 547 ); 548 } 549 550 /** 551 * Get top competitor for a keyword 552 * @param {Object} siteData - Site data 553 * @returns {Promise<Object|null>} Competitor info {domain, score, grade} or null 554 */ 555 async function getTopCompetitor(siteData) { 556 // Extract industry from score_json (filesystem first, DB fallback) 557 const scoreJson = getScoreDataWithFallback(siteData.id, siteData); 558 const industry = scoreJson?.industry_classification; 559 if (!industry) return null; 560 561 // Find all country codes sharing the same language 562 const prospectLang = COUNTRIES[siteData.country_code]?.language; 563 if (!prospectLang) return null; 564 565 const sameLanguageCodes = Object.entries(COUNTRIES) 566 .filter(([, c]) => c.language === prospectLang) 567 .map(([code]) => code); 568 569 const placeholders = sameLanguageCodes.map((_, i) => `$${i + 3}`).join(','); 570 const competitor = await getOne( 571 `SELECT domain, score, grade 572 FROM sites 573 WHERE industry_classification = $1 574 AND domain != $2 575 AND country_code IN (${placeholders}) 576 AND status IN ('prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'proposals_drafted', 'outreach_partial', 'outreach_sent') 577 AND score IS NOT NULL 578 ORDER BY score DESC 579 LIMIT 1`, 580 [industry, siteData.domain, ...sameLanguageCodes] 581 ); 582 583 return competitor || null; 584 } 585 586 /** 587 * Build context for proposal generation 588 * @param {Object} siteData - Site data 589 * @param {Object} scoreData - Scoring data 590 * @param {Array} contacts - Array of contacts from getAllContacts() 591 * @param {string} businessType - Business type extracted from keyword 592 * @returns {Promise<string>} Context string for AI 593 */ 594 async function buildProposalContext(siteData, scoreData, contacts, businessType) { 595 const weaknesses = extractKeyWeaknesses(scoreData); 596 597 // Get country configuration for localization 598 const country = getCountryByCode(siteData.country_code); 599 600 // Extract location from contacts_json (more reliable than keyword) 601 const contactData = getContactsDataWithFallback(siteData.id, siteData); 602 const city = contactData?.city || siteData.city || null; 603 const state = contactData?.state || siteData.state || null; 604 605 // Format contacts list for AI 606 const contactsList = contacts 607 .map((contact, i) => { 608 const safeName = contact.name ? stripInjectionMarkers(contact.name) : null; 609 const nameInfo = safeName ? ` (Name: ${safeName})` : ' (Name: Unknown)'; 610 return `${i + 1}. ${contact.channel}${nameInfo} - ${contact.uri}`; 611 }) 612 .join('\n'); 613 614 // Get top competitor and determine if score difference meets threshold 615 const competitor = await getTopCompetitor(siteData); 616 const scoreThreshold = parseFloat(process.env.COMPETITOR_SCORE_THRESHOLD || '10'); 617 618 let competitorSection = ''; 619 if (competitor) { 620 const scoreDifference = competitor.score - siteData.score; 621 if (scoreDifference >= scoreThreshold) { 622 competitorSection = `\nTOP COMPETITOR (same industry, same language): 623 - Domain: ${competitor.domain} 624 - Score: ${computeGrade(competitor.score)} (${competitor.score}/100) 625 - Difference: ${scoreDifference.toFixed(1)} points higher than target site 626 627 FRAMING RULES: 628 - Score = percentage of conversion best practices implemented (NOT a conversion rate) 629 - Use "our analysis suggests" — never state conversion claims as fact 630 - ~20-point gap ≈ roughly 2x conversion difference (heuristic, frame as estimate) 631 - Example: "Our analysis suggests ${competitor.domain} implements ${competitor.score}% of conversion best practices vs your ${siteData.score}% — that gap likely means they're converting roughly twice as many visitors."`; 632 } else { 633 competitorSection = `\nTOP COMPETITOR (same industry, same language): 634 - Domain: ${competitor.domain} 635 - Score: Similar to target site (do NOT mention specific scores - they're too close) 636 637 IMPORTANT: Do not mention competitor scores when they are clustered too tightly to be meaningful.`; 638 } 639 } 640 641 // Build localization requirements 642 const localizationSection = ` 643 LOCALIZATION REQUIREMENTS: 644 - Country: ${country.name} (${country.code}) 645 - Language: ${country.language === 'en' ? 'English' : country.language} 646 - Currency: ${country.currencySymbol} ${country.currency} 647 - Spelling: Use ${country.name} spelling conventions${['AU', 'GB', 'UK', 'NZ', 'IE', 'ZA', 'IN'].includes(country.code) ? ' (e.g., "optimise" not "optimize", "colour" not "color", "specialise" not "specialize")' : country.code === 'US' || country.code === 'CA' ? ' (e.g., "optimize" not "optimise", "color" not "colour", "specialize" not "specialise")' : ''} 648 - Cultural context: Reference ${country.name} business norms and expectations 649 - Date format: ${country.dateFormat} 650 - Phone format: ${country.phoneFormat} 651 ${country.timezone ? `- Timezone: ${country.timezone} (consider local business hours for call/response timing)` : ''}`; 652 653 // Build best practices section 654 const bestPracticesSection = ` 655 CHANNEL-SPECIFIC BEST PRACTICES: 656 657 === EMAIL BEST PRACTICES === 658 ${EMAIL_BEST_PRACTICES} 659 660 === SMS BEST PRACTICES === 661 ${SMS_BEST_PRACTICES} 662 663 IMPORTANT: Follow these best practices when crafting proposals for each channel. 664 - Email proposals should follow CAN-SPAM requirements and copywriting best practices 665 - SMS proposals MUST be <160 chars and follow local regulations. 666 - TCPA opt-out ("Reply STOP to opt out.") is REQUIRED for US and CA — include it in every US/CA SMS. 667 - CRITICAL: do NOT add any opt-out, STOP, or unsubscribe text for AU, GB, NZ, IE, ZA, IN or any non-US/CA market. These markets do not require TCPA opt-out and the extra text wastes precious SMS characters. 668 - All proposals must provide genuine value and avoid spam triggers 669 `; 670 671 // Build location string 672 let locationStr = 'Unknown'; 673 if (city && state) { 674 locationStr = `${city}, ${state}`; 675 } else if (city) { 676 locationStr = city; 677 } else if (state) { 678 locationStr = state; 679 } 680 681 // Aggregate stats for credibility (dynamic from DB) 682 const aggRow = await getOne( 683 `SELECT COUNT(*) AS total, ROUND(AVG(score)::numeric, 1) AS avg_score, ROUND(MAX(score)::numeric, 1) AS best_score 684 FROM sites WHERE score IS NOT NULL` 685 ); 686 const aggregateSection = ` 687 AGGREGATE STATS (real data — use for credibility): 688 - total_sites_scored: ${aggRow.total} 689 - avg_score: ${aggRow.avg_score} 690 - best_score: ${aggRow.best_score}`; 691 692 return `Generate personalized sales proposals for this website: 693 694 TARGET BUSINESS: 695 - Domain: ${siteData.domain} 696 - Business Name: ${contactData?.business_name || siteData.domain} 697 - URL: ${siteData.landing_page_url} 698 - Business Type: ${businessType} 699 - Location: ${locationStr} 700 701 CONVERSION ANALYSIS: 702 - Score: ${siteData.score ? computeGrade(siteData.score) : 'N/A'} (${siteData.score || 'N/A'}/100) 703 ${competitorSection} 704 ${aggregateSection} 705 706 KEY WEAKNESSES IDENTIFIED: 707 ${weaknesses} 708 709 CONTACTS TO REACH (generate ONE variant for EACH): 710 ${contactsList} 711 ${localizationSection} 712 713 ${bestPracticesSection} 714 715 Generate ${contacts.length} unique proposal variants - one for each contact listed above. 716 Vary your approach across the variants (don't send identical messages to everyone). 717 IMPORTANT: Use the exact "Business Name" above when referring to the company — do NOT guess or derive a name from the domain. 718 IMPORTANT: Apply all localization requirements above to ensure proposals are culturally appropriate and use correct local conventions. 719 CRITICAL: Follow the channel-specific best practices above - especially SMS length limits and compliance requirements.`; 720 } 721 722 /** 723 * Extract business type from keyword 724 */ 725 function extractBusinessType(keyword) { 726 const commonLocations = [ 727 'sydney', 728 'melbourne', 729 'brisbane', 730 'perth', 731 'adelaide', 732 'canberra', 733 'hobart', 734 'darwin', 735 ]; 736 737 let businessType = keyword; 738 for (const location of commonLocations) { 739 // eslint-disable-next-line security/detect-non-literal-regexp -- Safe: location is from hardcoded array 740 businessType = businessType.replace(new RegExp(location, 'gi'), '').trim(); 741 } 742 743 return businessType || keyword; 744 } 745 746 /** 747 * Extract location from keyword 748 */ 749 function extractLocation(keyword) { 750 const words = keyword.split(' '); 751 return words.slice(-2).join(' '); 752 } 753 754 /** 755 * Extract key weaknesses from score data 756 */ 757 function extractKeyWeaknesses(scoreData) { 758 if (!scoreData || !scoreData.factor_scores) { 759 return 'General conversion optimization opportunities'; 760 } 761 762 const weaknesses = []; 763 764 Object.entries(scoreData.factor_scores).forEach(([factorName, factor]) => { 765 if (factor && factor.score < 5 && factor.reasoning) { 766 const label = factorName.replace(/_/g, ' '); 767 weaknesses.push(`- ${label}: ${stripInjectionMarkers(factor.reasoning)}`); 768 } 769 }); 770 771 return weaknesses.length > 0 772 ? weaknesses.slice(0, 5).join('\n') 773 : 'General conversion optimization opportunities'; 774 } 775 776 /** 777 * Normalize contact method to match database schema 778 */ 779 function normalizeContactMethod(method) { 780 if (!method) return 'email'; // Default fallback 781 782 const normalized = method.toLowerCase().trim(); 783 784 const mapping = { 785 sms: 'sms', 786 email: 'email', 787 form: 'form', 788 'contact form': 'form', 789 contactform: 'form', 790 x: 'x', 791 twitter: 'x', 792 linkedin: 'linkedin', 793 facebook: 'email', 794 instagram: 'email', 795 }; 796 797 // eslint-disable-next-line security/detect-object-injection -- Safe: normalized is from our channel type strings 798 return mapping[normalized] || 'email'; 799 } 800 801 /** 802 * Store proposal variant in messages table 803 * @param {number} siteId - Site ID 804 * @param {Object} variant - Variant object with proposal_text, variant_number 805 * @param {string} subjectLine - Email subject line 806 * @param {Object|null} contact - Contact object {type, uri, name, channel} or null 807 * @param {Object} siteData - Site data including country_code and gdpr_verified 808 * @returns {Promise<number|null>} Message ID 809 */ 810 async function storeProposalVariant(siteId, variant, subjectLine, contact = null, siteData = null) { 811 // Validate variant data 812 if (!variant || typeof variant !== 'object') { 813 throw new Error('Invalid variant: must be an object'); 814 } 815 if (!variant.proposal_text || typeof variant.proposal_text !== 'string') { 816 throw new Error(`Invalid variant.proposal_text for variant ${variant.variant_number}`); 817 } 818 819 // Use contact info if available, otherwise use variant recommendation 820 const rawContactMethod = contact ? contact.channel : variant.recommended_channel || 'email'; 821 const contactMethod = normalizeContactMethod(rawContactMethod); 822 let contactUri = contact ? contact.uri : 'PENDING_CONTACT_EXTRACTION'; 823 824 // Normalize SMS phone numbers with country code 825 if ( 826 contactMethod === 'sms' && 827 contactUri !== 'PENDING_CONTACT_EXTRACTION' && 828 siteData?.country_code 829 ) { 830 contactUri = addCountryCode(contactUri, siteData.country_code); 831 } 832 833 // GDPR compliance check: Block unverified emails in GDPR countries 834 let approvalStatus = 'pending'; 835 836 if ( 837 siteData && 838 contactMethod === 'email' && 839 siteData.gdpr_verified === 0 && 840 siteData.country_code 841 ) { 842 const country = getCountryByCode(siteData.country_code); 843 if (country && country.requiresGDPRCheck) { 844 approvalStatus = 'gdpr_blocked'; 845 logger.info( 846 ` ⚠️ GDPR blocked: ${contactUri} in ${siteData.country_code} (unverified company email)` 847 ); 848 } 849 } 850 851 // Government email check 852 if (contactMethod === 'email' && isGovernmentEmail(contactUri)) { 853 logger.info(` ⚠️ Skipping government email: ${contactUri}`); 854 return null; 855 } 856 857 // Education email check 858 if (contactMethod === 'email' && isEducationEmail(contactUri)) { 859 logger.info(` ⚠️ Skipping education email: ${contactUri}`); 860 return null; 861 } 862 863 // Demo email check 864 if (contactMethod === 'email' && isDemoEmail(contactUri)) { 865 logger.info(` ⚠️ Skipping demo email: ${contactUri}`); 866 return null; 867 } 868 869 // Derive contact_basis from channel 870 const CONTACT_BASIS = { 871 sms: 'public_website_phone', 872 email: 'public_website_email', 873 form: 'contact_form', 874 x: 'social_profile', 875 linkedin: 'social_profile', 876 }; 877 const contactBasis = CONTACT_BASIS[contactMethod] || null; 878 879 // Debug logging 880 const nameInfo = contact?.name ? ` (${contact.name})` : ''; 881 const statusInfo = approvalStatus === 'gdpr_blocked' ? ' [GDPR BLOCKED]' : ''; 882 logger.info( 883 `Storing variant ${variant.variant_number}: siteId=${siteId}, textLen=${variant.proposal_text?.length}, method=${contactMethod}${nameInfo}, uri=${contactUri}${statusInfo}` 884 ); 885 886 // Resolve spintax before storing — each message record has a final, human-readable version 887 const spunText = spin(variant.proposal_text); 888 const spunSubject = spin(subjectLine); 889 890 // OUTPUT SANITIZATION 891 const prospectDomain = siteData?.domain || null; 892 const { sanitized: sanitizedText, strippedUrls, strippedPatterns } = sanitizeLlmOutput( 893 spunText, 894 prospectDomain 895 ); 896 const { sanitized: sanitizedSubject } = sanitizeLlmOutput(spunSubject, prospectDomain); 897 898 if (strippedUrls.length > 0 || strippedPatterns.length > 0) { 899 logger.warn( 900 `[proposals] Output sanitization for site ${siteId}: removed ${strippedPatterns.length} dangerous patterns, ${strippedUrls.length} unauthorized URLs` 901 ); 902 } 903 904 // Reject broken template output 905 if (sanitizedText.startsWith('{') || sanitizedText.startsWith(', ')) { 906 logger.warn( 907 `[proposals] Broken template output for site ${siteId} — starts with '${sanitizedText.slice(0, 10)}' — skipping` 908 ); 909 return null; 910 } 911 912 // SMS: append dynamic signature + opt-out based on country and remaining char budget 913 const personaFirstName = process.env.PERSONA_FIRST_NAME; 914 const personaName = process.env.PERSONA_NAME; 915 const brandName = process.env.BRAND_NAME; 916 const brandDomain = process.env.BRAND_DOMAIN; 917 if (!personaFirstName) throw new Error('PERSONA_FIRST_NAME env var required'); 918 if (!personaName) throw new Error('PERSONA_NAME env var required'); 919 if (!brandName) throw new Error('BRAND_NAME env var required'); 920 if (!brandDomain) throw new Error('BRAND_DOMAIN env var required'); 921 922 let finalText = sanitizedText; 923 if (contactMethod === 'sms') { 924 const cc = (siteData?.country_code || '').toUpperCase(); 925 const needsStop = cc === 'US' || cc === 'CA'; 926 const sigStripPattern = new RegExp(`\\s*-\\s*${personaFirstName}\\s*(${personaName.split(' ').slice(1).join(' ')})?\\s*,?\\s*(${brandName.replace(/[&]/g, '\\s*&\\s*')})?\\s*$`, 'i'); 927 finalText = finalText 928 .replace(/\s*Reply STOP to opt out\.?\s*/gi, '') 929 .replace(sigStripPattern, '') 930 .trimEnd(); 931 const stopText = needsStop ? ' Reply STOP to opt out.' : ''; 932 const sigTiers = needsStop 933 ? [` - ${personaFirstName}, ${brandName}`, ` - ${brandName}`] 934 : [` - ${personaFirstName}`, '']; 935 let bestSig = sigTiers[sigTiers.length - 1]; 936 for (const sig of sigTiers) { 937 if (finalText.length + stopText.length + sig.length <= 160) { 938 bestSig = sig; 939 break; 940 } 941 } 942 finalText = `${finalText}${stopText}${bestSig}`; 943 } else if (contactMethod !== 'sms') { 944 const identityPattern = new RegExp(`${personaFirstName}|${brandName.replace(/[&]/g, '\\$&')}|${brandDomain.replace(/\./g, '\\.')}`, 'i'); 945 const hasIdentity = identityPattern.test(finalText); 946 if (!hasIdentity) { 947 finalText = `${finalText.trimEnd()}\n\n${personaName}, ${brandName} (${brandDomain})`; 948 } 949 } 950 951 // Hard character limit enforcement 952 const CHANNEL_MAX_CHARS = { sms: 160, x: 280 }; 953 const maxChars = CHANNEL_MAX_CHARS[contactMethod]; 954 if (maxChars && finalText.length > maxChars) { 955 logger.warn( 956 `[proposals] ${contactMethod} message too long (${finalText.length}/${maxChars} chars) for site ${siteId} — skipping` 957 ); 958 return null; 959 } 960 961 const result = await run( 962 `INSERT INTO messages ( 963 site_id, 964 direction, 965 message_body, 966 subject_line, 967 contact_method, 968 contact_uri, 969 approval_status, 970 contact_basis 971 ) VALUES ($1, 'outbound', $2, $3, $4, $5, $6, $7) 972 ON CONFLICT (site_id, contact_method, contact_uri) WHERE direction = 'outbound' AND approval_status IN ('pending', 'approved') 973 DO UPDATE SET 974 message_body = EXCLUDED.message_body, 975 subject_line = EXCLUDED.subject_line, 976 approval_status = EXCLUDED.approval_status, 977 contact_basis = EXCLUDED.contact_basis 978 RETURNING id`, 979 [siteId, finalText, sanitizedSubject, contactMethod, contactUri, approvalStatus, contactBasis] 980 ); 981 982 return result.lastInsertRowid; 983 } 984 985 /** 986 * Get pending messages awaiting approval 987 */ 988 export async function getPendingOutreaches(limit = 100) { 989 return await getAll( 990 `SELECT 991 m.*, 992 s.domain, 993 s.keyword, 994 s.landing_page_url, 995 s.grade 996 FROM messages m 997 JOIN sites s ON m.site_id = s.id 998 WHERE m.direction = 'outbound' 999 AND m.approval_status = 'pending' 1000 ORDER BY m.created_at DESC 1001 LIMIT $1`, 1002 [limit] 1003 ); 1004 } 1005 1006 /** 1007 * Approve message for sending 1008 */ 1009 export async function approveOutreach(outreachId) { 1010 await run("UPDATE messages SET approval_status = 'approved' WHERE id = $1", [outreachId]); 1011 logger.success(`Approved message #${outreachId}`); 1012 } 1013 1014 /** 1015 * Mark message for rework 1016 */ 1017 export async function reworkOutreach(outreachId, reworkInstructions) { 1018 await run( 1019 "UPDATE messages SET approval_status = 'rework', rework_instructions = $1 WHERE id = $2", 1020 [reworkInstructions, outreachId] 1021 ); 1022 logger.info(`Marked message #${outreachId} for rework`); 1023 } 1024 1025 /** 1026 * Process rework requests (regenerate proposals with feedback) 1027 */ 1028 export async function processReworkQueue() { 1029 const reworkItems = await getAll( 1030 `SELECT DISTINCT site_id, rework_instructions 1031 FROM messages 1032 WHERE direction = 'outbound' 1033 AND approval_status = 'rework' 1034 GROUP BY site_id, rework_instructions` 1035 ); 1036 1037 logger.info(`Processing ${reworkItems.length} rework requests...`); 1038 1039 let processed = 0; 1040 let failed = 0; 1041 1042 for (const item of reworkItems) { 1043 try { 1044 // Delete old pending/rework messages for this site 1045 await run( 1046 `DELETE FROM messages 1047 WHERE site_id = $1 1048 AND direction = 'outbound' 1049 AND approval_status IN ('pending', 'rework')`, 1050 [item.site_id] 1051 ); 1052 1053 // Generate new variants with rework instructions 1054 await generateProposalVariants(item.site_id, item.rework_instructions); 1055 1056 logger.success(`Regenerated proposals for site #${item.site_id}`); 1057 processed++; 1058 } catch (error) { 1059 logger.error(`Failed to rework site #${item.site_id}: ${error.message}`); 1060 failed++; 1061 } 1062 } 1063 1064 logger.success( 1065 `Processed ${processed}/${reworkItems.length} rework requests${failed ? ` (${failed} failed)` : ''}` 1066 ); 1067 } 1068 1069 /** 1070 * Generate proposals for all low-scoring sites without outreaches 1071 */ 1072 export async function generateBulkProposals(limit = null) { 1073 const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82'); 1074 1075 const sql = `SELECT s.id, s.domain, s.keyword, s.score 1076 FROM sites s 1077 LEFT JOIN messages m ON s.id = m.site_id AND m.direction = 'outbound' 1078 WHERE s.status IN ('enriched', 'enriched_llm') 1079 AND s.score < $1 1080 AND m.id IS NULL 1081 ORDER BY s.score ASC 1082 ${limit ? `LIMIT ${limit}` : ''}`; 1083 1084 const sites = await getAll(sql, [cutoff]); 1085 1086 logger.info(`Generating proposals for ${sites.length} sites...`); 1087 1088 const results = []; 1089 1090 for (const site of sites) { 1091 try { 1092 const result = await generateProposalVariants(site.id); 1093 results.push(result); 1094 1095 // Rate limiting 1096 await new Promise(resolve => setTimeout(resolve, 2000)); 1097 } catch (error) { 1098 logger.error(`Failed for site #${site.id}`, error); 1099 results.push({ siteId: site.id, error: error.message }); 1100 } 1101 } 1102 1103 logger.success( 1104 `Generated proposals for ${results.filter(r => !r.error).length}/${results.length} sites` 1105 ); 1106 1107 return results; 1108 } 1109 1110 // CLI functionality 1111 if (import.meta.url === `file://${process.argv[1]}`) { 1112 const command = process.argv[2]; 1113 1114 if (command === 'generate') { 1115 const siteId = parseInt(process.argv[3], 10); 1116 if (!siteId) { 1117 console.error('Usage: node src/proposal-generator-v2.js generate <site_id>'); 1118 process.exit(1); 1119 } 1120 1121 generateProposalVariants(siteId) 1122 .then(result => { 1123 console.log('\n✅ Proposals generated!\n'); 1124 console.log(`Site: ${result.domain} (${result.keyword})`); 1125 console.log(`Outreach IDs: ${result.outreachIds.join(', ')}`); 1126 process.exit(0); 1127 }) 1128 .catch(error => { 1129 console.error(`\n❌ Failed: ${error.message}`); 1130 process.exit(1); 1131 }); 1132 } else if (command === 'bulk') { 1133 const limit = process.argv[3] ? parseInt(process.argv[3], 10) : null; 1134 1135 generateBulkProposals(limit) 1136 .then(() => process.exit(0)) 1137 .catch(error => { 1138 console.error(`\n❌ Failed: ${error.message}`); 1139 process.exit(1); 1140 }); 1141 } else if (command === 'pending') { 1142 const limit = process.argv[3] ? parseInt(process.argv[3], 10) : 100; 1143 getPendingOutreaches(limit) 1144 .then(msgs => { 1145 console.log(JSON.stringify(msgs, null, 2)); 1146 process.exit(0); 1147 }) 1148 .catch(error => { 1149 console.error(`\n❌ Failed: ${error.message}`); 1150 process.exit(1); 1151 }); 1152 } else { 1153 console.log('Usage:'); 1154 console.log(' generate <site_id> - Generate proposals for a site'); 1155 console.log(' bulk [limit] - Generate for all low-scoring sites'); 1156 console.log(' pending [limit] - List pending outreaches'); 1157 process.exit(1); 1158 } 1159 }