proposal-generator-v2.js
1 #!/usr/bin/env node 2 3 /** 4 * Proposal Generator V2 5 * Generates 3 variants of personalized outreach proposals using the new Proposal.md prompt 6 * Stores in messages table with pending approval_status for operator approval 7 */ 8 9 import { run, getOne, getAll } from './utils/db.js'; 10 import { execFileSync } from 'child_process'; 11 import { readFileSync, existsSync, readdirSync } from 'fs'; 12 import { join, dirname } from 'path'; 13 import { fileURLToPath } from 'url'; 14 import Logger from './utils/logger.js'; 15 import { retryWithBackoff, isRetryableError, safeJsonParse } from './utils/error-handler.js'; 16 import { getAllContactsWithNames, cleanInvalidSocialLinks } from './contacts/prioritize.js'; 17 import { spin } from './utils/spintax.js'; 18 import { openRouterLimiter } from './utils/rate-limiter.js'; 19 import { openRouterBreaker } from './utils/circuit-breaker.js'; 20 import { callLLM, getProviderDisplayName } from './utils/llm-provider.js'; 21 import { getCountryByCode, COUNTRIES } from './config/countries.js'; 22 import { computeGrade } from './score.js'; 23 import { isDemoEmail, isGovernmentEmail, isEducationEmail } from './utils/site-filters.js'; 24 import { addCountryCode } from './utils/phone-normalizer.js'; 25 import { generatePromptRecommendations } from './utils/prompt-learning.js'; 26 import { getCurrentSeason } from './utils/template-proposals.js'; 27 import { wrapUntrusted, stripInjectionMarkers, sanitizeLlmOutput } from './utils/llm-sanitizer.js'; 28 import { validateProposalResponse } from './utils/llm-response-validator.js'; 29 import { getScoreDataWithFallback } from './utils/score-storage.js'; 30 import { getContactsDataWithFallback } from './utils/contacts-storage.js'; 31 import { parseEnvSet } from './utils/load-env.js'; 32 33 const __filename = fileURLToPath(import.meta.url); 34 const __dirname = dirname(__filename); 35 const projectRoot = join(__dirname, '..'); 36 37 const logger = new Logger('ProposalGeneratorV2'); 38 39 // Countries blocked from SMS outreach (DR-121 — only AU/NZ have legal basis for cold SMS). 40 const BLOCKED_SMS_COUNTRIES = parseEnvSet(process.env.OUTREACH_BLOCKED_SMS_COUNTRIES); 41 42 // Load proposal prompt and best practices 43 const PROPOSAL_PROMPT = readFileSync(join(projectRoot, 'prompts/PROPOSAL.md'), 'utf-8'); 44 const HAIKU_POLISH_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-POLISH.md'), 'utf-8'); 45 const EMAIL_BEST_PRACTICES = readFileSync( 46 join(projectRoot, 'docs/05-outreach/email-best-practices.md'), 47 'utf-8' 48 ); 49 50 // SMS best practices (from CLAUDE.md compliance section) 51 const SMS_BEST_PRACTICES = `# SMS Best Practices (TCPA Compliance) 52 53 ## Core Requirements 54 55 **Prior Consent**: TCPA requires explicit prior written consent before sending marketing SMS. For cold outreach: 56 - Initial SMS must be informational, not promotional 57 - Must provide opt-out instructions 58 - Cannot include hard sales pitches without consent 59 60 **Opt-Out Instructions**: TCPA (US/CA only) requires opt-out instructions in every SMS: 61 - Required for: US, CA only 62 - Standard: "Reply STOP to opt out" 63 - NOT required for: AU, GB, NZ, IE, ZA, IN (different regulatory regimes) 64 - Must honor opt-outs immediately for all markets 65 - Store opt-outs permanently 66 67 **Business Hours**: SMS must be sent during reasonable hours: 68 - 8am - 9pm in recipient's local timezone 69 - Avoid weekends for initial outreach 70 - Consider industry-specific work hours 71 72 **Sender Identification**: Clearly identify who is sending: 73 - Include business name in first message 74 - Provide contact information 75 - Be transparent about purpose 76 77 **Message Length**: Keep SMS concise: 78 - Aim for <160 characters to avoid segmentation 79 - Ultra-short, urgent, personal tone 80 - Single clear call-to-action 81 82 **Compliance Checklist**: 83 - [ ] Explicit consent obtained (or informational-only content) 84 - [ ] Opt-out instructions included 85 - [ ] Sent during business hours (8am-9pm local time) 86 - [ ] Sender clearly identified 87 - [ ] Message under 160 characters 88 - [ ] No misleading or deceptive content 89 - [ ] Opt-outs honored immediately 90 `; 91 92 // Model configuration (from env or default) 93 const PROPOSAL_MODEL = 94 process.env.PROPOSAL_MODEL || process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5'; 95 96 // Language code → full name for polish prompt 97 const POLISH_LANG_NAMES = { 98 en: 'English', 99 de: 'German', 100 fr: 'French', 101 it: 'Italian', 102 ja: 'Japanese', 103 ko: 'Korean', 104 es: 'Spanish', 105 nl: 'Dutch', 106 pl: 'Polish', 107 sv: 'Swedish', 108 da: 'Danish', 109 id: 'Indonesian', 110 hi: 'Hindi', 111 }; 112 113 /** 114 * Polish a generated proposal via Claude CLI (uses Max subscription, zero cost). 115 * Falls back to OpenRouter if CLI is unavailable. 116 */ 117 async function polishProposal( 118 text, 119 channel, 120 languageCode = 'en', 121 subjectLine = null, 122 countryCode = null 123 ) { 124 const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection 125 const isEmailChannel = channel === 'email' || channel === 'form'; 126 127 const subjectSection = isEmailChannel && subjectLine ? `\nSUBJECT LINE:\n${subjectLine}` : ''; 128 const season = getCurrentSeason(countryCode); 129 const seasonSection = season ? `\nSEASON: ${season}` : ''; 130 131 const userMessage = `CHANNEL: ${channel} 132 LANGUAGE: ${langName}${seasonSection} 133 --- 134 PROPOSAL BODY: 135 ${wrapUntrusted(text, 'proposal_draft')}${subjectSection}`; 136 137 // Try Claude CLI first (Max subscription, zero cost) 138 try { 139 const cliInput = `${HAIKU_POLISH_PROMPT}\n\n---\n\n${userMessage}`; 140 const output = execFileSync('claude', ['-p', '--model', 'haiku', '--output-format', 'json'], { 141 input: cliInput, 142 encoding: 'utf-8', 143 timeout: 30000, 144 maxBuffer: 1024 * 1024, 145 }); 146 147 // Claude CLI with --output-format json wraps response in a JSON object 148 const cliResult = safeJsonParse(output); 149 // The CLI returns {result: "..."} — the result contains the LLM output 150 const llmOutput = cliResult?.result || output; 151 const parsed = safeJsonParse(llmOutput); 152 153 if (parsed && typeof parsed.body === 'string' && parsed.body.trim() !== '') { 154 return { 155 text: parsed.body.trim(), 156 subjectLine: isEmailChannel ? parsed.subject?.trim() || subjectLine : null, 157 }; 158 } 159 logger.warn('Polish (claude CLI) returned invalid response — falling back to OpenRouter'); 160 } catch (cliErr) { 161 logger.warn(`Polish (claude CLI) failed: ${cliErr.message} — falling back to OpenRouter`); 162 } 163 164 // Fallback: OpenRouter 165 try { 166 const { content } = await openRouterBreaker.fire(async () => 167 openRouterLimiter.schedule(() => 168 callLLM({ 169 model: 'anthropic/claude-haiku-4-5', 170 temperature: 0, 171 max_tokens: 3000, 172 json_mode: true, 173 messages: [ 174 { role: 'system', content: HAIKU_POLISH_PROMPT }, 175 { role: 'user', content: userMessage }, 176 ], 177 stage: 'proposals', 178 }) 179 ) 180 ); 181 182 const result = safeJsonParse(content); 183 if (!result || typeof result.body !== 'string' || result.body.trim() === '') { 184 logger.warn('Polish (OpenRouter fallback) returned invalid response — using original'); 185 return { text, subjectLine }; 186 } 187 188 return { 189 text: result.body.trim(), 190 subjectLine: isEmailChannel ? result.subject?.trim() || subjectLine : null, 191 }; 192 } catch (err) { 193 logger.warn(`Polish (OpenRouter fallback) failed: ${err.message} — using original`); 194 return { text, subjectLine }; 195 } 196 } 197 198 /** 199 * Generate proposal variants for a site and store in messages table 200 * @param {number} siteId - Site ID from database 201 * @param {string} reworkInstructions - Optional feedback for rework 202 * @returns {Promise<Object>} Generated proposals 203 */ 204 export async function generateProposalVariants(siteId, reworkInstructions = null) { 205 const _t0 = Date.now(); 206 207 // Get site data (including country metadata) 208 const siteData = await getOne( 209 `SELECT 210 s.*, 211 s.score, 212 s.grade, 213 s.country_code, 214 COALESCE(s.google_domain, 'google.com.au') as google_domain, 215 COALESCE(s.language_code, 'en') as language_code, 216 COALESCE(s.currency_code, 'AUD') as currency_code 217 FROM sites s 218 WHERE s.id = $1`, 219 [siteId] 220 ); 221 222 if (!siteData) { 223 throw new Error(`Site not found: ${siteId}`); 224 } 225 226 // Language override: if site language_code is 'en' but country's primary language isn't English 227 if (siteData.language_code === 'en' && siteData.country_code) { 228 const countryInfo = getCountryByCode(siteData.country_code); 229 if (countryInfo && countryInfo.language !== 'en') { 230 logger.info( 231 `[proposals] Language override: ${siteData.country_code} site has language_code='en' but country language is '${countryInfo.language}' — using '${countryInfo.language}'` 232 ); 233 siteData.language_code = countryInfo.language; 234 } 235 } 236 237 // Guard: country blocked from ALL outreach channels 238 const blockedCountries = new Set( 239 (process.env.OUTREACH_BLOCKED_COUNTRIES || '') 240 .split(',') 241 .map(s => s.trim().toUpperCase()) 242 .filter(Boolean) 243 ); 244 if (blockedCountries.has(siteData.country_code)) { 245 const msg = `country ${siteData.country_code} blocked via OUTREACH_BLOCKED_COUNTRIES — skipping proposal generation`; 246 logger.info(` ⏭️ ${siteData.domain}: ${msg}`); 247 throw new Error(`Skipping ${siteData.domain}: ${msg}`); 248 } 249 250 // Guard: country_code must be known 251 if (!siteData.country_code) { 252 const msg = 253 'country_code is unknown — re-enrich site to detect country before generating proposals'; 254 await run('UPDATE sites SET error_message = $1 WHERE id = $2', [msg, siteId]); 255 throw new Error(`Skipping ${siteData.domain}: ${msg}`); 256 } 257 258 // Guard: a template must exist for this country 259 const templatesDir = join(__dirname, '../data/templates'); 260 const templatePath = join(templatesDir, siteData.country_code, 'email.json'); 261 if (!existsSync(templatePath)) { 262 const msg = `no email template for country ${siteData.country_code} — add data/templates/${siteData.country_code}/email.json to enable outreach`; 263 await run('UPDATE sites SET error_message = $1 WHERE id = $2', [msg, siteId]); 264 throw new Error(`Skipping ${siteData.domain}: ${msg}`); 265 } 266 267 // Check if site meets low-score threshold (unless this is a rework) 268 if (!reworkInstructions) { 269 const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82'); 270 if (siteData.score >= cutoff) { 271 throw new Error( 272 `Site ${siteData.domain} has score ${siteData.score} which is above the cutoff ${cutoff}. Proposals are only generated for low-scoring sites.` 273 ); 274 } 275 } 276 277 logger.info(`Generating proposal variants for ${siteData.domain} (${siteData.keyword})`); 278 279 // Parse JSON data (filesystem first, DB fallback) 280 const scoreData = getScoreDataWithFallback(siteData.id, siteData); 281 let contactData = getContactsDataWithFallback(siteData.id, siteData); 282 283 // Clean up invalid Twitter/X.com links (those without usernames) 284 if (contactData) { 285 contactData = cleanInvalidSocialLinks(contactData); 286 } 287 288 // Get all contacts with Haiku-resolved first names 289 const _tNames = Date.now(); 290 const allContacts = contactData 291 ? await getAllContactsWithNames(contactData, siteData.country_code) 292 : []; 293 logger.debug( 294 `[proposals] name-extract ${Date.now() - _tNames}ms (${allContacts.length} contacts) for ${siteData.domain}` 295 ); 296 297 if (allContacts.length === 0) { 298 logger.warn(`No contacts found for ${siteData.domain} - skipping proposal generation`); 299 return { 300 siteId, 301 domain: siteData.domain, 302 keyword: siteData.keyword, 303 outreachIds: [], 304 variants: [], 305 reasoning: 'No contacts found', 306 contactCount: 0, 307 }; 308 } 309 310 // Filter out contacts that already have outbound messages 311 const existingMessages = await getAll( 312 `SELECT contact_method, contact_uri 313 FROM messages 314 WHERE site_id = $1 AND direction = 'outbound'`, 315 [siteId] 316 ); 317 318 const existingSet = new Set( 319 existingMessages.map(o => `${normalizeContactMethod(o.contact_method)}:${o.contact_uri}`) 320 ); 321 322 const countryUpper = (siteData.country_code || '').toUpperCase(); 323 const availableContacts = allContacts.filter(contact => { 324 const key = `${normalizeContactMethod(contact.channel)}:${contact.uri}`; 325 if (existingSet.has(key)) return false; 326 // Skip SMS contacts for countries without legal basis (DR-121) 327 if (contact.channel === 'sms' && BLOCKED_SMS_COUNTRIES.has(countryUpper)) { 328 logger.info(`Skipping SMS contact for ${siteData.domain} — SMS blocked for ${siteData.country_code} (DR-121)`); 329 return false; 330 } 331 return true; 332 }); 333 334 if (availableContacts.length === 0) { 335 logger.warn( 336 `All ${allContacts.length} contacts for ${siteData.domain} already have messages - skipping` 337 ); 338 return { 339 siteId, 340 domain: siteData.domain, 341 keyword: siteData.keyword, 342 outreachIds: [], 343 variants: [], 344 reasoning: 'All contacts already have messages', 345 contactCount: allContacts.length, 346 }; 347 } 348 349 if (availableContacts.length < allContacts.length) { 350 logger.info( 351 `Filtered ${allContacts.length - availableContacts.length} contacts with existing messages` 352 ); 353 } 354 355 logger.info(`Found ${availableContacts.length} contacts - generating personalized proposals`); 356 357 // Generate one proposal per contact 358 const _tGen = Date.now(); 359 const proposalData = await generateProposalsWithAI( 360 siteData, 361 scoreData, 362 availableContacts, 363 reworkInstructions, 364 siteId 365 ); 366 367 logger.debug(`[proposals] generation ${Date.now() - _tGen}ms for ${siteData.domain}`); 368 369 // Validate that we got the right number of variants 370 if (proposalData.variants.length !== availableContacts.length) { 371 throw new Error( 372 `Expected ${availableContacts.length} variants but got ${proposalData.variants.length}` 373 ); 374 } 375 376 // Polish all variants concurrently then store 377 const tPolish = Date.now(); 378 const polishedVariants = await Promise.all( 379 availableContacts.map(async (contact, i) => { 380 const variant = proposalData.variants[i]; // eslint-disable-line security/detect-object-injection 381 const contactMethod = normalizeContactMethod(contact.channel); 382 const isEmailChannel = contactMethod === 'email' || contactMethod === 'form'; 383 const polished = await polishProposal( 384 variant.proposal_text, 385 contactMethod, 386 siteData.language_code || 'en', 387 isEmailChannel ? proposalData.subject_line : null, 388 siteData.country_code || null 389 ); 390 return { contact, variant, contactMethod, polished }; 391 }) 392 ); 393 logger.debug( 394 `[proposals] polish ${Date.now() - tPolish}ms for ${availableContacts.length} contacts` 395 ); 396 397 const outreachIds = []; 398 for (const { contact, variant, polished } of polishedVariants) { 399 // Store outreach entry (with GDPR filtering) using polished content 400 const outreachId = await storeProposalVariant( 401 siteId, 402 { ...variant, proposal_text: polished.text }, 403 polished.subjectLine ?? proposalData.subject_line, 404 contact, 405 siteData 406 ); 407 outreachIds.push(outreachId); 408 409 const nameInfo = contact.name ? ` (${contact.name})` : ''; 410 const contactMethod = normalizeContactMethod(contact.channel); 411 logger.success( 412 `Stored outreach #${outreachId}: V${variant.variant_number} → ${contactMethod}${nameInfo} (${contact.uri})` 413 ); 414 } 415 416 // Update site status 417 await run( 418 `UPDATE sites 419 SET status = 'proposals_drafted', 420 updated_at = CURRENT_TIMESTAMP 421 WHERE id = $1`, 422 [siteId] 423 ); 424 425 logger.info( 426 `[proposals] total ${Date.now() - _t0}ms for ${siteData.domain} (${allContacts.length} contacts)` 427 ); 428 logger.success( 429 `Generated ${proposalData.variants.length} proposal variants and created ${outreachIds.length} outreach entries for ${siteData.domain}` 430 ); 431 432 return { 433 siteId, 434 domain: siteData.domain, 435 keyword: siteData.keyword, 436 outreachIds, 437 variants: proposalData.variants, 438 reasoning: proposalData.reasoning, 439 contactCount: allContacts.length, 440 }; 441 } 442 443 /** 444 * Generate proposal variants using LLM API 445 * @param {Object} siteData - Site data 446 * @param {Object} scoreData - Scoring data 447 * @param {Array} contacts - Array of contacts from getAllContacts() 448 * @param {string} reworkInstructions - Optional rework instructions 449 * @param {number} siteId - Site ID for usage tracking 450 * @returns {Promise<Object>} Proposal data with variants 451 */ 452 453 async function generateProposalsWithAI( 454 siteData, 455 scoreData, 456 contacts, 457 reworkInstructions, 458 siteId 459 ) { 460 // Extract business type from keyword 461 const businessType = extractBusinessType(siteData.keyword); 462 463 // Build context for the AI 464 const context = await buildProposalContext(siteData, scoreData, contacts, businessType); 465 466 // Add rework instructions if provided 467 const reworkSection = reworkInstructions 468 ? `\n\nREWORK INSTRUCTIONS FROM OPERATOR:\n${reworkInstructions}\n\nPlease incorporate this feedback into the new variants.` 469 : ''; 470 471 const userPrompt = `${context}${reworkSection}`; 472 473 return retryWithBackoff( 474 async () => { 475 // Calculate max_tokens based on number of contacts 476 const maxTokens = Math.max(8192, contacts.length * 1200); 477 478 const response = await openRouterBreaker.fire(async () => 479 openRouterLimiter.schedule(() => 480 callLLM({ 481 model: PROPOSAL_MODEL, 482 messages: [ 483 { role: 'system', content: PROPOSAL_PROMPT }, 484 { role: 'user', content: userPrompt }, 485 ], 486 temperature: 0.7, 487 max_tokens: maxTokens, 488 json_mode: true, 489 stage: 'proposals', 490 siteId, 491 }) 492 ) 493 ); 494 495 const { content, usage } = response; 496 497 logger.info( 498 `Generated proposals using ${getProviderDisplayName()} - ${usage.promptTokens + usage.completionTokens} tokens` 499 ); 500 501 if (!content) { 502 throw new Error('No content in API response'); 503 } 504 505 const result = safeJsonParse(content); 506 507 if (!result || !result.variants || !Array.isArray(result.variants)) { 508 throw new Error('Invalid proposal response format'); 509 } 510 511 if (result.variants.length !== contacts.length) { 512 throw new Error(`Expected ${contacts.length} variants but got ${result.variants.length}`); 513 } 514 515 // Validate response structure and check for suspicious URLs 516 validateProposalResponse(result, contacts.length); 517 518 // Programmatic content validation — retry if LLM ignores constraints 519 const freePattern = 520 /\bfree\s+(audit|report|assessment|consultation|review|strategy\s+call|snapshot|analysis|evaluation|estimate|quote)\b|\bno\s+cost\b|\bat\s+no\s+charge\b|\bcomplimentary\b/i; 521 const phonePattern = /\+\d[\d\s\-().]{6,}/; 522 for (const variant of result.variants) { 523 const text = variant.proposal_text || ''; 524 if (freePattern.test(text)) { 525 throw new Error( 526 `LLM promised a free service in variant ${variant.variant_number} — retry required` 527 ); 528 } 529 if (phonePattern.test(text)) { 530 throw new Error( 531 `LLM hallucinated a phone number in variant ${variant.variant_number} — retry required` 532 ); 533 } 534 } 535 536 return result; 537 }, 538 { 539 maxRetries: 3, 540 shouldRetry: error => error.message.includes('retry required') || isRetryableError(error), 541 onRetry: (attempt, error) => { 542 logger.warn(`Retry ${attempt + 1}/3 for ${siteData.domain}: ${error.message}`); 543 }, 544 } 545 ); 546 } 547 548 /** 549 * Get top competitor for a keyword 550 * @param {Object} siteData - Site data 551 * @returns {Promise<Object|null>} Competitor info {domain, score, grade} or null 552 */ 553 async function getTopCompetitor(siteData) { 554 // Extract industry from score_json (filesystem first, DB fallback) 555 const scoreJson = getScoreDataWithFallback(siteData.id, siteData); 556 const industry = scoreJson?.industry_classification; 557 if (!industry) return null; 558 559 // Find all country codes sharing the same language 560 const prospectLang = COUNTRIES[siteData.country_code]?.language; 561 if (!prospectLang) return null; 562 563 const sameLanguageCodes = Object.entries(COUNTRIES) 564 .filter(([, c]) => c.language === prospectLang) 565 .map(([code]) => code); 566 567 const placeholders = sameLanguageCodes.map((_, i) => `$${i + 3}`).join(','); 568 const competitor = await getOne( 569 `SELECT domain, score, grade 570 FROM sites 571 WHERE industry_classification = $1 572 AND domain != $2 573 AND country_code IN (${placeholders}) 574 AND status IN ('prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'proposals_drafted', 'outreach_partial', 'outreach_sent') 575 AND score IS NOT NULL 576 ORDER BY score DESC 577 LIMIT 1`, 578 [industry, siteData.domain, ...sameLanguageCodes] 579 ); 580 581 return competitor || null; 582 } 583 584 /** 585 * Build context for proposal generation 586 * @param {Object} siteData - Site data 587 * @param {Object} scoreData - Scoring data 588 * @param {Array} contacts - Array of contacts from getAllContacts() 589 * @param {string} businessType - Business type extracted from keyword 590 * @returns {Promise<string>} Context string for AI 591 */ 592 async function buildProposalContext(siteData, scoreData, contacts, businessType) { 593 const weaknesses = extractKeyWeaknesses(scoreData); 594 595 // Get country configuration for localization 596 const country = getCountryByCode(siteData.country_code); 597 598 // Extract location from contacts_json (more reliable than keyword) 599 const contactData = getContactsDataWithFallback(siteData.id, siteData); 600 const city = contactData?.city || siteData.city || null; 601 const state = contactData?.state || siteData.state || null; 602 603 // Format contacts list for AI 604 const contactsList = contacts 605 .map((contact, i) => { 606 const safeName = contact.name ? stripInjectionMarkers(contact.name) : null; 607 const nameInfo = safeName ? ` (Name: ${safeName})` : ' (Name: Unknown)'; 608 return `${i + 1}. ${contact.channel}${nameInfo} - ${contact.uri}`; 609 }) 610 .join('\n'); 611 612 // Get top competitor and determine if score difference meets threshold 613 const competitor = await getTopCompetitor(siteData); 614 const scoreThreshold = parseFloat(process.env.COMPETITOR_SCORE_THRESHOLD || '10'); 615 616 let competitorSection = ''; 617 if (competitor) { 618 const scoreDifference = competitor.score - siteData.score; 619 if (scoreDifference >= scoreThreshold) { 620 competitorSection = `\nTOP COMPETITOR (same industry, same language): 621 - Domain: ${competitor.domain} 622 - Score: ${computeGrade(competitor.score)} (${competitor.score}/100) 623 - Difference: ${scoreDifference.toFixed(1)} points higher than target site 624 625 FRAMING RULES: 626 - Score = percentage of conversion best practices implemented (NOT a conversion rate) 627 - Use "our analysis suggests" — never state conversion claims as fact 628 - ~20-point gap ≈ roughly 2x conversion difference (heuristic, frame as estimate) 629 - Example: "Our analysis suggests ${competitor.domain} implements ${competitor.score}% of conversion best practices vs your ${siteData.score}% — that gap likely means they're converting roughly twice as many visitors."`; 630 } else { 631 competitorSection = `\nTOP COMPETITOR (same industry, same language): 632 - Domain: ${competitor.domain} 633 - Score: Similar to target site (do NOT mention specific scores - they're too close) 634 635 IMPORTANT: Do not mention competitor scores when they are clustered too tightly to be meaningful.`; 636 } 637 } 638 639 // Build localization requirements 640 const localizationSection = ` 641 LOCALIZATION REQUIREMENTS: 642 - Country: ${country.name} (${country.code}) 643 - Language: ${country.language === 'en' ? 'English' : country.language} 644 - Currency: ${country.currencySymbol} ${country.currency} 645 - Spelling: Use ${country.name} spelling conventions${['AU', 'GB', 'UK', 'NZ', 'IE', 'ZA', 'IN'].includes(country.code) ? ' (e.g., "optimise" not "optimize", "colour" not "color", "specialise" not "specialize")' : country.code === 'US' || country.code === 'CA' ? ' (e.g., "optimize" not "optimise", "color" not "colour", "specialize" not "specialise")' : ''} 646 - Cultural context: Reference ${country.name} business norms and expectations 647 - Date format: ${country.dateFormat} 648 - Phone format: ${country.phoneFormat} 649 ${country.timezone ? `- Timezone: ${country.timezone} (consider local business hours for call/response timing)` : ''}`; 650 651 // Build best practices section 652 const bestPracticesSection = ` 653 CHANNEL-SPECIFIC BEST PRACTICES: 654 655 === EMAIL BEST PRACTICES === 656 ${EMAIL_BEST_PRACTICES} 657 658 === SMS BEST PRACTICES === 659 ${SMS_BEST_PRACTICES} 660 661 IMPORTANT: Follow these best practices when crafting proposals for each channel. 662 - Email proposals should follow CAN-SPAM requirements and copywriting best practices 663 - SMS proposals MUST be <160 chars and follow local regulations. 664 - TCPA opt-out ("Reply STOP to opt out.") is REQUIRED for US and CA — include it in every US/CA SMS. 665 - CRITICAL: do NOT add any opt-out, STOP, or unsubscribe text for AU, GB, NZ, IE, ZA, IN or any non-US/CA market. These markets do not require TCPA opt-out and the extra text wastes precious SMS characters. 666 - All proposals must provide genuine value and avoid spam triggers 667 `; 668 669 // Build location string 670 let locationStr = 'Unknown'; 671 if (city && state) { 672 locationStr = `${city}, ${state}`; 673 } else if (city) { 674 locationStr = city; 675 } else if (state) { 676 locationStr = state; 677 } 678 679 // Aggregate stats for credibility (dynamic from DB) 680 const aggRow = await getOne( 681 `SELECT COUNT(*) AS total, ROUND(AVG(score)::numeric, 1) AS avg_score, ROUND(MAX(score)::numeric, 1) AS best_score 682 FROM sites WHERE score IS NOT NULL` 683 ); 684 const aggregateSection = ` 685 AGGREGATE STATS (real data — use for credibility): 686 - total_sites_scored: ${aggRow.total} 687 - avg_score: ${aggRow.avg_score} 688 - best_score: ${aggRow.best_score}`; 689 690 return `Generate personalized sales proposals for this website: 691 692 TARGET BUSINESS: 693 - Domain: ${siteData.domain} 694 - Business Name: ${contactData?.business_name || siteData.domain} 695 - URL: ${siteData.landing_page_url} 696 - Business Type: ${businessType} 697 - Location: ${locationStr} 698 699 CONVERSION ANALYSIS: 700 - Score: ${siteData.score ? computeGrade(siteData.score) : 'N/A'} (${siteData.score || 'N/A'}/100) 701 ${competitorSection} 702 ${aggregateSection} 703 704 KEY WEAKNESSES IDENTIFIED: 705 ${weaknesses} 706 707 CONTACTS TO REACH (generate ONE variant for EACH): 708 ${contactsList} 709 ${localizationSection} 710 711 ${bestPracticesSection} 712 713 Generate ${contacts.length} unique proposal variants - one for each contact listed above. 714 Vary your approach across the variants (don't send identical messages to everyone). 715 IMPORTANT: Use the exact "Business Name" above when referring to the company — do NOT guess or derive a name from the domain. 716 IMPORTANT: Apply all localization requirements above to ensure proposals are culturally appropriate and use correct local conventions. 717 CRITICAL: Follow the channel-specific best practices above - especially SMS length limits and compliance requirements.`; 718 } 719 720 /** 721 * Extract business type from keyword 722 */ 723 function extractBusinessType(keyword) { 724 const commonLocations = [ 725 'sydney', 726 'melbourne', 727 'brisbane', 728 'perth', 729 'adelaide', 730 'canberra', 731 'hobart', 732 'darwin', 733 ]; 734 735 let businessType = keyword; 736 for (const location of commonLocations) { 737 // eslint-disable-next-line security/detect-non-literal-regexp -- Safe: location is from hardcoded array 738 businessType = businessType.replace(new RegExp(location, 'gi'), '').trim(); 739 } 740 741 return businessType || keyword; 742 } 743 744 /** 745 * Extract location from keyword 746 */ 747 function extractLocation(keyword) { 748 const words = keyword.split(' '); 749 return words.slice(-2).join(' '); 750 } 751 752 /** 753 * Extract key weaknesses from score data 754 */ 755 function extractKeyWeaknesses(scoreData) { 756 if (!scoreData || !scoreData.factor_scores) { 757 return 'General conversion optimization opportunities'; 758 } 759 760 const weaknesses = []; 761 762 Object.entries(scoreData.factor_scores).forEach(([factorName, factor]) => { 763 if (factor && factor.score < 5 && factor.reasoning) { 764 const label = factorName.replace(/_/g, ' '); 765 weaknesses.push(`- ${label}: ${stripInjectionMarkers(factor.reasoning)}`); 766 } 767 }); 768 769 return weaknesses.length > 0 770 ? weaknesses.slice(0, 5).join('\n') 771 : 'General conversion optimization opportunities'; 772 } 773 774 /** 775 * Normalize contact method to match database schema 776 */ 777 function normalizeContactMethod(method) { 778 if (!method) return 'email'; // Default fallback 779 780 const normalized = method.toLowerCase().trim(); 781 782 const mapping = { 783 sms: 'sms', 784 email: 'email', 785 form: 'form', 786 'contact form': 'form', 787 contactform: 'form', 788 x: 'x', 789 twitter: 'x', 790 linkedin: 'linkedin', 791 facebook: 'email', 792 instagram: 'email', 793 }; 794 795 // eslint-disable-next-line security/detect-object-injection -- Safe: normalized is from our channel type strings 796 return mapping[normalized] || 'email'; 797 } 798 799 /** 800 * Store proposal variant in messages table 801 * @param {number} siteId - Site ID 802 * @param {Object} variant - Variant object with proposal_text, variant_number 803 * @param {string} subjectLine - Email subject line 804 * @param {Object|null} contact - Contact object {type, uri, name, channel} or null 805 * @param {Object} siteData - Site data including country_code and gdpr_verified 806 * @returns {Promise<number|null>} Message ID 807 */ 808 async function storeProposalVariant(siteId, variant, subjectLine, contact = null, siteData = null) { 809 // Validate variant data 810 if (!variant || typeof variant !== 'object') { 811 throw new Error('Invalid variant: must be an object'); 812 } 813 if (!variant.proposal_text || typeof variant.proposal_text !== 'string') { 814 throw new Error(`Invalid variant.proposal_text for variant ${variant.variant_number}`); 815 } 816 817 // Use contact info if available, otherwise use variant recommendation 818 const rawContactMethod = contact ? contact.channel : variant.recommended_channel || 'email'; 819 const contactMethod = normalizeContactMethod(rawContactMethod); 820 let contactUri = contact ? contact.uri : 'PENDING_CONTACT_EXTRACTION'; 821 822 // Normalize SMS phone numbers with country code 823 if ( 824 contactMethod === 'sms' && 825 contactUri !== 'PENDING_CONTACT_EXTRACTION' && 826 siteData?.country_code 827 ) { 828 contactUri = addCountryCode(contactUri, siteData.country_code); 829 } 830 831 // GDPR compliance check: Block unverified emails in GDPR countries 832 let approvalStatus = 'pending'; 833 834 if ( 835 siteData && 836 contactMethod === 'email' && 837 siteData.gdpr_verified === 0 && 838 siteData.country_code 839 ) { 840 const country = getCountryByCode(siteData.country_code); 841 if (country && country.requiresGDPRCheck) { 842 approvalStatus = 'gdpr_blocked'; 843 logger.info( 844 ` ⚠️ GDPR blocked: ${contactUri} in ${siteData.country_code} (unverified company email)` 845 ); 846 } 847 } 848 849 // Government email check 850 if (contactMethod === 'email' && isGovernmentEmail(contactUri)) { 851 logger.info(` ⚠️ Skipping government email: ${contactUri}`); 852 return null; 853 } 854 855 // Education email check 856 if (contactMethod === 'email' && isEducationEmail(contactUri)) { 857 logger.info(` ⚠️ Skipping education email: ${contactUri}`); 858 return null; 859 } 860 861 // Demo email check 862 if (contactMethod === 'email' && isDemoEmail(contactUri)) { 863 logger.info(` ⚠️ Skipping demo email: ${contactUri}`); 864 return null; 865 } 866 867 // Derive contact_basis from channel 868 const CONTACT_BASIS = { 869 sms: 'public_website_phone', 870 email: 'public_website_email', 871 form: 'contact_form', 872 x: 'social_profile', 873 linkedin: 'social_profile', 874 }; 875 const contactBasis = CONTACT_BASIS[contactMethod] || null; 876 877 // Debug logging 878 const nameInfo = contact?.name ? ` (${contact.name})` : ''; 879 const statusInfo = approvalStatus === 'gdpr_blocked' ? ' [GDPR BLOCKED]' : ''; 880 logger.info( 881 `Storing variant ${variant.variant_number}: siteId=${siteId}, textLen=${variant.proposal_text?.length}, method=${contactMethod}${nameInfo}, uri=${contactUri}${statusInfo}` 882 ); 883 884 // Resolve spintax before storing — each message record has a final, human-readable version 885 const spunText = spin(variant.proposal_text); 886 const spunSubject = spin(subjectLine); 887 888 // OUTPUT SANITIZATION 889 const prospectDomain = siteData?.domain || null; 890 const { sanitized: sanitizedText, strippedUrls, strippedPatterns } = sanitizeLlmOutput( 891 spunText, 892 prospectDomain 893 ); 894 const { sanitized: sanitizedSubject } = sanitizeLlmOutput(spunSubject, prospectDomain); 895 896 if (strippedUrls.length > 0 || strippedPatterns.length > 0) { 897 logger.warn( 898 `[proposals] Output sanitization for site ${siteId}: removed ${strippedPatterns.length} dangerous patterns, ${strippedUrls.length} unauthorized URLs` 899 ); 900 } 901 902 // Reject broken template output 903 if (sanitizedText.startsWith('{') || sanitizedText.startsWith(', ')) { 904 logger.warn( 905 `[proposals] Broken template output for site ${siteId} — starts with '${sanitizedText.slice(0, 10)}' — skipping` 906 ); 907 return null; 908 } 909 910 // SMS: append dynamic signature + opt-out based on country and remaining char budget 911 let finalText = sanitizedText; 912 if (contactMethod === 'sms') { 913 const cc = (siteData?.country_code || '').toUpperCase(); 914 const needsStop = cc === 'US' || cc === 'CA'; 915 finalText = finalText 916 .replace(/\s*Reply STOP to opt out\.?\s*/gi, '') 917 .replace(/\s*-\s*Marcus\s*(Webb)?\s*,?\s*(Audit\s*&\s*Fix)?\s*$/i, '') 918 .trimEnd(); 919 const stopText = needsStop ? ' Reply STOP to opt out.' : ''; 920 const sigTiers = needsStop 921 ? [' - Marcus, Audit&Fix', ' - Audit&Fix'] 922 : [' - Marcus', '']; 923 let bestSig = sigTiers[sigTiers.length - 1]; 924 for (const sig of sigTiers) { 925 if (finalText.length + stopText.length + sig.length <= 160) { 926 bestSig = sig; 927 break; 928 } 929 } 930 finalText = `${finalText}${stopText}${bestSig}`; 931 } else if (contactMethod !== 'sms') { 932 const hasIdentity = /Marcus|Audit&Fix|AuditFix|auditandfix/i.test(finalText); 933 if (!hasIdentity) { 934 finalText = `${finalText.trimEnd()}\n\nMarcus Webb, Audit&Fix (auditandfix.com)`; 935 } 936 } 937 938 // Hard character limit enforcement 939 const CHANNEL_MAX_CHARS = { sms: 160, x: 280 }; 940 const maxChars = CHANNEL_MAX_CHARS[contactMethod]; 941 if (maxChars && finalText.length > maxChars) { 942 logger.warn( 943 `[proposals] ${contactMethod} message too long (${finalText.length}/${maxChars} chars) for site ${siteId} — skipping` 944 ); 945 return null; 946 } 947 948 const result = await run( 949 `INSERT INTO messages ( 950 site_id, 951 direction, 952 message_body, 953 subject_line, 954 contact_method, 955 contact_uri, 956 approval_status, 957 contact_basis 958 ) VALUES ($1, 'outbound', $2, $3, $4, $5, $6, $7) 959 ON CONFLICT (site_id, contact_method, contact_uri) WHERE direction = 'outbound' AND approval_status IN ('pending', 'approved') 960 DO UPDATE SET 961 message_body = EXCLUDED.message_body, 962 subject_line = EXCLUDED.subject_line, 963 approval_status = EXCLUDED.approval_status, 964 contact_basis = EXCLUDED.contact_basis 965 RETURNING id`, 966 [siteId, finalText, sanitizedSubject, contactMethod, contactUri, approvalStatus, contactBasis] 967 ); 968 969 return result.lastInsertRowid; 970 } 971 972 /** 973 * Get pending messages awaiting approval 974 */ 975 export async function getPendingOutreaches(limit = 100) { 976 return await getAll( 977 `SELECT 978 m.*, 979 s.domain, 980 s.keyword, 981 s.landing_page_url, 982 s.grade 983 FROM messages m 984 JOIN sites s ON m.site_id = s.id 985 WHERE m.direction = 'outbound' 986 AND m.approval_status = 'pending' 987 ORDER BY m.created_at DESC 988 LIMIT $1`, 989 [limit] 990 ); 991 } 992 993 /** 994 * Approve message for sending 995 */ 996 export async function approveOutreach(outreachId) { 997 await run("UPDATE messages SET approval_status = 'approved' WHERE id = $1", [outreachId]); 998 logger.success(`Approved message #${outreachId}`); 999 } 1000 1001 /** 1002 * Mark message for rework 1003 */ 1004 export async function reworkOutreach(outreachId, reworkInstructions) { 1005 await run( 1006 "UPDATE messages SET approval_status = 'rework', rework_instructions = $1 WHERE id = $2", 1007 [reworkInstructions, outreachId] 1008 ); 1009 logger.info(`Marked message #${outreachId} for rework`); 1010 } 1011 1012 /** 1013 * Process rework requests (regenerate proposals with feedback) 1014 */ 1015 export async function processReworkQueue() { 1016 const reworkItems = await getAll( 1017 `SELECT DISTINCT site_id, rework_instructions 1018 FROM messages 1019 WHERE direction = 'outbound' 1020 AND approval_status = 'rework' 1021 GROUP BY site_id, rework_instructions` 1022 ); 1023 1024 logger.info(`Processing ${reworkItems.length} rework requests...`); 1025 1026 let processed = 0; 1027 let failed = 0; 1028 1029 for (const item of reworkItems) { 1030 try { 1031 // Delete old pending/rework messages for this site 1032 await run( 1033 `DELETE FROM messages 1034 WHERE site_id = $1 1035 AND direction = 'outbound' 1036 AND approval_status IN ('pending', 'rework')`, 1037 [item.site_id] 1038 ); 1039 1040 // Generate new variants with rework instructions 1041 await generateProposalVariants(item.site_id, item.rework_instructions); 1042 1043 logger.success(`Regenerated proposals for site #${item.site_id}`); 1044 processed++; 1045 } catch (error) { 1046 logger.error(`Failed to rework site #${item.site_id}: ${error.message}`); 1047 failed++; 1048 } 1049 } 1050 1051 logger.success( 1052 `Processed ${processed}/${reworkItems.length} rework requests${failed ? ` (${failed} failed)` : ''}` 1053 ); 1054 } 1055 1056 /** 1057 * Generate proposals for all low-scoring sites without outreaches 1058 */ 1059 export async function generateBulkProposals(limit = null) { 1060 const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82'); 1061 1062 const sql = `SELECT s.id, s.domain, s.keyword, s.score 1063 FROM sites s 1064 LEFT JOIN messages m ON s.id = m.site_id AND m.direction = 'outbound' 1065 WHERE s.status IN ('enriched', 'enriched_llm') 1066 AND s.score < $1 1067 AND m.id IS NULL 1068 ORDER BY s.score ASC 1069 ${limit ? `LIMIT ${limit}` : ''}`; 1070 1071 const sites = await getAll(sql, [cutoff]); 1072 1073 logger.info(`Generating proposals for ${sites.length} sites...`); 1074 1075 const results = []; 1076 1077 for (const site of sites) { 1078 try { 1079 const result = await generateProposalVariants(site.id); 1080 results.push(result); 1081 1082 // Rate limiting 1083 await new Promise(resolve => setTimeout(resolve, 2000)); 1084 } catch (error) { 1085 logger.error(`Failed for site #${site.id}`, error); 1086 results.push({ siteId: site.id, error: error.message }); 1087 } 1088 } 1089 1090 logger.success( 1091 `Generated proposals for ${results.filter(r => !r.error).length}/${results.length} sites` 1092 ); 1093 1094 return results; 1095 } 1096 1097 // CLI functionality 1098 if (import.meta.url === `file://${process.argv[1]}`) { 1099 const command = process.argv[2]; 1100 1101 if (command === 'generate') { 1102 const siteId = parseInt(process.argv[3], 10); 1103 if (!siteId) { 1104 console.error('Usage: node src/proposal-generator-v2.js generate <site_id>'); 1105 process.exit(1); 1106 } 1107 1108 generateProposalVariants(siteId) 1109 .then(result => { 1110 console.log('\n✅ Proposals generated!\n'); 1111 console.log(`Site: ${result.domain} (${result.keyword})`); 1112 console.log(`Outreach IDs: ${result.outreachIds.join(', ')}`); 1113 process.exit(0); 1114 }) 1115 .catch(error => { 1116 console.error(`\n❌ Failed: ${error.message}`); 1117 process.exit(1); 1118 }); 1119 } else if (command === 'bulk') { 1120 const limit = process.argv[3] ? parseInt(process.argv[3], 10) : null; 1121 1122 generateBulkProposals(limit) 1123 .then(() => process.exit(0)) 1124 .catch(error => { 1125 console.error(`\n❌ Failed: ${error.message}`); 1126 process.exit(1); 1127 }); 1128 } else if (command === 'pending') { 1129 const limit = process.argv[3] ? parseInt(process.argv[3], 10) : 100; 1130 getPendingOutreaches(limit) 1131 .then(msgs => { 1132 console.log(JSON.stringify(msgs, null, 2)); 1133 process.exit(0); 1134 }) 1135 .catch(error => { 1136 console.error(`\n❌ Failed: ${error.message}`); 1137 process.exit(1); 1138 }); 1139 } else { 1140 console.log('Usage:'); 1141 console.log(' generate <site_id> - Generate proposals for a site'); 1142 console.log(' bulk [limit] - Generate for all low-scoring sites'); 1143 console.log(' pending [limit] - List pending outreaches'); 1144 process.exit(1); 1145 } 1146 }