proposal-generator-templates.js
1 #!/usr/bin/env node 2 3 /** 4 * Template-Based Proposal Generator 5 * Zero-cost alternative to LLM-based proposals 6 * 7 * Uses pre-written, tested templates instead of generating proposals with AI 8 * Savings: $0.18/site → $0/site (79% of total API costs) 9 * 10 * Activated when USE_LLM_PROPOSALS=false in .env 11 */ 12 13 import { run, getOne, getAll } from './utils/db.js'; 14 import { readFileSync } from 'fs'; 15 import { join, dirname } from 'path'; 16 import { fileURLToPath } from 'url'; 17 import Logger from './utils/logger.js'; 18 import { getAllContactsWithNames, cleanInvalidSocialLinks } from './contacts/prioritize.js'; 19 import { 20 generateTemplateProposal, 21 analyzeScoreJson, 22 shortenSmsWithHaiku, 23 } from './utils/template-proposals.js'; 24 import { validateCompliance } from './utils/compliance-validator.js'; 25 import { spin } from './utils/spintax.js'; 26 import { getCountryByCode } from './config/countries.js'; 27 import { isDemoEmail, isGovernmentEmail, isEducationEmail } from './utils/site-filters.js'; 28 import { addCountryCode } from './utils/phone-normalizer.js'; 29 import { getScoreDataWithFallback } from './utils/score-storage.js'; 30 import { getContactsDataWithFallback } from './utils/contacts-storage.js'; 31 import { parseEnvSet } from './utils/load-env.js'; 32 33 const __filename = fileURLToPath(import.meta.url); 34 const __dirname = dirname(__filename); 35 const projectRoot = join(__dirname, '..'); 36 37 const logger = new Logger('ProposalGeneratorTemplates'); 38 39 // Countries blocked from SMS outreach (DR-121 — only AU/NZ have legal basis for cold SMS). 40 const BLOCKED_SMS_COUNTRIES = parseEnvSet(process.env.OUTREACH_BLOCKED_SMS_COUNTRIES); 41 42 /** 43 * Normalize contact method to match database schema 44 */ 45 function normalizeContactMethod(method) { 46 if (!method) return 'email'; 47 48 const normalized = method.toLowerCase().trim(); 49 50 const mapping = { 51 sms: 'sms', 52 email: 'email', 53 form: 'form', 54 'contact form': 'form', 55 contactform: 'form', 56 x: 'x', 57 twitter: 'x', 58 linkedin: 'linkedin', 59 facebook: 'email', 60 instagram: 'email', 61 }; 62 63 return mapping[normalized] || 'email'; 64 } 65 66 /** 67 * Store proposal variant in messages table 68 */ 69 async function storeProposalVariant( 70 siteId, 71 proposalText, 72 subjectLine, 73 variantNumber, 74 contact, 75 siteData, 76 templateId = null 77 ) { 78 const rawContactMethod = contact ? contact.channel : 'email'; 79 const contactMethod = normalizeContactMethod(rawContactMethod); 80 let contactUri = contact ? contact.uri : 'PENDING_CONTACT_EXTRACTION'; 81 82 // Normalize SMS phone numbers with country code 83 if ( 84 contactMethod === 'sms' && 85 contactUri !== 'PENDING_CONTACT_EXTRACTION' && 86 siteData?.country_code 87 ) { 88 contactUri = addCountryCode(contactUri, siteData.country_code); 89 } 90 91 // GDPR/Government/Education/Demo email filtering 92 let outreachStatus = 'pending'; 93 let errorMessage = null; 94 95 if ( 96 siteData && 97 contactMethod === 'email' && 98 siteData.gdpr_verified === 0 && 99 siteData.country_code 100 ) { 101 const country = getCountryByCode(siteData.country_code); 102 if (country && country.requiresGDPRCheck) { 103 outreachStatus = 'gdpr_blocked'; 104 errorMessage = `GDPR: Email failed company verification (free email or uncertain). Country: ${siteData.country_code}`; 105 logger.info( 106 ` ⚠️ GDPR blocked: ${contactUri} in ${siteData.country_code} (unverified company email)` 107 ); 108 } 109 } 110 111 if (contactMethod === 'email' && isGovernmentEmail(contactUri)) { 112 outreachStatus = 'rejected'; 113 errorMessage = `Government email blocked: ${contactUri}`; 114 logger.info(` ⚠️ Government email blocked: ${contactUri}`); 115 } 116 117 if (contactMethod === 'email' && isEducationEmail(contactUri)) { 118 outreachStatus = 'rejected'; 119 errorMessage = `Education email blocked: ${contactUri}`; 120 logger.info(` ⚠️ Education email blocked: ${contactUri}`); 121 } 122 123 if (contactMethod === 'email' && isDemoEmail(contactUri)) { 124 outreachStatus = 'rejected'; 125 errorMessage = `Demo email blocked: ${contactUri}`; 126 logger.info(` ⚠️ Demo email blocked: ${contactUri}`); 127 } 128 129 const result = await run( 130 `INSERT INTO messages ( 131 site_id, 132 message_body, 133 subject_line, 134 contact_method, 135 contact_uri, 136 direction, 137 approval_status, 138 delivery_status, 139 error_message, 140 template_id 141 ) VALUES ($1, $2, $3, $4, $5, 'outbound', $6, NULL, $7, $8) 142 ON CONFLICT (site_id, contact_method, contact_uri) WHERE direction = 'outbound' AND approval_status IN ('pending', 'approved') 143 DO UPDATE SET 144 message_body = EXCLUDED.message_body, 145 subject_line = EXCLUDED.subject_line, 146 approval_status = EXCLUDED.approval_status, 147 error_message = EXCLUDED.error_message, 148 template_id = EXCLUDED.template_id 149 RETURNING id`, 150 [ 151 siteId, 152 proposalText, 153 subjectLine, 154 contactMethod, 155 contactUri, 156 outreachStatus, 157 errorMessage, 158 templateId, 159 ] 160 ); 161 162 return result.lastInsertRowid; 163 } 164 165 /** 166 * Generate template-based proposals for a site 167 * @param {number} siteId - Site ID from database 168 * @returns {Promise<Object>} Generated proposals 169 */ 170 export async function generateProposalVariants(siteId) { 171 // Get site data — score and grade are top-level columns, not embedded in score_json 172 const siteData = await getOne( 173 `SELECT 174 s.*, 175 s.score, 176 s.grade, 177 s.country_code, 178 s.google_domain, 179 s.language_code, 180 s.currency_code 181 FROM sites s 182 WHERE s.id = $1`, 183 [siteId] 184 ); 185 186 if (!siteData) { 187 throw new Error(`Site not found: ${siteId}`); 188 } 189 190 // Check score threshold 191 const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82'); 192 if (siteData.score >= cutoff) { 193 throw new Error( 194 `Site ${siteData.domain} has score ${siteData.score} which is above the cutoff ${cutoff}. Proposals are only generated for low-scoring sites.` 195 ); 196 } 197 198 // Check language pause list — non-English templates have outstanding retranslation work. 199 const lang = siteData.language_code; 200 if (lang && lang !== 'en') { 201 let pausedLangs = []; 202 try { 203 const pausedData = JSON.parse( 204 readFileSync(join(projectRoot, 'data/compliance/paused-languages.json'), 'utf-8') 205 ); 206 pausedLangs = pausedData.paused || []; 207 } catch (_) { 208 // File missing or malformed — treat as no languages paused 209 } 210 if (pausedLangs.includes(lang)) { 211 await run( 212 `UPDATE sites SET error_message = $1, updated_at = CURRENT_TIMESTAMP WHERE id = $2`, 213 [ 214 `Paused: ${lang} template retranslation pending (see data/compliance/paused-languages.json)`, 215 siteId, 216 ] 217 ); 218 logger.info(`Skipping ${siteData.domain} — language '${lang}' is paused for retranslation`); 219 return { 220 siteId, 221 domain: siteData.domain, 222 keyword: siteData.keyword, 223 outreachIds: [], 224 variants: [], 225 reasoning: `Language '${lang}' paused for retranslation`, 226 contactCount: 0, 227 }; 228 } 229 } 230 231 logger.info(`Generating template proposals for ${siteData.domain} (${siteData.keyword})`); 232 233 // Parse JSON data (filesystem first, DB fallback) 234 const scoreData = getScoreDataWithFallback(siteData.id, siteData); 235 let contactData = getContactsDataWithFallback(siteData.id, siteData); 236 237 // Clean up invalid social links 238 if (contactData) { 239 contactData = cleanInvalidSocialLinks(contactData); 240 } 241 242 // Get all contacts — uses Haiku to filter non-person names (e.g. "Mobile", "Brisbane") 243 const allContacts = contactData 244 ? await getAllContactsWithNames(contactData, siteData.country_code, siteData.domain) 245 : []; 246 247 if (allContacts.length === 0) { 248 logger.warn(`No contacts found for ${siteData.domain} - skipping proposal generation`); 249 // Advance to proposals_drafted to prevent infinite loop on next run 250 await run( 251 `UPDATE sites SET status = 'proposals_drafted', updated_at = CURRENT_TIMESTAMP WHERE id = $1`, 252 [siteId] 253 ); 254 return { 255 siteId, 256 domain: siteData.domain, 257 keyword: siteData.keyword, 258 outreachIds: [], 259 variants: [], 260 reasoning: 'No contacts found', 261 contactCount: 0, 262 }; 263 } 264 265 // Filter out contacts that already have outbound messages 266 const existingOutreaches = await getAll( 267 `SELECT contact_method, contact_uri 268 FROM messages 269 WHERE direction = 'outbound' AND site_id = $1`, 270 [siteId] 271 ); 272 273 const existingSet = new Set( 274 existingOutreaches.map(o => `${normalizeContactMethod(o.contact_method)}:${o.contact_uri}`) 275 ); 276 277 const availableContacts = allContacts.filter(contact => { 278 const key = `${normalizeContactMethod(contact.channel)}:${contact.uri}`; 279 return !existingSet.has(key); 280 }); 281 282 if (availableContacts.length === 0) { 283 logger.warn( 284 `All ${allContacts.length} contacts for ${siteData.domain} already have outreaches - skipping` 285 ); 286 // Advance to proposals_drafted to prevent infinite loop on next run 287 await run( 288 `UPDATE sites SET status = 'proposals_drafted', updated_at = CURRENT_TIMESTAMP WHERE id = $1`, 289 [siteId] 290 ); 291 return { 292 siteId, 293 domain: siteData.domain, 294 keyword: siteData.keyword, 295 outreachIds: [], 296 variants: [], 297 reasoning: 'All contacts already have outreaches', 298 contactCount: allContacts.length, 299 }; 300 } 301 302 logger.info(`Found ${availableContacts.length} contacts - generating template-based proposals`); 303 304 // Analyze score_json ONCE per site — result is the same for all contacts. 305 let cachedAnalysis = null; 306 try { 307 cachedAnalysis = await analyzeScoreJson( 308 scoreData, 309 siteData.keyword || null, 310 siteData.language_code || 'en', 311 siteData.country_code 312 ); 313 } catch (err) { 314 logger.warn( 315 `analyzeScoreJson failed for ${siteData.domain}: ${err.message} — each contact will retry individually` 316 ); 317 } 318 319 // Generate template-based proposals for each contact 320 const outreachIds = []; 321 const variants = []; 322 const missingTemplateReasons = []; 323 const siteCountryUpper = (siteData.country_code || '').toUpperCase(); 324 325 for (let i = 0; i < availableContacts.length; i++) { 326 const contact = availableContacts[i]; 327 328 if (contact.channel === 'sms' && BLOCKED_SMS_COUNTRIES.has(siteCountryUpper)) { 329 logger.info(`Skipping SMS proposal for ${siteData.domain} — SMS blocked for ${siteData.country_code} (DR-121)`); 330 continue; 331 } 332 333 try { 334 // Generate proposal from template (throws if no template for this country/language/channel) 335 const { 336 proposalText: rawProposalText, 337 templateId, 338 subjectLine, 339 } = await generateTemplateProposal(siteData, scoreData, contact, cachedAnalysis); 340 341 // Compliance pre-storage check 342 const compliance = validateCompliance( 343 rawProposalText, 344 contact.channel, 345 siteData.country_code 346 ); 347 if (compliance.blocked) { 348 logger.warn(`Compliance block for contact ${contact.uri}: ${compliance.reason}`); 349 missingTemplateReasons.push(`Compliance: ${compliance.reason}`); 350 continue; 351 } 352 // Resolve spintax before storage so stored text is ready-to-send 353 let spunText = spin(compliance.modifiedText || rawProposalText); 354 const spunSubject = subjectLine ? spin(subjectLine) : subjectLine; 355 356 // Post-compliance SMS length check 357 if (contact.channel === 'sms' && spunText.length > 160) { 358 spunText = await shortenSmsWithHaiku(spunText); 359 if (spunText.length > 160) { 360 logger.warn( 361 `SMS still ${spunText.length} chars after Haiku shortening — hard trimming` 362 ); 363 const truncated = spunText.slice(0, 157); 364 const lastSentence = truncated.lastIndexOf('. '); 365 spunText = lastSentence > 80 ? spunText.slice(0, lastSentence + 1) : `${truncated}...`; 366 } 367 } 368 369 // Email must have a subject line — block without one 370 if (contact.channel === 'email' && !spunSubject) { 371 logger.error(`Email proposal for ${contact.uri} has no subject line — skipping`); 372 missingTemplateReasons.push('Email missing subject line'); 373 continue; 374 } 375 376 // Store outreach entry 377 const outreachId = await storeProposalVariant( 378 siteId, 379 spunText, 380 spunSubject, 381 i + 1, // variant_number 382 contact, 383 siteData, 384 templateId 385 ); 386 387 outreachIds.push(outreachId); 388 variants.push({ 389 variant_number: i + 1, 390 proposal_text: spunText, 391 template_id: templateId, 392 contact_channel: contact.channel, 393 contact_name: contact.name, 394 }); 395 396 const nameInfo = contact.name ? ` (${contact.name})` : ''; 397 logger.success( 398 `Stored outreach #${outreachId}: Template ${templateId} → ${contact.channel}${nameInfo} (${contact.uri})` 399 ); 400 } catch (contactError) { 401 if (contactError.message.startsWith('No templates for')) { 402 missingTemplateReasons.push(contactError.message); 403 logger.warn(`No template: ${contactError.message} — skipping contact ${contact.uri}`); 404 } else if (contactError.message.includes('recommendation_sms blank after 2')) { 405 // Score_json produced no usable SMS fragment — same for all contacts, bail early 406 missingTemplateReasons.push(contactError.message); 407 logger.warn(`recommendation_sms blank for ${siteData.domain} — skipping all contacts`); 408 break; 409 } else { 410 logger.error( 411 `Failed to generate template proposal for contact ${contact.uri}`, 412 contactError 413 ); 414 } 415 } 416 } 417 418 // Only advance to proposals_drafted if at least one outreach was stored 419 if (outreachIds.length > 0) { 420 await run( 421 `UPDATE sites 422 SET status = 'proposals_drafted', 423 updated_at = CURRENT_TIMESTAMP 424 WHERE id = $1`, 425 [siteId] 426 ); 427 } else if (missingTemplateReasons.some(r => r.includes('recommendation_sms blank'))) { 428 const errorMsg = missingTemplateReasons.find(r => r.includes('recommendation_sms blank')); 429 await run( 430 `UPDATE sites 431 SET status = 'proposals_drafted', 432 error_message = $1, 433 updated_at = CURRENT_TIMESTAMP 434 WHERE id = $2`, 435 [errorMsg, siteId] 436 ); 437 logger.warn( 438 `recommendation_sms blank for ${siteData.domain} — advanced to proposals_drafted with error` 439 ); 440 } else if (missingTemplateReasons.length > 0) { 441 const errorMsg = missingTemplateReasons[0]; 442 await run( 443 `UPDATE sites 444 SET error_message = $1, 445 updated_at = CURRENT_TIMESTAMP 446 WHERE id = $2`, 447 [errorMsg, siteId] 448 ); 449 logger.warn(`No outreaches stored for ${siteData.domain}: ${errorMsg} — leaving at enriched`); 450 } 451 452 logger.success( 453 `Generated ${variants.length} template proposals and created ${outreachIds.length} outreach entries for ${siteData.domain}` 454 ); 455 456 return { 457 siteId, 458 domain: siteData.domain, 459 keyword: siteData.keyword, 460 outreachIds, 461 variants, 462 reasoning: 'Generated from pre-written templates (zero LLM cost)', 463 contactCount: allContacts.length, 464 }; 465 } 466 467 /** 468 * Generate template proposals for all low-scoring sites without outreaches 469 */ 470 export async function generateBulkProposals(limit = null) { 471 const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82'); 472 473 const sql = `SELECT s.id, s.domain, s.keyword, s.score 474 FROM sites s 475 LEFT JOIN messages o ON s.id = o.site_id AND o.direction = 'outbound' 476 WHERE s.status IN ('enriched', 'enriched_llm') 477 AND s.score < $1 478 AND o.id IS NULL 479 ORDER BY s.score ASC 480 ${limit ? `LIMIT ${limit}` : ''}`; 481 482 const sites = await getAll(sql, [cutoff]); 483 484 logger.info(`Generating template proposals for ${sites.length} sites...`); 485 486 const results = []; 487 488 for (const site of sites) { 489 try { 490 const result = await generateProposalVariants(site.id); 491 results.push(result); 492 493 // Rate limiting (minimal since no API calls) 494 await new Promise(resolve => setTimeout(resolve, 100)); 495 } catch (error) { 496 logger.error(`Failed for site #${site.id}`, error); 497 results.push({ siteId: site.id, error: error.message }); 498 } 499 } 500 501 logger.success( 502 `Generated template proposals for ${results.filter(r => !r.error).length}/${results.length} sites` 503 ); 504 505 return results; 506 } 507 508 // CLI functionality 509 if (import.meta.url === `file://${process.argv[1]}`) { 510 const command = process.argv[2]; 511 512 if (command === 'generate') { 513 const siteId = parseInt(process.argv[3], 10); 514 if (!siteId) { 515 console.error('Usage: node src/proposal-generator-templates.js generate <site_id>'); 516 process.exit(1); 517 } 518 519 generateProposalVariants(siteId) 520 .then(result => { 521 console.log('\n✅ Template proposals generated!\n'); 522 console.log(`Site: ${result.domain} (${result.keyword})`); 523 console.log(`Outreach IDs: ${result.outreachIds.join(', ')}`); 524 console.log(`\nReasoning: ${result.reasoning}\n`); 525 process.exit(0); 526 }) 527 .catch(error => { 528 console.error(`\n❌ Failed: ${error.message}`); 529 process.exit(1); 530 }); 531 } else if (command === 'bulk') { 532 const limit = process.argv[3] ? parseInt(process.argv[3], 10) : null; 533 534 generateBulkProposals(limit) 535 .then(() => process.exit(0)) 536 .catch(error => { 537 console.error(`\n❌ Failed: ${error.message}`); 538 process.exit(1); 539 }); 540 } else { 541 console.log('Usage:'); 542 console.log(' generate <site_id> - Generate template proposals for a site'); 543 console.log(' bulk [limit] - Generate for all low-scoring sites'); 544 console.log('\nTemplate-based proposals:'); 545 console.log(' - Zero LLM API cost ($0.18/site → $0/site)'); 546 console.log(' - Pre-written, tested templates'); 547 console.log(' - No approval workflow needed (templates are pre-approved)'); 548 console.log(' - Tracks template performance for optimization'); 549 process.exit(1); 550 } 551 } 552 553 export default { 554 generateProposalVariants, 555 generateBulkProposals, 556 };