/ src / proposal-generator-templates.js
proposal-generator-templates.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Template-Based Proposal Generator
  5   * Zero-cost alternative to LLM-based proposals
  6   *
  7   * Uses pre-written, tested templates instead of generating proposals with AI
  8   * Savings: $0.18/site → $0/site (79% of total API costs)
  9   *
 10   * Activated when USE_LLM_PROPOSALS=false in .env
 11   */
 12  
 13  import { run, getOne, getAll } from './utils/db.js';
 14  import { readFileSync } from 'fs';
 15  import { join, dirname } from 'path';
 16  import { fileURLToPath } from 'url';
 17  import Logger from './utils/logger.js';
 18  import { getAllContactsWithNames, cleanInvalidSocialLinks } from './contacts/prioritize.js';
 19  import {
 20    generateTemplateProposal,
 21    analyzeScoreJson,
 22    shortenSmsWithHaiku,
 23  } from './utils/template-proposals.js';
 24  import { validateCompliance } from './utils/compliance-validator.js';
 25  import { spin } from './utils/spintax.js';
 26  import { getCountryByCode } from './config/countries.js';
 27  import { isDemoEmail, isGovernmentEmail, isEducationEmail } from './utils/site-filters.js';
 28  import { addCountryCode } from './utils/phone-normalizer.js';
 29  import { getScoreDataWithFallback } from './utils/score-storage.js';
 30  import { getContactsDataWithFallback } from './utils/contacts-storage.js';
 31  import { parseEnvSet } from './utils/load-env.js';
 32  
 33  const __filename = fileURLToPath(import.meta.url);
 34  const __dirname = dirname(__filename);
 35  const projectRoot = join(__dirname, '..');
 36  
 37  const logger = new Logger('ProposalGeneratorTemplates');
 38  
 39  // Countries blocked from SMS outreach (DR-121 — only AU/NZ have legal basis for cold SMS).
 40  const BLOCKED_SMS_COUNTRIES = parseEnvSet(process.env.OUTREACH_BLOCKED_SMS_COUNTRIES);
 41  
 42  /**
 43   * Normalize contact method to match database schema
 44   */
 45  function normalizeContactMethod(method) {
 46    if (!method) return 'email';
 47  
 48    const normalized = method.toLowerCase().trim();
 49  
 50    const mapping = {
 51      sms: 'sms',
 52      email: 'email',
 53      form: 'form',
 54      'contact form': 'form',
 55      contactform: 'form',
 56      x: 'x',
 57      twitter: 'x',
 58      linkedin: 'linkedin',
 59      facebook: 'email',
 60      instagram: 'email',
 61    };
 62  
 63    return mapping[normalized] || 'email';
 64  }
 65  
 66  /**
 67   * Store proposal variant in messages table
 68   */
 69  async function storeProposalVariant(
 70    siteId,
 71    proposalText,
 72    subjectLine,
 73    variantNumber,
 74    contact,
 75    siteData,
 76    templateId = null
 77  ) {
 78    const rawContactMethod = contact ? contact.channel : 'email';
 79    const contactMethod = normalizeContactMethod(rawContactMethod);
 80    let contactUri = contact ? contact.uri : 'PENDING_CONTACT_EXTRACTION';
 81  
 82    // Normalize SMS phone numbers with country code
 83    if (
 84      contactMethod === 'sms' &&
 85      contactUri !== 'PENDING_CONTACT_EXTRACTION' &&
 86      siteData?.country_code
 87    ) {
 88      contactUri = addCountryCode(contactUri, siteData.country_code);
 89    }
 90  
 91    // GDPR/Government/Education/Demo email filtering
 92    let outreachStatus = 'pending';
 93    let errorMessage = null;
 94  
 95    if (
 96      siteData &&
 97      contactMethod === 'email' &&
 98      siteData.gdpr_verified === 0 &&
 99      siteData.country_code
100    ) {
101      const country = getCountryByCode(siteData.country_code);
102      if (country && country.requiresGDPRCheck) {
103        outreachStatus = 'gdpr_blocked';
104        errorMessage = `GDPR: Email failed company verification (free email or uncertain). Country: ${siteData.country_code}`;
105        logger.info(
106          `  ⚠️  GDPR blocked: ${contactUri} in ${siteData.country_code} (unverified company email)`
107        );
108      }
109    }
110  
111    if (contactMethod === 'email' && isGovernmentEmail(contactUri)) {
112      outreachStatus = 'rejected';
113      errorMessage = `Government email blocked: ${contactUri}`;
114      logger.info(`  ⚠️  Government email blocked: ${contactUri}`);
115    }
116  
117    if (contactMethod === 'email' && isEducationEmail(contactUri)) {
118      outreachStatus = 'rejected';
119      errorMessage = `Education email blocked: ${contactUri}`;
120      logger.info(`  ⚠️  Education email blocked: ${contactUri}`);
121    }
122  
123    if (contactMethod === 'email' && isDemoEmail(contactUri)) {
124      outreachStatus = 'rejected';
125      errorMessage = `Demo email blocked: ${contactUri}`;
126      logger.info(`  ⚠️  Demo email blocked: ${contactUri}`);
127    }
128  
129    const result = await run(
130      `INSERT INTO messages (
131        site_id,
132        message_body,
133        subject_line,
134        contact_method,
135        contact_uri,
136        direction,
137        approval_status,
138        delivery_status,
139        error_message,
140        template_id
141      ) VALUES ($1, $2, $3, $4, $5, 'outbound', $6, NULL, $7, $8)
142      ON CONFLICT (site_id, contact_method, contact_uri) WHERE direction = 'outbound' AND approval_status IN ('pending', 'approved')
143      DO UPDATE SET
144        message_body = EXCLUDED.message_body,
145        subject_line = EXCLUDED.subject_line,
146        approval_status = EXCLUDED.approval_status,
147        error_message = EXCLUDED.error_message,
148        template_id = EXCLUDED.template_id
149      RETURNING id`,
150      [
151        siteId,
152        proposalText,
153        subjectLine,
154        contactMethod,
155        contactUri,
156        outreachStatus,
157        errorMessage,
158        templateId,
159      ]
160    );
161  
162    return result.lastInsertRowid;
163  }
164  
165  /**
166   * Generate template-based proposals for a site
167   * @param {number} siteId - Site ID from database
168   * @returns {Promise<Object>} Generated proposals
169   */
170  export async function generateProposalVariants(siteId) {
171    // Get site data — score and grade are top-level columns, not embedded in score_json
172    const siteData = await getOne(
173      `SELECT
174        s.*,
175        s.score,
176        s.grade,
177        s.country_code,
178        s.google_domain,
179        s.language_code,
180        s.currency_code
181      FROM sites s
182      WHERE s.id = $1`,
183      [siteId]
184    );
185  
186    if (!siteData) {
187      throw new Error(`Site not found: ${siteId}`);
188    }
189  
190    // Check score threshold
191    const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82');
192    if (siteData.score >= cutoff) {
193      throw new Error(
194        `Site ${siteData.domain} has score ${siteData.score} which is above the cutoff ${cutoff}. Proposals are only generated for low-scoring sites.`
195      );
196    }
197  
198    // Check language pause list — non-English templates have outstanding retranslation work.
199    const lang = siteData.language_code;
200    if (lang && lang !== 'en') {
201      let pausedLangs = [];
202      try {
203        const pausedData = JSON.parse(
204          readFileSync(join(projectRoot, 'data/compliance/paused-languages.json'), 'utf-8')
205        );
206        pausedLangs = pausedData.paused || [];
207      } catch (_) {
208        // File missing or malformed — treat as no languages paused
209      }
210      if (pausedLangs.includes(lang)) {
211        await run(
212          `UPDATE sites SET error_message = $1, updated_at = CURRENT_TIMESTAMP WHERE id = $2`,
213          [
214            `Paused: ${lang} template retranslation pending (see data/compliance/paused-languages.json)`,
215            siteId,
216          ]
217        );
218        logger.info(`Skipping ${siteData.domain} — language '${lang}' is paused for retranslation`);
219        return {
220          siteId,
221          domain: siteData.domain,
222          keyword: siteData.keyword,
223          outreachIds: [],
224          variants: [],
225          reasoning: `Language '${lang}' paused for retranslation`,
226          contactCount: 0,
227        };
228      }
229    }
230  
231    logger.info(`Generating template proposals for ${siteData.domain} (${siteData.keyword})`);
232  
233    // Parse JSON data (filesystem first, DB fallback)
234    const scoreData = getScoreDataWithFallback(siteData.id, siteData);
235    let contactData = getContactsDataWithFallback(siteData.id, siteData);
236  
237    // Clean up invalid social links
238    if (contactData) {
239      contactData = cleanInvalidSocialLinks(contactData);
240    }
241  
242    // Get all contacts — uses Haiku to filter non-person names (e.g. "Mobile", "Brisbane")
243    const allContacts = contactData
244      ? await getAllContactsWithNames(contactData, siteData.country_code, siteData.domain)
245      : [];
246  
247    if (allContacts.length === 0) {
248      logger.warn(`No contacts found for ${siteData.domain} - skipping proposal generation`);
249      // Advance to proposals_drafted to prevent infinite loop on next run
250      await run(
251        `UPDATE sites SET status = 'proposals_drafted', updated_at = CURRENT_TIMESTAMP WHERE id = $1`,
252        [siteId]
253      );
254      return {
255        siteId,
256        domain: siteData.domain,
257        keyword: siteData.keyword,
258        outreachIds: [],
259        variants: [],
260        reasoning: 'No contacts found',
261        contactCount: 0,
262      };
263    }
264  
265    // Filter out contacts that already have outbound messages
266    const existingOutreaches = await getAll(
267      `SELECT contact_method, contact_uri
268       FROM messages
269       WHERE direction = 'outbound' AND site_id = $1`,
270      [siteId]
271    );
272  
273    const existingSet = new Set(
274      existingOutreaches.map(o => `${normalizeContactMethod(o.contact_method)}:${o.contact_uri}`)
275    );
276  
277    const availableContacts = allContacts.filter(contact => {
278      const key = `${normalizeContactMethod(contact.channel)}:${contact.uri}`;
279      return !existingSet.has(key);
280    });
281  
282    if (availableContacts.length === 0) {
283      logger.warn(
284        `All ${allContacts.length} contacts for ${siteData.domain} already have outreaches - skipping`
285      );
286      // Advance to proposals_drafted to prevent infinite loop on next run
287      await run(
288        `UPDATE sites SET status = 'proposals_drafted', updated_at = CURRENT_TIMESTAMP WHERE id = $1`,
289        [siteId]
290      );
291      return {
292        siteId,
293        domain: siteData.domain,
294        keyword: siteData.keyword,
295        outreachIds: [],
296        variants: [],
297        reasoning: 'All contacts already have outreaches',
298        contactCount: allContacts.length,
299      };
300    }
301  
302    logger.info(`Found ${availableContacts.length} contacts - generating template-based proposals`);
303  
304    // Analyze score_json ONCE per site — result is the same for all contacts.
305    let cachedAnalysis = null;
306    try {
307      cachedAnalysis = await analyzeScoreJson(
308        scoreData,
309        siteData.keyword || null,
310        siteData.language_code || 'en',
311        siteData.country_code
312      );
313    } catch (err) {
314      logger.warn(
315        `analyzeScoreJson failed for ${siteData.domain}: ${err.message} — each contact will retry individually`
316      );
317    }
318  
319    // Generate template-based proposals for each contact
320    const outreachIds = [];
321    const variants = [];
322    const missingTemplateReasons = [];
323    const siteCountryUpper = (siteData.country_code || '').toUpperCase();
324  
325    for (let i = 0; i < availableContacts.length; i++) {
326      const contact = availableContacts[i];
327  
328      if (contact.channel === 'sms' && BLOCKED_SMS_COUNTRIES.has(siteCountryUpper)) {
329        logger.info(`Skipping SMS proposal for ${siteData.domain} — SMS blocked for ${siteData.country_code} (DR-121)`);
330        continue;
331      }
332  
333      try {
334        // Generate proposal from template (throws if no template for this country/language/channel)
335        const {
336          proposalText: rawProposalText,
337          templateId,
338          subjectLine,
339        } = await generateTemplateProposal(siteData, scoreData, contact, cachedAnalysis);
340  
341        // Compliance pre-storage check
342        const compliance = validateCompliance(
343          rawProposalText,
344          contact.channel,
345          siteData.country_code
346        );
347        if (compliance.blocked) {
348          logger.warn(`Compliance block for contact ${contact.uri}: ${compliance.reason}`);
349          missingTemplateReasons.push(`Compliance: ${compliance.reason}`);
350          continue;
351        }
352        // Resolve spintax before storage so stored text is ready-to-send
353        let spunText = spin(compliance.modifiedText || rawProposalText);
354        const spunSubject = subjectLine ? spin(subjectLine) : subjectLine;
355  
356        // Post-compliance SMS length check
357        if (contact.channel === 'sms' && spunText.length > 160) {
358          spunText = await shortenSmsWithHaiku(spunText);
359          if (spunText.length > 160) {
360            logger.warn(
361              `SMS still ${spunText.length} chars after Haiku shortening — hard trimming`
362            );
363            const truncated = spunText.slice(0, 157);
364            const lastSentence = truncated.lastIndexOf('. ');
365            spunText = lastSentence > 80 ? spunText.slice(0, lastSentence + 1) : `${truncated}...`;
366          }
367        }
368  
369        // Email must have a subject line — block without one
370        if (contact.channel === 'email' && !spunSubject) {
371          logger.error(`Email proposal for ${contact.uri} has no subject line — skipping`);
372          missingTemplateReasons.push('Email missing subject line');
373          continue;
374        }
375  
376        // Store outreach entry
377        const outreachId = await storeProposalVariant(
378          siteId,
379          spunText,
380          spunSubject,
381          i + 1, // variant_number
382          contact,
383          siteData,
384          templateId
385        );
386  
387        outreachIds.push(outreachId);
388        variants.push({
389          variant_number: i + 1,
390          proposal_text: spunText,
391          template_id: templateId,
392          contact_channel: contact.channel,
393          contact_name: contact.name,
394        });
395  
396        const nameInfo = contact.name ? ` (${contact.name})` : '';
397        logger.success(
398          `Stored outreach #${outreachId}: Template ${templateId} → ${contact.channel}${nameInfo} (${contact.uri})`
399        );
400      } catch (contactError) {
401        if (contactError.message.startsWith('No templates for')) {
402          missingTemplateReasons.push(contactError.message);
403          logger.warn(`No template: ${contactError.message} — skipping contact ${contact.uri}`);
404        } else if (contactError.message.includes('recommendation_sms blank after 2')) {
405          // Score_json produced no usable SMS fragment — same for all contacts, bail early
406          missingTemplateReasons.push(contactError.message);
407          logger.warn(`recommendation_sms blank for ${siteData.domain} — skipping all contacts`);
408          break;
409        } else {
410          logger.error(
411            `Failed to generate template proposal for contact ${contact.uri}`,
412            contactError
413          );
414        }
415      }
416    }
417  
418    // Only advance to proposals_drafted if at least one outreach was stored
419    if (outreachIds.length > 0) {
420      await run(
421        `UPDATE sites
422         SET status = 'proposals_drafted',
423             updated_at = CURRENT_TIMESTAMP
424         WHERE id = $1`,
425        [siteId]
426      );
427    } else if (missingTemplateReasons.some(r => r.includes('recommendation_sms blank'))) {
428      const errorMsg = missingTemplateReasons.find(r => r.includes('recommendation_sms blank'));
429      await run(
430        `UPDATE sites
431         SET status = 'proposals_drafted',
432             error_message = $1,
433             updated_at = CURRENT_TIMESTAMP
434         WHERE id = $2`,
435        [errorMsg, siteId]
436      );
437      logger.warn(
438        `recommendation_sms blank for ${siteData.domain} — advanced to proposals_drafted with error`
439      );
440    } else if (missingTemplateReasons.length > 0) {
441      const errorMsg = missingTemplateReasons[0];
442      await run(
443        `UPDATE sites
444         SET error_message = $1,
445             updated_at = CURRENT_TIMESTAMP
446         WHERE id = $2`,
447        [errorMsg, siteId]
448      );
449      logger.warn(`No outreaches stored for ${siteData.domain}: ${errorMsg} — leaving at enriched`);
450    }
451  
452    logger.success(
453      `Generated ${variants.length} template proposals and created ${outreachIds.length} outreach entries for ${siteData.domain}`
454    );
455  
456    return {
457      siteId,
458      domain: siteData.domain,
459      keyword: siteData.keyword,
460      outreachIds,
461      variants,
462      reasoning: 'Generated from pre-written templates (zero LLM cost)',
463      contactCount: allContacts.length,
464    };
465  }
466  
467  /**
468   * Generate template proposals for all low-scoring sites without outreaches
469   */
470  export async function generateBulkProposals(limit = null) {
471    const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82');
472  
473    const sql = `SELECT s.id, s.domain, s.keyword, s.score
474         FROM sites s
475         LEFT JOIN messages o ON s.id = o.site_id AND o.direction = 'outbound'
476         WHERE s.status IN ('enriched', 'enriched_llm')
477         AND s.score < $1
478         AND o.id IS NULL
479         ORDER BY s.score ASC
480         ${limit ? `LIMIT ${limit}` : ''}`;
481  
482    const sites = await getAll(sql, [cutoff]);
483  
484    logger.info(`Generating template proposals for ${sites.length} sites...`);
485  
486    const results = [];
487  
488    for (const site of sites) {
489      try {
490        const result = await generateProposalVariants(site.id);
491        results.push(result);
492  
493        // Rate limiting (minimal since no API calls)
494        await new Promise(resolve => setTimeout(resolve, 100));
495      } catch (error) {
496        logger.error(`Failed for site #${site.id}`, error);
497        results.push({ siteId: site.id, error: error.message });
498      }
499    }
500  
501    logger.success(
502      `Generated template proposals for ${results.filter(r => !r.error).length}/${results.length} sites`
503    );
504  
505    return results;
506  }
507  
508  // CLI functionality
509  if (import.meta.url === `file://${process.argv[1]}`) {
510    const command = process.argv[2];
511  
512    if (command === 'generate') {
513      const siteId = parseInt(process.argv[3], 10);
514      if (!siteId) {
515        console.error('Usage: node src/proposal-generator-templates.js generate <site_id>');
516        process.exit(1);
517      }
518  
519      generateProposalVariants(siteId)
520        .then(result => {
521          console.log('\n✅ Template proposals generated!\n');
522          console.log(`Site: ${result.domain} (${result.keyword})`);
523          console.log(`Outreach IDs: ${result.outreachIds.join(', ')}`);
524          console.log(`\nReasoning: ${result.reasoning}\n`);
525          process.exit(0);
526        })
527        .catch(error => {
528          console.error(`\n❌ Failed: ${error.message}`);
529          process.exit(1);
530        });
531    } else if (command === 'bulk') {
532      const limit = process.argv[3] ? parseInt(process.argv[3], 10) : null;
533  
534      generateBulkProposals(limit)
535        .then(() => process.exit(0))
536        .catch(error => {
537          console.error(`\n❌ Failed: ${error.message}`);
538          process.exit(1);
539        });
540    } else {
541      console.log('Usage:');
542      console.log('  generate <site_id>     - Generate template proposals for a site');
543      console.log('  bulk [limit]           - Generate for all low-scoring sites');
544      console.log('\nTemplate-based proposals:');
545      console.log('  - Zero LLM API cost ($0.18/site → $0/site)');
546      console.log('  - Pre-written, tested templates');
547      console.log('  - No approval workflow needed (templates are pre-approved)');
548      console.log('  - Tracks template performance for optimization');
549      process.exit(1);
550    }
551  }
552  
553  export default {
554    generateProposalVariants,
555    generateBulkProposals,
556  };