/ src / proposal-generator-v2.js
proposal-generator-v2.js
   1  #!/usr/bin/env node
   2  
   3  /**
   4   * Proposal Generator V2
   5   * Generates 3 variants of personalized outreach proposals using the new Proposal.md prompt
   6   * Stores in messages table with pending approval_status for operator approval
   7   */
   8  
   9  import { run, getOne, getAll } from './utils/db.js';
  10  import { execFileSync } from 'child_process';
  11  import { readFileSync, existsSync, readdirSync } from 'fs';
  12  import { join, dirname } from 'path';
  13  import { fileURLToPath } from 'url';
  14  import Logger from './utils/logger.js';
  15  import { retryWithBackoff, isRetryableError, safeJsonParse } from './utils/error-handler.js';
  16  import { getAllContactsWithNames, cleanInvalidSocialLinks } from './contacts/prioritize.js';
  17  import { spin } from './utils/spintax.js';
  18  import { openRouterLimiter } from './utils/rate-limiter.js';
  19  import { openRouterBreaker } from './utils/circuit-breaker.js';
  20  import { callLLM, getProviderDisplayName } from './utils/llm-provider.js';
  21  import { getCountryByCode, COUNTRIES } from './config/countries.js';
  22  import { computeGrade } from './score.js';
  23  import { isDemoEmail, isGovernmentEmail, isEducationEmail } from './utils/site-filters.js';
  24  import { addCountryCode } from './utils/phone-normalizer.js';
  25  import { generatePromptRecommendations } from './utils/prompt-learning.js';
  26  import { getCurrentSeason } from './utils/template-proposals.js';
  27  import { wrapUntrusted, stripInjectionMarkers, sanitizeLlmOutput } from './utils/llm-sanitizer.js';
  28  import { validateProposalResponse } from './utils/llm-response-validator.js';
  29  import { getScoreDataWithFallback } from './utils/score-storage.js';
  30  import { getContactsDataWithFallback } from './utils/contacts-storage.js';
  31  import { parseEnvSet } from './utils/load-env.js';
  32  
  33  const __filename = fileURLToPath(import.meta.url);
  34  const __dirname = dirname(__filename);
  35  const projectRoot = join(__dirname, '..');
  36  
  37  const logger = new Logger('ProposalGeneratorV2');
  38  
  39  // Countries blocked from SMS outreach (DR-121 — only AU/NZ have legal basis for cold SMS).
  40  const BLOCKED_SMS_COUNTRIES = parseEnvSet(process.env.OUTREACH_BLOCKED_SMS_COUNTRIES);
  41  
  42  // Load proposal prompt and best practices, injecting brand/persona env vars
  43  const PROPOSAL_PROMPT = readFileSync(join(projectRoot, 'prompts/PROPOSAL.md'), 'utf-8')
  44    .replaceAll('[BRAND_NAME]', process.env.BRAND_NAME || 'the brand')
  45    .replaceAll('BRAND_DOMAIN', process.env.BRAND_DOMAIN || 'the site');
  46  const HAIKU_POLISH_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-POLISH.md'), 'utf-8');
  47  const EMAIL_BEST_PRACTICES = readFileSync(
  48    join(projectRoot, 'docs/05-outreach/email-best-practices.md'),
  49    'utf-8'
  50  );
  51  
  52  // SMS best practices (from CLAUDE.md compliance section)
  53  const SMS_BEST_PRACTICES = `# SMS Best Practices (TCPA Compliance)
  54  
  55  ## Core Requirements
  56  
  57  **Prior Consent**: TCPA requires explicit prior written consent before sending marketing SMS. For cold outreach:
  58  - Initial SMS must be informational, not promotional
  59  - Must provide opt-out instructions
  60  - Cannot include hard sales pitches without consent
  61  
  62  **Opt-Out Instructions**: TCPA (US/CA only) requires opt-out instructions in every SMS:
  63  - Required for: US, CA only
  64  - Standard: "Reply STOP to opt out"
  65  - NOT required for: AU, GB, NZ, IE, ZA, IN (different regulatory regimes)
  66  - Must honor opt-outs immediately for all markets
  67  - Store opt-outs permanently
  68  
  69  **Business Hours**: SMS must be sent during reasonable hours:
  70  - 8am - 9pm in recipient's local timezone
  71  - Avoid weekends for initial outreach
  72  - Consider industry-specific work hours
  73  
  74  **Sender Identification**: Clearly identify who is sending:
  75  - Include business name in first message
  76  - Provide contact information
  77  - Be transparent about purpose
  78  
  79  **Message Length**: Keep SMS concise:
  80  - Aim for <160 characters to avoid segmentation
  81  - Ultra-short, urgent, personal tone
  82  - Single clear call-to-action
  83  
  84  **Compliance Checklist**:
  85  - [ ] Explicit consent obtained (or informational-only content)
  86  - [ ] Opt-out instructions included
  87  - [ ] Sent during business hours (8am-9pm local time)
  88  - [ ] Sender clearly identified
  89  - [ ] Message under 160 characters
  90  - [ ] No misleading or deceptive content
  91  - [ ] Opt-outs honored immediately
  92  `;
  93  
  94  // Model configuration (from env or default)
  95  const PROPOSAL_MODEL =
  96    process.env.PROPOSAL_MODEL || process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5';
  97  
  98  // Language code → full name for polish prompt
  99  const POLISH_LANG_NAMES = {
 100    en: 'English',
 101    de: 'German',
 102    fr: 'French',
 103    it: 'Italian',
 104    ja: 'Japanese',
 105    ko: 'Korean',
 106    es: 'Spanish',
 107    nl: 'Dutch',
 108    pl: 'Polish',
 109    sv: 'Swedish',
 110    da: 'Danish',
 111    id: 'Indonesian',
 112    hi: 'Hindi',
 113  };
 114  
 115  /**
 116   * Polish a generated proposal via Claude CLI (uses Max subscription, zero cost).
 117   * Falls back to OpenRouter if CLI is unavailable.
 118   */
 119  async function polishProposal(
 120    text,
 121    channel,
 122    languageCode = 'en',
 123    subjectLine = null,
 124    countryCode = null
 125  ) {
 126    const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection
 127    const isEmailChannel = channel === 'email' || channel === 'form';
 128  
 129    const subjectSection = isEmailChannel && subjectLine ? `\nSUBJECT LINE:\n${subjectLine}` : '';
 130    const season = getCurrentSeason(countryCode);
 131    const seasonSection = season ? `\nSEASON: ${season}` : '';
 132  
 133    const userMessage = `CHANNEL: ${channel}
 134  LANGUAGE: ${langName}${seasonSection}
 135  ---
 136  PROPOSAL BODY:
 137  ${wrapUntrusted(text, 'proposal_draft')}${subjectSection}`;
 138  
 139    // Try Claude CLI first (Max subscription, zero cost)
 140    try {
 141      const cliInput = `${HAIKU_POLISH_PROMPT}\n\n---\n\n${userMessage}`;
 142      const output = execFileSync('claude', ['-p', '--model', 'haiku', '--output-format', 'json'], {
 143        input: cliInput,
 144        encoding: 'utf-8',
 145        timeout: 30000,
 146        maxBuffer: 1024 * 1024,
 147      });
 148  
 149      // Claude CLI with --output-format json wraps response in a JSON object
 150      const cliResult = safeJsonParse(output);
 151      // The CLI returns {result: "..."} — the result contains the LLM output
 152      const llmOutput = cliResult?.result || output;
 153      const parsed = safeJsonParse(llmOutput);
 154  
 155      if (parsed && typeof parsed.body === 'string' && parsed.body.trim() !== '') {
 156        return {
 157          text: parsed.body.trim(),
 158          subjectLine: isEmailChannel ? parsed.subject?.trim() || subjectLine : null,
 159        };
 160      }
 161      logger.warn('Polish (claude CLI) returned invalid response — falling back to OpenRouter');
 162    } catch (cliErr) {
 163      logger.warn(`Polish (claude CLI) failed: ${cliErr.message} — falling back to OpenRouter`);
 164    }
 165  
 166    // Fallback: OpenRouter
 167    try {
 168      const { content } = await openRouterBreaker.fire(async () =>
 169        openRouterLimiter.schedule(() =>
 170          callLLM({
 171            model: 'anthropic/claude-haiku-4-5',
 172            temperature: 0,
 173            max_tokens: 3000,
 174            json_mode: true,
 175            messages: [
 176              { role: 'system', content: HAIKU_POLISH_PROMPT },
 177              { role: 'user', content: userMessage },
 178            ],
 179            stage: 'proposals',
 180          })
 181        )
 182      );
 183  
 184      const result = safeJsonParse(content);
 185      if (!result || typeof result.body !== 'string' || result.body.trim() === '') {
 186        logger.warn('Polish (OpenRouter fallback) returned invalid response — using original');
 187        return { text, subjectLine };
 188      }
 189  
 190      return {
 191        text: result.body.trim(),
 192        subjectLine: isEmailChannel ? result.subject?.trim() || subjectLine : null,
 193      };
 194    } catch (err) {
 195      logger.warn(`Polish (OpenRouter fallback) failed: ${err.message} — using original`);
 196      return { text, subjectLine };
 197    }
 198  }
 199  
 200  /**
 201   * Generate proposal variants for a site and store in messages table
 202   * @param {number} siteId - Site ID from database
 203   * @param {string} reworkInstructions - Optional feedback for rework
 204   * @returns {Promise<Object>} Generated proposals
 205   */
 206  export async function generateProposalVariants(siteId, reworkInstructions = null) {
 207    const _t0 = Date.now();
 208  
 209    // Get site data (including country metadata)
 210    const siteData = await getOne(
 211      `SELECT
 212        s.*,
 213        s.score,
 214        s.grade,
 215        s.country_code,
 216        COALESCE(s.google_domain, 'google.com.au') as google_domain,
 217        COALESCE(s.language_code, 'en') as language_code,
 218        COALESCE(s.currency_code, 'AUD') as currency_code
 219      FROM sites s
 220      WHERE s.id = $1`,
 221      [siteId]
 222    );
 223  
 224    if (!siteData) {
 225      throw new Error(`Site not found: ${siteId}`);
 226    }
 227  
 228    // Language override: if site language_code is 'en' but country's primary language isn't English
 229    if (siteData.language_code === 'en' && siteData.country_code) {
 230      const countryInfo = getCountryByCode(siteData.country_code);
 231      if (countryInfo && countryInfo.language !== 'en') {
 232        logger.info(
 233          `[proposals] Language override: ${siteData.country_code} site has language_code='en' but country language is '${countryInfo.language}' — using '${countryInfo.language}'`
 234        );
 235        siteData.language_code = countryInfo.language;
 236      }
 237    }
 238  
 239    // Guard: country blocked from ALL outreach channels
 240    const blockedCountries = new Set(
 241      (process.env.OUTREACH_BLOCKED_COUNTRIES || '')
 242        .split(',')
 243        .map(s => s.trim().toUpperCase())
 244        .filter(Boolean)
 245    );
 246    if (blockedCountries.has(siteData.country_code)) {
 247      const msg = `country ${siteData.country_code} blocked via OUTREACH_BLOCKED_COUNTRIES — skipping proposal generation`;
 248      logger.info(`  ⏭️  ${siteData.domain}: ${msg}`);
 249      throw new Error(`Skipping ${siteData.domain}: ${msg}`);
 250    }
 251  
 252    // Guard: country_code must be known
 253    if (!siteData.country_code) {
 254      const msg =
 255        'country_code is unknown — re-enrich site to detect country before generating proposals';
 256      await run('UPDATE sites SET error_message = $1 WHERE id = $2', [msg, siteId]);
 257      throw new Error(`Skipping ${siteData.domain}: ${msg}`);
 258    }
 259  
 260    // Guard: a template must exist for this country
 261    const templatesDir = join(__dirname, '../data/templates');
 262    const templatePath = join(templatesDir, siteData.country_code, 'email.json');
 263    if (!existsSync(templatePath)) {
 264      const msg = `no email template for country ${siteData.country_code} — add data/templates/${siteData.country_code}/email.json to enable outreach`;
 265      await run('UPDATE sites SET error_message = $1 WHERE id = $2', [msg, siteId]);
 266      throw new Error(`Skipping ${siteData.domain}: ${msg}`);
 267    }
 268  
 269    // Check if site meets low-score threshold (unless this is a rework)
 270    if (!reworkInstructions) {
 271      const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82');
 272      if (siteData.score >= cutoff) {
 273        throw new Error(
 274          `Site ${siteData.domain} has score ${siteData.score} which is above the cutoff ${cutoff}. Proposals are only generated for low-scoring sites.`
 275        );
 276      }
 277    }
 278  
 279    logger.info(`Generating proposal variants for ${siteData.domain} (${siteData.keyword})`);
 280  
 281    // Parse JSON data (filesystem first, DB fallback)
 282    const scoreData = getScoreDataWithFallback(siteData.id, siteData);
 283    let contactData = getContactsDataWithFallback(siteData.id, siteData);
 284  
 285    // Clean up invalid Twitter/X.com links (those without usernames)
 286    if (contactData) {
 287      contactData = cleanInvalidSocialLinks(contactData);
 288    }
 289  
 290    // Get all contacts with Haiku-resolved first names
 291    const _tNames = Date.now();
 292    const allContacts = contactData
 293      ? await getAllContactsWithNames(contactData, siteData.country_code)
 294      : [];
 295    logger.debug(
 296      `[proposals] name-extract ${Date.now() - _tNames}ms (${allContacts.length} contacts) for ${siteData.domain}`
 297    );
 298  
 299    if (allContacts.length === 0) {
 300      logger.warn(`No contacts found for ${siteData.domain} - skipping proposal generation`);
 301      return {
 302        siteId,
 303        domain: siteData.domain,
 304        keyword: siteData.keyword,
 305        outreachIds: [],
 306        variants: [],
 307        reasoning: 'No contacts found',
 308        contactCount: 0,
 309      };
 310    }
 311  
 312    // Filter out contacts that already have outbound messages
 313    const existingMessages = await getAll(
 314      `SELECT contact_method, contact_uri
 315       FROM messages
 316       WHERE site_id = $1 AND direction = 'outbound'`,
 317      [siteId]
 318    );
 319  
 320    const existingSet = new Set(
 321      existingMessages.map(o => `${normalizeContactMethod(o.contact_method)}:${o.contact_uri}`)
 322    );
 323  
 324    const countryUpper = (siteData.country_code || '').toUpperCase();
 325    const availableContacts = allContacts.filter(contact => {
 326      const key = `${normalizeContactMethod(contact.channel)}:${contact.uri}`;
 327      if (existingSet.has(key)) return false;
 328      // Skip SMS contacts for countries without legal basis (DR-121)
 329      if (contact.channel === 'sms' && BLOCKED_SMS_COUNTRIES.has(countryUpper)) {
 330        logger.info(`Skipping SMS contact for ${siteData.domain} — SMS blocked for ${siteData.country_code} (DR-121)`);
 331        return false;
 332      }
 333      return true;
 334    });
 335  
 336    if (availableContacts.length === 0) {
 337      logger.warn(
 338        `All ${allContacts.length} contacts for ${siteData.domain} already have messages - skipping`
 339      );
 340      return {
 341        siteId,
 342        domain: siteData.domain,
 343        keyword: siteData.keyword,
 344        outreachIds: [],
 345        variants: [],
 346        reasoning: 'All contacts already have messages',
 347        contactCount: allContacts.length,
 348      };
 349    }
 350  
 351    if (availableContacts.length < allContacts.length) {
 352      logger.info(
 353        `Filtered ${allContacts.length - availableContacts.length} contacts with existing messages`
 354      );
 355    }
 356  
 357    logger.info(`Found ${availableContacts.length} contacts - generating personalized proposals`);
 358  
 359    // Generate one proposal per contact
 360    const _tGen = Date.now();
 361    const proposalData = await generateProposalsWithAI(
 362      siteData,
 363      scoreData,
 364      availableContacts,
 365      reworkInstructions,
 366      siteId
 367    );
 368  
 369    logger.debug(`[proposals] generation ${Date.now() - _tGen}ms for ${siteData.domain}`);
 370  
 371    // Validate that we got the right number of variants
 372    if (proposalData.variants.length !== availableContacts.length) {
 373      throw new Error(
 374        `Expected ${availableContacts.length} variants but got ${proposalData.variants.length}`
 375      );
 376    }
 377  
 378    // Polish all variants concurrently then store
 379    const tPolish = Date.now();
 380    const polishedVariants = await Promise.all(
 381      availableContacts.map(async (contact, i) => {
 382        const variant = proposalData.variants[i]; // eslint-disable-line security/detect-object-injection
 383        const contactMethod = normalizeContactMethod(contact.channel);
 384        const isEmailChannel = contactMethod === 'email' || contactMethod === 'form';
 385        const polished = await polishProposal(
 386          variant.proposal_text,
 387          contactMethod,
 388          siteData.language_code || 'en',
 389          isEmailChannel ? proposalData.subject_line : null,
 390          siteData.country_code || null
 391        );
 392        return { contact, variant, contactMethod, polished };
 393      })
 394    );
 395    logger.debug(
 396      `[proposals] polish ${Date.now() - tPolish}ms for ${availableContacts.length} contacts`
 397    );
 398  
 399    const outreachIds = [];
 400    for (const { contact, variant, polished } of polishedVariants) {
 401      // Store outreach entry (with GDPR filtering) using polished content
 402      const outreachId = await storeProposalVariant(
 403        siteId,
 404        { ...variant, proposal_text: polished.text },
 405        polished.subjectLine ?? proposalData.subject_line,
 406        contact,
 407        siteData
 408      );
 409      outreachIds.push(outreachId);
 410  
 411      const nameInfo = contact.name ? ` (${contact.name})` : '';
 412      const contactMethod = normalizeContactMethod(contact.channel);
 413      logger.success(
 414        `Stored outreach #${outreachId}: V${variant.variant_number} → ${contactMethod}${nameInfo} (${contact.uri})`
 415      );
 416    }
 417  
 418    // Update site status
 419    await run(
 420      `UPDATE sites
 421       SET status = 'proposals_drafted',
 422           updated_at = CURRENT_TIMESTAMP
 423       WHERE id = $1`,
 424      [siteId]
 425    );
 426  
 427    logger.info(
 428      `[proposals] total ${Date.now() - _t0}ms for ${siteData.domain} (${allContacts.length} contacts)`
 429    );
 430    logger.success(
 431      `Generated ${proposalData.variants.length} proposal variants and created ${outreachIds.length} outreach entries for ${siteData.domain}`
 432    );
 433  
 434    return {
 435      siteId,
 436      domain: siteData.domain,
 437      keyword: siteData.keyword,
 438      outreachIds,
 439      variants: proposalData.variants,
 440      reasoning: proposalData.reasoning,
 441      contactCount: allContacts.length,
 442    };
 443  }
 444  
 445  /**
 446   * Generate proposal variants using LLM API
 447   * @param {Object} siteData - Site data
 448   * @param {Object} scoreData - Scoring data
 449   * @param {Array} contacts - Array of contacts from getAllContacts()
 450   * @param {string} reworkInstructions - Optional rework instructions
 451   * @param {number} siteId - Site ID for usage tracking
 452   * @returns {Promise<Object>} Proposal data with variants
 453   */
 454   
 455  async function generateProposalsWithAI(
 456    siteData,
 457    scoreData,
 458    contacts,
 459    reworkInstructions,
 460    siteId
 461  ) {
 462    // Extract business type from keyword
 463    const businessType = extractBusinessType(siteData.keyword);
 464  
 465    // Build context for the AI
 466    const context = await buildProposalContext(siteData, scoreData, contacts, businessType);
 467  
 468    // Add rework instructions if provided
 469    const reworkSection = reworkInstructions
 470      ? `\n\nREWORK INSTRUCTIONS FROM OPERATOR:\n${reworkInstructions}\n\nPlease incorporate this feedback into the new variants.`
 471      : '';
 472  
 473    const userPrompt = `${context}${reworkSection}`;
 474  
 475    return retryWithBackoff(
 476      async () => {
 477        // Calculate max_tokens based on number of contacts
 478        const maxTokens = Math.max(8192, contacts.length * 1200);
 479  
 480        const response = await openRouterBreaker.fire(async () =>
 481          openRouterLimiter.schedule(() =>
 482            callLLM({
 483              model: PROPOSAL_MODEL,
 484              messages: [
 485                { role: 'system', content: PROPOSAL_PROMPT },
 486                { role: 'user', content: userPrompt },
 487              ],
 488              temperature: 0.7,
 489              max_tokens: maxTokens,
 490              json_mode: true,
 491              stage: 'proposals',
 492              siteId,
 493            })
 494          )
 495        );
 496  
 497        const { content, usage } = response;
 498  
 499        logger.info(
 500          `Generated proposals using ${getProviderDisplayName()} - ${usage.promptTokens + usage.completionTokens} tokens`
 501        );
 502  
 503        if (!content) {
 504          throw new Error('No content in API response');
 505        }
 506  
 507        const result = safeJsonParse(content);
 508  
 509        if (!result || !result.variants || !Array.isArray(result.variants)) {
 510          throw new Error('Invalid proposal response format');
 511        }
 512  
 513        if (result.variants.length !== contacts.length) {
 514          throw new Error(`Expected ${contacts.length} variants but got ${result.variants.length}`);
 515        }
 516  
 517        // Validate response structure and check for suspicious URLs
 518        validateProposalResponse(result, contacts.length);
 519  
 520        // Programmatic content validation — retry if LLM ignores constraints
 521        const freePattern =
 522          /\bfree\s+(audit|report|assessment|consultation|review|strategy\s+call|snapshot|analysis|evaluation|estimate|quote)\b|\bno\s+cost\b|\bat\s+no\s+charge\b|\bcomplimentary\b/i;
 523        const phonePattern = /\+\d[\d\s\-().]{6,}/;
 524        for (const variant of result.variants) {
 525          const text = variant.proposal_text || '';
 526          if (freePattern.test(text)) {
 527            throw new Error(
 528              `LLM promised a free service in variant ${variant.variant_number} — retry required`
 529            );
 530          }
 531          if (phonePattern.test(text)) {
 532            throw new Error(
 533              `LLM hallucinated a phone number in variant ${variant.variant_number} — retry required`
 534            );
 535          }
 536        }
 537  
 538        return result;
 539      },
 540      {
 541        maxRetries: 3,
 542        shouldRetry: error => error.message.includes('retry required') || isRetryableError(error),
 543        onRetry: (attempt, error) => {
 544          logger.warn(`Retry ${attempt + 1}/3 for ${siteData.domain}: ${error.message}`);
 545        },
 546      }
 547    );
 548  }
 549  
 550  /**
 551   * Get top competitor for a keyword
 552   * @param {Object} siteData - Site data
 553   * @returns {Promise<Object|null>} Competitor info {domain, score, grade} or null
 554   */
 555  async function getTopCompetitor(siteData) {
 556    // Extract industry from score_json (filesystem first, DB fallback)
 557    const scoreJson = getScoreDataWithFallback(siteData.id, siteData);
 558    const industry = scoreJson?.industry_classification;
 559    if (!industry) return null;
 560  
 561    // Find all country codes sharing the same language
 562    const prospectLang = COUNTRIES[siteData.country_code]?.language;
 563    if (!prospectLang) return null;
 564  
 565    const sameLanguageCodes = Object.entries(COUNTRIES)
 566      .filter(([, c]) => c.language === prospectLang)
 567      .map(([code]) => code);
 568  
 569    const placeholders = sameLanguageCodes.map((_, i) => `$${i + 3}`).join(',');
 570    const competitor = await getOne(
 571      `SELECT domain, score, grade
 572       FROM sites
 573       WHERE industry_classification = $1
 574         AND domain != $2
 575         AND country_code IN (${placeholders})
 576         AND status IN ('prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'proposals_drafted', 'outreach_partial', 'outreach_sent')
 577         AND score IS NOT NULL
 578       ORDER BY score DESC
 579       LIMIT 1`,
 580      [industry, siteData.domain, ...sameLanguageCodes]
 581    );
 582  
 583    return competitor || null;
 584  }
 585  
 586  /**
 587   * Build context for proposal generation
 588   * @param {Object} siteData - Site data
 589   * @param {Object} scoreData - Scoring data
 590   * @param {Array} contacts - Array of contacts from getAllContacts()
 591   * @param {string} businessType - Business type extracted from keyword
 592   * @returns {Promise<string>} Context string for AI
 593   */
 594  async function buildProposalContext(siteData, scoreData, contacts, businessType) {
 595    const weaknesses = extractKeyWeaknesses(scoreData);
 596  
 597    // Get country configuration for localization
 598    const country = getCountryByCode(siteData.country_code);
 599  
 600    // Extract location from contacts_json (more reliable than keyword)
 601    const contactData = getContactsDataWithFallback(siteData.id, siteData);
 602    const city = contactData?.city || siteData.city || null;
 603    const state = contactData?.state || siteData.state || null;
 604  
 605    // Format contacts list for AI
 606    const contactsList = contacts
 607      .map((contact, i) => {
 608        const safeName = contact.name ? stripInjectionMarkers(contact.name) : null;
 609        const nameInfo = safeName ? ` (Name: ${safeName})` : ' (Name: Unknown)';
 610        return `${i + 1}. ${contact.channel}${nameInfo} - ${contact.uri}`;
 611      })
 612      .join('\n');
 613  
 614    // Get top competitor and determine if score difference meets threshold
 615    const competitor = await getTopCompetitor(siteData);
 616    const scoreThreshold = parseFloat(process.env.COMPETITOR_SCORE_THRESHOLD || '10');
 617  
 618    let competitorSection = '';
 619    if (competitor) {
 620      const scoreDifference = competitor.score - siteData.score;
 621      if (scoreDifference >= scoreThreshold) {
 622        competitorSection = `\nTOP COMPETITOR (same industry, same language):
 623  - Domain: ${competitor.domain}
 624  - Score: ${computeGrade(competitor.score)} (${competitor.score}/100)
 625  - Difference: ${scoreDifference.toFixed(1)} points higher than target site
 626  
 627  FRAMING RULES:
 628  - Score = percentage of conversion best practices implemented (NOT a conversion rate)
 629  - Use "our analysis suggests" — never state conversion claims as fact
 630  - ~20-point gap ≈ roughly 2x conversion difference (heuristic, frame as estimate)
 631  - Example: "Our analysis suggests ${competitor.domain} implements ${competitor.score}% of conversion best practices vs your ${siteData.score}% — that gap likely means they're converting roughly twice as many visitors."`;
 632      } else {
 633        competitorSection = `\nTOP COMPETITOR (same industry, same language):
 634  - Domain: ${competitor.domain}
 635  - Score: Similar to target site (do NOT mention specific scores - they're too close)
 636  
 637  IMPORTANT: Do not mention competitor scores when they are clustered too tightly to be meaningful.`;
 638      }
 639    }
 640  
 641    // Build localization requirements
 642    const localizationSection = `
 643  LOCALIZATION REQUIREMENTS:
 644  - Country: ${country.name} (${country.code})
 645  - Language: ${country.language === 'en' ? 'English' : country.language}
 646  - Currency: ${country.currencySymbol} ${country.currency}
 647  - Spelling: Use ${country.name} spelling conventions${['AU', 'GB', 'UK', 'NZ', 'IE', 'ZA', 'IN'].includes(country.code) ? ' (e.g., "optimise" not "optimize", "colour" not "color", "specialise" not "specialize")' : country.code === 'US' || country.code === 'CA' ? ' (e.g., "optimize" not "optimise", "color" not "colour", "specialize" not "specialise")' : ''}
 648  - Cultural context: Reference ${country.name} business norms and expectations
 649  - Date format: ${country.dateFormat}
 650  - Phone format: ${country.phoneFormat}
 651  ${country.timezone ? `- Timezone: ${country.timezone} (consider local business hours for call/response timing)` : ''}`;
 652  
 653    // Build best practices section
 654    const bestPracticesSection = `
 655  CHANNEL-SPECIFIC BEST PRACTICES:
 656  
 657  === EMAIL BEST PRACTICES ===
 658  ${EMAIL_BEST_PRACTICES}
 659  
 660  === SMS BEST PRACTICES ===
 661  ${SMS_BEST_PRACTICES}
 662  
 663  IMPORTANT: Follow these best practices when crafting proposals for each channel.
 664  - Email proposals should follow CAN-SPAM requirements and copywriting best practices
 665  - SMS proposals MUST be <160 chars and follow local regulations.
 666  - TCPA opt-out ("Reply STOP to opt out.") is REQUIRED for US and CA — include it in every US/CA SMS.
 667  - CRITICAL: do NOT add any opt-out, STOP, or unsubscribe text for AU, GB, NZ, IE, ZA, IN or any non-US/CA market. These markets do not require TCPA opt-out and the extra text wastes precious SMS characters.
 668  - All proposals must provide genuine value and avoid spam triggers
 669  `;
 670  
 671    // Build location string
 672    let locationStr = 'Unknown';
 673    if (city && state) {
 674      locationStr = `${city}, ${state}`;
 675    } else if (city) {
 676      locationStr = city;
 677    } else if (state) {
 678      locationStr = state;
 679    }
 680  
 681    // Aggregate stats for credibility (dynamic from DB)
 682    const aggRow = await getOne(
 683      `SELECT COUNT(*) AS total, ROUND(AVG(score)::numeric, 1) AS avg_score, ROUND(MAX(score)::numeric, 1) AS best_score
 684       FROM sites WHERE score IS NOT NULL`
 685    );
 686    const aggregateSection = `
 687  AGGREGATE STATS (real data — use for credibility):
 688  - total_sites_scored: ${aggRow.total}
 689  - avg_score: ${aggRow.avg_score}
 690  - best_score: ${aggRow.best_score}`;
 691  
 692    return `Generate personalized sales proposals for this website:
 693  
 694  TARGET BUSINESS:
 695  - Domain: ${siteData.domain}
 696  - Business Name: ${contactData?.business_name || siteData.domain}
 697  - URL: ${siteData.landing_page_url}
 698  - Business Type: ${businessType}
 699  - Location: ${locationStr}
 700  
 701  CONVERSION ANALYSIS:
 702  - Score: ${siteData.score ? computeGrade(siteData.score) : 'N/A'} (${siteData.score || 'N/A'}/100)
 703  ${competitorSection}
 704  ${aggregateSection}
 705  
 706  KEY WEAKNESSES IDENTIFIED:
 707  ${weaknesses}
 708  
 709  CONTACTS TO REACH (generate ONE variant for EACH):
 710  ${contactsList}
 711  ${localizationSection}
 712  
 713  ${bestPracticesSection}
 714  
 715  Generate ${contacts.length} unique proposal variants - one for each contact listed above.
 716  Vary your approach across the variants (don't send identical messages to everyone).
 717  IMPORTANT: Use the exact "Business Name" above when referring to the company — do NOT guess or derive a name from the domain.
 718  IMPORTANT: Apply all localization requirements above to ensure proposals are culturally appropriate and use correct local conventions.
 719  CRITICAL: Follow the channel-specific best practices above - especially SMS length limits and compliance requirements.`;
 720  }
 721  
 722  /**
 723   * Extract business type from keyword
 724   */
 725  function extractBusinessType(keyword) {
 726    const commonLocations = [
 727      'sydney',
 728      'melbourne',
 729      'brisbane',
 730      'perth',
 731      'adelaide',
 732      'canberra',
 733      'hobart',
 734      'darwin',
 735    ];
 736  
 737    let businessType = keyword;
 738    for (const location of commonLocations) {
 739      // eslint-disable-next-line security/detect-non-literal-regexp -- Safe: location is from hardcoded array
 740      businessType = businessType.replace(new RegExp(location, 'gi'), '').trim();
 741    }
 742  
 743    return businessType || keyword;
 744  }
 745  
 746  /**
 747   * Extract location from keyword
 748   */
 749  function extractLocation(keyword) {
 750    const words = keyword.split(' ');
 751    return words.slice(-2).join(' ');
 752  }
 753  
 754  /**
 755   * Extract key weaknesses from score data
 756   */
 757  function extractKeyWeaknesses(scoreData) {
 758    if (!scoreData || !scoreData.factor_scores) {
 759      return 'General conversion optimization opportunities';
 760    }
 761  
 762    const weaknesses = [];
 763  
 764    Object.entries(scoreData.factor_scores).forEach(([factorName, factor]) => {
 765      if (factor && factor.score < 5 && factor.reasoning) {
 766        const label = factorName.replace(/_/g, ' ');
 767        weaknesses.push(`- ${label}: ${stripInjectionMarkers(factor.reasoning)}`);
 768      }
 769    });
 770  
 771    return weaknesses.length > 0
 772      ? weaknesses.slice(0, 5).join('\n')
 773      : 'General conversion optimization opportunities';
 774  }
 775  
 776  /**
 777   * Normalize contact method to match database schema
 778   */
 779  function normalizeContactMethod(method) {
 780    if (!method) return 'email'; // Default fallback
 781  
 782    const normalized = method.toLowerCase().trim();
 783  
 784    const mapping = {
 785      sms: 'sms',
 786      email: 'email',
 787      form: 'form',
 788      'contact form': 'form',
 789      contactform: 'form',
 790      x: 'x',
 791      twitter: 'x',
 792      linkedin: 'linkedin',
 793      facebook: 'email',
 794      instagram: 'email',
 795    };
 796  
 797    // eslint-disable-next-line security/detect-object-injection -- Safe: normalized is from our channel type strings
 798    return mapping[normalized] || 'email';
 799  }
 800  
 801  /**
 802   * Store proposal variant in messages table
 803   * @param {number} siteId - Site ID
 804   * @param {Object} variant - Variant object with proposal_text, variant_number
 805   * @param {string} subjectLine - Email subject line
 806   * @param {Object|null} contact - Contact object {type, uri, name, channel} or null
 807   * @param {Object} siteData - Site data including country_code and gdpr_verified
 808   * @returns {Promise<number|null>} Message ID
 809   */
 810  async function storeProposalVariant(siteId, variant, subjectLine, contact = null, siteData = null) {
 811    // Validate variant data
 812    if (!variant || typeof variant !== 'object') {
 813      throw new Error('Invalid variant: must be an object');
 814    }
 815    if (!variant.proposal_text || typeof variant.proposal_text !== 'string') {
 816      throw new Error(`Invalid variant.proposal_text for variant ${variant.variant_number}`);
 817    }
 818  
 819    // Use contact info if available, otherwise use variant recommendation
 820    const rawContactMethod = contact ? contact.channel : variant.recommended_channel || 'email';
 821    const contactMethod = normalizeContactMethod(rawContactMethod);
 822    let contactUri = contact ? contact.uri : 'PENDING_CONTACT_EXTRACTION';
 823  
 824    // Normalize SMS phone numbers with country code
 825    if (
 826      contactMethod === 'sms' &&
 827      contactUri !== 'PENDING_CONTACT_EXTRACTION' &&
 828      siteData?.country_code
 829    ) {
 830      contactUri = addCountryCode(contactUri, siteData.country_code);
 831    }
 832  
 833    // GDPR compliance check: Block unverified emails in GDPR countries
 834    let approvalStatus = 'pending';
 835  
 836    if (
 837      siteData &&
 838      contactMethod === 'email' &&
 839      siteData.gdpr_verified === 0 &&
 840      siteData.country_code
 841    ) {
 842      const country = getCountryByCode(siteData.country_code);
 843      if (country && country.requiresGDPRCheck) {
 844        approvalStatus = 'gdpr_blocked';
 845        logger.info(
 846          `  ⚠️  GDPR blocked: ${contactUri} in ${siteData.country_code} (unverified company email)`
 847        );
 848      }
 849    }
 850  
 851    // Government email check
 852    if (contactMethod === 'email' && isGovernmentEmail(contactUri)) {
 853      logger.info(`  ⚠️  Skipping government email: ${contactUri}`);
 854      return null;
 855    }
 856  
 857    // Education email check
 858    if (contactMethod === 'email' && isEducationEmail(contactUri)) {
 859      logger.info(`  ⚠️  Skipping education email: ${contactUri}`);
 860      return null;
 861    }
 862  
 863    // Demo email check
 864    if (contactMethod === 'email' && isDemoEmail(contactUri)) {
 865      logger.info(`  ⚠️  Skipping demo email: ${contactUri}`);
 866      return null;
 867    }
 868  
 869    // Derive contact_basis from channel
 870    const CONTACT_BASIS = {
 871      sms: 'public_website_phone',
 872      email: 'public_website_email',
 873      form: 'contact_form',
 874      x: 'social_profile',
 875      linkedin: 'social_profile',
 876    };
 877    const contactBasis = CONTACT_BASIS[contactMethod] || null;
 878  
 879    // Debug logging
 880    const nameInfo = contact?.name ? ` (${contact.name})` : '';
 881    const statusInfo = approvalStatus === 'gdpr_blocked' ? ' [GDPR BLOCKED]' : '';
 882    logger.info(
 883      `Storing variant ${variant.variant_number}: siteId=${siteId}, textLen=${variant.proposal_text?.length}, method=${contactMethod}${nameInfo}, uri=${contactUri}${statusInfo}`
 884    );
 885  
 886    // Resolve spintax before storing — each message record has a final, human-readable version
 887    const spunText = spin(variant.proposal_text);
 888    const spunSubject = spin(subjectLine);
 889  
 890    // OUTPUT SANITIZATION
 891    const prospectDomain = siteData?.domain || null;
 892    const { sanitized: sanitizedText, strippedUrls, strippedPatterns } = sanitizeLlmOutput(
 893      spunText,
 894      prospectDomain
 895    );
 896    const { sanitized: sanitizedSubject } = sanitizeLlmOutput(spunSubject, prospectDomain);
 897  
 898    if (strippedUrls.length > 0 || strippedPatterns.length > 0) {
 899      logger.warn(
 900        `[proposals] Output sanitization for site ${siteId}: removed ${strippedPatterns.length} dangerous patterns, ${strippedUrls.length} unauthorized URLs`
 901      );
 902    }
 903  
 904    // Reject broken template output
 905    if (sanitizedText.startsWith('{') || sanitizedText.startsWith(', ')) {
 906      logger.warn(
 907        `[proposals] Broken template output for site ${siteId} — starts with '${sanitizedText.slice(0, 10)}' — skipping`
 908      );
 909      return null;
 910    }
 911  
 912    // SMS: append dynamic signature + opt-out based on country and remaining char budget
 913    const personaFirstName = process.env.PERSONA_FIRST_NAME;
 914    const personaName = process.env.PERSONA_NAME;
 915    const brandName = process.env.BRAND_NAME;
 916    const brandDomain = process.env.BRAND_DOMAIN;
 917    if (!personaFirstName) throw new Error('PERSONA_FIRST_NAME env var required');
 918    if (!personaName) throw new Error('PERSONA_NAME env var required');
 919    if (!brandName) throw new Error('BRAND_NAME env var required');
 920    if (!brandDomain) throw new Error('BRAND_DOMAIN env var required');
 921  
 922    let finalText = sanitizedText;
 923    if (contactMethod === 'sms') {
 924      const cc = (siteData?.country_code || '').toUpperCase();
 925      const needsStop = cc === 'US' || cc === 'CA';
 926      const sigStripPattern = new RegExp(`\\s*-\\s*${personaFirstName}\\s*(${personaName.split(' ').slice(1).join(' ')})?\\s*,?\\s*(${brandName.replace(/[&]/g, '\\s*&\\s*')})?\\s*$`, 'i');
 927      finalText = finalText
 928        .replace(/\s*Reply STOP to opt out\.?\s*/gi, '')
 929        .replace(sigStripPattern, '')
 930        .trimEnd();
 931      const stopText = needsStop ? ' Reply STOP to opt out.' : '';
 932      const sigTiers = needsStop
 933        ? [` - ${personaFirstName}, ${brandName}`, ` - ${brandName}`]
 934        : [` - ${personaFirstName}`, ''];
 935      let bestSig = sigTiers[sigTiers.length - 1];
 936      for (const sig of sigTiers) {
 937        if (finalText.length + stopText.length + sig.length <= 160) {
 938          bestSig = sig;
 939          break;
 940        }
 941      }
 942      finalText = `${finalText}${stopText}${bestSig}`;
 943    } else if (contactMethod !== 'sms') {
 944      const identityPattern = new RegExp(`${personaFirstName}|${brandName.replace(/[&]/g, '\\$&')}|${brandDomain.replace(/\./g, '\\.')}`, 'i');
 945      const hasIdentity = identityPattern.test(finalText);
 946      if (!hasIdentity) {
 947        finalText = `${finalText.trimEnd()}\n\n${personaName}, ${brandName} (${brandDomain})`;
 948      }
 949    }
 950  
 951    // Hard character limit enforcement
 952    const CHANNEL_MAX_CHARS = { sms: 160, x: 280 };
 953    const maxChars = CHANNEL_MAX_CHARS[contactMethod];
 954    if (maxChars && finalText.length > maxChars) {
 955      logger.warn(
 956        `[proposals] ${contactMethod} message too long (${finalText.length}/${maxChars} chars) for site ${siteId} — skipping`
 957      );
 958      return null;
 959    }
 960  
 961    const result = await run(
 962      `INSERT INTO messages (
 963        site_id,
 964        direction,
 965        message_body,
 966        subject_line,
 967        contact_method,
 968        contact_uri,
 969        approval_status,
 970        contact_basis
 971      ) VALUES ($1, 'outbound', $2, $3, $4, $5, $6, $7)
 972      ON CONFLICT (site_id, contact_method, contact_uri) WHERE direction = 'outbound' AND approval_status IN ('pending', 'approved')
 973      DO UPDATE SET
 974        message_body = EXCLUDED.message_body,
 975        subject_line = EXCLUDED.subject_line,
 976        approval_status = EXCLUDED.approval_status,
 977        contact_basis = EXCLUDED.contact_basis
 978      RETURNING id`,
 979      [siteId, finalText, sanitizedSubject, contactMethod, contactUri, approvalStatus, contactBasis]
 980    );
 981  
 982    return result.lastInsertRowid;
 983  }
 984  
 985  /**
 986   * Get pending messages awaiting approval
 987   */
 988  export async function getPendingOutreaches(limit = 100) {
 989    return await getAll(
 990      `SELECT
 991        m.*,
 992        s.domain,
 993        s.keyword,
 994        s.landing_page_url,
 995        s.grade
 996      FROM messages m
 997      JOIN sites s ON m.site_id = s.id
 998      WHERE m.direction = 'outbound'
 999        AND m.approval_status = 'pending'
1000      ORDER BY m.created_at DESC
1001      LIMIT $1`,
1002      [limit]
1003    );
1004  }
1005  
1006  /**
1007   * Approve message for sending
1008   */
1009  export async function approveOutreach(outreachId) {
1010    await run("UPDATE messages SET approval_status = 'approved' WHERE id = $1", [outreachId]);
1011    logger.success(`Approved message #${outreachId}`);
1012  }
1013  
1014  /**
1015   * Mark message for rework
1016   */
1017  export async function reworkOutreach(outreachId, reworkInstructions) {
1018    await run(
1019      "UPDATE messages SET approval_status = 'rework', rework_instructions = $1 WHERE id = $2",
1020      [reworkInstructions, outreachId]
1021    );
1022    logger.info(`Marked message #${outreachId} for rework`);
1023  }
1024  
1025  /**
1026   * Process rework requests (regenerate proposals with feedback)
1027   */
1028  export async function processReworkQueue() {
1029    const reworkItems = await getAll(
1030      `SELECT DISTINCT site_id, rework_instructions
1031       FROM messages
1032       WHERE direction = 'outbound'
1033         AND approval_status = 'rework'
1034       GROUP BY site_id, rework_instructions`
1035    );
1036  
1037    logger.info(`Processing ${reworkItems.length} rework requests...`);
1038  
1039    let processed = 0;
1040    let failed = 0;
1041  
1042    for (const item of reworkItems) {
1043      try {
1044        // Delete old pending/rework messages for this site
1045        await run(
1046          `DELETE FROM messages
1047           WHERE site_id = $1
1048           AND direction = 'outbound'
1049           AND approval_status IN ('pending', 'rework')`,
1050          [item.site_id]
1051        );
1052  
1053        // Generate new variants with rework instructions
1054        await generateProposalVariants(item.site_id, item.rework_instructions);
1055  
1056        logger.success(`Regenerated proposals for site #${item.site_id}`);
1057        processed++;
1058      } catch (error) {
1059        logger.error(`Failed to rework site #${item.site_id}: ${error.message}`);
1060        failed++;
1061      }
1062    }
1063  
1064    logger.success(
1065      `Processed ${processed}/${reworkItems.length} rework requests${failed ? ` (${failed} failed)` : ''}`
1066    );
1067  }
1068  
1069  /**
1070   * Generate proposals for all low-scoring sites without outreaches
1071   */
1072  export async function generateBulkProposals(limit = null) {
1073    const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82');
1074  
1075    const sql = `SELECT s.id, s.domain, s.keyword, s.score
1076         FROM sites s
1077         LEFT JOIN messages m ON s.id = m.site_id AND m.direction = 'outbound'
1078         WHERE s.status IN ('enriched', 'enriched_llm')
1079         AND s.score < $1
1080         AND m.id IS NULL
1081         ORDER BY s.score ASC
1082         ${limit ? `LIMIT ${limit}` : ''}`;
1083  
1084    const sites = await getAll(sql, [cutoff]);
1085  
1086    logger.info(`Generating proposals for ${sites.length} sites...`);
1087  
1088    const results = [];
1089  
1090    for (const site of sites) {
1091      try {
1092        const result = await generateProposalVariants(site.id);
1093        results.push(result);
1094  
1095        // Rate limiting
1096        await new Promise(resolve => setTimeout(resolve, 2000));
1097      } catch (error) {
1098        logger.error(`Failed for site #${site.id}`, error);
1099        results.push({ siteId: site.id, error: error.message });
1100      }
1101    }
1102  
1103    logger.success(
1104      `Generated proposals for ${results.filter(r => !r.error).length}/${results.length} sites`
1105    );
1106  
1107    return results;
1108  }
1109  
1110  // CLI functionality
1111  if (import.meta.url === `file://${process.argv[1]}`) {
1112    const command = process.argv[2];
1113  
1114    if (command === 'generate') {
1115      const siteId = parseInt(process.argv[3], 10);
1116      if (!siteId) {
1117        console.error('Usage: node src/proposal-generator-v2.js generate <site_id>');
1118        process.exit(1);
1119      }
1120  
1121      generateProposalVariants(siteId)
1122        .then(result => {
1123          console.log('\n✅ Proposals generated!\n');
1124          console.log(`Site: ${result.domain} (${result.keyword})`);
1125          console.log(`Outreach IDs: ${result.outreachIds.join(', ')}`);
1126          process.exit(0);
1127        })
1128        .catch(error => {
1129          console.error(`\n❌ Failed: ${error.message}`);
1130          process.exit(1);
1131        });
1132    } else if (command === 'bulk') {
1133      const limit = process.argv[3] ? parseInt(process.argv[3], 10) : null;
1134  
1135      generateBulkProposals(limit)
1136        .then(() => process.exit(0))
1137        .catch(error => {
1138          console.error(`\n❌ Failed: ${error.message}`);
1139          process.exit(1);
1140        });
1141    } else if (command === 'pending') {
1142      const limit = process.argv[3] ? parseInt(process.argv[3], 10) : 100;
1143      getPendingOutreaches(limit)
1144        .then(msgs => {
1145          console.log(JSON.stringify(msgs, null, 2));
1146          process.exit(0);
1147        })
1148        .catch(error => {
1149          console.error(`\n❌ Failed: ${error.message}`);
1150          process.exit(1);
1151        });
1152    } else {
1153      console.log('Usage:');
1154      console.log('  generate <site_id>     - Generate proposals for a site');
1155      console.log('  bulk [limit]           - Generate for all low-scoring sites');
1156      console.log('  pending [limit]        - List pending outreaches');
1157      process.exit(1);
1158    }
1159  }