/ src / proposal-generator-v2.js
proposal-generator-v2.js
   1  #!/usr/bin/env node
   2  
   3  /**
   4   * Proposal Generator V2
   5   * Generates 3 variants of personalized outreach proposals using the new Proposal.md prompt
   6   * Stores in messages table with pending approval_status for operator approval
   7   */
   8  
   9  import { run, getOne, getAll } from './utils/db.js';
  10  import { execFileSync } from 'child_process';
  11  import { readFileSync, existsSync, readdirSync } from 'fs';
  12  import { join, dirname } from 'path';
  13  import { fileURLToPath } from 'url';
  14  import Logger from './utils/logger.js';
  15  import { retryWithBackoff, isRetryableError, safeJsonParse } from './utils/error-handler.js';
  16  import { getAllContactsWithNames, cleanInvalidSocialLinks } from './contacts/prioritize.js';
  17  import { spin } from './utils/spintax.js';
  18  import { openRouterLimiter } from './utils/rate-limiter.js';
  19  import { openRouterBreaker } from './utils/circuit-breaker.js';
  20  import { callLLM, getProviderDisplayName } from './utils/llm-provider.js';
  21  import { getCountryByCode, COUNTRIES } from './config/countries.js';
  22  import { computeGrade } from './score.js';
  23  import { isDemoEmail, isGovernmentEmail, isEducationEmail } from './utils/site-filters.js';
  24  import { addCountryCode } from './utils/phone-normalizer.js';
  25  import { generatePromptRecommendations } from './utils/prompt-learning.js';
  26  import { getCurrentSeason } from './utils/template-proposals.js';
  27  import { wrapUntrusted, stripInjectionMarkers, sanitizeLlmOutput } from './utils/llm-sanitizer.js';
  28  import { validateProposalResponse } from './utils/llm-response-validator.js';
  29  import { getScoreDataWithFallback } from './utils/score-storage.js';
  30  import { getContactsDataWithFallback } from './utils/contacts-storage.js';
  31  import { parseEnvSet } from './utils/load-env.js';
  32  
  33  const __filename = fileURLToPath(import.meta.url);
  34  const __dirname = dirname(__filename);
  35  const projectRoot = join(__dirname, '..');
  36  
  37  const logger = new Logger('ProposalGeneratorV2');
  38  
  39  // Countries blocked from SMS outreach (DR-121 — only AU/NZ have legal basis for cold SMS).
  40  const BLOCKED_SMS_COUNTRIES = parseEnvSet(process.env.OUTREACH_BLOCKED_SMS_COUNTRIES);
  41  
  42  // Load proposal prompt and best practices
  43  const PROPOSAL_PROMPT = readFileSync(join(projectRoot, 'prompts/PROPOSAL.md'), 'utf-8');
  44  const HAIKU_POLISH_PROMPT = readFileSync(join(projectRoot, 'prompts/HAIKU-POLISH.md'), 'utf-8');
  45  const EMAIL_BEST_PRACTICES = readFileSync(
  46    join(projectRoot, 'docs/05-outreach/email-best-practices.md'),
  47    'utf-8'
  48  );
  49  
  50  // SMS best practices (from CLAUDE.md compliance section)
  51  const SMS_BEST_PRACTICES = `# SMS Best Practices (TCPA Compliance)
  52  
  53  ## Core Requirements
  54  
  55  **Prior Consent**: TCPA requires explicit prior written consent before sending marketing SMS. For cold outreach:
  56  - Initial SMS must be informational, not promotional
  57  - Must provide opt-out instructions
  58  - Cannot include hard sales pitches without consent
  59  
  60  **Opt-Out Instructions**: TCPA (US/CA only) requires opt-out instructions in every SMS:
  61  - Required for: US, CA only
  62  - Standard: "Reply STOP to opt out"
  63  - NOT required for: AU, GB, NZ, IE, ZA, IN (different regulatory regimes)
  64  - Must honor opt-outs immediately for all markets
  65  - Store opt-outs permanently
  66  
  67  **Business Hours**: SMS must be sent during reasonable hours:
  68  - 8am - 9pm in recipient's local timezone
  69  - Avoid weekends for initial outreach
  70  - Consider industry-specific work hours
  71  
  72  **Sender Identification**: Clearly identify who is sending:
  73  - Include business name in first message
  74  - Provide contact information
  75  - Be transparent about purpose
  76  
  77  **Message Length**: Keep SMS concise:
  78  - Aim for <160 characters to avoid segmentation
  79  - Ultra-short, urgent, personal tone
  80  - Single clear call-to-action
  81  
  82  **Compliance Checklist**:
  83  - [ ] Explicit consent obtained (or informational-only content)
  84  - [ ] Opt-out instructions included
  85  - [ ] Sent during business hours (8am-9pm local time)
  86  - [ ] Sender clearly identified
  87  - [ ] Message under 160 characters
  88  - [ ] No misleading or deceptive content
  89  - [ ] Opt-outs honored immediately
  90  `;
  91  
  92  // Model configuration (from env or default)
  93  const PROPOSAL_MODEL =
  94    process.env.PROPOSAL_MODEL || process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5';
  95  
  96  // Language code → full name for polish prompt
  97  const POLISH_LANG_NAMES = {
  98    en: 'English',
  99    de: 'German',
 100    fr: 'French',
 101    it: 'Italian',
 102    ja: 'Japanese',
 103    ko: 'Korean',
 104    es: 'Spanish',
 105    nl: 'Dutch',
 106    pl: 'Polish',
 107    sv: 'Swedish',
 108    da: 'Danish',
 109    id: 'Indonesian',
 110    hi: 'Hindi',
 111  };
 112  
 113  /**
 114   * Polish a generated proposal via Claude CLI (uses Max subscription, zero cost).
 115   * Falls back to OpenRouter if CLI is unavailable.
 116   */
 117  async function polishProposal(
 118    text,
 119    channel,
 120    languageCode = 'en',
 121    subjectLine = null,
 122    countryCode = null
 123  ) {
 124    const langName = POLISH_LANG_NAMES[languageCode] || 'English'; // eslint-disable-line security/detect-object-injection
 125    const isEmailChannel = channel === 'email' || channel === 'form';
 126  
 127    const subjectSection = isEmailChannel && subjectLine ? `\nSUBJECT LINE:\n${subjectLine}` : '';
 128    const season = getCurrentSeason(countryCode);
 129    const seasonSection = season ? `\nSEASON: ${season}` : '';
 130  
 131    const userMessage = `CHANNEL: ${channel}
 132  LANGUAGE: ${langName}${seasonSection}
 133  ---
 134  PROPOSAL BODY:
 135  ${wrapUntrusted(text, 'proposal_draft')}${subjectSection}`;
 136  
 137    // Try Claude CLI first (Max subscription, zero cost)
 138    try {
 139      const cliInput = `${HAIKU_POLISH_PROMPT}\n\n---\n\n${userMessage}`;
 140      const output = execFileSync('claude', ['-p', '--model', 'haiku', '--output-format', 'json'], {
 141        input: cliInput,
 142        encoding: 'utf-8',
 143        timeout: 30000,
 144        maxBuffer: 1024 * 1024,
 145      });
 146  
 147      // Claude CLI with --output-format json wraps response in a JSON object
 148      const cliResult = safeJsonParse(output);
 149      // The CLI returns {result: "..."} — the result contains the LLM output
 150      const llmOutput = cliResult?.result || output;
 151      const parsed = safeJsonParse(llmOutput);
 152  
 153      if (parsed && typeof parsed.body === 'string' && parsed.body.trim() !== '') {
 154        return {
 155          text: parsed.body.trim(),
 156          subjectLine: isEmailChannel ? parsed.subject?.trim() || subjectLine : null,
 157        };
 158      }
 159      logger.warn('Polish (claude CLI) returned invalid response — falling back to OpenRouter');
 160    } catch (cliErr) {
 161      logger.warn(`Polish (claude CLI) failed: ${cliErr.message} — falling back to OpenRouter`);
 162    }
 163  
 164    // Fallback: OpenRouter
 165    try {
 166      const { content } = await openRouterBreaker.fire(async () =>
 167        openRouterLimiter.schedule(() =>
 168          callLLM({
 169            model: 'anthropic/claude-haiku-4-5',
 170            temperature: 0,
 171            max_tokens: 3000,
 172            json_mode: true,
 173            messages: [
 174              { role: 'system', content: HAIKU_POLISH_PROMPT },
 175              { role: 'user', content: userMessage },
 176            ],
 177            stage: 'proposals',
 178          })
 179        )
 180      );
 181  
 182      const result = safeJsonParse(content);
 183      if (!result || typeof result.body !== 'string' || result.body.trim() === '') {
 184        logger.warn('Polish (OpenRouter fallback) returned invalid response — using original');
 185        return { text, subjectLine };
 186      }
 187  
 188      return {
 189        text: result.body.trim(),
 190        subjectLine: isEmailChannel ? result.subject?.trim() || subjectLine : null,
 191      };
 192    } catch (err) {
 193      logger.warn(`Polish (OpenRouter fallback) failed: ${err.message} — using original`);
 194      return { text, subjectLine };
 195    }
 196  }
 197  
 198  /**
 199   * Generate proposal variants for a site and store in messages table
 200   * @param {number} siteId - Site ID from database
 201   * @param {string} reworkInstructions - Optional feedback for rework
 202   * @returns {Promise<Object>} Generated proposals
 203   */
 204  export async function generateProposalVariants(siteId, reworkInstructions = null) {
 205    const _t0 = Date.now();
 206  
 207    // Get site data (including country metadata)
 208    const siteData = await getOne(
 209      `SELECT
 210        s.*,
 211        s.score,
 212        s.grade,
 213        s.country_code,
 214        COALESCE(s.google_domain, 'google.com.au') as google_domain,
 215        COALESCE(s.language_code, 'en') as language_code,
 216        COALESCE(s.currency_code, 'AUD') as currency_code
 217      FROM sites s
 218      WHERE s.id = $1`,
 219      [siteId]
 220    );
 221  
 222    if (!siteData) {
 223      throw new Error(`Site not found: ${siteId}`);
 224    }
 225  
 226    // Language override: if site language_code is 'en' but country's primary language isn't English
 227    if (siteData.language_code === 'en' && siteData.country_code) {
 228      const countryInfo = getCountryByCode(siteData.country_code);
 229      if (countryInfo && countryInfo.language !== 'en') {
 230        logger.info(
 231          `[proposals] Language override: ${siteData.country_code} site has language_code='en' but country language is '${countryInfo.language}' — using '${countryInfo.language}'`
 232        );
 233        siteData.language_code = countryInfo.language;
 234      }
 235    }
 236  
 237    // Guard: country blocked from ALL outreach channels
 238    const blockedCountries = new Set(
 239      (process.env.OUTREACH_BLOCKED_COUNTRIES || '')
 240        .split(',')
 241        .map(s => s.trim().toUpperCase())
 242        .filter(Boolean)
 243    );
 244    if (blockedCountries.has(siteData.country_code)) {
 245      const msg = `country ${siteData.country_code} blocked via OUTREACH_BLOCKED_COUNTRIES — skipping proposal generation`;
 246      logger.info(`  ⏭️  ${siteData.domain}: ${msg}`);
 247      throw new Error(`Skipping ${siteData.domain}: ${msg}`);
 248    }
 249  
 250    // Guard: country_code must be known
 251    if (!siteData.country_code) {
 252      const msg =
 253        'country_code is unknown — re-enrich site to detect country before generating proposals';
 254      await run('UPDATE sites SET error_message = $1 WHERE id = $2', [msg, siteId]);
 255      throw new Error(`Skipping ${siteData.domain}: ${msg}`);
 256    }
 257  
 258    // Guard: a template must exist for this country
 259    const templatesDir = join(__dirname, '../data/templates');
 260    const templatePath = join(templatesDir, siteData.country_code, 'email.json');
 261    if (!existsSync(templatePath)) {
 262      const msg = `no email template for country ${siteData.country_code} — add data/templates/${siteData.country_code}/email.json to enable outreach`;
 263      await run('UPDATE sites SET error_message = $1 WHERE id = $2', [msg, siteId]);
 264      throw new Error(`Skipping ${siteData.domain}: ${msg}`);
 265    }
 266  
 267    // Check if site meets low-score threshold (unless this is a rework)
 268    if (!reworkInstructions) {
 269      const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82');
 270      if (siteData.score >= cutoff) {
 271        throw new Error(
 272          `Site ${siteData.domain} has score ${siteData.score} which is above the cutoff ${cutoff}. Proposals are only generated for low-scoring sites.`
 273        );
 274      }
 275    }
 276  
 277    logger.info(`Generating proposal variants for ${siteData.domain} (${siteData.keyword})`);
 278  
 279    // Parse JSON data (filesystem first, DB fallback)
 280    const scoreData = getScoreDataWithFallback(siteData.id, siteData);
 281    let contactData = getContactsDataWithFallback(siteData.id, siteData);
 282  
 283    // Clean up invalid Twitter/X.com links (those without usernames)
 284    if (contactData) {
 285      contactData = cleanInvalidSocialLinks(contactData);
 286    }
 287  
 288    // Get all contacts with Haiku-resolved first names
 289    const _tNames = Date.now();
 290    const allContacts = contactData
 291      ? await getAllContactsWithNames(contactData, siteData.country_code)
 292      : [];
 293    logger.debug(
 294      `[proposals] name-extract ${Date.now() - _tNames}ms (${allContacts.length} contacts) for ${siteData.domain}`
 295    );
 296  
 297    if (allContacts.length === 0) {
 298      logger.warn(`No contacts found for ${siteData.domain} - skipping proposal generation`);
 299      return {
 300        siteId,
 301        domain: siteData.domain,
 302        keyword: siteData.keyword,
 303        outreachIds: [],
 304        variants: [],
 305        reasoning: 'No contacts found',
 306        contactCount: 0,
 307      };
 308    }
 309  
 310    // Filter out contacts that already have outbound messages
 311    const existingMessages = await getAll(
 312      `SELECT contact_method, contact_uri
 313       FROM messages
 314       WHERE site_id = $1 AND direction = 'outbound'`,
 315      [siteId]
 316    );
 317  
 318    const existingSet = new Set(
 319      existingMessages.map(o => `${normalizeContactMethod(o.contact_method)}:${o.contact_uri}`)
 320    );
 321  
 322    const countryUpper = (siteData.country_code || '').toUpperCase();
 323    const availableContacts = allContacts.filter(contact => {
 324      const key = `${normalizeContactMethod(contact.channel)}:${contact.uri}`;
 325      if (existingSet.has(key)) return false;
 326      // Skip SMS contacts for countries without legal basis (DR-121)
 327      if (contact.channel === 'sms' && BLOCKED_SMS_COUNTRIES.has(countryUpper)) {
 328        logger.info(`Skipping SMS contact for ${siteData.domain} — SMS blocked for ${siteData.country_code} (DR-121)`);
 329        return false;
 330      }
 331      return true;
 332    });
 333  
 334    if (availableContacts.length === 0) {
 335      logger.warn(
 336        `All ${allContacts.length} contacts for ${siteData.domain} already have messages - skipping`
 337      );
 338      return {
 339        siteId,
 340        domain: siteData.domain,
 341        keyword: siteData.keyword,
 342        outreachIds: [],
 343        variants: [],
 344        reasoning: 'All contacts already have messages',
 345        contactCount: allContacts.length,
 346      };
 347    }
 348  
 349    if (availableContacts.length < allContacts.length) {
 350      logger.info(
 351        `Filtered ${allContacts.length - availableContacts.length} contacts with existing messages`
 352      );
 353    }
 354  
 355    logger.info(`Found ${availableContacts.length} contacts - generating personalized proposals`);
 356  
 357    // Generate one proposal per contact
 358    const _tGen = Date.now();
 359    const proposalData = await generateProposalsWithAI(
 360      siteData,
 361      scoreData,
 362      availableContacts,
 363      reworkInstructions,
 364      siteId
 365    );
 366  
 367    logger.debug(`[proposals] generation ${Date.now() - _tGen}ms for ${siteData.domain}`);
 368  
 369    // Validate that we got the right number of variants
 370    if (proposalData.variants.length !== availableContacts.length) {
 371      throw new Error(
 372        `Expected ${availableContacts.length} variants but got ${proposalData.variants.length}`
 373      );
 374    }
 375  
 376    // Polish all variants concurrently then store
 377    const tPolish = Date.now();
 378    const polishedVariants = await Promise.all(
 379      availableContacts.map(async (contact, i) => {
 380        const variant = proposalData.variants[i]; // eslint-disable-line security/detect-object-injection
 381        const contactMethod = normalizeContactMethod(contact.channel);
 382        const isEmailChannel = contactMethod === 'email' || contactMethod === 'form';
 383        const polished = await polishProposal(
 384          variant.proposal_text,
 385          contactMethod,
 386          siteData.language_code || 'en',
 387          isEmailChannel ? proposalData.subject_line : null,
 388          siteData.country_code || null
 389        );
 390        return { contact, variant, contactMethod, polished };
 391      })
 392    );
 393    logger.debug(
 394      `[proposals] polish ${Date.now() - tPolish}ms for ${availableContacts.length} contacts`
 395    );
 396  
 397    const outreachIds = [];
 398    for (const { contact, variant, polished } of polishedVariants) {
 399      // Store outreach entry (with GDPR filtering) using polished content
 400      const outreachId = await storeProposalVariant(
 401        siteId,
 402        { ...variant, proposal_text: polished.text },
 403        polished.subjectLine ?? proposalData.subject_line,
 404        contact,
 405        siteData
 406      );
 407      outreachIds.push(outreachId);
 408  
 409      const nameInfo = contact.name ? ` (${contact.name})` : '';
 410      const contactMethod = normalizeContactMethod(contact.channel);
 411      logger.success(
 412        `Stored outreach #${outreachId}: V${variant.variant_number} → ${contactMethod}${nameInfo} (${contact.uri})`
 413      );
 414    }
 415  
 416    // Update site status
 417    await run(
 418      `UPDATE sites
 419       SET status = 'proposals_drafted',
 420           updated_at = CURRENT_TIMESTAMP
 421       WHERE id = $1`,
 422      [siteId]
 423    );
 424  
 425    logger.info(
 426      `[proposals] total ${Date.now() - _t0}ms for ${siteData.domain} (${allContacts.length} contacts)`
 427    );
 428    logger.success(
 429      `Generated ${proposalData.variants.length} proposal variants and created ${outreachIds.length} outreach entries for ${siteData.domain}`
 430    );
 431  
 432    return {
 433      siteId,
 434      domain: siteData.domain,
 435      keyword: siteData.keyword,
 436      outreachIds,
 437      variants: proposalData.variants,
 438      reasoning: proposalData.reasoning,
 439      contactCount: allContacts.length,
 440    };
 441  }
 442  
 443  /**
 444   * Generate proposal variants using LLM API
 445   * @param {Object} siteData - Site data
 446   * @param {Object} scoreData - Scoring data
 447   * @param {Array} contacts - Array of contacts from getAllContacts()
 448   * @param {string} reworkInstructions - Optional rework instructions
 449   * @param {number} siteId - Site ID for usage tracking
 450   * @returns {Promise<Object>} Proposal data with variants
 451   */
 452   
 453  async function generateProposalsWithAI(
 454    siteData,
 455    scoreData,
 456    contacts,
 457    reworkInstructions,
 458    siteId
 459  ) {
 460    // Extract business type from keyword
 461    const businessType = extractBusinessType(siteData.keyword);
 462  
 463    // Build context for the AI
 464    const context = await buildProposalContext(siteData, scoreData, contacts, businessType);
 465  
 466    // Add rework instructions if provided
 467    const reworkSection = reworkInstructions
 468      ? `\n\nREWORK INSTRUCTIONS FROM OPERATOR:\n${reworkInstructions}\n\nPlease incorporate this feedback into the new variants.`
 469      : '';
 470  
 471    const userPrompt = `${context}${reworkSection}`;
 472  
 473    return retryWithBackoff(
 474      async () => {
 475        // Calculate max_tokens based on number of contacts
 476        const maxTokens = Math.max(8192, contacts.length * 1200);
 477  
 478        const response = await openRouterBreaker.fire(async () =>
 479          openRouterLimiter.schedule(() =>
 480            callLLM({
 481              model: PROPOSAL_MODEL,
 482              messages: [
 483                { role: 'system', content: PROPOSAL_PROMPT },
 484                { role: 'user', content: userPrompt },
 485              ],
 486              temperature: 0.7,
 487              max_tokens: maxTokens,
 488              json_mode: true,
 489              stage: 'proposals',
 490              siteId,
 491            })
 492          )
 493        );
 494  
 495        const { content, usage } = response;
 496  
 497        logger.info(
 498          `Generated proposals using ${getProviderDisplayName()} - ${usage.promptTokens + usage.completionTokens} tokens`
 499        );
 500  
 501        if (!content) {
 502          throw new Error('No content in API response');
 503        }
 504  
 505        const result = safeJsonParse(content);
 506  
 507        if (!result || !result.variants || !Array.isArray(result.variants)) {
 508          throw new Error('Invalid proposal response format');
 509        }
 510  
 511        if (result.variants.length !== contacts.length) {
 512          throw new Error(`Expected ${contacts.length} variants but got ${result.variants.length}`);
 513        }
 514  
 515        // Validate response structure and check for suspicious URLs
 516        validateProposalResponse(result, contacts.length);
 517  
 518        // Programmatic content validation — retry if LLM ignores constraints
 519        const freePattern =
 520          /\bfree\s+(audit|report|assessment|consultation|review|strategy\s+call|snapshot|analysis|evaluation|estimate|quote)\b|\bno\s+cost\b|\bat\s+no\s+charge\b|\bcomplimentary\b/i;
 521        const phonePattern = /\+\d[\d\s\-().]{6,}/;
 522        for (const variant of result.variants) {
 523          const text = variant.proposal_text || '';
 524          if (freePattern.test(text)) {
 525            throw new Error(
 526              `LLM promised a free service in variant ${variant.variant_number} — retry required`
 527            );
 528          }
 529          if (phonePattern.test(text)) {
 530            throw new Error(
 531              `LLM hallucinated a phone number in variant ${variant.variant_number} — retry required`
 532            );
 533          }
 534        }
 535  
 536        return result;
 537      },
 538      {
 539        maxRetries: 3,
 540        shouldRetry: error => error.message.includes('retry required') || isRetryableError(error),
 541        onRetry: (attempt, error) => {
 542          logger.warn(`Retry ${attempt + 1}/3 for ${siteData.domain}: ${error.message}`);
 543        },
 544      }
 545    );
 546  }
 547  
 548  /**
 549   * Get top competitor for a keyword
 550   * @param {Object} siteData - Site data
 551   * @returns {Promise<Object|null>} Competitor info {domain, score, grade} or null
 552   */
 553  async function getTopCompetitor(siteData) {
 554    // Extract industry from score_json (filesystem first, DB fallback)
 555    const scoreJson = getScoreDataWithFallback(siteData.id, siteData);
 556    const industry = scoreJson?.industry_classification;
 557    if (!industry) return null;
 558  
 559    // Find all country codes sharing the same language
 560    const prospectLang = COUNTRIES[siteData.country_code]?.language;
 561    if (!prospectLang) return null;
 562  
 563    const sameLanguageCodes = Object.entries(COUNTRIES)
 564      .filter(([, c]) => c.language === prospectLang)
 565      .map(([code]) => code);
 566  
 567    const placeholders = sameLanguageCodes.map((_, i) => `$${i + 3}`).join(',');
 568    const competitor = await getOne(
 569      `SELECT domain, score, grade
 570       FROM sites
 571       WHERE industry_classification = $1
 572         AND domain != $2
 573         AND country_code IN (${placeholders})
 574         AND status IN ('prog_scored', 'semantic_scored', 'vision_scored', 'enriched', 'proposals_drafted', 'outreach_partial', 'outreach_sent')
 575         AND score IS NOT NULL
 576       ORDER BY score DESC
 577       LIMIT 1`,
 578      [industry, siteData.domain, ...sameLanguageCodes]
 579    );
 580  
 581    return competitor || null;
 582  }
 583  
 584  /**
 585   * Build context for proposal generation
 586   * @param {Object} siteData - Site data
 587   * @param {Object} scoreData - Scoring data
 588   * @param {Array} contacts - Array of contacts from getAllContacts()
 589   * @param {string} businessType - Business type extracted from keyword
 590   * @returns {Promise<string>} Context string for AI
 591   */
 592  async function buildProposalContext(siteData, scoreData, contacts, businessType) {
 593    const weaknesses = extractKeyWeaknesses(scoreData);
 594  
 595    // Get country configuration for localization
 596    const country = getCountryByCode(siteData.country_code);
 597  
 598    // Extract location from contacts_json (more reliable than keyword)
 599    const contactData = getContactsDataWithFallback(siteData.id, siteData);
 600    const city = contactData?.city || siteData.city || null;
 601    const state = contactData?.state || siteData.state || null;
 602  
 603    // Format contacts list for AI
 604    const contactsList = contacts
 605      .map((contact, i) => {
 606        const safeName = contact.name ? stripInjectionMarkers(contact.name) : null;
 607        const nameInfo = safeName ? ` (Name: ${safeName})` : ' (Name: Unknown)';
 608        return `${i + 1}. ${contact.channel}${nameInfo} - ${contact.uri}`;
 609      })
 610      .join('\n');
 611  
 612    // Get top competitor and determine if score difference meets threshold
 613    const competitor = await getTopCompetitor(siteData);
 614    const scoreThreshold = parseFloat(process.env.COMPETITOR_SCORE_THRESHOLD || '10');
 615  
 616    let competitorSection = '';
 617    if (competitor) {
 618      const scoreDifference = competitor.score - siteData.score;
 619      if (scoreDifference >= scoreThreshold) {
 620        competitorSection = `\nTOP COMPETITOR (same industry, same language):
 621  - Domain: ${competitor.domain}
 622  - Score: ${computeGrade(competitor.score)} (${competitor.score}/100)
 623  - Difference: ${scoreDifference.toFixed(1)} points higher than target site
 624  
 625  FRAMING RULES:
 626  - Score = percentage of conversion best practices implemented (NOT a conversion rate)
 627  - Use "our analysis suggests" — never state conversion claims as fact
 628  - ~20-point gap ≈ roughly 2x conversion difference (heuristic, frame as estimate)
 629  - Example: "Our analysis suggests ${competitor.domain} implements ${competitor.score}% of conversion best practices vs your ${siteData.score}% — that gap likely means they're converting roughly twice as many visitors."`;
 630      } else {
 631        competitorSection = `\nTOP COMPETITOR (same industry, same language):
 632  - Domain: ${competitor.domain}
 633  - Score: Similar to target site (do NOT mention specific scores - they're too close)
 634  
 635  IMPORTANT: Do not mention competitor scores when they are clustered too tightly to be meaningful.`;
 636      }
 637    }
 638  
 639    // Build localization requirements
 640    const localizationSection = `
 641  LOCALIZATION REQUIREMENTS:
 642  - Country: ${country.name} (${country.code})
 643  - Language: ${country.language === 'en' ? 'English' : country.language}
 644  - Currency: ${country.currencySymbol} ${country.currency}
 645  - Spelling: Use ${country.name} spelling conventions${['AU', 'GB', 'UK', 'NZ', 'IE', 'ZA', 'IN'].includes(country.code) ? ' (e.g., "optimise" not "optimize", "colour" not "color", "specialise" not "specialize")' : country.code === 'US' || country.code === 'CA' ? ' (e.g., "optimize" not "optimise", "color" not "colour", "specialize" not "specialise")' : ''}
 646  - Cultural context: Reference ${country.name} business norms and expectations
 647  - Date format: ${country.dateFormat}
 648  - Phone format: ${country.phoneFormat}
 649  ${country.timezone ? `- Timezone: ${country.timezone} (consider local business hours for call/response timing)` : ''}`;
 650  
 651    // Build best practices section
 652    const bestPracticesSection = `
 653  CHANNEL-SPECIFIC BEST PRACTICES:
 654  
 655  === EMAIL BEST PRACTICES ===
 656  ${EMAIL_BEST_PRACTICES}
 657  
 658  === SMS BEST PRACTICES ===
 659  ${SMS_BEST_PRACTICES}
 660  
 661  IMPORTANT: Follow these best practices when crafting proposals for each channel.
 662  - Email proposals should follow CAN-SPAM requirements and copywriting best practices
 663  - SMS proposals MUST be <160 chars and follow local regulations.
 664  - TCPA opt-out ("Reply STOP to opt out.") is REQUIRED for US and CA — include it in every US/CA SMS.
 665  - CRITICAL: do NOT add any opt-out, STOP, or unsubscribe text for AU, GB, NZ, IE, ZA, IN or any non-US/CA market. These markets do not require TCPA opt-out and the extra text wastes precious SMS characters.
 666  - All proposals must provide genuine value and avoid spam triggers
 667  `;
 668  
 669    // Build location string
 670    let locationStr = 'Unknown';
 671    if (city && state) {
 672      locationStr = `${city}, ${state}`;
 673    } else if (city) {
 674      locationStr = city;
 675    } else if (state) {
 676      locationStr = state;
 677    }
 678  
 679    // Aggregate stats for credibility (dynamic from DB)
 680    const aggRow = await getOne(
 681      `SELECT COUNT(*) AS total, ROUND(AVG(score)::numeric, 1) AS avg_score, ROUND(MAX(score)::numeric, 1) AS best_score
 682       FROM sites WHERE score IS NOT NULL`
 683    );
 684    const aggregateSection = `
 685  AGGREGATE STATS (real data — use for credibility):
 686  - total_sites_scored: ${aggRow.total}
 687  - avg_score: ${aggRow.avg_score}
 688  - best_score: ${aggRow.best_score}`;
 689  
 690    return `Generate personalized sales proposals for this website:
 691  
 692  TARGET BUSINESS:
 693  - Domain: ${siteData.domain}
 694  - Business Name: ${contactData?.business_name || siteData.domain}
 695  - URL: ${siteData.landing_page_url}
 696  - Business Type: ${businessType}
 697  - Location: ${locationStr}
 698  
 699  CONVERSION ANALYSIS:
 700  - Score: ${siteData.score ? computeGrade(siteData.score) : 'N/A'} (${siteData.score || 'N/A'}/100)
 701  ${competitorSection}
 702  ${aggregateSection}
 703  
 704  KEY WEAKNESSES IDENTIFIED:
 705  ${weaknesses}
 706  
 707  CONTACTS TO REACH (generate ONE variant for EACH):
 708  ${contactsList}
 709  ${localizationSection}
 710  
 711  ${bestPracticesSection}
 712  
 713  Generate ${contacts.length} unique proposal variants - one for each contact listed above.
 714  Vary your approach across the variants (don't send identical messages to everyone).
 715  IMPORTANT: Use the exact "Business Name" above when referring to the company — do NOT guess or derive a name from the domain.
 716  IMPORTANT: Apply all localization requirements above to ensure proposals are culturally appropriate and use correct local conventions.
 717  CRITICAL: Follow the channel-specific best practices above - especially SMS length limits and compliance requirements.`;
 718  }
 719  
 720  /**
 721   * Extract business type from keyword
 722   */
 723  function extractBusinessType(keyword) {
 724    const commonLocations = [
 725      'sydney',
 726      'melbourne',
 727      'brisbane',
 728      'perth',
 729      'adelaide',
 730      'canberra',
 731      'hobart',
 732      'darwin',
 733    ];
 734  
 735    let businessType = keyword;
 736    for (const location of commonLocations) {
 737      // eslint-disable-next-line security/detect-non-literal-regexp -- Safe: location is from hardcoded array
 738      businessType = businessType.replace(new RegExp(location, 'gi'), '').trim();
 739    }
 740  
 741    return businessType || keyword;
 742  }
 743  
 744  /**
 745   * Extract location from keyword
 746   */
 747  function extractLocation(keyword) {
 748    const words = keyword.split(' ');
 749    return words.slice(-2).join(' ');
 750  }
 751  
 752  /**
 753   * Extract key weaknesses from score data
 754   */
 755  function extractKeyWeaknesses(scoreData) {
 756    if (!scoreData || !scoreData.factor_scores) {
 757      return 'General conversion optimization opportunities';
 758    }
 759  
 760    const weaknesses = [];
 761  
 762    Object.entries(scoreData.factor_scores).forEach(([factorName, factor]) => {
 763      if (factor && factor.score < 5 && factor.reasoning) {
 764        const label = factorName.replace(/_/g, ' ');
 765        weaknesses.push(`- ${label}: ${stripInjectionMarkers(factor.reasoning)}`);
 766      }
 767    });
 768  
 769    return weaknesses.length > 0
 770      ? weaknesses.slice(0, 5).join('\n')
 771      : 'General conversion optimization opportunities';
 772  }
 773  
 774  /**
 775   * Normalize contact method to match database schema
 776   */
 777  function normalizeContactMethod(method) {
 778    if (!method) return 'email'; // Default fallback
 779  
 780    const normalized = method.toLowerCase().trim();
 781  
 782    const mapping = {
 783      sms: 'sms',
 784      email: 'email',
 785      form: 'form',
 786      'contact form': 'form',
 787      contactform: 'form',
 788      x: 'x',
 789      twitter: 'x',
 790      linkedin: 'linkedin',
 791      facebook: 'email',
 792      instagram: 'email',
 793    };
 794  
 795    // eslint-disable-next-line security/detect-object-injection -- Safe: normalized is from our channel type strings
 796    return mapping[normalized] || 'email';
 797  }
 798  
 799  /**
 800   * Store proposal variant in messages table
 801   * @param {number} siteId - Site ID
 802   * @param {Object} variant - Variant object with proposal_text, variant_number
 803   * @param {string} subjectLine - Email subject line
 804   * @param {Object|null} contact - Contact object {type, uri, name, channel} or null
 805   * @param {Object} siteData - Site data including country_code and gdpr_verified
 806   * @returns {Promise<number|null>} Message ID
 807   */
 808  async function storeProposalVariant(siteId, variant, subjectLine, contact = null, siteData = null) {
 809    // Validate variant data
 810    if (!variant || typeof variant !== 'object') {
 811      throw new Error('Invalid variant: must be an object');
 812    }
 813    if (!variant.proposal_text || typeof variant.proposal_text !== 'string') {
 814      throw new Error(`Invalid variant.proposal_text for variant ${variant.variant_number}`);
 815    }
 816  
 817    // Use contact info if available, otherwise use variant recommendation
 818    const rawContactMethod = contact ? contact.channel : variant.recommended_channel || 'email';
 819    const contactMethod = normalizeContactMethod(rawContactMethod);
 820    let contactUri = contact ? contact.uri : 'PENDING_CONTACT_EXTRACTION';
 821  
 822    // Normalize SMS phone numbers with country code
 823    if (
 824      contactMethod === 'sms' &&
 825      contactUri !== 'PENDING_CONTACT_EXTRACTION' &&
 826      siteData?.country_code
 827    ) {
 828      contactUri = addCountryCode(contactUri, siteData.country_code);
 829    }
 830  
 831    // GDPR compliance check: Block unverified emails in GDPR countries
 832    let approvalStatus = 'pending';
 833  
 834    if (
 835      siteData &&
 836      contactMethod === 'email' &&
 837      siteData.gdpr_verified === 0 &&
 838      siteData.country_code
 839    ) {
 840      const country = getCountryByCode(siteData.country_code);
 841      if (country && country.requiresGDPRCheck) {
 842        approvalStatus = 'gdpr_blocked';
 843        logger.info(
 844          `  ⚠️  GDPR blocked: ${contactUri} in ${siteData.country_code} (unverified company email)`
 845        );
 846      }
 847    }
 848  
 849    // Government email check
 850    if (contactMethod === 'email' && isGovernmentEmail(contactUri)) {
 851      logger.info(`  ⚠️  Skipping government email: ${contactUri}`);
 852      return null;
 853    }
 854  
 855    // Education email check
 856    if (contactMethod === 'email' && isEducationEmail(contactUri)) {
 857      logger.info(`  ⚠️  Skipping education email: ${contactUri}`);
 858      return null;
 859    }
 860  
 861    // Demo email check
 862    if (contactMethod === 'email' && isDemoEmail(contactUri)) {
 863      logger.info(`  ⚠️  Skipping demo email: ${contactUri}`);
 864      return null;
 865    }
 866  
 867    // Derive contact_basis from channel
 868    const CONTACT_BASIS = {
 869      sms: 'public_website_phone',
 870      email: 'public_website_email',
 871      form: 'contact_form',
 872      x: 'social_profile',
 873      linkedin: 'social_profile',
 874    };
 875    const contactBasis = CONTACT_BASIS[contactMethod] || null;
 876  
 877    // Debug logging
 878    const nameInfo = contact?.name ? ` (${contact.name})` : '';
 879    const statusInfo = approvalStatus === 'gdpr_blocked' ? ' [GDPR BLOCKED]' : '';
 880    logger.info(
 881      `Storing variant ${variant.variant_number}: siteId=${siteId}, textLen=${variant.proposal_text?.length}, method=${contactMethod}${nameInfo}, uri=${contactUri}${statusInfo}`
 882    );
 883  
 884    // Resolve spintax before storing — each message record has a final, human-readable version
 885    const spunText = spin(variant.proposal_text);
 886    const spunSubject = spin(subjectLine);
 887  
 888    // OUTPUT SANITIZATION
 889    const prospectDomain = siteData?.domain || null;
 890    const { sanitized: sanitizedText, strippedUrls, strippedPatterns } = sanitizeLlmOutput(
 891      spunText,
 892      prospectDomain
 893    );
 894    const { sanitized: sanitizedSubject } = sanitizeLlmOutput(spunSubject, prospectDomain);
 895  
 896    if (strippedUrls.length > 0 || strippedPatterns.length > 0) {
 897      logger.warn(
 898        `[proposals] Output sanitization for site ${siteId}: removed ${strippedPatterns.length} dangerous patterns, ${strippedUrls.length} unauthorized URLs`
 899      );
 900    }
 901  
 902    // Reject broken template output
 903    if (sanitizedText.startsWith('{') || sanitizedText.startsWith(', ')) {
 904      logger.warn(
 905        `[proposals] Broken template output for site ${siteId} — starts with '${sanitizedText.slice(0, 10)}' — skipping`
 906      );
 907      return null;
 908    }
 909  
 910    // SMS: append dynamic signature + opt-out based on country and remaining char budget
 911    let finalText = sanitizedText;
 912    if (contactMethod === 'sms') {
 913      const cc = (siteData?.country_code || '').toUpperCase();
 914      const needsStop = cc === 'US' || cc === 'CA';
 915      finalText = finalText
 916        .replace(/\s*Reply STOP to opt out\.?\s*/gi, '')
 917        .replace(/\s*-\s*Marcus\s*(Webb)?\s*,?\s*(Audit\s*&\s*Fix)?\s*$/i, '')
 918        .trimEnd();
 919      const stopText = needsStop ? ' Reply STOP to opt out.' : '';
 920      const sigTiers = needsStop
 921        ? [' - Marcus, Audit&Fix', ' - Audit&Fix']
 922        : [' - Marcus', ''];
 923      let bestSig = sigTiers[sigTiers.length - 1];
 924      for (const sig of sigTiers) {
 925        if (finalText.length + stopText.length + sig.length <= 160) {
 926          bestSig = sig;
 927          break;
 928        }
 929      }
 930      finalText = `${finalText}${stopText}${bestSig}`;
 931    } else if (contactMethod !== 'sms') {
 932      const hasIdentity = /Marcus|Audit&Fix|AuditFix|auditandfix/i.test(finalText);
 933      if (!hasIdentity) {
 934        finalText = `${finalText.trimEnd()}\n\nMarcus Webb, Audit&Fix (auditandfix.com)`;
 935      }
 936    }
 937  
 938    // Hard character limit enforcement
 939    const CHANNEL_MAX_CHARS = { sms: 160, x: 280 };
 940    const maxChars = CHANNEL_MAX_CHARS[contactMethod];
 941    if (maxChars && finalText.length > maxChars) {
 942      logger.warn(
 943        `[proposals] ${contactMethod} message too long (${finalText.length}/${maxChars} chars) for site ${siteId} — skipping`
 944      );
 945      return null;
 946    }
 947  
 948    const result = await run(
 949      `INSERT INTO messages (
 950        site_id,
 951        direction,
 952        message_body,
 953        subject_line,
 954        contact_method,
 955        contact_uri,
 956        approval_status,
 957        contact_basis
 958      ) VALUES ($1, 'outbound', $2, $3, $4, $5, $6, $7)
 959      ON CONFLICT (site_id, contact_method, contact_uri) WHERE direction = 'outbound' AND approval_status IN ('pending', 'approved')
 960      DO UPDATE SET
 961        message_body = EXCLUDED.message_body,
 962        subject_line = EXCLUDED.subject_line,
 963        approval_status = EXCLUDED.approval_status,
 964        contact_basis = EXCLUDED.contact_basis
 965      RETURNING id`,
 966      [siteId, finalText, sanitizedSubject, contactMethod, contactUri, approvalStatus, contactBasis]
 967    );
 968  
 969    return result.lastInsertRowid;
 970  }
 971  
 972  /**
 973   * Get pending messages awaiting approval
 974   */
 975  export async function getPendingOutreaches(limit = 100) {
 976    return await getAll(
 977      `SELECT
 978        m.*,
 979        s.domain,
 980        s.keyword,
 981        s.landing_page_url,
 982        s.grade
 983      FROM messages m
 984      JOIN sites s ON m.site_id = s.id
 985      WHERE m.direction = 'outbound'
 986        AND m.approval_status = 'pending'
 987      ORDER BY m.created_at DESC
 988      LIMIT $1`,
 989      [limit]
 990    );
 991  }
 992  
 993  /**
 994   * Approve message for sending
 995   */
 996  export async function approveOutreach(outreachId) {
 997    await run("UPDATE messages SET approval_status = 'approved' WHERE id = $1", [outreachId]);
 998    logger.success(`Approved message #${outreachId}`);
 999  }
1000  
1001  /**
1002   * Mark message for rework
1003   */
1004  export async function reworkOutreach(outreachId, reworkInstructions) {
1005    await run(
1006      "UPDATE messages SET approval_status = 'rework', rework_instructions = $1 WHERE id = $2",
1007      [reworkInstructions, outreachId]
1008    );
1009    logger.info(`Marked message #${outreachId} for rework`);
1010  }
1011  
1012  /**
1013   * Process rework requests (regenerate proposals with feedback)
1014   */
1015  export async function processReworkQueue() {
1016    const reworkItems = await getAll(
1017      `SELECT DISTINCT site_id, rework_instructions
1018       FROM messages
1019       WHERE direction = 'outbound'
1020         AND approval_status = 'rework'
1021       GROUP BY site_id, rework_instructions`
1022    );
1023  
1024    logger.info(`Processing ${reworkItems.length} rework requests...`);
1025  
1026    let processed = 0;
1027    let failed = 0;
1028  
1029    for (const item of reworkItems) {
1030      try {
1031        // Delete old pending/rework messages for this site
1032        await run(
1033          `DELETE FROM messages
1034           WHERE site_id = $1
1035           AND direction = 'outbound'
1036           AND approval_status IN ('pending', 'rework')`,
1037          [item.site_id]
1038        );
1039  
1040        // Generate new variants with rework instructions
1041        await generateProposalVariants(item.site_id, item.rework_instructions);
1042  
1043        logger.success(`Regenerated proposals for site #${item.site_id}`);
1044        processed++;
1045      } catch (error) {
1046        logger.error(`Failed to rework site #${item.site_id}: ${error.message}`);
1047        failed++;
1048      }
1049    }
1050  
1051    logger.success(
1052      `Processed ${processed}/${reworkItems.length} rework requests${failed ? ` (${failed} failed)` : ''}`
1053    );
1054  }
1055  
1056  /**
1057   * Generate proposals for all low-scoring sites without outreaches
1058   */
1059  export async function generateBulkProposals(limit = null) {
1060    const cutoff = parseFloat(process.env.LOW_SCORE_CUTOFF || '82');
1061  
1062    const sql = `SELECT s.id, s.domain, s.keyword, s.score
1063         FROM sites s
1064         LEFT JOIN messages m ON s.id = m.site_id AND m.direction = 'outbound'
1065         WHERE s.status IN ('enriched', 'enriched_llm')
1066         AND s.score < $1
1067         AND m.id IS NULL
1068         ORDER BY s.score ASC
1069         ${limit ? `LIMIT ${limit}` : ''}`;
1070  
1071    const sites = await getAll(sql, [cutoff]);
1072  
1073    logger.info(`Generating proposals for ${sites.length} sites...`);
1074  
1075    const results = [];
1076  
1077    for (const site of sites) {
1078      try {
1079        const result = await generateProposalVariants(site.id);
1080        results.push(result);
1081  
1082        // Rate limiting
1083        await new Promise(resolve => setTimeout(resolve, 2000));
1084      } catch (error) {
1085        logger.error(`Failed for site #${site.id}`, error);
1086        results.push({ siteId: site.id, error: error.message });
1087      }
1088    }
1089  
1090    logger.success(
1091      `Generated proposals for ${results.filter(r => !r.error).length}/${results.length} sites`
1092    );
1093  
1094    return results;
1095  }
1096  
1097  // CLI functionality
1098  if (import.meta.url === `file://${process.argv[1]}`) {
1099    const command = process.argv[2];
1100  
1101    if (command === 'generate') {
1102      const siteId = parseInt(process.argv[3], 10);
1103      if (!siteId) {
1104        console.error('Usage: node src/proposal-generator-v2.js generate <site_id>');
1105        process.exit(1);
1106      }
1107  
1108      generateProposalVariants(siteId)
1109        .then(result => {
1110          console.log('\n✅ Proposals generated!\n');
1111          console.log(`Site: ${result.domain} (${result.keyword})`);
1112          console.log(`Outreach IDs: ${result.outreachIds.join(', ')}`);
1113          process.exit(0);
1114        })
1115        .catch(error => {
1116          console.error(`\n❌ Failed: ${error.message}`);
1117          process.exit(1);
1118        });
1119    } else if (command === 'bulk') {
1120      const limit = process.argv[3] ? parseInt(process.argv[3], 10) : null;
1121  
1122      generateBulkProposals(limit)
1123        .then(() => process.exit(0))
1124        .catch(error => {
1125          console.error(`\n❌ Failed: ${error.message}`);
1126          process.exit(1);
1127        });
1128    } else if (command === 'pending') {
1129      const limit = process.argv[3] ? parseInt(process.argv[3], 10) : 100;
1130      getPendingOutreaches(limit)
1131        .then(msgs => {
1132          console.log(JSON.stringify(msgs, null, 2));
1133          process.exit(0);
1134        })
1135        .catch(error => {
1136          console.error(`\n❌ Failed: ${error.message}`);
1137          process.exit(1);
1138        });
1139    } else {
1140      console.log('Usage:');
1141      console.log('  generate <site_id>     - Generate proposals for a site');
1142      console.log('  bulk [limit]           - Generate for all low-scoring sites');
1143      console.log('  pending [limit]        - List pending outreaches');
1144      process.exit(1);
1145    }
1146  }