Cradicle Explorer

/ tests / pipeline / pipeline-e2e.test.js
pipeline-e2e.test.js
   1  /**
   2   * End-to-End Pipeline Test
   3   * Tests complete pipeline flow for 1 site through all stages
   4   * - Actually sends SMS, email, and submits contact forms
   5   * - Validates outputs against expected values
   6   * - Saves test artifacts for manual review
   7   * - Stops at first failure
   8   */
   9  
  10  import 'dotenv/config';
  11  import { describe, test, before, after, mock } from 'node:test';
  12  import assert from 'node:assert';
  13  import Database from 'better-sqlite3';
  14  import { createPgMock } from '../helpers/pg-mock.js';
  15  import { join, dirname } from 'path';
  16  import { fileURLToPath } from 'url';
  17  import { existsSync, mkdirSync, rmSync, readFileSync, writeFileSync } from 'fs';
  18  import { getScoreJson, deleteScoreJson } from '../../src/utils/score-storage.js';
  19  import { getContactsJson, getContactsData, deleteContactsJson } from '../../src/utils/contacts-storage.js';
  20  
  21  const __filename = fileURLToPath(import.meta.url);
  22  const __dirname = dirname(__filename);
  23  const projectRoot = join(__dirname, '../..');
  24  
  25  // Test configuration from environment
  26  const TEST_URL = process.env.TEST_E2E_URL || 'https://example.com';
  27  const TEST_KEYWORD = process.env.TEST_E2E_KEYWORD || 'test keyword';
  28  const TEST_DB_PATH = join('/tmp', `test-e2e-${process.pid}.db`);
  29  const TEST_STORAGE_BASE = join('/tmp', `test-e2e-storage-${process.pid}`);
  30  const TEST_SCREENSHOT_DIR = join(projectRoot, 'screenshots-test-e2e');
  31  const EXPECTED_VALUES_PATH = join(__dirname, 'expected-e2e.json');
  32  const EXPECTED_CONTACTS_PATH = join(__dirname, '..', 'expected-e2e-contacts.json');
  33  const ACTUAL_VALUES_PATH = join(projectRoot, 'test-results-e2e.json');
  34  
  35  // Track site ID and actual values across tests
  36  let siteId;
  37  let keywordId;
  38  let expectedValues = null;
  39  let testFailed = false; // Track if any test has failed (for fail-fast behavior)
  40  const actualValues = {
  41    url: TEST_URL,
  42    keyword: TEST_KEYWORD,
  43    timestamp: new Date().toISOString(),
  44    stages: {},
  45  };
  46  
  47  /**
  48   * Log debug info with timestamp
  49   */
  50  function debug(message, data = null) {
  51    const timestamp = new Date().toISOString();
  52    console.log(`\n[${timestamp}] ${message}`);
  53    if (data) {
  54      console.log(JSON.stringify(data, null, 2));
  55    }
  56  }
  57  
  58  /**
  59   * Check if previous test failed and skip if so (fail-fast behavior)
  60   */
  61  function checkFailFast(testName) {
  62    if (testFailed) {
  63      debug(`⏭️  Skipping ${testName} (previous test failed - fail-fast mode)`);
  64      throw new Error('Skipping test due to previous failure');
  65    }
  66  }
  67  
  68  /**
  69   * Mark test as failed (for fail-fast behavior)
  70   */
  71  function markTestFailed(error) {
  72    testFailed = true;
  73    throw error;
  74  }
  75  
  76  /**
  77   * Validate database record exists and has expected fields
  78   */
  79  function validateRecord(record, expectedFields, recordType) {
  80    assert.ok(record, `${recordType} record should exist`);
  81    for (const field of expectedFields) {
  82      assert.ok(
  83        record[field] !== null && record[field] !== undefined,
  84        `${recordType} should have ${field}`
  85      );
  86    }
  87    debug(`✅ ${recordType} validation passed`);
  88  }
  89  
  90  /**
  91   * Load expected values from JSON files if they exist
  92   */
  93  function loadExpectedValues() {
  94    let expected = {};
  95  
  96    // Load main expected values (score, html, etc.)
  97    if (existsSync(EXPECTED_VALUES_PATH)) {
  98      try {
  99        const content = readFileSync(EXPECTED_VALUES_PATH, 'utf-8');
 100        const data = JSON.parse(content);
 101  
 102        // Check if this is a template or has actual expected values
 103        if (
 104          data.expected &&
 105          data.expected.conversion_score_json &&
 106          !data.expected.conversion_score_json.note
 107        ) {
 108          debug('Loaded expected values from expected-e2e.json');
 109          expected = { ...data.expected };
 110        } else {
 111          debug('⚠️  expected-e2e.json is still a template. Score validation will be skipped.');
 112        }
 113      } catch (err) {
 114        debug(`⚠️  Could not parse expected-e2e.json: ${err.message}`);
 115      }
 116    } else {
 117      debug('⚠️  expected-e2e.json not found. Score validation will be skipped.');
 118    }
 119  
 120    // Load expected contacts
 121    if (existsSync(EXPECTED_CONTACTS_PATH)) {
 122      try {
 123        const contactsContent = readFileSync(EXPECTED_CONTACTS_PATH, 'utf-8');
 124        const contactsData = JSON.parse(contactsContent);
 125        debug('Loaded expected contacts from expected-e2e-contacts.json');
 126        expected.contacts_json = contactsData;
 127      } catch (err) {
 128        debug(`⚠️  Could not parse expected-e2e-contacts.json: ${err.message}`);
 129      }
 130    } else {
 131      debug('⚠️  expected-e2e-contacts.json not found. Contact validation will be skipped.');
 132    }
 133  
 134    return Object.keys(expected).length > 0 ? expected : null;
 135  }
 136  
 137  /**
 138   * Save actual values to JSON file for manual review
 139   */
 140  function saveActualValues() {
 141    try {
 142      writeFileSync(ACTUAL_VALUES_PATH, JSON.stringify(actualValues, null, 2));
 143      debug(`✅ Saved actual values to ${ACTUAL_VALUES_PATH}`);
 144    } catch (err) {
 145      debug(`⚠️  Could not save actual values: ${err.message}`);
 146    }
 147  }
 148  
 149  /**
 150   * Validate conversion_score_json against expected values
 151   */
 152  function validateScoreJson(actual, expected) {
 153    if (!expected || !expected.conversion_score_json) {
 154      debug('⏭️  Skipping conversion_score_json validation (no expected values)');
 155      return;
 156    }
 157  
 158    debug('Validating conversion_score_json...');
 159  
 160    // Check score is in expected range
 161    if (expected.score_range) {
 162      const actualScore = actual.overall_calculation?.conversion_score;
 163      assert.ok(
 164        actualScore >= expected.score_range.min && actualScore <= expected.score_range.max,
 165        `Score ${actualScore} should be in range ${expected.score_range.min}-${expected.score_range.max}`
 166      );
 167      debug(`✅ Score ${actualScore} is within expected range`);
 168    }
 169  
 170    // Check expected grade if specified
 171    if (expected.expected_grade) {
 172      const actualGrade = actual.overall_calculation?.grade;
 173      assert.strictEqual(
 174        actualGrade,
 175        expected.expected_grade,
 176        `Grade should be ${expected.expected_grade}`
 177      );
 178      debug(`✅ Grade ${actualGrade} matches expected`);
 179    }
 180  }
 181  
 182  /**
 183   * Check if actual label matches any variation of expected label
 184   * @param {string} actualLabel - Label from extraction
 185   * @param {Array<string>} labelVariations - Acceptable label variations
 186   * @returns {boolean} True if label matches any variation
 187   */
 188  function labelMatches(actualLabel, labelVariations) {
 189    if (!actualLabel || !labelVariations) return false;
 190    const normalized = actualLabel.toLowerCase().trim();
 191    return labelVariations.some(
 192      variation =>
 193        variation.toLowerCase().trim() === normalized ||
 194        normalized.includes(variation.toLowerCase().trim()) ||
 195        variation.toLowerCase().trim().includes(normalized)
 196    );
 197  }
 198  
 199  /**
 200   * Validate contacts_json against expected patterns
 201   */
 202  function validateContactsJson(actual, expected) {
 203    if (!expected || !expected.contacts_json) {
 204      debug('⏭️  Skipping contacts_json validation (no expected values)');
 205      return;
 206    }
 207  
 208    debug('Validating contacts_json...');
 209  
 210    assert.ok(actual, 'contacts_json should exist');
 211  
 212    const expectedContacts = expected.contacts_json;
 213    const visionEnabled = process.env.ENABLE_VISION === 'true';
 214  
 215    // Validate email addresses if expected
 216    if (expectedContacts.email_addresses && Array.isArray(expectedContacts.email_addresses)) {
 217      assert.ok(
 218        Array.isArray(actual.email_addresses),
 219        'contacts_json should have email_addresses array'
 220      );
 221      const applicableEmails = expectedContacts.email_addresses.filter(
 222        e => !e.requires_vision || visionEnabled
 223      );
 224      const actualCount = actual.email_addresses?.length || 0;
 225      const expectedCount = applicableEmails.length;
 226      assert.strictEqual(
 227        actualCount,
 228        expectedCount,
 229        `Should have ${expectedCount} email addresses, got ${actualCount}`
 230      );
 231  
 232      // Validate each applicable email
 233      applicableEmails.forEach(expected => {
 234        const actualEmail = actual.email_addresses.find(
 235          a => (typeof a === 'object' ? a.email : a) === expected.email
 236        );
 237        assert.ok(actualEmail, `Should have email ${expected.email}`);
 238  
 239        // Validate label (flexible matching)
 240        if (expected.label_variations && typeof actualEmail === 'object') {
 241          const labelOk = labelMatches(actualEmail.label, expected.label_variations);
 242          if (!labelOk) {
 243            debug(
 244              `⚠️  Email ${expected.email} label "${actualEmail.label}" doesn't match variations: ${expected.label_variations.join(', ')}`
 245            );
 246          }
 247        }
 248      });
 249  
 250      debug(`✅ Found ${actualCount} email addresses (expected ${expectedCount})`);
 251    }
 252  
 253    // Validate phone numbers if expected
 254    if (expectedContacts.phone_numbers && Array.isArray(expectedContacts.phone_numbers)) {
 255      assert.ok(Array.isArray(actual.phone_numbers), 'contacts_json should have phone_numbers array');
 256      const applicablePhones = expectedContacts.phone_numbers.filter(
 257        p => !p.requires_vision || visionEnabled
 258      );
 259      const actualCount = actual.phone_numbers?.length || 0;
 260      const expectedCount = applicablePhones.length;
 261      assert.strictEqual(
 262        actualCount,
 263        expectedCount,
 264        `Should have ${expectedCount} phone numbers, got ${actualCount}`
 265      );
 266  
 267      // Validate each applicable phone (normalize to digits-only for comparison)
 268      const normalizeNum = num => num.replace(/\D/g, '');
 269      applicablePhones.forEach(expected => {
 270        const actualPhone = actual.phone_numbers.find(a => {
 271          const actualNum = typeof a === 'object' ? a.number : a;
 272          return normalizeNum(actualNum) === normalizeNum(expected.number);
 273        });
 274        assert.ok(actualPhone, `Should have phone ${expected.number}`);
 275  
 276        // Validate label (flexible matching)
 277        if (expected.label_variations && typeof actualPhone === 'object') {
 278          const labelOk = labelMatches(actualPhone.label, expected.label_variations);
 279          if (!labelOk) {
 280            debug(
 281              `⚠️  Phone ${expected.number} label "${actualPhone.label}" doesn't match variations: ${expected.label_variations.join(', ')}`
 282            );
 283          }
 284        }
 285      });
 286  
 287      debug(`✅ Found ${actualCount} phone numbers (expected ${expectedCount})`);
 288    }
 289  
 290    // Validate contact form if expected
 291    if (expectedContacts.primary_contact_form) {
 292      assert.ok(actual.primary_contact_form, 'contacts_json should have primary_contact_form');
 293  
 294      // Field count check only applies post-LLM enrichment when actual form has fields populated.
 295      // After regex-only enrichment, primary_contact_form is {form_url, form_action_url} with no fields key.
 296      if (
 297        expectedContacts.primary_contact_form.fields &&
 298        typeof actual.primary_contact_form === 'object' &&
 299        actual.primary_contact_form !== null &&
 300        actual.primary_contact_form.fields
 301      ) {
 302        const expectedFieldCount = Object.keys(expectedContacts.primary_contact_form.fields).length;
 303        const actualFieldCount = Object.keys(actual.primary_contact_form.fields).length;
 304        assert.strictEqual(
 305          actualFieldCount,
 306          expectedFieldCount,
 307          `Form should have exactly ${expectedFieldCount} fields, got ${actualFieldCount}`
 308        );
 309        debug(`✅ Form has ${actualFieldCount} fields (expected ${expectedFieldCount})`);
 310      } else {
 311        const formDesc =
 312          typeof actual.primary_contact_form === 'string'
 313            ? actual.primary_contact_form
 314            : actual.primary_contact_form?.form_url;
 315        debug(`⏭️  Form field validation skipped (fields not yet extracted; form = ${formDesc})`);
 316      }
 317    }
 318  
 319    // Validate social profiles if expected
 320    if (expectedContacts.social_profiles && Array.isArray(expectedContacts.social_profiles)) {
 321      assert.ok(
 322        Array.isArray(actual.social_profiles),
 323        'contacts_json should have social_profiles array'
 324      );
 325      const actualCount = actual.social_profiles?.length || 0;
 326      const expectedCount = expectedContacts.social_profiles.length;
 327      assert.strictEqual(
 328        actualCount,
 329        expectedCount,
 330        `Should have ${expectedCount} social profiles, got ${actualCount}`
 331      );
 332  
 333      // Validate each social profile
 334      expectedContacts.social_profiles.forEach(expected => {
 335        const actualSocial = actual.social_profiles.find(a => {
 336          const actualUrl = (typeof a === 'object' ? a.url : a).replace(/\/$/, '');
 337          const expectedUrl = (typeof expected === 'object' ? expected.url : expected).replace(/\/$/, '');
 338          return actualUrl === expectedUrl;
 339        });
 340        const expectedUrl = typeof expected === 'object' ? expected.url : expected;
 341        assert.ok(actualSocial, `Should have social profile ${expectedUrl}`);
 342  
 343        // Validate label (flexible matching)
 344        if (
 345          typeof expected === 'object' &&
 346          expected.label_variations &&
 347          typeof actualSocial === 'object'
 348        ) {
 349          const labelOk = labelMatches(actualSocial.label, expected.label_variations);
 350          if (!labelOk) {
 351            debug(
 352              `⚠️  Social ${expectedUrl} label "${actualSocial.label}" doesn't match variations: ${expected.label_variations.join(', ')}`
 353            );
 354          }
 355        }
 356      });
 357  
 358      debug(`✅ Found ${actualCount} social profiles (expected ${expectedCount})`);
 359    }
 360  
 361    // Business name (optional - LLM may not always extract it)
 362    if (expectedContacts.business_name && actual.business_name) {
 363      assert.strictEqual(
 364        actual.business_name,
 365        expectedContacts.business_name,
 366        `Business name should be "${expectedContacts.business_name}"`
 367      );
 368      debug(`✅ Business name: ${actual.business_name}`);
 369    } else if (expectedContacts.business_name && !actual.business_name) {
 370      debug(`⚠️  Business name not extracted by LLM (expected: ${expectedContacts.business_name})`);
 371    }
 372  
 373    // City (optional - LLM may not always extract it)
 374    if (expectedContacts.city && actual.city) {
 375      assert.strictEqual(actual.city, expectedContacts.city, 'City should match');
 376      debug(`✅ City: ${actual.city}`);
 377    } else if (expectedContacts.city && !actual.city) {
 378      debug(`⚠️  City not extracted by LLM (expected: ${expectedContacts.city})`);
 379    }
 380  
 381    // Country code (optional - LLM may not always extract it)
 382    if (expectedContacts.country_code && actual.country_code) {
 383      assert.strictEqual(
 384        actual.country_code,
 385        expectedContacts.country_code,
 386        'Country code should match'
 387      );
 388      debug(`✅ Country code: ${actual.country_code}`);
 389    } else if (expectedContacts.country_code && !actual.country_code) {
 390      debug(`⚠️  Country code not extracted by LLM (expected: ${expectedContacts.country_code})`);
 391    }
 392  
 393    // State (optional - LLM may not always extract it)
 394    if (expectedContacts.state && actual.state) {
 395      assert.strictEqual(actual.state, expectedContacts.state, 'State should match');
 396      debug(`✅ State: ${actual.state}`);
 397    } else if (expectedContacts.state && !actual.state) {
 398      debug(`⚠️  State not extracted by LLM (expected: ${expectedContacts.state})`);
 399    }
 400  
 401    debug('✅ contacts_json validation passed');
 402  }
 403  
 404  /**
 405   * Validate HTML DOM against expected patterns
 406   */
 407  function validateHtmlDom(actual, expected) {
 408    if (!expected || !expected.html_dom) {
 409      debug('⏭️  Skipping html_dom validation (no expected values)');
 410      return;
 411    }
 412  
 413    debug('Validating html_dom...');
 414  
 415    const htmlExpected = expected.html_dom;
 416  
 417    // Check minimum length
 418    if (htmlExpected.min_length) {
 419      assert.ok(
 420        actual.length >= htmlExpected.min_length,
 421        `HTML length ${actual.length} should be >= ${htmlExpected.min_length}`
 422      );
 423      debug(`✅ HTML length ${actual.length} meets minimum`);
 424    }
 425  
 426    // Check for required patterns
 427    if (htmlExpected.patterns) {
 428      for (const pattern of htmlExpected.patterns) {
 429        assert.ok(actual.includes(pattern), `HTML should contain pattern: ${pattern}`);
 430      }
 431      debug(`✅ All required patterns found in HTML`);
 432    }
 433  
 434    // Check for required content
 435    if (htmlExpected.should_contain) {
 436      for (const content of htmlExpected.should_contain) {
 437        assert.ok(actual.includes(content), `HTML should contain: ${content}`);
 438      }
 439      debug(`✅ All required content found in HTML`);
 440    }
 441  }
 442  
 443  /**
 444   * Initialize test database with schema
 445   */
 446  function initTestDatabase(db) {
 447    debug('Initializing test database...');
 448  
 449    // Read and execute schema
 450    const schemaPath = join(projectRoot, 'db/schema.sql');
 451    const schema = readFileSync(schemaPath, 'utf-8');
 452  
 453    // Execute entire schema at once (better-sqlite3 supports multiple statements).
 454    // Wrap in transaction to avoid 20+ second fsync overhead on Docker overlay
 455    // filesystems (135+ DDL statements each doing individual fsync).
 456    try {
 457      db.exec('BEGIN;');
 458      db.exec(schema);
 459      db.exec('COMMIT;');
 460    } catch (err) {
 461      try { db.exec('ROLLBACK;'); } catch { /* ignore */ }
 462      // Only ignore "already exists" errors
 463      if (!err.message.includes('already exists')) {
 464        debug(`Schema execution error: ${err.message}`);
 465        throw err;
 466      }
 467    }
 468  
 469    // Apply migrations
 470    const migrations = [
 471      'add-keywords-table.sql',
 472      '005-add-screenshot-path.sql',
 473      'add-config-low-score-cutoff.sql',
 474    ];
 475  
 476    for (const migration of migrations) {
 477      const migrationPath = join(projectRoot, 'db/migrations', migration);
 478      if (existsSync(migrationPath)) {
 479        const sql = readFileSync(migrationPath, 'utf-8');
 480        try {
 481          db.exec(sql);
 482        } catch (err) {
 483          // Ignore "already exists" and "duplicate column" errors
 484          if (!err.message.includes('already exists') && !err.message.includes('duplicate column')) {
 485            debug(`Migration warning (${migration}): ${err.message}`);
 486          }
 487        }
 488      }
 489    }
 490  
 491    // Fix outreaches table for new proposal generator
 492    // The new generator creates N proposals (one per contact) instead of 3 fixed variants
 493    // The messages table uses a unified schema with direction, approval_status, delivery_status
 494    try {
 495      db.exec(`
 496        CREATE TABLE IF NOT EXISTS messages_temp (
 497          id INTEGER PRIMARY KEY AUTOINCREMENT,
 498          site_id INTEGER NOT NULL,
 499          contact_method TEXT NOT NULL CHECK(contact_method IN ('sms', 'email', 'form', 'x', 'linkedin')),
 500          contact_uri TEXT NOT NULL,
 501          our_account TEXT,
 502          message_body TEXT NOT NULL,
 503          status TEXT DEFAULT 'pending' CHECK(status IN ('pending', 'sent', 'delivered', 'failed', 'bounced', 'opened', 'clicked', 'replied', 'scheduled', 'gdpr_blocked')),
 504          sent_at TIMESTAMP,
 505          delivered_at TIMESTAMP,
 506          error_message TEXT,
 507          created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
 508          updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
 509          subject_line TEXT,
 510          resulted_in_sale BOOLEAN DEFAULT 0,
 511          sale_amount DECIMAL(10, 2),
 512          tracking_clicked_at DATETIME,
 513          opened_at DATETIME,
 514          email_id TEXT,
 515          unsubscribed BOOLEAN DEFAULT 0,
 516          FOREIGN KEY (site_id) REFERENCES sites(id) ON DELETE CASCADE
 517        );
 518  
 519        -- Copy existing data if any
 520        INSERT INTO messages_temp SELECT
 521          id, site_id, direction, contact_method, contact_uri, our_account,
 522          message_body, COALESCE(delivery_status, approval_status, 'pending') as status,
 523          sent_at, delivered_at, NULL as error_message,
 524          created_at, created_at as updated_at, subject_line,
 525          resulted_in_sale, sale_amount, tracking_clicked_at, opened_at, email_id, unsubscribed
 526        FROM messages;
 527  
 528        -- Replace old table
 529        DROP TABLE messages;
 530        ALTER TABLE messages_temp RENAME TO outreaches;
 531  
 532        -- Recreate indexes
 533        CREATE INDEX idx_outreaches_site_id ON outreaches(site_id);
 534        CREATE INDEX idx_outreaches_status ON outreaches(status);
 535        CREATE INDEX idx_outreaches_contact_method ON outreaches(contact_method);
 536      `);
 537    } catch (err) {
 538      debug(`Warning updating outreaches table: ${err.message}`);
 539    }
 540  
 541    // Insert default config
 542    db.prepare(
 543      `
 544      INSERT OR REPLACE INTO config (key, value)
 545      VALUES ('low_score_cutoff', '82')
 546    `
 547    ).run();
 548  
 549    debug('✅ Test database initialized');
 550  }
 551  
 552  describe.skip('End-to-End Pipeline Test — skipped: requires Playwright browser in container', { concurrency: false }, () => {
 553    let db;
 554  
 555    before(() => {
 556      debug('='.repeat(80));
 557      debug('STARTING END-TO-END PIPELINE TEST');
 558      debug('='.repeat(80));
 559      debug('Test Configuration', {
 560        url: TEST_URL,
 561        keyword: TEST_KEYWORD,
 562        dbPath: TEST_DB_PATH,
 563        screenshotDir: TEST_SCREENSHOT_DIR,
 564        expectedValuesPath: EXPECTED_VALUES_PATH,
 565        actualValuesPath: ACTUAL_VALUES_PATH,
 566      });
 567  
 568      // Load expected values
 569      expectedValues = loadExpectedValues();
 570  
 571      // Clean up any existing test artifacts
 572      if (existsSync(TEST_DB_PATH)) {
 573        debug('Removing existing test database...');
 574        rmSync(TEST_DB_PATH);
 575      }
 576      if (existsSync(TEST_SCREENSHOT_DIR)) {
 577        debug('Removing existing test screenshots...');
 578        rmSync(TEST_SCREENSHOT_DIR, { recursive: true });
 579      }
 580      if (existsSync(ACTUAL_VALUES_PATH)) {
 581        debug('Removing existing test results...');
 582        rmSync(ACTUAL_VALUES_PATH);
 583      }
 584  
 585      // Create test screenshot directory
 586      mkdirSync(TEST_SCREENSHOT_DIR, { recursive: true });
 587  
 588      // Set environment variables for test
 589      process.env.DATABASE_PATH = TEST_DB_PATH;
 590      process.env.SCREENSHOT_BASE_PATH = TEST_SCREENSHOT_DIR;
 591      // Isolate score/contacts filesystem storage to avoid cross-test contamination
 592      // when node --test runs files in parallel (other tests delete data/scores/1.json etc.)
 593      process.env.SCORE_STORAGE_BASE = TEST_STORAGE_BASE;
 594      process.env.CONTACTS_STORAGE_BASE = TEST_STORAGE_BASE;
 595      // Force programmatic scoring (not orchestrator mode) so the scoring stage runs in tests
 596      process.env.ENABLE_LLM_SCORING = 'false';
 597      // Programmatic scoring cannot detect country from the test page, setting country_code=null.
 598      // ENGLISH_ONLY_MARKETS filter (NULL IN ...) would then block enrichment. Disable for tests.
 599      // Set to empty string (not delete) so load-env.js dotenv.config() doesn't re-set it from .env.
 600      process.env.ENGLISH_ONLY_MARKETS = '';
 601  
 602      // Create and initialize database
 603      db = new Database(TEST_DB_PATH);
 604      initTestDatabase(db);
 605  
 606      // Register pg mock so any db.js imports by stages route through the same SQLite file
 607      mock.module('../../src/utils/db.js', { namedExports: createPgMock(db) });
 608    });
 609  
 610    after(() => {
 611      if (db) {
 612        db.close();
 613      }
 614  
 615      // Clean up isolated storage directory (contains scores/ and contacts/ subdirs)
 616      try {
 617        if (existsSync(TEST_STORAGE_BASE)) {
 618          rmSync(TEST_STORAGE_BASE, { recursive: true, force: true });
 619        }
 620      } catch { /* ignore */ }
 621  
 622      // Restore env
 623      delete process.env.ENABLE_LLM_SCORING;
 624      delete process.env.ENGLISH_ONLY_MARKETS;
 625      delete process.env.DATABASE_PATH;
 626      delete process.env.SCREENSHOT_BASE_PATH;
 627      delete process.env.SCORE_STORAGE_BASE;
 628      delete process.env.CONTACTS_STORAGE_BASE;
 629  
 630      // Save actual values for manual review
 631      saveActualValues();
 632  
 633      // Keep test artifacts for manual inspection
 634      debug('='.repeat(80));
 635      debug('TEST ARTIFACTS PRESERVED');
 636      debug('='.repeat(80));
 637      debug('Test artifacts location:', {
 638        database: TEST_DB_PATH,
 639        screenshots: TEST_SCREENSHOT_DIR,
 640        actualValues: ACTUAL_VALUES_PATH,
 641      });
 642      debug('\nTo inspect:');
 643      debug(`  Database:     sqlite3 ${TEST_DB_PATH}`);
 644      debug(`  Screenshots:  ls -la ${TEST_SCREENSHOT_DIR}`);
 645      debug(`  Results:      cat ${ACTUAL_VALUES_PATH}`);
 646  
 647      debug('\n='.repeat(80));
 648      debug('END-TO-END PIPELINE TEST COMPLETED');
 649      debug('='.repeat(80));
 650    });
 651  
 652    test('Stage 0: Setup - Add keyword and site to database', async () => {
 653      debug('-'.repeat(80));
 654      debug('STAGE 0: SETUP');
 655      debug('-'.repeat(80));
 656  
 657      // Insert keyword with required country_code and google_domain
 658      const insertKeyword = db.prepare(`
 659        INSERT INTO keywords (keyword, country_code, google_domain)
 660        VALUES (?, ?, ?)
 661      `);
 662      const keywordResult = insertKeyword.run(TEST_KEYWORD, 'US', 'google.com');
 663      keywordId = keywordResult.lastInsertRowid;
 664  
 665      debug(`Inserted keyword: "${TEST_KEYWORD}" (ID: ${keywordId})`);
 666  
 667      // Verify keyword
 668      const keyword = db.prepare('SELECT * FROM keywords WHERE id = ?').get(keywordId);
 669      validateRecord(keyword, ['id', 'keyword'], 'Keyword');
 670      assert.strictEqual(keyword.keyword, TEST_KEYWORD);
 671  
 672      // Insert site (extract domain from URL)
 673      const url = new URL(TEST_URL);
 674      const domain = url.hostname;
 675  
 676      const insertSite = db.prepare(`
 677        INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain)
 678        VALUES (?, ?, ?, 'found', 'US', 'google.com')
 679      `);
 680      const siteResult = insertSite.run(domain, TEST_URL, TEST_KEYWORD);
 681      siteId = siteResult.lastInsertRowid;
 682  
 683      debug(`Inserted site: ${TEST_URL} (ID: ${siteId})`);
 684  
 685      // Verify site
 686      const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
 687      validateRecord(site, ['id', 'domain', 'landing_page_url', 'keyword', 'status'], 'Site');
 688      assert.strictEqual(site.domain, domain);
 689      assert.strictEqual(site.landing_page_url, TEST_URL);
 690      assert.strictEqual(site.keyword, TEST_KEYWORD);
 691      assert.strictEqual(site.status, 'found');
 692  
 693      actualValues.stages.setup = {
 694        keywordId,
 695        siteId,
 696        status: 'completed',
 697      };
 698  
 699      debug('✅ STAGE 0 COMPLETE: Setup successful');
 700    });
 701  
 702    test('Stage 1: Assets - Capture screenshots', async () => {
 703      checkFailFast('Stage 1: Assets');
 704      try {
 705        debug('-'.repeat(80));
 706        debug('STAGE 1: ASSETS');
 707        debug('-'.repeat(80));
 708  
 709        // Import and run assets stage
 710        const { runAssetsStage } = await import('../../src/stages/assets.js');
 711  
 712        debug('Starting screenshot capture...');
 713        const startTime = Date.now();
 714        const stats = await runAssetsStage({ limit: 1 });
 715        const duration = Date.now() - startTime;
 716  
 717        debug('Assets stage completed', {
 718          duration: `${duration}ms`,
 719          stats,
 720        });
 721  
 722        // Validate stats
 723        assert.strictEqual(stats.processed, 1, 'Should process 1 site');
 724        assert.strictEqual(stats.succeeded, 1, 'Should succeed for 1 site');
 725        assert.strictEqual(stats.failed, 0, 'Should have 0 failures');
 726  
 727        // Verify site record updated
 728        const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
 729        validateRecord(site, ['status'], 'Site after assets');
 730        assert.strictEqual(site.status, 'assets_captured', 'Status should be "assets_captured"');
 731  
 732        const visionEnabled = process.env.ENABLE_VISION === 'true';
 733  
 734        if (visionEnabled && site.screenshot_path) {
 735          // Verify screenshot directory exists (vision mode only)
 736          const screenshotSiteId = site.screenshot_path.split('/').pop();
 737          const screenshotPath = join(TEST_SCREENSHOT_DIR, screenshotSiteId);
 738          assert.ok(existsSync(screenshotPath), 'Screenshot directory should exist');
 739  
 740          const expectedFiles = [
 741            'desktop_above.jpg',
 742            'desktop_above_uncropped.jpg',
 743            'desktop_below.jpg',
 744            'desktop_below_uncropped.jpg',
 745            'mobile_above.jpg',
 746            'mobile_above_uncropped.jpg',
 747          ];
 748  
 749          for (const file of expectedFiles) {
 750            const filePath = join(screenshotPath, file);
 751            assert.ok(existsSync(filePath), `Screenshot file should exist: ${file}`);
 752          }
 753  
 754          actualValues.stages.assets = {
 755            screenshotPath: site.screenshot_path,
 756            filesCreated: expectedFiles,
 757            duration,
 758            status: 'completed',
 759          };
 760  
 761          debug('✅ STAGE 1 COMPLETE: Screenshots captured', {
 762            screenshotPath: site.screenshot_path,
 763            filesCreated: expectedFiles.length,
 764          });
 765        } else {
 766          // HTML-only mode: verify html_dom was captured instead
 767          assert.ok(site.html_dom, 'Site should have html_dom in HTML-only mode');
 768          actualValues.stages.assets = {
 769            htmlOnly: true,
 770            htmlDomLength: site.html_dom?.length || 0,
 771            duration,
 772            status: 'completed',
 773          };
 774          debug('✅ STAGE 1 COMPLETE: DOM HTML captured (vision disabled)', {
 775            htmlDomLength: site.html_dom?.length || 0,
 776          });
 777        }
 778      } catch (error) {
 779        markTestFailed(error);
 780      }
 781    });
 782  
 783    test('Stage 2: Scoring - Initial AI scoring', async () => {
 784      checkFailFast('Stage 2: Scoring');
 785      try {
 786        debug('-'.repeat(80));
 787        debug('STAGE 2: SCORING');
 788        debug('-'.repeat(80));
 789  
 790        // Import and run scoring stage
 791        const { runScoringStage } = await import('../../src/stages/scoring.js');
 792  
 793        debug('Starting AI scoring...');
 794        const startTime = Date.now();
 795        const stats = await runScoringStage({ limit: 1 });
 796        const duration = Date.now() - startTime;
 797  
 798        debug('Scoring stage completed', {
 799          duration: `${duration}ms`,
 800          stats,
 801        });
 802  
 803        // Validate stats
 804        assert.strictEqual(stats.processed, 1, 'Should process 1 site');
 805        assert.strictEqual(stats.succeeded, 1, 'Should succeed for 1 site');
 806        assert.strictEqual(stats.failed, 0, 'Should have 0 failures');
 807  
 808        // Verify site record updated
 809        const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
 810        validateRecord(
 811          site,
 812          ['score', 'grade', 'scored_at', 'status'],
 813          'Site after scoring'
 814        );
 815        // Scoring always sets status to 'prog_scored', 'semantic_scored', or 'high_score'
 816        assert.ok(
 817          ['prog_scored', 'semantic_scored', 'vision_scored', 'high_score'].includes(site.status),
 818          `Status should be prog_scored/semantic_scored/vision_scored/high_score, got: ${site.status}`
 819        );
 820  
 821        // Validate score
 822        assert.ok(typeof site.score === 'number', 'Score should be a number');
 823        assert.ok(site.score >= 0 && site.score <= 100, `Score ${site.score} should be 0-100`);
 824  
 825        // Validate grade
 826        const validGrades = ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F'];
 827        assert.ok(validGrades.includes(site.grade), `Grade ${site.grade} should be valid`);
 828  
 829        // Validate reasoning exists (score_json now on filesystem)
 830        const scoreJsonRaw = getScoreJson(siteId);
 831        assert.ok(scoreJsonRaw && scoreJsonRaw.length > 0, 'Should have score_json on filesystem');
 832  
 833        // Parse and validate conversion_score_json if expected values exist
 834        let scoringJson = null;
 835        try {
 836          scoringJson = JSON.parse(scoreJsonRaw);
 837          validateScoreJson(scoringJson, expectedValues);
 838        } catch (err) {
 839          debug(`⚠️  Could not parse score_json as JSON: ${err.message}`);
 840        }
 841  
 842        actualValues.stages.scoring = {
 843          score: site.score,
 844          grade: site.grade,
 845          scoringReasoning: scoringJson || scoreJsonRaw,
 846          duration,
 847          status: 'completed',
 848        };
 849  
 850        debug('✅ STAGE 2 COMPLETE: Site scored', {
 851          score: site.score,
 852          grade: site.grade,
 853          reasoningLength: scoreJsonRaw?.length || 0,
 854        });
 855      } catch (error) {
 856        markTestFailed(error);
 857      }
 858    });
 859  
 860    test('Stage 3: Rescoring - Rescore if B- or below', async () => {
 861      checkFailFast('Stage 3: Rescoring');
 862      debug('-'.repeat(80));
 863      debug('STAGE 3: RESCORING');
 864      debug('-'.repeat(80));
 865  
 866      // Check if site needs rescoring
 867      const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
 868      // In HTML-only mode (ENABLE_VISION=false), scoring promotes directly to 'rescored'
 869      // Rescoring stage only processes sites with status='scored' (vision mode only)
 870      const needsRescoring = site.status === 'prog_scored' && site.score <= 82;
 871  
 872      debug(`Site score: ${site.score} (${site.grade})`);
 873      debug(`Site status: ${site.status}`);
 874      debug(`Needs rescoring: ${needsRescoring}`);
 875  
 876      if (needsRescoring) {
 877        // Import and run rescoring stage
 878        const { runRescoringStage } = await import('../../src/stages/rescoring.js');
 879  
 880        debug('Starting rescoring with below-fold screenshots...');
 881        const startTime = Date.now();
 882        const stats = await runRescoringStage({ limit: 1 });
 883        const duration = Date.now() - startTime;
 884  
 885        debug('Rescoring stage completed', {
 886          duration: `${duration}ms`,
 887          stats,
 888        });
 889  
 890        // Validate stats
 891        assert.strictEqual(stats.processed, 1, 'Should process 1 site');
 892        assert.strictEqual(stats.succeeded, 1, 'Should succeed for 1 site');
 893        assert.strictEqual(stats.failed, 0, 'Should have 0 failures');
 894  
 895        // Verify site record updated
 896        const rescoredSite = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
 897        validateRecord(rescoredSite, ['score', 'grade', 'rescored_at'], 'Site after rescoring');
 898  
 899        // Validate city/country_code populated from score_json (now on filesystem)
 900        const rescoredScoreJsonRaw = getScoreJson(siteId);
 901        if (rescoredScoreJsonRaw) {
 902          const scoreJson = JSON.parse(rescoredScoreJsonRaw);
 903          if (scoreJson.contact_details?.city) {
 904            assert.strictEqual(
 905              rescoredSite.city,
 906              scoreJson.contact_details.city,
 907              'City column should match contact_details.city'
 908            );
 909            debug(`✅ City populated: ${rescoredSite.city}`);
 910          }
 911          if (scoreJson.contact_details?.country_code) {
 912            assert.strictEqual(
 913              rescoredSite.country_code,
 914              scoreJson.contact_details.country_code,
 915              'Country code column should match contact_details.country_code'
 916            );
 917            debug(`✅ Country code populated: ${rescoredSite.country_code}`);
 918          }
 919          if (scoreJson.contact_details?.state) {
 920            assert.strictEqual(
 921              rescoredSite.state,
 922              scoreJson.contact_details.state,
 923              'State column should match contact_details.state'
 924            );
 925            debug(`✅ State populated: ${rescoredSite.state}`);
 926          }
 927        }
 928  
 929        // Validate score changed or stayed same
 930        assert.ok(typeof rescoredSite.score === 'number', 'Score should be a number');
 931        assert.ok(
 932          rescoredSite.score >= 0 && rescoredSite.score <= 100,
 933          `Score ${rescoredSite.score} should be 0-100`
 934        );
 935  
 936        actualValues.stages.rescoring = {
 937          oldScore: site.score,
 938          newScore: rescoredSite.score,
 939          oldGrade: site.grade,
 940          newGrade: rescoredSite.grade,
 941          improved: stats.improved > 0,
 942          duration,
 943          status: 'completed',
 944        };
 945  
 946        debug('✅ STAGE 3 COMPLETE: Site rescored', {
 947          oldScore: site.score,
 948          newScore: rescoredSite.score,
 949          oldGrade: site.grade,
 950          newGrade: rescoredSite.grade,
 951          improved: stats.improved,
 952        });
 953      } else {
 954        const skipReason =
 955          site.status === 'semantic_scored'
 956            ? 'HTML-only mode: scoring already promoted to semantic_scored'
 957            : 'Score above cutoff (B- or 82)';
 958        actualValues.stages.rescoring = {
 959          status: 'skipped',
 960          reason: skipReason,
 961        };
 962        debug(`⏭️  STAGE 3 SKIPPED: ${skipReason}`);
 963      }
 964    });
 965  
 966    test('Stage 5: Enrichment - Browse key pages for contact info', async () => {
 967      checkFailFast('Stage 5: Enrichment');
 968      try {
 969        debug('-'.repeat(80));
 970        debug('STAGE 5: ENRICHMENT');
 971        debug('-'.repeat(80));
 972  
 973        // Get current site status
 974        const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
 975  
 976        debug(`Site status: ${site.status}`);
 977        debug(`Score: ${site.score} (${site.grade})`);
 978  
 979        // Enrichment runs on rescored sites (or scored sites if score <= 82)
 980        const needsEnrichment =
 981          site.status === 'semantic_scored' ||
 982          site.status === 'vision_scored' ||
 983          site.status === 'prog_scored';
 984  
 985        if (needsEnrichment) {
 986          // Import and run enrichment stage
 987          const { runEnrichmentStage } = await import('../../src/stages/enrich.js');
 988  
 989          debug('Browsing key pages for additional contact info...');
 990          const startTime = Date.now();
 991          const stats = await runEnrichmentStage({ limit: 1 });
 992          const duration = Date.now() - startTime;
 993  
 994          debug('Enrichment stage completed', {
 995            duration: `${duration}ms`,
 996            stats,
 997          });
 998  
 999          // Verify site record updated
1000          const enrichedSite = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
1001  
1002          // Site should be enriched or skipped (if already had contact form)
1003          if (stats.skipped > 0) {
1004            debug('Enrichment skipped (already has contact form)');
1005            actualValues.stages.enrichment = {
1006              status: 'skipped',
1007              reason: 'Already has contact form',
1008              duration,
1009            };
1010          } else {
1011            validateRecord(enrichedSite, ['enriched_at'], 'Site after enrichment');
1012            assert.ok(
1013              ['enriched_regex', 'enriched_llm', 'enriched'].includes(enrichedSite.status),
1014              `Status should be an enriched variant, got: ${enrichedSite.status}`
1015            );
1016  
1017            // Parse and check enriched contacts (contacts_json now on filesystem)
1018            const enrichedContactsRaw = getContactsJson(siteId);
1019            assert.ok(enrichedContactsRaw, 'contacts_json should exist on filesystem');
1020            const enrichedContacts = JSON.parse(enrichedContactsRaw);
1021  
1022            // Validate against expected contacts if provided
1023            validateContactsJson(enrichedContacts, expectedValues);
1024  
1025            // Validate city/country_code columns populated from contacts_json
1026            if (enrichedContacts.city) {
1027              assert.strictEqual(
1028                enrichedSite.city,
1029                enrichedContacts.city,
1030                'City column should match contacts_json.city'
1031              );
1032              debug(`✅ City column populated: ${enrichedSite.city}`);
1033            }
1034            if (enrichedContacts.country_code) {
1035              assert.strictEqual(
1036                enrichedSite.country_code,
1037                enrichedContacts.country_code,
1038                'Country code column should match contacts_json.country_code'
1039              );
1040              debug(`✅ Country code column populated: ${enrichedSite.country_code}`);
1041            }
1042            if (enrichedContacts.state) {
1043              assert.strictEqual(
1044                enrichedSite.state,
1045                enrichedContacts.state,
1046                'State column should match contacts_json.state'
1047              );
1048              debug(`✅ State column populated: ${enrichedSite.state}`);
1049            }
1050  
1051            actualValues.stages.enrichment = {
1052              formsFound: stats.formsFound || 0,
1053              emailsFound: stats.emailsFound || 0,
1054              phonesFound: stats.phonesFound || 0,
1055              hasContactForm: !!enrichedContacts.primary_contact_form,
1056              emailCount: enrichedContacts.email_addresses?.length || 0,
1057              phoneCount: enrichedContacts.phone_numbers?.length || 0,
1058              duration,
1059              status: 'completed',
1060            };
1061  
1062            debug('✅ STAGE 5 COMPLETE: Site enriched', {
1063              formsFound: stats.formsFound,
1064              emailsFound: stats.emailsFound,
1065              phonesFound: stats.phonesFound,
1066            });
1067          }
1068        } else {
1069          actualValues.stages.enrichment = {
1070            status: 'skipped',
1071            reason: 'Site not at rescored/scored stage',
1072          };
1073          debug('⏭️  STAGE 5 SKIPPED: Site not at rescored/scored stage');
1074        }
1075      } catch (error) {
1076        markTestFailed(error);
1077      }
1078    });
1079  
1080    test('Stage 6: Proposals - Generate proposals for low-scoring sites', async () => {
1081      checkFailFast('Stage 6: Proposals');
1082      debug('-'.repeat(80));
1083      debug('STAGE 6: PROPOSALS');
1084      debug('-'.repeat(80));
1085  
1086      // Bump enriched_regex → enriched_llm so proposals stage can find the site.
1087      // Also set country_code = 'AU' if null (regex-only enrichment can't extract country;
1088      // the LLM pass would normally do this). 'AU' has proposal templates and is safe for testing.
1089      // In production the orchestrator LLM pass handles both status promotion and geo extraction.
1090      db.prepare(
1091        `UPDATE sites
1092         SET status = 'enriched_llm',
1093             country_code = COALESCE(country_code, 'AU')
1094         WHERE id = ? AND status = 'enriched_regex'`
1095      ).run(siteId);
1096  
1097      // Check if site needs proposals
1098      const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
1099      const needsProposals = site.score <= 82;
1100  
1101      debug(`Site score: ${site.score} (${site.grade})`);
1102      debug(`Needs proposals: ${needsProposals}`);
1103  
1104      if (needsProposals) {
1105        // Import and run proposals stage
1106        const { runProposalsStage } = await import('../../src/stages/proposals.js');
1107  
1108        debug('Generating proposal variants...');
1109        const startTime = Date.now();
1110        const stats = await runProposalsStage({ limit: 1 });
1111        const duration = Date.now() - startTime;
1112  
1113        debug('Proposals stage completed', {
1114          duration: `${duration}ms`,
1115          stats,
1116        });
1117  
1118        // Validate stats
1119        assert.strictEqual(stats.processed, 1, 'Should process 1 site');
1120        assert.strictEqual(stats.succeeded, 1, 'Should succeed for 1 site');
1121        assert.strictEqual(stats.failed, 0, 'Should have 0 failures');
1122  
1123        // Verify outreach records created
1124        // Note: Number of outreach variants depends on number of contacts found
1125        // The new proposal generator creates N proposals for N contacts
1126        const messages = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId);
1127        debug(`Created ${messages.length} outreach variants (depends on contacts found)`);
1128  
1129        const variants = [];
1130  
1131        if (messages.length > 0) {
1132          // Validate each outreach
1133          for (let i = 0; i < messages.length; i++) {
1134            const outreach = messages[i];
1135            validateRecord(
1136              outreach,
1137              ['id', 'site_id', 'message_body', 'contact_method'],
1138              `Outreach variant ${i + 1}`
1139            );
1140            assert.strictEqual(outreach.direction, 'outbound', `Message should be outbound`);
1141            assert.strictEqual(outreach.approval_status, 'pending', 'Status should be "pending"');
1142            assert.ok(outreach.message_body.length > 0, 'Should have proposal text');
1143  
1144            const subjectOrProposal = outreach.subject_line || outreach.message_body.substring(0, 50);
1145            variants.push({
1146              variant: i + 1,
1147              subject: subjectOrProposal,
1148              messageLength: outreach.message_body.length,
1149            });
1150  
1151            debug(`Variant ${i + 1}`, {
1152              subject: `${subjectOrProposal}...`,
1153              messageLength: outreach.message_body.length,
1154            });
1155          }
1156  
1157          actualValues.stages.proposals = {
1158            variants,
1159            duration,
1160            status: 'completed',
1161          };
1162  
1163          debug('✅ STAGE 6 COMPLETE: Proposals generated', {
1164            variants: messages.length,
1165          });
1166        } else {
1167          actualValues.stages.proposals = {
1168            variants: [],
1169            duration,
1170            status: 'completed_no_contacts',
1171            reason: 'No contacts found - no proposals generated',
1172          };
1173  
1174          debug('⚠️  STAGE 6 COMPLETE: No proposals generated (no contacts found)');
1175        }
1176      } else {
1177        actualValues.stages.proposals = {
1178          status: 'skipped',
1179          reason: 'Score above cutoff (no proposals needed)',
1180        };
1181        debug('⏭️  STAGE 6 SKIPPED: Score above cutoff (no proposals needed)');
1182      }
1183    });
1184  
1185    test('Stage 7: Outreach - Actually send messages', async () => {
1186      checkFailFast('Stage 7: Outreach');
1187      debug('-'.repeat(80));
1188      debug('STAGE 7: OUTREACH (ACTUAL SENDING)');
1189      debug('-'.repeat(80));
1190      debug('⚠️  WARNING: This will send real SMS, emails, and submit contact forms!');
1191  
1192      // Check if site has outreach records
1193      const messages = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId);
1194  
1195      debug(`Found ${messages.length} outreach records`);
1196  
1197      if (messages.length > 0) {
1198        // Import prioritize function to assign contact methods
1199        const { prioritizeContacts } = await import('../../src/contacts/prioritize.js');
1200  
1201        debug('Prioritizing contact methods...');
1202        const prioritized = await prioritizeContacts();
1203  
1204        debug(`Prioritized ${prioritized} outreaches`);
1205  
1206        // Get updated outreaches with contact methods
1207        const updatedOutreaches = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId);
1208  
1209        // Import outreach stage to actually send
1210        const { runOutreachStage } = await import('../../src/stages/outreach.js');
1211  
1212        debug('Sending outreach messages...');
1213        const startTime = Date.now();
1214        // Send more outreaches to include SMS (emails are first 3, SMS are next 2)
1215        const stats = await runOutreachStage({ limit: 10, useBulk: false });
1216        const duration = Date.now() - startTime;
1217  
1218        debug('Outreach stage completed', {
1219          duration: `${duration}ms`,
1220          stats,
1221        });
1222  
1223        // Get final outreach states
1224        const sentOutreaches = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId);
1225  
1226        const outreachResults = sentOutreaches.map(o => ({
1227          contactMethod: o.contact_method,
1228          contactUri: o.contact_uri,
1229          status: o.status,
1230          sentAt: o.sent_at,
1231        }));
1232  
1233        actualValues.stages.outreach = {
1234          stats,
1235          outreaches: outreachResults,
1236          duration,
1237          status: 'completed',
1238        };
1239  
1240        for (const outreach of sentOutreaches) {
1241          debug(`Outreach ${outreach.id}`, {
1242            contactMethod: outreach.contact_method,
1243            contactUri: outreach.contact_uri,
1244            delivery_status: outreach.delivery_status,
1245            sentAt: outreach.sent_at,
1246          });
1247        }
1248  
1249        debug('✅ STAGE 7 COMPLETE: Outreach sent', {
1250          totalOutreaches: sentOutreaches.length,
1251          byChannel: stats.byChannel,
1252        });
1253      } else {
1254        actualValues.stages.outreach = {
1255          status: 'skipped',
1256          reason: 'No outreach records (high-scoring site)',
1257        };
1258        debug('⏭️  STAGE 7 SKIPPED: No outreach records (high-scoring site)');
1259      }
1260    });
1261  
1262    test('Stage 8: Final Verification - Validate complete pipeline', async () => {
1263      checkFailFast('Stage 8: Final Verification');
1264      debug('-'.repeat(80));
1265      debug('STAGE 8: FINAL VERIFICATION');
1266      debug('-'.repeat(80));
1267  
1268      // Get final site state
1269      const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId);
1270      const keyword = db.prepare('SELECT * FROM keywords WHERE id = ?').get(keywordId);
1271      const messages = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId);
1272  
1273      debug('Final Site State', {
1274        id: site.id,
1275        url: site.url,
1276        keyword: keyword.keyword,
1277        status: site.status,
1278        score: site.score,
1279        grade: site.grade,
1280        hasScreenshots: !!site.screenshot_path,
1281        hasReasoning: !!site.scoring_reasoning,
1282        wasRescored: !!site.rescored_at,
1283        wasEnriched: !!site.enriched_at,
1284        outreachVariants: messages.length,
1285      });
1286  
1287      // Verify complete pipeline flow
1288      // After proposals and outreach stages, status should be proposals_drafted or outreach_sent
1289      const validStatuses = [
1290        'prog_scored',
1291        'semantic_scored',
1292        'vision_scored',
1293        'high_score',
1294        'enriched',
1295        'proposals_drafted',
1296        'outreach_sent',
1297      ];
1298      assert.ok(
1299        validStatuses.includes(site.status),
1300        `Final status should be one of ${validStatuses.join(', ')}, got "${site.status}"`
1301      );
1302      // In vision mode, screenshot_path is required; in HTML-only mode, html_dom is used instead
1303      const visionEnabledFinal = process.env.ENABLE_VISION !== 'false';
1304      if (visionEnabledFinal) {
1305        assert.ok(site.screenshot_path, 'Should have screenshots (vision mode)');
1306      } else {
1307        assert.ok(site.html_dom, 'Should have html_dom (HTML-only mode)');
1308      }
1309      assert.ok(site.score !== null, 'Should have score');
1310      assert.ok(site.grade, 'Should have grade');
1311      assert.ok(getScoreJson(siteId), 'Should have score_json on filesystem');
1312  
1313      // Verify outreach records if low-scoring
1314      // Note: Outreach variants depend on contacts being found
1315      // If no contacts found, no outreach records will be created (which is correct behavior)
1316      if (site.score <= 82 && messages.length > 0) {
1317        debug(`✅ Created ${messages.length} outreach variants for low-scoring site`);
1318      } else if (site.score <= 82 && messages.length === 0) {
1319        debug('⚠️  No outreach variants created (no contacts found on site)');
1320      } else if (site.score > 82) {
1321        assert.strictEqual(messages.length, 0, 'High-scoring site should have no outreach variants');
1322      }
1323  
1324      // Validate against expected values if provided
1325      if (expectedValues) {
1326        debug('Validating against expected values...');
1327  
1328        // Validate HTML DOM if available
1329        // Note: The schema uses BLOB for screenshots, not TEXT for html_dom
1330        // We'll need to check if html_dom is captured elsewhere
1331        // For now, we'll skip this validation
1332  
1333        debug('✅ Validation against expected values complete');
1334      } else {
1335        debug('⏭️  Skipping validation (no expected values provided)');
1336        debug('💡 Edit tests/expected-e2e.json with expected values to enable validation');
1337      }
1338  
1339      actualValues.stages.final = {
1340        finalScore: site.score,
1341        finalGrade: site.grade,
1342        finalStatus: site.status,
1343        wasRescored: !!site.rescored_at,
1344        wasEnriched: !!site.enriched_at,
1345        hasProposals: messages.length > 0,
1346        pipelineComplete: true,
1347      };
1348  
1349      debug('='.repeat(80));
1350      debug('✅ PIPELINE E2E TEST PASSED');
1351      debug('='.repeat(80));
1352      debug('Pipeline Summary', {
1353        url: TEST_URL,
1354        keyword: TEST_KEYWORD,
1355        finalScore: site.score,
1356        finalGrade: site.grade,
1357        wasRescored: !!site.rescored_at,
1358        wasEnriched: !!site.enriched_at,
1359        hasProposals: messages.length > 0,
1360        outreachSent: messages.some(o => o.delivery_status === 'sent'),
1361        pipelineComplete: true,
1362      });
1363    });
1364  });