pipeline-e2e.test.js
1 /** 2 * End-to-End Pipeline Test 3 * Tests complete pipeline flow for 1 site through all stages 4 * - Actually sends SMS, email, and submits contact forms 5 * - Validates outputs against expected values 6 * - Saves test artifacts for manual review 7 * - Stops at first failure 8 */ 9 10 import 'dotenv/config'; 11 import { describe, test, before, after, mock } from 'node:test'; 12 import assert from 'node:assert'; 13 import Database from 'better-sqlite3'; 14 import { createPgMock } from '../helpers/pg-mock.js'; 15 import { join, dirname } from 'path'; 16 import { fileURLToPath } from 'url'; 17 import { existsSync, mkdirSync, rmSync, readFileSync, writeFileSync } from 'fs'; 18 import { getScoreJson, deleteScoreJson } from '../../src/utils/score-storage.js'; 19 import { getContactsJson, getContactsData, deleteContactsJson } from '../../src/utils/contacts-storage.js'; 20 21 const __filename = fileURLToPath(import.meta.url); 22 const __dirname = dirname(__filename); 23 const projectRoot = join(__dirname, '../..'); 24 25 // Test configuration from environment 26 const TEST_URL = process.env.TEST_E2E_URL || 'https://example.com'; 27 const TEST_KEYWORD = process.env.TEST_E2E_KEYWORD || 'test keyword'; 28 const TEST_DB_PATH = join('/tmp', `test-e2e-${process.pid}.db`); 29 const TEST_STORAGE_BASE = join('/tmp', `test-e2e-storage-${process.pid}`); 30 const TEST_SCREENSHOT_DIR = join(projectRoot, 'screenshots-test-e2e'); 31 const EXPECTED_VALUES_PATH = join(__dirname, 'expected-e2e.json'); 32 const EXPECTED_CONTACTS_PATH = join(__dirname, '..', 'expected-e2e-contacts.json'); 33 const ACTUAL_VALUES_PATH = join(projectRoot, 'test-results-e2e.json'); 34 35 // Track site ID and actual values across tests 36 let siteId; 37 let keywordId; 38 let expectedValues = null; 39 let testFailed = false; // Track if any test has failed (for fail-fast behavior) 40 const actualValues = { 41 url: TEST_URL, 42 keyword: TEST_KEYWORD, 43 timestamp: new Date().toISOString(), 44 stages: {}, 45 }; 46 47 /** 48 * Log debug info with timestamp 49 */ 50 function debug(message, data = null) { 51 const timestamp = new Date().toISOString(); 52 console.log(`\n[${timestamp}] ${message}`); 53 if (data) { 54 console.log(JSON.stringify(data, null, 2)); 55 } 56 } 57 58 /** 59 * Check if previous test failed and skip if so (fail-fast behavior) 60 */ 61 function checkFailFast(testName) { 62 if (testFailed) { 63 debug(`⏭️ Skipping ${testName} (previous test failed - fail-fast mode)`); 64 throw new Error('Skipping test due to previous failure'); 65 } 66 } 67 68 /** 69 * Mark test as failed (for fail-fast behavior) 70 */ 71 function markTestFailed(error) { 72 testFailed = true; 73 throw error; 74 } 75 76 /** 77 * Validate database record exists and has expected fields 78 */ 79 function validateRecord(record, expectedFields, recordType) { 80 assert.ok(record, `${recordType} record should exist`); 81 for (const field of expectedFields) { 82 assert.ok( 83 record[field] !== null && record[field] !== undefined, 84 `${recordType} should have ${field}` 85 ); 86 } 87 debug(`✅ ${recordType} validation passed`); 88 } 89 90 /** 91 * Load expected values from JSON files if they exist 92 */ 93 function loadExpectedValues() { 94 let expected = {}; 95 96 // Load main expected values (score, html, etc.) 97 if (existsSync(EXPECTED_VALUES_PATH)) { 98 try { 99 const content = readFileSync(EXPECTED_VALUES_PATH, 'utf-8'); 100 const data = JSON.parse(content); 101 102 // Check if this is a template or has actual expected values 103 if ( 104 data.expected && 105 data.expected.conversion_score_json && 106 !data.expected.conversion_score_json.note 107 ) { 108 debug('Loaded expected values from expected-e2e.json'); 109 expected = { ...data.expected }; 110 } else { 111 debug('⚠️ expected-e2e.json is still a template. Score validation will be skipped.'); 112 } 113 } catch (err) { 114 debug(`⚠️ Could not parse expected-e2e.json: ${err.message}`); 115 } 116 } else { 117 debug('⚠️ expected-e2e.json not found. Score validation will be skipped.'); 118 } 119 120 // Load expected contacts 121 if (existsSync(EXPECTED_CONTACTS_PATH)) { 122 try { 123 const contactsContent = readFileSync(EXPECTED_CONTACTS_PATH, 'utf-8'); 124 const contactsData = JSON.parse(contactsContent); 125 debug('Loaded expected contacts from expected-e2e-contacts.json'); 126 expected.contacts_json = contactsData; 127 } catch (err) { 128 debug(`⚠️ Could not parse expected-e2e-contacts.json: ${err.message}`); 129 } 130 } else { 131 debug('⚠️ expected-e2e-contacts.json not found. Contact validation will be skipped.'); 132 } 133 134 return Object.keys(expected).length > 0 ? expected : null; 135 } 136 137 /** 138 * Save actual values to JSON file for manual review 139 */ 140 function saveActualValues() { 141 try { 142 writeFileSync(ACTUAL_VALUES_PATH, JSON.stringify(actualValues, null, 2)); 143 debug(`✅ Saved actual values to ${ACTUAL_VALUES_PATH}`); 144 } catch (err) { 145 debug(`⚠️ Could not save actual values: ${err.message}`); 146 } 147 } 148 149 /** 150 * Validate conversion_score_json against expected values 151 */ 152 function validateScoreJson(actual, expected) { 153 if (!expected || !expected.conversion_score_json) { 154 debug('⏭️ Skipping conversion_score_json validation (no expected values)'); 155 return; 156 } 157 158 debug('Validating conversion_score_json...'); 159 160 // Check score is in expected range 161 if (expected.score_range) { 162 const actualScore = actual.overall_calculation?.conversion_score; 163 assert.ok( 164 actualScore >= expected.score_range.min && actualScore <= expected.score_range.max, 165 `Score ${actualScore} should be in range ${expected.score_range.min}-${expected.score_range.max}` 166 ); 167 debug(`✅ Score ${actualScore} is within expected range`); 168 } 169 170 // Check expected grade if specified 171 if (expected.expected_grade) { 172 const actualGrade = actual.overall_calculation?.grade; 173 assert.strictEqual( 174 actualGrade, 175 expected.expected_grade, 176 `Grade should be ${expected.expected_grade}` 177 ); 178 debug(`✅ Grade ${actualGrade} matches expected`); 179 } 180 } 181 182 /** 183 * Check if actual label matches any variation of expected label 184 * @param {string} actualLabel - Label from extraction 185 * @param {Array<string>} labelVariations - Acceptable label variations 186 * @returns {boolean} True if label matches any variation 187 */ 188 function labelMatches(actualLabel, labelVariations) { 189 if (!actualLabel || !labelVariations) return false; 190 const normalized = actualLabel.toLowerCase().trim(); 191 return labelVariations.some( 192 variation => 193 variation.toLowerCase().trim() === normalized || 194 normalized.includes(variation.toLowerCase().trim()) || 195 variation.toLowerCase().trim().includes(normalized) 196 ); 197 } 198 199 /** 200 * Validate contacts_json against expected patterns 201 */ 202 function validateContactsJson(actual, expected) { 203 if (!expected || !expected.contacts_json) { 204 debug('⏭️ Skipping contacts_json validation (no expected values)'); 205 return; 206 } 207 208 debug('Validating contacts_json...'); 209 210 assert.ok(actual, 'contacts_json should exist'); 211 212 const expectedContacts = expected.contacts_json; 213 const visionEnabled = process.env.ENABLE_VISION === 'true'; 214 215 // Validate email addresses if expected 216 if (expectedContacts.email_addresses && Array.isArray(expectedContacts.email_addresses)) { 217 assert.ok( 218 Array.isArray(actual.email_addresses), 219 'contacts_json should have email_addresses array' 220 ); 221 const applicableEmails = expectedContacts.email_addresses.filter( 222 e => !e.requires_vision || visionEnabled 223 ); 224 const actualCount = actual.email_addresses?.length || 0; 225 const expectedCount = applicableEmails.length; 226 assert.strictEqual( 227 actualCount, 228 expectedCount, 229 `Should have ${expectedCount} email addresses, got ${actualCount}` 230 ); 231 232 // Validate each applicable email 233 applicableEmails.forEach(expected => { 234 const actualEmail = actual.email_addresses.find( 235 a => (typeof a === 'object' ? a.email : a) === expected.email 236 ); 237 assert.ok(actualEmail, `Should have email ${expected.email}`); 238 239 // Validate label (flexible matching) 240 if (expected.label_variations && typeof actualEmail === 'object') { 241 const labelOk = labelMatches(actualEmail.label, expected.label_variations); 242 if (!labelOk) { 243 debug( 244 `⚠️ Email ${expected.email} label "${actualEmail.label}" doesn't match variations: ${expected.label_variations.join(', ')}` 245 ); 246 } 247 } 248 }); 249 250 debug(`✅ Found ${actualCount} email addresses (expected ${expectedCount})`); 251 } 252 253 // Validate phone numbers if expected 254 if (expectedContacts.phone_numbers && Array.isArray(expectedContacts.phone_numbers)) { 255 assert.ok(Array.isArray(actual.phone_numbers), 'contacts_json should have phone_numbers array'); 256 const applicablePhones = expectedContacts.phone_numbers.filter( 257 p => !p.requires_vision || visionEnabled 258 ); 259 const actualCount = actual.phone_numbers?.length || 0; 260 const expectedCount = applicablePhones.length; 261 assert.strictEqual( 262 actualCount, 263 expectedCount, 264 `Should have ${expectedCount} phone numbers, got ${actualCount}` 265 ); 266 267 // Validate each applicable phone (normalize to digits-only for comparison) 268 const normalizeNum = num => num.replace(/\D/g, ''); 269 applicablePhones.forEach(expected => { 270 const actualPhone = actual.phone_numbers.find(a => { 271 const actualNum = typeof a === 'object' ? a.number : a; 272 return normalizeNum(actualNum) === normalizeNum(expected.number); 273 }); 274 assert.ok(actualPhone, `Should have phone ${expected.number}`); 275 276 // Validate label (flexible matching) 277 if (expected.label_variations && typeof actualPhone === 'object') { 278 const labelOk = labelMatches(actualPhone.label, expected.label_variations); 279 if (!labelOk) { 280 debug( 281 `⚠️ Phone ${expected.number} label "${actualPhone.label}" doesn't match variations: ${expected.label_variations.join(', ')}` 282 ); 283 } 284 } 285 }); 286 287 debug(`✅ Found ${actualCount} phone numbers (expected ${expectedCount})`); 288 } 289 290 // Validate contact form if expected 291 if (expectedContacts.primary_contact_form) { 292 assert.ok(actual.primary_contact_form, 'contacts_json should have primary_contact_form'); 293 294 // Field count check only applies post-LLM enrichment when actual form has fields populated. 295 // After regex-only enrichment, primary_contact_form is {form_url, form_action_url} with no fields key. 296 if ( 297 expectedContacts.primary_contact_form.fields && 298 typeof actual.primary_contact_form === 'object' && 299 actual.primary_contact_form !== null && 300 actual.primary_contact_form.fields 301 ) { 302 const expectedFieldCount = Object.keys(expectedContacts.primary_contact_form.fields).length; 303 const actualFieldCount = Object.keys(actual.primary_contact_form.fields).length; 304 assert.strictEqual( 305 actualFieldCount, 306 expectedFieldCount, 307 `Form should have exactly ${expectedFieldCount} fields, got ${actualFieldCount}` 308 ); 309 debug(`✅ Form has ${actualFieldCount} fields (expected ${expectedFieldCount})`); 310 } else { 311 const formDesc = 312 typeof actual.primary_contact_form === 'string' 313 ? actual.primary_contact_form 314 : actual.primary_contact_form?.form_url; 315 debug(`⏭️ Form field validation skipped (fields not yet extracted; form = ${formDesc})`); 316 } 317 } 318 319 // Validate social profiles if expected 320 if (expectedContacts.social_profiles && Array.isArray(expectedContacts.social_profiles)) { 321 assert.ok( 322 Array.isArray(actual.social_profiles), 323 'contacts_json should have social_profiles array' 324 ); 325 const actualCount = actual.social_profiles?.length || 0; 326 const expectedCount = expectedContacts.social_profiles.length; 327 assert.strictEqual( 328 actualCount, 329 expectedCount, 330 `Should have ${expectedCount} social profiles, got ${actualCount}` 331 ); 332 333 // Validate each social profile 334 expectedContacts.social_profiles.forEach(expected => { 335 const actualSocial = actual.social_profiles.find(a => { 336 const actualUrl = (typeof a === 'object' ? a.url : a).replace(/\/$/, ''); 337 const expectedUrl = (typeof expected === 'object' ? expected.url : expected).replace(/\/$/, ''); 338 return actualUrl === expectedUrl; 339 }); 340 const expectedUrl = typeof expected === 'object' ? expected.url : expected; 341 assert.ok(actualSocial, `Should have social profile ${expectedUrl}`); 342 343 // Validate label (flexible matching) 344 if ( 345 typeof expected === 'object' && 346 expected.label_variations && 347 typeof actualSocial === 'object' 348 ) { 349 const labelOk = labelMatches(actualSocial.label, expected.label_variations); 350 if (!labelOk) { 351 debug( 352 `⚠️ Social ${expectedUrl} label "${actualSocial.label}" doesn't match variations: ${expected.label_variations.join(', ')}` 353 ); 354 } 355 } 356 }); 357 358 debug(`✅ Found ${actualCount} social profiles (expected ${expectedCount})`); 359 } 360 361 // Business name (optional - LLM may not always extract it) 362 if (expectedContacts.business_name && actual.business_name) { 363 assert.strictEqual( 364 actual.business_name, 365 expectedContacts.business_name, 366 `Business name should be "${expectedContacts.business_name}"` 367 ); 368 debug(`✅ Business name: ${actual.business_name}`); 369 } else if (expectedContacts.business_name && !actual.business_name) { 370 debug(`⚠️ Business name not extracted by LLM (expected: ${expectedContacts.business_name})`); 371 } 372 373 // City (optional - LLM may not always extract it) 374 if (expectedContacts.city && actual.city) { 375 assert.strictEqual(actual.city, expectedContacts.city, 'City should match'); 376 debug(`✅ City: ${actual.city}`); 377 } else if (expectedContacts.city && !actual.city) { 378 debug(`⚠️ City not extracted by LLM (expected: ${expectedContacts.city})`); 379 } 380 381 // Country code (optional - LLM may not always extract it) 382 if (expectedContacts.country_code && actual.country_code) { 383 assert.strictEqual( 384 actual.country_code, 385 expectedContacts.country_code, 386 'Country code should match' 387 ); 388 debug(`✅ Country code: ${actual.country_code}`); 389 } else if (expectedContacts.country_code && !actual.country_code) { 390 debug(`⚠️ Country code not extracted by LLM (expected: ${expectedContacts.country_code})`); 391 } 392 393 // State (optional - LLM may not always extract it) 394 if (expectedContacts.state && actual.state) { 395 assert.strictEqual(actual.state, expectedContacts.state, 'State should match'); 396 debug(`✅ State: ${actual.state}`); 397 } else if (expectedContacts.state && !actual.state) { 398 debug(`⚠️ State not extracted by LLM (expected: ${expectedContacts.state})`); 399 } 400 401 debug('✅ contacts_json validation passed'); 402 } 403 404 /** 405 * Validate HTML DOM against expected patterns 406 */ 407 function validateHtmlDom(actual, expected) { 408 if (!expected || !expected.html_dom) { 409 debug('⏭️ Skipping html_dom validation (no expected values)'); 410 return; 411 } 412 413 debug('Validating html_dom...'); 414 415 const htmlExpected = expected.html_dom; 416 417 // Check minimum length 418 if (htmlExpected.min_length) { 419 assert.ok( 420 actual.length >= htmlExpected.min_length, 421 `HTML length ${actual.length} should be >= ${htmlExpected.min_length}` 422 ); 423 debug(`✅ HTML length ${actual.length} meets minimum`); 424 } 425 426 // Check for required patterns 427 if (htmlExpected.patterns) { 428 for (const pattern of htmlExpected.patterns) { 429 assert.ok(actual.includes(pattern), `HTML should contain pattern: ${pattern}`); 430 } 431 debug(`✅ All required patterns found in HTML`); 432 } 433 434 // Check for required content 435 if (htmlExpected.should_contain) { 436 for (const content of htmlExpected.should_contain) { 437 assert.ok(actual.includes(content), `HTML should contain: ${content}`); 438 } 439 debug(`✅ All required content found in HTML`); 440 } 441 } 442 443 /** 444 * Initialize test database with schema 445 */ 446 function initTestDatabase(db) { 447 debug('Initializing test database...'); 448 449 // Read and execute schema 450 const schemaPath = join(projectRoot, 'db/schema.sql'); 451 const schema = readFileSync(schemaPath, 'utf-8'); 452 453 // Execute entire schema at once (better-sqlite3 supports multiple statements). 454 // Wrap in transaction to avoid 20+ second fsync overhead on Docker overlay 455 // filesystems (135+ DDL statements each doing individual fsync). 456 try { 457 db.exec('BEGIN;'); 458 db.exec(schema); 459 db.exec('COMMIT;'); 460 } catch (err) { 461 try { db.exec('ROLLBACK;'); } catch { /* ignore */ } 462 // Only ignore "already exists" errors 463 if (!err.message.includes('already exists')) { 464 debug(`Schema execution error: ${err.message}`); 465 throw err; 466 } 467 } 468 469 // Apply migrations 470 const migrations = [ 471 'add-keywords-table.sql', 472 '005-add-screenshot-path.sql', 473 'add-config-low-score-cutoff.sql', 474 ]; 475 476 for (const migration of migrations) { 477 const migrationPath = join(projectRoot, 'db/migrations', migration); 478 if (existsSync(migrationPath)) { 479 const sql = readFileSync(migrationPath, 'utf-8'); 480 try { 481 db.exec(sql); 482 } catch (err) { 483 // Ignore "already exists" and "duplicate column" errors 484 if (!err.message.includes('already exists') && !err.message.includes('duplicate column')) { 485 debug(`Migration warning (${migration}): ${err.message}`); 486 } 487 } 488 } 489 } 490 491 // Fix outreaches table for new proposal generator 492 // The new generator creates N proposals (one per contact) instead of 3 fixed variants 493 // The messages table uses a unified schema with direction, approval_status, delivery_status 494 try { 495 db.exec(` 496 CREATE TABLE IF NOT EXISTS messages_temp ( 497 id INTEGER PRIMARY KEY AUTOINCREMENT, 498 site_id INTEGER NOT NULL, 499 contact_method TEXT NOT NULL CHECK(contact_method IN ('sms', 'email', 'form', 'x', 'linkedin')), 500 contact_uri TEXT NOT NULL, 501 our_account TEXT, 502 message_body TEXT NOT NULL, 503 status TEXT DEFAULT 'pending' CHECK(status IN ('pending', 'sent', 'delivered', 'failed', 'bounced', 'opened', 'clicked', 'replied', 'scheduled', 'gdpr_blocked')), 504 sent_at TIMESTAMP, 505 delivered_at TIMESTAMP, 506 error_message TEXT, 507 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 508 updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 509 subject_line TEXT, 510 resulted_in_sale BOOLEAN DEFAULT 0, 511 sale_amount DECIMAL(10, 2), 512 tracking_clicked_at DATETIME, 513 opened_at DATETIME, 514 email_id TEXT, 515 unsubscribed BOOLEAN DEFAULT 0, 516 FOREIGN KEY (site_id) REFERENCES sites(id) ON DELETE CASCADE 517 ); 518 519 -- Copy existing data if any 520 INSERT INTO messages_temp SELECT 521 id, site_id, direction, contact_method, contact_uri, our_account, 522 message_body, COALESCE(delivery_status, approval_status, 'pending') as status, 523 sent_at, delivered_at, NULL as error_message, 524 created_at, created_at as updated_at, subject_line, 525 resulted_in_sale, sale_amount, tracking_clicked_at, opened_at, email_id, unsubscribed 526 FROM messages; 527 528 -- Replace old table 529 DROP TABLE messages; 530 ALTER TABLE messages_temp RENAME TO outreaches; 531 532 -- Recreate indexes 533 CREATE INDEX idx_outreaches_site_id ON outreaches(site_id); 534 CREATE INDEX idx_outreaches_status ON outreaches(status); 535 CREATE INDEX idx_outreaches_contact_method ON outreaches(contact_method); 536 `); 537 } catch (err) { 538 debug(`Warning updating outreaches table: ${err.message}`); 539 } 540 541 // Insert default config 542 db.prepare( 543 ` 544 INSERT OR REPLACE INTO config (key, value) 545 VALUES ('low_score_cutoff', '82') 546 ` 547 ).run(); 548 549 debug('✅ Test database initialized'); 550 } 551 552 describe.skip('End-to-End Pipeline Test — skipped: requires Playwright browser in container', { concurrency: false }, () => { 553 let db; 554 555 before(() => { 556 debug('='.repeat(80)); 557 debug('STARTING END-TO-END PIPELINE TEST'); 558 debug('='.repeat(80)); 559 debug('Test Configuration', { 560 url: TEST_URL, 561 keyword: TEST_KEYWORD, 562 dbPath: TEST_DB_PATH, 563 screenshotDir: TEST_SCREENSHOT_DIR, 564 expectedValuesPath: EXPECTED_VALUES_PATH, 565 actualValuesPath: ACTUAL_VALUES_PATH, 566 }); 567 568 // Load expected values 569 expectedValues = loadExpectedValues(); 570 571 // Clean up any existing test artifacts 572 if (existsSync(TEST_DB_PATH)) { 573 debug('Removing existing test database...'); 574 rmSync(TEST_DB_PATH); 575 } 576 if (existsSync(TEST_SCREENSHOT_DIR)) { 577 debug('Removing existing test screenshots...'); 578 rmSync(TEST_SCREENSHOT_DIR, { recursive: true }); 579 } 580 if (existsSync(ACTUAL_VALUES_PATH)) { 581 debug('Removing existing test results...'); 582 rmSync(ACTUAL_VALUES_PATH); 583 } 584 585 // Create test screenshot directory 586 mkdirSync(TEST_SCREENSHOT_DIR, { recursive: true }); 587 588 // Set environment variables for test 589 process.env.DATABASE_PATH = TEST_DB_PATH; 590 process.env.SCREENSHOT_BASE_PATH = TEST_SCREENSHOT_DIR; 591 // Isolate score/contacts filesystem storage to avoid cross-test contamination 592 // when node --test runs files in parallel (other tests delete data/scores/1.json etc.) 593 process.env.SCORE_STORAGE_BASE = TEST_STORAGE_BASE; 594 process.env.CONTACTS_STORAGE_BASE = TEST_STORAGE_BASE; 595 // Force programmatic scoring (not orchestrator mode) so the scoring stage runs in tests 596 process.env.ENABLE_LLM_SCORING = 'false'; 597 // Programmatic scoring cannot detect country from the test page, setting country_code=null. 598 // ENGLISH_ONLY_MARKETS filter (NULL IN ...) would then block enrichment. Disable for tests. 599 // Set to empty string (not delete) so load-env.js dotenv.config() doesn't re-set it from .env. 600 process.env.ENGLISH_ONLY_MARKETS = ''; 601 602 // Create and initialize database 603 db = new Database(TEST_DB_PATH); 604 initTestDatabase(db); 605 606 // Register pg mock so any db.js imports by stages route through the same SQLite file 607 mock.module('../../src/utils/db.js', { namedExports: createPgMock(db) }); 608 }); 609 610 after(() => { 611 if (db) { 612 db.close(); 613 } 614 615 // Clean up isolated storage directory (contains scores/ and contacts/ subdirs) 616 try { 617 if (existsSync(TEST_STORAGE_BASE)) { 618 rmSync(TEST_STORAGE_BASE, { recursive: true, force: true }); 619 } 620 } catch { /* ignore */ } 621 622 // Restore env 623 delete process.env.ENABLE_LLM_SCORING; 624 delete process.env.ENGLISH_ONLY_MARKETS; 625 delete process.env.DATABASE_PATH; 626 delete process.env.SCREENSHOT_BASE_PATH; 627 delete process.env.SCORE_STORAGE_BASE; 628 delete process.env.CONTACTS_STORAGE_BASE; 629 630 // Save actual values for manual review 631 saveActualValues(); 632 633 // Keep test artifacts for manual inspection 634 debug('='.repeat(80)); 635 debug('TEST ARTIFACTS PRESERVED'); 636 debug('='.repeat(80)); 637 debug('Test artifacts location:', { 638 database: TEST_DB_PATH, 639 screenshots: TEST_SCREENSHOT_DIR, 640 actualValues: ACTUAL_VALUES_PATH, 641 }); 642 debug('\nTo inspect:'); 643 debug(` Database: sqlite3 ${TEST_DB_PATH}`); 644 debug(` Screenshots: ls -la ${TEST_SCREENSHOT_DIR}`); 645 debug(` Results: cat ${ACTUAL_VALUES_PATH}`); 646 647 debug('\n='.repeat(80)); 648 debug('END-TO-END PIPELINE TEST COMPLETED'); 649 debug('='.repeat(80)); 650 }); 651 652 test('Stage 0: Setup - Add keyword and site to database', async () => { 653 debug('-'.repeat(80)); 654 debug('STAGE 0: SETUP'); 655 debug('-'.repeat(80)); 656 657 // Insert keyword with required country_code and google_domain 658 const insertKeyword = db.prepare(` 659 INSERT INTO keywords (keyword, country_code, google_domain) 660 VALUES (?, ?, ?) 661 `); 662 const keywordResult = insertKeyword.run(TEST_KEYWORD, 'US', 'google.com'); 663 keywordId = keywordResult.lastInsertRowid; 664 665 debug(`Inserted keyword: "${TEST_KEYWORD}" (ID: ${keywordId})`); 666 667 // Verify keyword 668 const keyword = db.prepare('SELECT * FROM keywords WHERE id = ?').get(keywordId); 669 validateRecord(keyword, ['id', 'keyword'], 'Keyword'); 670 assert.strictEqual(keyword.keyword, TEST_KEYWORD); 671 672 // Insert site (extract domain from URL) 673 const url = new URL(TEST_URL); 674 const domain = url.hostname; 675 676 const insertSite = db.prepare(` 677 INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain) 678 VALUES (?, ?, ?, 'found', 'US', 'google.com') 679 `); 680 const siteResult = insertSite.run(domain, TEST_URL, TEST_KEYWORD); 681 siteId = siteResult.lastInsertRowid; 682 683 debug(`Inserted site: ${TEST_URL} (ID: ${siteId})`); 684 685 // Verify site 686 const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 687 validateRecord(site, ['id', 'domain', 'landing_page_url', 'keyword', 'status'], 'Site'); 688 assert.strictEqual(site.domain, domain); 689 assert.strictEqual(site.landing_page_url, TEST_URL); 690 assert.strictEqual(site.keyword, TEST_KEYWORD); 691 assert.strictEqual(site.status, 'found'); 692 693 actualValues.stages.setup = { 694 keywordId, 695 siteId, 696 status: 'completed', 697 }; 698 699 debug('✅ STAGE 0 COMPLETE: Setup successful'); 700 }); 701 702 test('Stage 1: Assets - Capture screenshots', async () => { 703 checkFailFast('Stage 1: Assets'); 704 try { 705 debug('-'.repeat(80)); 706 debug('STAGE 1: ASSETS'); 707 debug('-'.repeat(80)); 708 709 // Import and run assets stage 710 const { runAssetsStage } = await import('../../src/stages/assets.js'); 711 712 debug('Starting screenshot capture...'); 713 const startTime = Date.now(); 714 const stats = await runAssetsStage({ limit: 1 }); 715 const duration = Date.now() - startTime; 716 717 debug('Assets stage completed', { 718 duration: `${duration}ms`, 719 stats, 720 }); 721 722 // Validate stats 723 assert.strictEqual(stats.processed, 1, 'Should process 1 site'); 724 assert.strictEqual(stats.succeeded, 1, 'Should succeed for 1 site'); 725 assert.strictEqual(stats.failed, 0, 'Should have 0 failures'); 726 727 // Verify site record updated 728 const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 729 validateRecord(site, ['status'], 'Site after assets'); 730 assert.strictEqual(site.status, 'assets_captured', 'Status should be "assets_captured"'); 731 732 const visionEnabled = process.env.ENABLE_VISION === 'true'; 733 734 if (visionEnabled && site.screenshot_path) { 735 // Verify screenshot directory exists (vision mode only) 736 const screenshotSiteId = site.screenshot_path.split('/').pop(); 737 const screenshotPath = join(TEST_SCREENSHOT_DIR, screenshotSiteId); 738 assert.ok(existsSync(screenshotPath), 'Screenshot directory should exist'); 739 740 const expectedFiles = [ 741 'desktop_above.jpg', 742 'desktop_above_uncropped.jpg', 743 'desktop_below.jpg', 744 'desktop_below_uncropped.jpg', 745 'mobile_above.jpg', 746 'mobile_above_uncropped.jpg', 747 ]; 748 749 for (const file of expectedFiles) { 750 const filePath = join(screenshotPath, file); 751 assert.ok(existsSync(filePath), `Screenshot file should exist: ${file}`); 752 } 753 754 actualValues.stages.assets = { 755 screenshotPath: site.screenshot_path, 756 filesCreated: expectedFiles, 757 duration, 758 status: 'completed', 759 }; 760 761 debug('✅ STAGE 1 COMPLETE: Screenshots captured', { 762 screenshotPath: site.screenshot_path, 763 filesCreated: expectedFiles.length, 764 }); 765 } else { 766 // HTML-only mode: verify html_dom was captured instead 767 assert.ok(site.html_dom, 'Site should have html_dom in HTML-only mode'); 768 actualValues.stages.assets = { 769 htmlOnly: true, 770 htmlDomLength: site.html_dom?.length || 0, 771 duration, 772 status: 'completed', 773 }; 774 debug('✅ STAGE 1 COMPLETE: DOM HTML captured (vision disabled)', { 775 htmlDomLength: site.html_dom?.length || 0, 776 }); 777 } 778 } catch (error) { 779 markTestFailed(error); 780 } 781 }); 782 783 test('Stage 2: Scoring - Initial AI scoring', async () => { 784 checkFailFast('Stage 2: Scoring'); 785 try { 786 debug('-'.repeat(80)); 787 debug('STAGE 2: SCORING'); 788 debug('-'.repeat(80)); 789 790 // Import and run scoring stage 791 const { runScoringStage } = await import('../../src/stages/scoring.js'); 792 793 debug('Starting AI scoring...'); 794 const startTime = Date.now(); 795 const stats = await runScoringStage({ limit: 1 }); 796 const duration = Date.now() - startTime; 797 798 debug('Scoring stage completed', { 799 duration: `${duration}ms`, 800 stats, 801 }); 802 803 // Validate stats 804 assert.strictEqual(stats.processed, 1, 'Should process 1 site'); 805 assert.strictEqual(stats.succeeded, 1, 'Should succeed for 1 site'); 806 assert.strictEqual(stats.failed, 0, 'Should have 0 failures'); 807 808 // Verify site record updated 809 const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 810 validateRecord( 811 site, 812 ['score', 'grade', 'scored_at', 'status'], 813 'Site after scoring' 814 ); 815 // Scoring always sets status to 'prog_scored', 'semantic_scored', or 'high_score' 816 assert.ok( 817 ['prog_scored', 'semantic_scored', 'vision_scored', 'high_score'].includes(site.status), 818 `Status should be prog_scored/semantic_scored/vision_scored/high_score, got: ${site.status}` 819 ); 820 821 // Validate score 822 assert.ok(typeof site.score === 'number', 'Score should be a number'); 823 assert.ok(site.score >= 0 && site.score <= 100, `Score ${site.score} should be 0-100`); 824 825 // Validate grade 826 const validGrades = ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F']; 827 assert.ok(validGrades.includes(site.grade), `Grade ${site.grade} should be valid`); 828 829 // Validate reasoning exists (score_json now on filesystem) 830 const scoreJsonRaw = getScoreJson(siteId); 831 assert.ok(scoreJsonRaw && scoreJsonRaw.length > 0, 'Should have score_json on filesystem'); 832 833 // Parse and validate conversion_score_json if expected values exist 834 let scoringJson = null; 835 try { 836 scoringJson = JSON.parse(scoreJsonRaw); 837 validateScoreJson(scoringJson, expectedValues); 838 } catch (err) { 839 debug(`⚠️ Could not parse score_json as JSON: ${err.message}`); 840 } 841 842 actualValues.stages.scoring = { 843 score: site.score, 844 grade: site.grade, 845 scoringReasoning: scoringJson || scoreJsonRaw, 846 duration, 847 status: 'completed', 848 }; 849 850 debug('✅ STAGE 2 COMPLETE: Site scored', { 851 score: site.score, 852 grade: site.grade, 853 reasoningLength: scoreJsonRaw?.length || 0, 854 }); 855 } catch (error) { 856 markTestFailed(error); 857 } 858 }); 859 860 test('Stage 3: Rescoring - Rescore if B- or below', async () => { 861 checkFailFast('Stage 3: Rescoring'); 862 debug('-'.repeat(80)); 863 debug('STAGE 3: RESCORING'); 864 debug('-'.repeat(80)); 865 866 // Check if site needs rescoring 867 const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 868 // In HTML-only mode (ENABLE_VISION=false), scoring promotes directly to 'rescored' 869 // Rescoring stage only processes sites with status='scored' (vision mode only) 870 const needsRescoring = site.status === 'prog_scored' && site.score <= 82; 871 872 debug(`Site score: ${site.score} (${site.grade})`); 873 debug(`Site status: ${site.status}`); 874 debug(`Needs rescoring: ${needsRescoring}`); 875 876 if (needsRescoring) { 877 // Import and run rescoring stage 878 const { runRescoringStage } = await import('../../src/stages/rescoring.js'); 879 880 debug('Starting rescoring with below-fold screenshots...'); 881 const startTime = Date.now(); 882 const stats = await runRescoringStage({ limit: 1 }); 883 const duration = Date.now() - startTime; 884 885 debug('Rescoring stage completed', { 886 duration: `${duration}ms`, 887 stats, 888 }); 889 890 // Validate stats 891 assert.strictEqual(stats.processed, 1, 'Should process 1 site'); 892 assert.strictEqual(stats.succeeded, 1, 'Should succeed for 1 site'); 893 assert.strictEqual(stats.failed, 0, 'Should have 0 failures'); 894 895 // Verify site record updated 896 const rescoredSite = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 897 validateRecord(rescoredSite, ['score', 'grade', 'rescored_at'], 'Site after rescoring'); 898 899 // Validate city/country_code populated from score_json (now on filesystem) 900 const rescoredScoreJsonRaw = getScoreJson(siteId); 901 if (rescoredScoreJsonRaw) { 902 const scoreJson = JSON.parse(rescoredScoreJsonRaw); 903 if (scoreJson.contact_details?.city) { 904 assert.strictEqual( 905 rescoredSite.city, 906 scoreJson.contact_details.city, 907 'City column should match contact_details.city' 908 ); 909 debug(`✅ City populated: ${rescoredSite.city}`); 910 } 911 if (scoreJson.contact_details?.country_code) { 912 assert.strictEqual( 913 rescoredSite.country_code, 914 scoreJson.contact_details.country_code, 915 'Country code column should match contact_details.country_code' 916 ); 917 debug(`✅ Country code populated: ${rescoredSite.country_code}`); 918 } 919 if (scoreJson.contact_details?.state) { 920 assert.strictEqual( 921 rescoredSite.state, 922 scoreJson.contact_details.state, 923 'State column should match contact_details.state' 924 ); 925 debug(`✅ State populated: ${rescoredSite.state}`); 926 } 927 } 928 929 // Validate score changed or stayed same 930 assert.ok(typeof rescoredSite.score === 'number', 'Score should be a number'); 931 assert.ok( 932 rescoredSite.score >= 0 && rescoredSite.score <= 100, 933 `Score ${rescoredSite.score} should be 0-100` 934 ); 935 936 actualValues.stages.rescoring = { 937 oldScore: site.score, 938 newScore: rescoredSite.score, 939 oldGrade: site.grade, 940 newGrade: rescoredSite.grade, 941 improved: stats.improved > 0, 942 duration, 943 status: 'completed', 944 }; 945 946 debug('✅ STAGE 3 COMPLETE: Site rescored', { 947 oldScore: site.score, 948 newScore: rescoredSite.score, 949 oldGrade: site.grade, 950 newGrade: rescoredSite.grade, 951 improved: stats.improved, 952 }); 953 } else { 954 const skipReason = 955 site.status === 'semantic_scored' 956 ? 'HTML-only mode: scoring already promoted to semantic_scored' 957 : 'Score above cutoff (B- or 82)'; 958 actualValues.stages.rescoring = { 959 status: 'skipped', 960 reason: skipReason, 961 }; 962 debug(`⏭️ STAGE 3 SKIPPED: ${skipReason}`); 963 } 964 }); 965 966 test('Stage 5: Enrichment - Browse key pages for contact info', async () => { 967 checkFailFast('Stage 5: Enrichment'); 968 try { 969 debug('-'.repeat(80)); 970 debug('STAGE 5: ENRICHMENT'); 971 debug('-'.repeat(80)); 972 973 // Get current site status 974 const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 975 976 debug(`Site status: ${site.status}`); 977 debug(`Score: ${site.score} (${site.grade})`); 978 979 // Enrichment runs on rescored sites (or scored sites if score <= 82) 980 const needsEnrichment = 981 site.status === 'semantic_scored' || 982 site.status === 'vision_scored' || 983 site.status === 'prog_scored'; 984 985 if (needsEnrichment) { 986 // Import and run enrichment stage 987 const { runEnrichmentStage } = await import('../../src/stages/enrich.js'); 988 989 debug('Browsing key pages for additional contact info...'); 990 const startTime = Date.now(); 991 const stats = await runEnrichmentStage({ limit: 1 }); 992 const duration = Date.now() - startTime; 993 994 debug('Enrichment stage completed', { 995 duration: `${duration}ms`, 996 stats, 997 }); 998 999 // Verify site record updated 1000 const enrichedSite = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 1001 1002 // Site should be enriched or skipped (if already had contact form) 1003 if (stats.skipped > 0) { 1004 debug('Enrichment skipped (already has contact form)'); 1005 actualValues.stages.enrichment = { 1006 status: 'skipped', 1007 reason: 'Already has contact form', 1008 duration, 1009 }; 1010 } else { 1011 validateRecord(enrichedSite, ['enriched_at'], 'Site after enrichment'); 1012 assert.ok( 1013 ['enriched_regex', 'enriched_llm', 'enriched'].includes(enrichedSite.status), 1014 `Status should be an enriched variant, got: ${enrichedSite.status}` 1015 ); 1016 1017 // Parse and check enriched contacts (contacts_json now on filesystem) 1018 const enrichedContactsRaw = getContactsJson(siteId); 1019 assert.ok(enrichedContactsRaw, 'contacts_json should exist on filesystem'); 1020 const enrichedContacts = JSON.parse(enrichedContactsRaw); 1021 1022 // Validate against expected contacts if provided 1023 validateContactsJson(enrichedContacts, expectedValues); 1024 1025 // Validate city/country_code columns populated from contacts_json 1026 if (enrichedContacts.city) { 1027 assert.strictEqual( 1028 enrichedSite.city, 1029 enrichedContacts.city, 1030 'City column should match contacts_json.city' 1031 ); 1032 debug(`✅ City column populated: ${enrichedSite.city}`); 1033 } 1034 if (enrichedContacts.country_code) { 1035 assert.strictEqual( 1036 enrichedSite.country_code, 1037 enrichedContacts.country_code, 1038 'Country code column should match contacts_json.country_code' 1039 ); 1040 debug(`✅ Country code column populated: ${enrichedSite.country_code}`); 1041 } 1042 if (enrichedContacts.state) { 1043 assert.strictEqual( 1044 enrichedSite.state, 1045 enrichedContacts.state, 1046 'State column should match contacts_json.state' 1047 ); 1048 debug(`✅ State column populated: ${enrichedSite.state}`); 1049 } 1050 1051 actualValues.stages.enrichment = { 1052 formsFound: stats.formsFound || 0, 1053 emailsFound: stats.emailsFound || 0, 1054 phonesFound: stats.phonesFound || 0, 1055 hasContactForm: !!enrichedContacts.primary_contact_form, 1056 emailCount: enrichedContacts.email_addresses?.length || 0, 1057 phoneCount: enrichedContacts.phone_numbers?.length || 0, 1058 duration, 1059 status: 'completed', 1060 }; 1061 1062 debug('✅ STAGE 5 COMPLETE: Site enriched', { 1063 formsFound: stats.formsFound, 1064 emailsFound: stats.emailsFound, 1065 phonesFound: stats.phonesFound, 1066 }); 1067 } 1068 } else { 1069 actualValues.stages.enrichment = { 1070 status: 'skipped', 1071 reason: 'Site not at rescored/scored stage', 1072 }; 1073 debug('⏭️ STAGE 5 SKIPPED: Site not at rescored/scored stage'); 1074 } 1075 } catch (error) { 1076 markTestFailed(error); 1077 } 1078 }); 1079 1080 test('Stage 6: Proposals - Generate proposals for low-scoring sites', async () => { 1081 checkFailFast('Stage 6: Proposals'); 1082 debug('-'.repeat(80)); 1083 debug('STAGE 6: PROPOSALS'); 1084 debug('-'.repeat(80)); 1085 1086 // Bump enriched_regex → enriched_llm so proposals stage can find the site. 1087 // Also set country_code = 'AU' if null (regex-only enrichment can't extract country; 1088 // the LLM pass would normally do this). 'AU' has proposal templates and is safe for testing. 1089 // In production the orchestrator LLM pass handles both status promotion and geo extraction. 1090 db.prepare( 1091 `UPDATE sites 1092 SET status = 'enriched_llm', 1093 country_code = COALESCE(country_code, 'AU') 1094 WHERE id = ? AND status = 'enriched_regex'` 1095 ).run(siteId); 1096 1097 // Check if site needs proposals 1098 const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 1099 const needsProposals = site.score <= 82; 1100 1101 debug(`Site score: ${site.score} (${site.grade})`); 1102 debug(`Needs proposals: ${needsProposals}`); 1103 1104 if (needsProposals) { 1105 // Import and run proposals stage 1106 const { runProposalsStage } = await import('../../src/stages/proposals.js'); 1107 1108 debug('Generating proposal variants...'); 1109 const startTime = Date.now(); 1110 const stats = await runProposalsStage({ limit: 1 }); 1111 const duration = Date.now() - startTime; 1112 1113 debug('Proposals stage completed', { 1114 duration: `${duration}ms`, 1115 stats, 1116 }); 1117 1118 // Validate stats 1119 assert.strictEqual(stats.processed, 1, 'Should process 1 site'); 1120 assert.strictEqual(stats.succeeded, 1, 'Should succeed for 1 site'); 1121 assert.strictEqual(stats.failed, 0, 'Should have 0 failures'); 1122 1123 // Verify outreach records created 1124 // Note: Number of outreach variants depends on number of contacts found 1125 // The new proposal generator creates N proposals for N contacts 1126 const messages = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId); 1127 debug(`Created ${messages.length} outreach variants (depends on contacts found)`); 1128 1129 const variants = []; 1130 1131 if (messages.length > 0) { 1132 // Validate each outreach 1133 for (let i = 0; i < messages.length; i++) { 1134 const outreach = messages[i]; 1135 validateRecord( 1136 outreach, 1137 ['id', 'site_id', 'message_body', 'contact_method'], 1138 `Outreach variant ${i + 1}` 1139 ); 1140 assert.strictEqual(outreach.direction, 'outbound', `Message should be outbound`); 1141 assert.strictEqual(outreach.approval_status, 'pending', 'Status should be "pending"'); 1142 assert.ok(outreach.message_body.length > 0, 'Should have proposal text'); 1143 1144 const subjectOrProposal = outreach.subject_line || outreach.message_body.substring(0, 50); 1145 variants.push({ 1146 variant: i + 1, 1147 subject: subjectOrProposal, 1148 messageLength: outreach.message_body.length, 1149 }); 1150 1151 debug(`Variant ${i + 1}`, { 1152 subject: `${subjectOrProposal}...`, 1153 messageLength: outreach.message_body.length, 1154 }); 1155 } 1156 1157 actualValues.stages.proposals = { 1158 variants, 1159 duration, 1160 status: 'completed', 1161 }; 1162 1163 debug('✅ STAGE 6 COMPLETE: Proposals generated', { 1164 variants: messages.length, 1165 }); 1166 } else { 1167 actualValues.stages.proposals = { 1168 variants: [], 1169 duration, 1170 status: 'completed_no_contacts', 1171 reason: 'No contacts found - no proposals generated', 1172 }; 1173 1174 debug('⚠️ STAGE 6 COMPLETE: No proposals generated (no contacts found)'); 1175 } 1176 } else { 1177 actualValues.stages.proposals = { 1178 status: 'skipped', 1179 reason: 'Score above cutoff (no proposals needed)', 1180 }; 1181 debug('⏭️ STAGE 6 SKIPPED: Score above cutoff (no proposals needed)'); 1182 } 1183 }); 1184 1185 test('Stage 7: Outreach - Actually send messages', async () => { 1186 checkFailFast('Stage 7: Outreach'); 1187 debug('-'.repeat(80)); 1188 debug('STAGE 7: OUTREACH (ACTUAL SENDING)'); 1189 debug('-'.repeat(80)); 1190 debug('⚠️ WARNING: This will send real SMS, emails, and submit contact forms!'); 1191 1192 // Check if site has outreach records 1193 const messages = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId); 1194 1195 debug(`Found ${messages.length} outreach records`); 1196 1197 if (messages.length > 0) { 1198 // Import prioritize function to assign contact methods 1199 const { prioritizeContacts } = await import('../../src/contacts/prioritize.js'); 1200 1201 debug('Prioritizing contact methods...'); 1202 const prioritized = await prioritizeContacts(); 1203 1204 debug(`Prioritized ${prioritized} outreaches`); 1205 1206 // Get updated outreaches with contact methods 1207 const updatedOutreaches = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId); 1208 1209 // Import outreach stage to actually send 1210 const { runOutreachStage } = await import('../../src/stages/outreach.js'); 1211 1212 debug('Sending outreach messages...'); 1213 const startTime = Date.now(); 1214 // Send more outreaches to include SMS (emails are first 3, SMS are next 2) 1215 const stats = await runOutreachStage({ limit: 10, useBulk: false }); 1216 const duration = Date.now() - startTime; 1217 1218 debug('Outreach stage completed', { 1219 duration: `${duration}ms`, 1220 stats, 1221 }); 1222 1223 // Get final outreach states 1224 const sentOutreaches = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId); 1225 1226 const outreachResults = sentOutreaches.map(o => ({ 1227 contactMethod: o.contact_method, 1228 contactUri: o.contact_uri, 1229 status: o.status, 1230 sentAt: o.sent_at, 1231 })); 1232 1233 actualValues.stages.outreach = { 1234 stats, 1235 outreaches: outreachResults, 1236 duration, 1237 status: 'completed', 1238 }; 1239 1240 for (const outreach of sentOutreaches) { 1241 debug(`Outreach ${outreach.id}`, { 1242 contactMethod: outreach.contact_method, 1243 contactUri: outreach.contact_uri, 1244 delivery_status: outreach.delivery_status, 1245 sentAt: outreach.sent_at, 1246 }); 1247 } 1248 1249 debug('✅ STAGE 7 COMPLETE: Outreach sent', { 1250 totalOutreaches: sentOutreaches.length, 1251 byChannel: stats.byChannel, 1252 }); 1253 } else { 1254 actualValues.stages.outreach = { 1255 status: 'skipped', 1256 reason: 'No outreach records (high-scoring site)', 1257 }; 1258 debug('⏭️ STAGE 7 SKIPPED: No outreach records (high-scoring site)'); 1259 } 1260 }); 1261 1262 test('Stage 8: Final Verification - Validate complete pipeline', async () => { 1263 checkFailFast('Stage 8: Final Verification'); 1264 debug('-'.repeat(80)); 1265 debug('STAGE 8: FINAL VERIFICATION'); 1266 debug('-'.repeat(80)); 1267 1268 // Get final site state 1269 const site = db.prepare('SELECT * FROM sites WHERE id = ?').get(siteId); 1270 const keyword = db.prepare('SELECT * FROM keywords WHERE id = ?').get(keywordId); 1271 const messages = db.prepare('SELECT * FROM messages WHERE site_id = ?').all(siteId); 1272 1273 debug('Final Site State', { 1274 id: site.id, 1275 url: site.url, 1276 keyword: keyword.keyword, 1277 status: site.status, 1278 score: site.score, 1279 grade: site.grade, 1280 hasScreenshots: !!site.screenshot_path, 1281 hasReasoning: !!site.scoring_reasoning, 1282 wasRescored: !!site.rescored_at, 1283 wasEnriched: !!site.enriched_at, 1284 outreachVariants: messages.length, 1285 }); 1286 1287 // Verify complete pipeline flow 1288 // After proposals and outreach stages, status should be proposals_drafted or outreach_sent 1289 const validStatuses = [ 1290 'prog_scored', 1291 'semantic_scored', 1292 'vision_scored', 1293 'high_score', 1294 'enriched', 1295 'proposals_drafted', 1296 'outreach_sent', 1297 ]; 1298 assert.ok( 1299 validStatuses.includes(site.status), 1300 `Final status should be one of ${validStatuses.join(', ')}, got "${site.status}"` 1301 ); 1302 // In vision mode, screenshot_path is required; in HTML-only mode, html_dom is used instead 1303 const visionEnabledFinal = process.env.ENABLE_VISION !== 'false'; 1304 if (visionEnabledFinal) { 1305 assert.ok(site.screenshot_path, 'Should have screenshots (vision mode)'); 1306 } else { 1307 assert.ok(site.html_dom, 'Should have html_dom (HTML-only mode)'); 1308 } 1309 assert.ok(site.score !== null, 'Should have score'); 1310 assert.ok(site.grade, 'Should have grade'); 1311 assert.ok(getScoreJson(siteId), 'Should have score_json on filesystem'); 1312 1313 // Verify outreach records if low-scoring 1314 // Note: Outreach variants depend on contacts being found 1315 // If no contacts found, no outreach records will be created (which is correct behavior) 1316 if (site.score <= 82 && messages.length > 0) { 1317 debug(`✅ Created ${messages.length} outreach variants for low-scoring site`); 1318 } else if (site.score <= 82 && messages.length === 0) { 1319 debug('⚠️ No outreach variants created (no contacts found on site)'); 1320 } else if (site.score > 82) { 1321 assert.strictEqual(messages.length, 0, 'High-scoring site should have no outreach variants'); 1322 } 1323 1324 // Validate against expected values if provided 1325 if (expectedValues) { 1326 debug('Validating against expected values...'); 1327 1328 // Validate HTML DOM if available 1329 // Note: The schema uses BLOB for screenshots, not TEXT for html_dom 1330 // We'll need to check if html_dom is captured elsewhere 1331 // For now, we'll skip this validation 1332 1333 debug('✅ Validation against expected values complete'); 1334 } else { 1335 debug('⏭️ Skipping validation (no expected values provided)'); 1336 debug('💡 Edit tests/expected-e2e.json with expected values to enable validation'); 1337 } 1338 1339 actualValues.stages.final = { 1340 finalScore: site.score, 1341 finalGrade: site.grade, 1342 finalStatus: site.status, 1343 wasRescored: !!site.rescored_at, 1344 wasEnriched: !!site.enriched_at, 1345 hasProposals: messages.length > 0, 1346 pipelineComplete: true, 1347 }; 1348 1349 debug('='.repeat(80)); 1350 debug('✅ PIPELINE E2E TEST PASSED'); 1351 debug('='.repeat(80)); 1352 debug('Pipeline Summary', { 1353 url: TEST_URL, 1354 keyword: TEST_KEYWORD, 1355 finalScore: site.score, 1356 finalGrade: site.grade, 1357 wasRescored: !!site.rescored_at, 1358 wasEnriched: !!site.enriched_at, 1359 hasProposals: messages.length > 0, 1360 outreachSent: messages.some(o => o.delivery_status === 'sent'), 1361 pipelineComplete: true, 1362 }); 1363 }); 1364 });