/ tests / stages / enrich-supplement.test.js
enrich-supplement.test.js
   1  /**
   2   * Enrichment Stage — Supplemental Coverage Tests
   3   *
   4   * Targets uncovered branches not exercised by enrich.test.js:
   5   *   - scrapePage: social media path (humanScroll/randomDelay), cloudflare unresolved warning,
   6   *                 waitForLoadState timeout catch (lines 884-885, 889-891, 901-902)
   7   *   - GDPR verification block inside enrichSite (after contact pages browsed) (lines 783-810)
   8   *   - extractInitialContacts: happy path, parse failure fallback, error fallback (lines 991-1062)
   9   *
  10   * MUST be run with --experimental-test-module-mocks.
  11   */
  12  
  13  import { describe, test, mock, beforeEach } from 'node:test';
  14  import assert from 'node:assert/strict';
  15  import * as realFs from 'node:fs';
  16  import { createPgMock } from '../helpers/pg-mock.js'; // eslint-disable-line no-unused-vars
  17  
  18  // ═══════════════════════════════════════════════════════════════
  19  // Environment setup BEFORE any mock.module() or imports
  20  // ═══════════════════════════════════════════════════════════════
  21  process.env.NODE_ENV = 'test';
  22  process.env.LOGS_DIR = '/tmp/test-logs';
  23  process.env.DATABASE_PATH = '/tmp/test-enrich-supp.db';
  24  process.env.ENABLE_VISION = 'true';
  25  process.env.ENRICHMENT_CONCURRENCY = '1';
  26  
  27  // Mock fetch globally to prevent real network calls from sitemap discovery
  28  globalThis.fetch = async () => ({
  29    ok: false,
  30    status: 404,
  31    text: async () => '',
  32  });
  33  
  34  // ═══════════════════════════════════════════════════════════════
  35  // Mutable state (reset per test)
  36  // ═══════════════════════════════════════════════════════════════
  37  let mockSiteRows = [];
  38  let runCalls = [];
  39  let getCalls = [];
  40  let dbClosed = false;
  41  let browserCloseCalled = false;
  42  let mockHtml = '<html><body>Contact us</body></html>';
  43  let mockScreenshot = Buffer.from('png_data');
  44  let mockLLMResponse = null;
  45  let llmCallCount = 0;
  46  let mockBlocklistResult = null;
  47  
  48  let mockCountryByCode = code => {
  49    if (code === 'GB') return { code: 'GB', requiresGDPRCheck: true };
  50    if (code === 'AU') return { code: 'AU', requiresGDPRCheck: false };
  51    return null;
  52  };
  53  
  54  let mockParseCountryFromGoogleDomain = _domain => 'AU';
  55  
  56  // ═══════════════════════════════════════════════════════════════
  57  // 1. fs — mock readFileSync for ENRICHMENT_PROMPT
  58  // ═══════════════════════════════════════════════════════════════
  59  mock.module('fs', {
  60    namedExports: {
  61      ...realFs,
  62      readFileSync: mock.fn((_path, _enc) => 'MOCK ENRICHMENT PROMPT'),
  63      existsSync: () => false,
  64    },
  65  });
  66  
  67  // ═══════════════════════════════════════════════════════════════
  68  // 2. better-sqlite3
  69  // ═══════════════════════════════════════════════════════════════
  70  class MockStatement {
  71    constructor(sql) {
  72      this.sql = sql;
  73    }
  74  
  75    all(..._args) {
  76      if (this.sql.includes('FROM sites') && this.sql.includes('enriched_at IS NULL')) {
  77        return mockSiteRows;
  78      }
  79      return [];
  80    }
  81  
  82    get(...args) {
  83      getCalls.push({ sql: this.sql, args });
  84      if (this.sql.includes('google_domain')) {
  85        const id = args[0];
  86        const site = mockSiteRows.find(s => s.id === id);
  87        if (site) return { google_domain: site.google_domain || 'google.com.au' };
  88        return null;
  89      }
  90      if (this.sql.includes('enriched_at IS NOT NULL')) {
  91        return { total_enriched: 5, with_forms: 2, with_emails: 4, with_phones: 1 };
  92      }
  93      return null;
  94    }
  95  
  96    run(...args) {
  97      runCalls.push({ sql: this.sql, args });
  98      return { changes: 1, lastInsertRowid: 1 };
  99    }
 100  }
 101  
 102  class MockDatabase {
 103    constructor(_path) {
 104      dbClosed = false;
 105    }
 106    prepare(sql) {
 107      return new MockStatement(sql);
 108    }
 109    pragma() {
 110      return undefined;
 111    }
 112    exec() {
 113      return undefined;
 114    }
 115    transaction(fn) {
 116      return (...args) => fn(...args);
 117    }
 118    close() {
 119      dbClosed = true;
 120    }
 121  }
 122  
 123  mock.module('better-sqlite3', {
 124    defaultExport: MockDatabase,
 125  });
 126  
 127  // ═══════════════════════════════════════════════════════════════
 128  // 2b. db.js mock — enrich.js uses db.js (not better-sqlite3 directly)
 129  // ═══════════════════════════════════════════════════════════════
 130  mock.module('../../src/utils/db.js', {
 131    namedExports: {
 132      getAll: async (sql, _params = []) => {
 133        if (sql.includes('FROM sites') && (sql.includes('enriched_at IS NULL') || sql.includes('enrich'))) {
 134          return mockSiteRows;
 135        }
 136        return [];
 137      },
 138      getOne: async (sql, params = []) => {
 139        if (sql.includes('google_domain')) {
 140          const id = params[0];
 141          const site = mockSiteRows.find(s => s.id === id);
 142          if (site) return { google_domain: site.google_domain || 'google.com.au' };
 143          return null;
 144        }
 145        if (sql.includes('enriched_at IS NOT NULL')) {
 146          return { total_enriched: 5, with_forms: 2, with_emails: 4, with_phones: 1 };
 147        }
 148        return null;
 149      },
 150      run: async (sql, args = []) => {
 151        runCalls.push({ sql, args });
 152        return { changes: 1, rowCount: 1 };
 153      },
 154      query: async (sql, params = []) => {
 155        runCalls.push({ sql, args: params });
 156        return { rows: [], rowCount: 0 };
 157      },
 158      withTransaction: async fn => fn({
 159        query: async (sql, params = []) => {
 160          runCalls.push({ sql, args: params });
 161          return { rows: [], rowCount: 0 };
 162        },
 163      }),
 164      closePool: async () => {},
 165      getPool: () => ({}),
 166      createDatabaseConnection: () => ({}),
 167      closeDatabaseConnection: async () => {},
 168    },
 169  });
 170  
 171  // ═══════════════════════════════════════════════════════════════
 172  // 3. Logger — silent no-op
 173  // ═══════════════════════════════════════════════════════════════
 174  mock.module('../../src/utils/logger.js', {
 175    defaultExport: class MockLogger {
 176      info() {}
 177      success() {}
 178      error() {}
 179      warn() {}
 180      debug() {}
 181    },
 182  });
 183  
 184  // ═══════════════════════════════════════════════════════════════
 185  // 4. Stealth browser — mock browser/context/page chain
 186  // ═══════════════════════════════════════════════════════════════
 187  const mockPageGoto = mock.fn(async () => {});
 188  const mockPageContent = mock.fn(async () => mockHtml);
 189  const mockPageScreenshot = mock.fn(async () => mockScreenshot);
 190  const mockPageClose = mock.fn(async () => {});
 191  const mockPageWaitForLoadState = mock.fn(async () => {});
 192  
 193  const mockPage = {
 194    goto: mockPageGoto,
 195    content: mockPageContent,
 196    screenshot: mockPageScreenshot,
 197    close: mockPageClose,
 198    waitForLoadState: mockPageWaitForLoadState,
 199  };
 200  
 201  const mockContextNewPage = mock.fn(async () => mockPage);
 202  const mockContextClose = mock.fn(async () => {});
 203  
 204  const mockContext = {
 205    newPage: mockContextNewPage,
 206    close: mockContextClose,
 207  };
 208  
 209  const mockBrowserClose = mock.fn(async () => {
 210    browserCloseCalled = true;
 211  });
 212  
 213  const mockBrowser = { close: mockBrowserClose };
 214  
 215  const launchStealthBrowserMock = mock.fn(async () => mockBrowser);
 216  const createStealthContextMock = mock.fn(async () => mockContext);
 217  const humanScrollMock = mock.fn(async () => {});
 218  const randomDelayMock = mock.fn(async () => {});
 219  const isSocialMediaUrlMock = mock.fn(() => false);
 220  const waitForCloudflareMock = mock.fn(async () => true);
 221  
 222  mock.module('../../src/utils/stealth-browser.js', {
 223    namedExports: {
 224      launchStealthBrowser: launchStealthBrowserMock,
 225      createStealthContext: createStealthContextMock,
 226      humanScroll: humanScrollMock,
 227      randomDelay: randomDelayMock,
 228      isSocialMediaUrl: isSocialMediaUrlMock,
 229      waitForCloudflare: waitForCloudflareMock,
 230    },
 231  });
 232  
 233  // ═══════════════════════════════════════════════════════════════
 234  // 5. LLM provider
 235  // ═══════════════════════════════════════════════════════════════
 236  const callLLMMock = mock.fn(async () => {
 237    llmCallCount++;
 238    return (
 239      mockLLMResponse || {
 240        content: JSON.stringify({
 241          business_name: 'Test Corp',
 242          email_addresses: [{ email: 'info@testcorp.com', label: 'General' }],
 243          phone_numbers: [],
 244          social_profiles: [],
 245          key_pages: [],
 246          primary_contact_form: null,
 247        }),
 248        usage: { promptTokens: 100, completionTokens: 50 },
 249      }
 250    );
 251  });
 252  
 253  mock.module('../../src/utils/llm-provider.js', {
 254    namedExports: {
 255      callLLM: callLLMMock,
 256      getProvider: mock.fn(() => 'openrouter'),
 257    },
 258  });
 259  
 260  // ═══════════════════════════════════════════════════════════════
 261  // 6. LLM usage tracker
 262  // ═══════════════════════════════════════════════════════════════
 263  mock.module('../../src/utils/llm-usage-tracker.js', {
 264    namedExports: {
 265      logLLMUsage: mock.fn(() => {}),
 266    },
 267  });
 268  
 269  // ═══════════════════════════════════════════════════════════════
 270  // 7. error-handler
 271  // ═══════════════════════════════════════════════════════════════
 272  mock.module('../../src/utils/error-handler.js', {
 273    namedExports: {
 274      processBatch: mock.fn(async (items, processor, _opts) => {
 275        const results = [];
 276        const errors = [];
 277        for (let i = 0; i < items.length; i++) {
 278          try {
 279            const r = await processor(items[i], i);
 280            results.push(r);
 281          } catch (err) {
 282            errors.push(err);
 283            results.push(null);
 284          }
 285        }
 286        return { results, errors };
 287      }),
 288      safeJsonParse: mock.fn((str, fallback = null) => {
 289        try {
 290          return str ? JSON.parse(str) : fallback;
 291        } catch {
 292          return fallback;
 293        }
 294      }),
 295      retryWithBackoff: mock.fn(async fn => fn()),
 296    },
 297  });
 298  
 299  // ═══════════════════════════════════════════════════════════════
 300  // 8. summary-generator
 301  // ═══════════════════════════════════════════════════════════════
 302  mock.module('../../src/utils/summary-generator.js', {
 303    namedExports: {
 304      generateStageCompletion: mock.fn(() => {}),
 305      displayProgress: mock.fn(() => {}),
 306    },
 307  });
 308  
 309  // ═══════════════════════════════════════════════════════════════
 310  // 9. adaptive-concurrency
 311  // ═══════════════════════════════════════════════════════════════
 312  mock.module('../../src/utils/adaptive-concurrency.js', {
 313    namedExports: {
 314      getAdaptiveConcurrencyFast: mock.fn(() => 1),
 315      getAdaptiveConcurrency: mock.fn(() => 1),
 316      isScreenActive: mock.fn(() => false),
 317    },
 318  });
 319  
 320  // ═══════════════════════════════════════════════════════════════
 321  // 10. site-filters
 322  // ═══════════════════════════════════════════════════════════════
 323  const checkBlocklistMock = mock.fn((_domain, _country) => mockBlocklistResult);
 324  
 325  mock.module('../../src/utils/site-filters.js', {
 326    namedExports: {
 327      checkBlocklist: checkBlocklistMock,
 328      DIRECTORY_DOMAINS: [],
 329      SOCIAL_MEDIA_DOMAINS: [],
 330      DEMO_EMAIL_DOMAINS: [],
 331      loadFranchiseDomains: mock.fn(() => []),
 332      isGovernmentDomain: mock.fn(() => false),
 333      isEducationDomain: mock.fn(() => false),
 334      isNonCommercialDomain: mock.fn(() => false),
 335      isDemoEmail: mock.fn(() => false),
 336      isGovernmentEmail: mock.fn(() => false),
 337    },
 338  });
 339  
 340  // ═══════════════════════════════════════════════════════════════
 341  // 11. gdpr-verification
 342  // ═══════════════════════════════════════════════════════════════
 343  const batchVerifyEmailsMock = mock.fn(() => [
 344    { isVerified: true, confidence: 'high', reason: 'Company domain' },
 345  ]);
 346  
 347  mock.module('../../src/utils/gdpr-verification.js', {
 348    namedExports: {
 349      verifyCompanyEmail: mock.fn(() => ({
 350        isVerified: true,
 351        confidence: 'high',
 352        reason: 'Company domain',
 353      })),
 354      batchVerifyEmails: batchVerifyEmailsMock,
 355      isFreeEmailProvider: mock.fn(() => false),
 356      searchCompanyTypes: mock.fn(() => []),
 357      searchCompanyKeywords: mock.fn(() => []),
 358      getKeyPageNames: mock.fn(() => []),
 359    },
 360  });
 361  
 362  // ═══════════════════════════════════════════════════════════════
 363  // 12. countries.js
 364  // ═══════════════════════════════════════════════════════════════
 365  const getCountryByCodeMock = mock.fn(code => mockCountryByCode(code));
 366  
 367  mock.module('../../src/config/countries.js', {
 368    namedExports: {
 369      getCountryByCode: getCountryByCodeMock,
 370      getCountryByGoogleDomain: mock.fn(() => null),
 371      normaliseCountryCode: mock.fn(code => code),
 372      COUNTRIES: {},
 373      FREE_EMAIL_PROVIDERS: [],
 374      isFreeEmailProvider: mock.fn(() => false),
 375      getSupportedCountries: mock.fn(() => []),
 376      getGDPRCountries: mock.fn(() => []),
 377      isMobileNumber: mock.fn(() => false),
 378    },
 379  });
 380  
 381  // ═══════════════════════════════════════════════════════════════
 382  // 13. retry-handler
 383  // ═══════════════════════════════════════════════════════════════
 384  const recordFailureMock = mock.fn(() => {});
 385  const resetRetriesMock = mock.fn(() => {});
 386  
 387  mock.module('../../src/utils/retry-handler.js', {
 388    namedExports: {
 389      recordFailure: recordFailureMock,
 390      resetRetries: resetRetriesMock,
 391      getRetryStats: mock.fn(() => ({})),
 392    },
 393  });
 394  
 395  // ═══════════════════════════════════════════════════════════════
 396  // 14. tld-detector
 397  // ═══════════════════════════════════════════════════════════════
 398  const parseCountryFromGoogleDomainMock = mock.fn(domain =>
 399    mockParseCountryFromGoogleDomain(domain)
 400  );
 401  
 402  mock.module('../../src/utils/tld-detector.js', {
 403    namedExports: {
 404      parseCountryFromGoogleDomain: parseCountryFromGoogleDomainMock,
 405      detectCountryFromTLD: mock.fn(() => null),
 406    },
 407  });
 408  
 409  // ═══════════════════════════════════════════════════════════════
 410  // 15. phone-normalizer
 411  // ═══════════════════════════════════════════════════════════════
 412  const normalizePhoneNumberMock = mock.fn(p => p);
 413  
 414  mock.module('../../src/utils/phone-normalizer.js', {
 415    namedExports: {
 416      normalizePhoneNumber: normalizePhoneNumberMock,
 417      normalizePhoneNumbers: mock.fn(ps => ps),
 418      addCountryCode: mock.fn(p => p),
 419      isFakeNumber: mock.fn(() => false),
 420      cleanPhoneNumbers: mock.fn(ps => ps),
 421      isValidSmsNumber: mock.fn(() => ({ valid: true })),
 422    },
 423  });
 424  
 425  // ═══════════════════════════════════════════════════════════════
 426  // 16. contacts/prioritize.js
 427  // ═══════════════════════════════════════════════════════════════
 428  const cleanInvalidSocialLinksMock = mock.fn(contacts => contacts || {});
 429  
 430  mock.module('../../src/contacts/prioritize.js', {
 431    namedExports: {
 432      cleanInvalidSocialLinks: cleanInvalidSocialLinksMock,
 433      getAllContacts: mock.fn(() => []),
 434      parseAvailableChannels: mock.fn(() => []),
 435      prioritizeContacts: mock.fn(() => []),
 436      updateOutreachContacts: mock.fn(() => {}),
 437      bulkUpdateOutreachContacts: mock.fn(() => {}),
 438      getOutreachReadinessReport: mock.fn(() => ({})),
 439      getAllContactsWithNames: mock.fn(async () => []),
 440    },
 441    defaultExport: {},
 442  });
 443  
 444  // ═══════════════════════════════════════════════════════════════
 445  // contacts-storage — use site row fallback, avoid real filesystem
 446  // ═══════════════════════════════════════════════════════════════
 447  mock.module('../../src/utils/contacts-storage.js', {
 448    namedExports: {
 449      getContactsJson: mock.fn(() => null),
 450      getContactsData: mock.fn(() => null),
 451      setContactsJson: mock.fn(() => {}),
 452      deleteContactsJson: mock.fn(() => false),
 453      hasContactsJson: mock.fn(() => false),
 454      getContactsJsonWithFallback: mock.fn((siteId, dbRow) => dbRow?.contacts_json || null),
 455      getContactsDataWithFallback: mock.fn((siteId, dbRow) => {
 456        const raw = dbRow?.contacts_json;
 457        if (!raw) return null;
 458        try { return JSON.parse(raw); } catch { return null; }
 459      }),
 460      DATA_DIR: '/tmp/test-contacts',
 461    },
 462  });
 463  
 464  // ═══════════════════════════════════════════════════════════════
 465  // html-storage — return html_dom from mockSiteRows
 466  // ═══════════════════════════════════════════════════════════════
 467  mock.module('../../src/utils/html-storage.js', {
 468    namedExports: {
 469      readHtmlDom: mock.fn(siteId => {
 470        const site = mockSiteRows.find(s => s.id === siteId);
 471        return site?.html_dom || null;
 472      }),
 473      writeKeyPagesHtml: mock.fn(() => {}),
 474      readKeyPagesHtml: mock.fn(() => null),
 475      deleteHtmlDom: mock.fn(() => {}),
 476      deleteKeyPagesHtml: mock.fn(() => {}),
 477    },
 478  });
 479  
 480  // ═══════════════════════════════════════════════════════════════
 481  // score-storage — use site row fallback
 482  // ═══════════════════════════════════════════════════════════════
 483  mock.module('../../src/utils/score-storage.js', {
 484    namedExports: {
 485      getScoreJson: mock.fn(() => null),
 486      getScoreData: mock.fn(() => null),
 487      setScoreJson: mock.fn(() => {}),
 488      getScoreJsonWithFallback: mock.fn((siteId, dbRow) => dbRow?.score_json || null),
 489      getScoreDataWithFallback: mock.fn((siteId, dbRow) => {
 490        const raw = dbRow?.score_json;
 491        if (!raw) return null;
 492        try { return JSON.parse(raw); } catch { return null; }
 493      }),
 494    },
 495  });
 496  
 497  // ═══════════════════════════════════════════════════════════════
 498  // Import module under test AFTER all mocks
 499  // ═══════════════════════════════════════════════════════════════
 500  const { runEnrichmentStage } = await import('../../src/stages/enrich.js');
 501  
 502  // ═══════════════════════════════════════════════════════════════
 503  // Helpers
 504  // ═══════════════════════════════════════════════════════════════
 505  function makeSite(overrides = {}) {
 506    return {
 507      id: 1,
 508      domain: 'example.com',
 509      url: 'https://example.com',
 510      contacts_json: null,
 511      html_dom: '<html><body>Test</body></html>',
 512      score_json: null,
 513      country_code: 'AU',
 514      google_domain: 'google.com.au',
 515      ...overrides,
 516    };
 517  }
 518  
 519  function makeContactsJson(extra = {}) {
 520    return JSON.stringify({
 521      business_name: 'Test Co',
 522      email_addresses: [],
 523      phone_numbers: [],
 524      social_profiles: [],
 525      key_pages: [],
 526      ...extra,
 527    });
 528  }
 529  
 530  function resetState() {
 531    mockSiteRows = [];
 532    runCalls = [];
 533    getCalls = [];
 534    dbClosed = false;
 535    browserCloseCalled = false;
 536    llmCallCount = 0;
 537    mockHtml = '<html><body>Contact us</body></html>';
 538    mockScreenshot = Buffer.from('png_data');
 539    mockLLMResponse = null;
 540    mockBlocklistResult = null;
 541  
 542    mockCountryByCode = code => {
 543      if (code === 'GB') return { code: 'GB', requiresGDPRCheck: true };
 544      if (code === 'AU') return { code: 'AU', requiresGDPRCheck: false };
 545      return null;
 546    };
 547    mockParseCountryFromGoogleDomain = _domain => 'AU';
 548  
 549    mockPageGoto.mock.resetCalls();
 550    mockPageContent.mock.resetCalls();
 551    mockPageScreenshot.mock.resetCalls();
 552    mockPageClose.mock.resetCalls();
 553    mockPageWaitForLoadState.mock.resetCalls();
 554    mockContextNewPage.mock.resetCalls();
 555    mockContextClose.mock.resetCalls();
 556    mockBrowserClose.mock.resetCalls();
 557    callLLMMock.mock.resetCalls();
 558    recordFailureMock.mock.resetCalls();
 559    resetRetriesMock.mock.resetCalls();
 560    checkBlocklistMock.mock.resetCalls();
 561    batchVerifyEmailsMock.mock.resetCalls();
 562    getCountryByCodeMock.mock.resetCalls();
 563    parseCountryFromGoogleDomainMock.mock.resetCalls();
 564    normalizePhoneNumberMock.mock.resetCalls();
 565    cleanInvalidSocialLinksMock.mock.resetCalls();
 566    launchStealthBrowserMock.mock.resetCalls();
 567    isSocialMediaUrlMock.mock.resetCalls();
 568    waitForCloudflareMock.mock.resetCalls();
 569    humanScrollMock.mock.resetCalls();
 570    randomDelayMock.mock.resetCalls();
 571  
 572    mockPageGoto.mock.mockImplementation(async () => {});
 573    mockPageContent.mock.mockImplementation(async () => mockHtml);
 574    mockPageScreenshot.mock.mockImplementation(async () => mockScreenshot);
 575    mockPageWaitForLoadState.mock.mockImplementation(async () => {});
 576    isSocialMediaUrlMock.mock.mockImplementation(() => false);
 577    waitForCloudflareMock.mock.mockImplementation(async () => true);
 578  
 579    callLLMMock.mock.mockImplementation(async () => {
 580      llmCallCount++;
 581      return (
 582        mockLLMResponse || {
 583          content: JSON.stringify({
 584            business_name: 'Test Corp',
 585            email_addresses: [{ email: 'info@testcorp.com', label: 'General' }],
 586            phone_numbers: [],
 587            social_profiles: [],
 588            key_pages: [],
 589            primary_contact_form: null,
 590          }),
 591          usage: { promptTokens: 100, completionTokens: 50 },
 592        }
 593      );
 594    });
 595    getCountryByCodeMock.mock.mockImplementation(code => mockCountryByCode(code));
 596    parseCountryFromGoogleDomainMock.mock.mockImplementation(domain =>
 597      mockParseCountryFromGoogleDomain(domain)
 598    );
 599  }
 600  
 601  // ═══════════════════════════════════════════════════════════════
 602  // Test Suites
 603  // ═══════════════════════════════════════════════════════════════
 604  
 605  describe('Enrichment Stage — Supplement Coverage', () => {
 606    // ─────────────────────────────────────────────────────────────
 607    // Suite: scrapePage — social media stealth path (lines 888-891)
 608    // ─────────────────────────────────────────────────────────────
 609    describe('scrapePage — social media stealth path', () => {
 610      beforeEach(() => {
 611        resetState();
 612      });
 613  
 614      test('calls humanScroll and randomDelay for social media URLs', async () => {
 615        // Make isSocialMediaUrl return true so the social branch in scrapePage executes
 616        isSocialMediaUrlMock.mock.mockImplementation(() => true);
 617  
 618        mockSiteRows = [
 619          makeSite({
 620            id: 1001,
 621            contacts_json: makeContactsJson({
 622              key_pages: ['https://facebook.com/testbiz/about'],
 623            }),
 624          }),
 625        ];
 626  
 627        await runEnrichmentStage();
 628  
 629        assert.ok(
 630          humanScrollMock.mock.calls.length >= 1,
 631          'humanScroll should be called for social media pages'
 632        );
 633        assert.ok(
 634          randomDelayMock.mock.calls.length >= 2,
 635          'randomDelay should be called multiple times for social media pages'
 636        );
 637      });
 638  
 639      test('uses aggressive stealth context for social media URLs', async () => {
 640        isSocialMediaUrlMock.mock.mockImplementation(() => true);
 641  
 642        mockSiteRows = [
 643          makeSite({
 644            id: 1002,
 645            contacts_json: makeContactsJson({
 646              key_pages: ['https://instagram.com/testbiz/contact'],
 647            }),
 648          }),
 649        ];
 650  
 651        await runEnrichmentStage();
 652  
 653        // createStealthContext should be called with stealthLevel: 'aggressive'
 654        const stealthCalls = createStealthContextMock.mock.calls;
 655        assert.ok(stealthCalls.length >= 1, 'createStealthContext should be called');
 656        const opts = stealthCalls[0].arguments[1];
 657        assert.equal(
 658          opts.stealthLevel,
 659          'aggressive',
 660          'should use aggressive stealth for social URLs'
 661        );
 662      });
 663  
 664      test('uses minimal stealth context for non-social URLs', async () => {
 665        // Reset call counts explicitly before this test to avoid cross-test contamination
 666        createStealthContextMock.mock.resetCalls();
 667        isSocialMediaUrlMock.mock.mockImplementation(() => false);
 668  
 669        mockSiteRows = [
 670          makeSite({
 671            id: 1003,
 672            contacts_json: makeContactsJson({
 673              key_pages: ['https://example.com/contact'],
 674            }),
 675          }),
 676        ];
 677  
 678        await runEnrichmentStage();
 679  
 680        const stealthCalls = createStealthContextMock.mock.calls;
 681        assert.ok(stealthCalls.length >= 1, 'createStealthContext should be called');
 682        // Find the call made during this test (last call if multiple)
 683        const lastCall = stealthCalls[stealthCalls.length - 1];
 684        const opts = lastCall.arguments[1];
 685        assert.equal(opts.stealthLevel, 'minimal', 'should use minimal stealth for non-social URLs');
 686      });
 687    });
 688  
 689    // ─────────────────────────────────────────────────────────────
 690    // Suite: scrapePage — Cloudflare not resolved warning (lines 883-885)
 691    // ─────────────────────────────────────────────────────────────
 692    describe('scrapePage — Cloudflare challenge not resolved', () => {
 693      beforeEach(() => {
 694        resetState();
 695      });
 696  
 697      test('logs warning but continues when Cloudflare is not resolved', async () => {
 698        // Make waitForCloudflare return false (challenge still blocking)
 699        waitForCloudflareMock.mock.mockImplementation(async () => false);
 700  
 701        mockSiteRows = [
 702          makeSite({
 703            id: 1010,
 704            contacts_json: makeContactsJson({
 705              key_pages: ['https://example.com/contact'],
 706            }),
 707          }),
 708        ];
 709  
 710        // Should still succeed (warning only, not a hard error)
 711        const result = await runEnrichmentStage();
 712  
 713        assert.ok(waitForCloudflareMock.mock.calls.length >= 1, 'waitForCloudflare should be called');
 714        // Site should succeed even with Cloudflare warning
 715        assert.equal(result.processed, 1, 'site should still be processed');
 716      });
 717  
 718      test('still reads page content after Cloudflare warning', async () => {
 719        waitForCloudflareMock.mock.mockImplementation(async () => false);
 720  
 721        mockSiteRows = [
 722          makeSite({
 723            id: 1011,
 724            contacts_json: makeContactsJson({
 725              key_pages: ['https://example.com/about'],
 726            }),
 727          }),
 728        ];
 729  
 730        await runEnrichmentStage();
 731  
 732        assert.ok(
 733          mockPageContent.mock.calls.length >= 1,
 734          'page.content() should still be called after Cloudflare warning'
 735        );
 736      });
 737    });
 738  
 739    // ─────────────────────────────────────────────────────────────
 740    // Suite: scrapePage — waitForLoadState timeout (lines 901-902)
 741    // ─────────────────────────────────────────────────────────────
 742    describe('scrapePage — waitForLoadState timeout caught', () => {
 743      beforeEach(() => {
 744        resetState();
 745      });
 746  
 747      test('continues enrichment when waitForLoadState times out', async () => {
 748        // Make waitForLoadState throw to exercise the inner try/catch
 749        mockPageWaitForLoadState.mock.mockImplementation(async () => {
 750          throw new Error('Timeout waiting for load state');
 751        });
 752  
 753        mockSiteRows = [
 754          makeSite({
 755            id: 1020,
 756            contacts_json: makeContactsJson({
 757              key_pages: ['https://example.com/contact'],
 758            }),
 759          }),
 760        ];
 761  
 762        // Should not throw — the timeout is caught and ignored
 763        const result = await runEnrichmentStage();
 764  
 765        assert.equal(result.processed, 1, 'should process site even when waitForLoadState times out');
 766      });
 767  
 768      test('still calls page.content() after waitForLoadState timeout', async () => {
 769        mockPageWaitForLoadState.mock.mockImplementation(async () => {
 770          throw new Error('Timed out waiting for load state');
 771        });
 772  
 773        mockSiteRows = [
 774          makeSite({
 775            id: 1021,
 776            contacts_json: makeContactsJson({
 777              key_pages: ['https://example.com/contact'],
 778            }),
 779          }),
 780        ];
 781  
 782        await runEnrichmentStage();
 783  
 784        assert.ok(
 785          mockPageContent.mock.calls.length >= 1,
 786          'page.content() should still be called after waitForLoadState timeout'
 787        );
 788      });
 789    });
 790  
 791    // ─────────────────────────────────────────────────────────────
 792    // Suite: ENABLE_VISION=false path (disables screenshot in scrapePage)
 793    // ─────────────────────────────────────────────────────────────
 794    describe('scrapePage — ENABLE_VISION=false skips screenshot', () => {
 795      beforeEach(() => {
 796        resetState();
 797        process.env.ENABLE_VISION = 'false';
 798      });
 799  
 800      // Restore after suite
 801      test('does not call page.screenshot() when ENABLE_VISION=false', async () => {
 802        mockSiteRows = [
 803          makeSite({
 804            id: 1030,
 805            contacts_json: makeContactsJson({
 806              key_pages: ['https://example.com/contact'],
 807            }),
 808          }),
 809        ];
 810  
 811        await runEnrichmentStage();
 812  
 813        assert.equal(
 814          mockPageScreenshot.mock.calls.length,
 815          0,
 816          'page.screenshot() should not be called when ENABLE_VISION=false'
 817        );
 818  
 819        // Restore
 820        process.env.ENABLE_VISION = 'true';
 821      });
 822  
 823      test('still enriches site without screenshot', async () => {
 824        mockSiteRows = [
 825          makeSite({
 826            id: 1031,
 827            contacts_json: makeContactsJson({
 828              key_pages: ['https://example.com/contact'],
 829            }),
 830          }),
 831        ];
 832  
 833        const result = await runEnrichmentStage();
 834  
 835        assert.equal(result.processed, 1, 'should process site without screenshot');
 836  
 837        process.env.ENABLE_VISION = 'true';
 838      });
 839    });
 840  
 841    // ─────────────────────────────────────────────────────────────
 842    // Suite: extractInitialContacts path (lines 991-1062)
 843    //        Triggered by: contacts_json=null, score_json=null,
 844    //        ENABLE_ENRICHMENT_LLM=false
 845    // ─────────────────────────────────────────────────────────────
 846    describe('extractInitialContacts path (ENABLE_ENRICHMENT_LLM=false)', () => {
 847      beforeEach(() => {
 848        resetState();
 849        process.env.ENABLE_ENRICHMENT_LLM = 'false';
 850      });
 851  
 852      test('calls LLM when ENABLE_ENRICHMENT_LLM=false and no contacts/score_json', async () => {
 853        mockSiteRows = [
 854          makeSite({
 855            id: 1040,
 856            contacts_json: null,
 857            score_json: null,
 858            html_dom: '<html><body>Contact page</body></html>',
 859          }),
 860        ];
 861  
 862        mockLLMResponse = {
 863          content: JSON.stringify({
 864            business_name: 'LLM Corp',
 865            email_addresses: [{ email: 'llm@llmcorp.com', label: 'Main' }],
 866            phone_numbers: [],
 867            social_profiles: [],
 868            key_pages: [],
 869          }),
 870          usage: { promptTokens: 100, completionTokens: 50 },
 871        };
 872  
 873        const result = await runEnrichmentStage();
 874  
 875        assert.equal(result.processed, 1, 'should process the site');
 876        assert.ok(llmCallCount >= 1, 'LLM should be called via extractInitialContacts');
 877  
 878        process.env.ENABLE_ENRICHMENT_LLM = 'true';
 879      });
 880  
 881      test('returns minimal contact structure when LLM parse fails', async () => {
 882        mockSiteRows = [
 883          makeSite({
 884            id: 1041,
 885            contacts_json: null,
 886            score_json: null,
 887            html_dom: '<html><body>Contact</body></html>',
 888          }),
 889        ];
 890  
 891        // Return invalid JSON so safeJsonParse returns null
 892        mockLLMResponse = {
 893          content: 'NOT JSON AT ALL <<<>>>',
 894          usage: { promptTokens: 100, completionTokens: 10 },
 895        };
 896  
 897        const result = await runEnrichmentStage();
 898  
 899        // With null parse result, extractInitialContacts returns minimal structure
 900        // which has no contacts → recordFailure is called (no contacts, no key_pages)
 901        assert.equal(result.processed, 1, 'should process site even with LLM parse failure');
 902  
 903        process.env.ENABLE_ENRICHMENT_LLM = 'true';
 904      });
 905  
 906      test('returns minimal structure when LLM call throws', async () => {
 907        mockSiteRows = [
 908          makeSite({
 909            id: 1042,
 910            contacts_json: null,
 911            score_json: null,
 912            html_dom: '<html><body>Contact</body></html>',
 913          }),
 914        ];
 915  
 916        // Make LLM throw an error to hit the catch block in extractInitialContacts
 917        callLLMMock.mock.mockImplementation(async () => {
 918          llmCallCount++;
 919          throw new Error('LLM API error');
 920        });
 921  
 922        // extractInitialContacts catches the error and returns minimal structure
 923        // minimal structure has no contacts → recordFailure path
 924        const result = await runEnrichmentStage();
 925  
 926        assert.equal(result.processed, 1, 'should process even when LLM throws');
 927  
 928        process.env.ENABLE_ENRICHMENT_LLM = 'true';
 929      });
 930  
 931      test('cleans invalid social links from LLM extractInitialContacts result', async () => {
 932        mockSiteRows = [
 933          makeSite({
 934            id: 1043,
 935            contacts_json: null,
 936            score_json: null,
 937            html_dom: '<html><body>Contact</body></html>',
 938          }),
 939        ];
 940  
 941        mockLLMResponse = {
 942          content: JSON.stringify({
 943            business_name: 'Social LLM Corp',
 944            email_addresses: [{ email: 'info@sociallm.com', label: 'Main' }],
 945            phone_numbers: [],
 946            social_profiles: [{ url: 'https://twitter.com/handle', label: 'Twitter' }],
 947            key_pages: [],
 948          }),
 949          usage: { promptTokens: 100, completionTokens: 50 },
 950        };
 951  
 952        await runEnrichmentStage();
 953  
 954        assert.ok(
 955          cleanInvalidSocialLinksMock.mock.calls.length >= 1,
 956          'cleanInvalidSocialLinks should be called on extractInitialContacts result'
 957        );
 958  
 959        process.env.ENABLE_ENRICHMENT_LLM = 'true';
 960      });
 961  
 962      test('normalizes phone numbers from LLM extractInitialContacts result', async () => {
 963        mockSiteRows = [
 964          makeSite({
 965            id: 1044,
 966            contacts_json: null,
 967            score_json: null,
 968            html_dom: '<html><body>Contact</body></html>',
 969          }),
 970        ];
 971  
 972        mockLLMResponse = {
 973          content: JSON.stringify({
 974            business_name: 'Phone LLM Corp',
 975            email_addresses: [{ email: 'info@phonellm.com', label: 'Main' }],
 976            phone_numbers: [{ number: '+61412345678', label: 'Office' }],
 977            social_profiles: [],
 978            key_pages: [],
 979          }),
 980          usage: { promptTokens: 100, completionTokens: 50 },
 981        };
 982  
 983        let normalizeCalled = false;
 984        normalizePhoneNumberMock.mock.mockImplementation(p => {
 985          normalizeCalled = true;
 986          return p;
 987        });
 988  
 989        await runEnrichmentStage();
 990  
 991        assert.ok(
 992          normalizeCalled,
 993          'normalizePhoneNumber should be called on phone numbers from extractInitialContacts'
 994        );
 995  
 996        process.env.ENABLE_ENRICHMENT_LLM = 'true';
 997      });
 998    });
 999  
1000    // ─────────────────────────────────────────────────────────────
1001    // Suite: GDPR verification in enrichSite after browsing
1002    //        (lines 783-810 — the block with companyProof / gdprVerified)
1003    // ─────────────────────────────────────────────────────────────
1004    describe('GDPR verification after contact page browsing', () => {
1005      beforeEach(() => {
1006        resetState();
1007      });
1008  
1009      test('runs GDPR verification and sets company_proof when GB site has emails after browsing', async () => {
1010        // Configure: GB requires GDPR, google.co.uk → GB (no country mismatch)
1011        mockParseCountryFromGoogleDomain = _domain => 'GB';
1012        mockCountryByCode = code => {
1013          if (code === 'GB') return { code: 'GB', requiresGDPRCheck: true };
1014          return null;
1015        };
1016        parseCountryFromGoogleDomainMock.mock.mockImplementation(domain =>
1017          mockParseCountryFromGoogleDomain(domain)
1018        );
1019        getCountryByCodeMock.mock.mockImplementation(code => mockCountryByCode(code));
1020  
1021        // Page HTML that regex extractor will detect emails from
1022        mockHtml = '<html><body><a href="mailto:info@uk-co.co.uk">info@uk-co.co.uk</a></body></html>';
1023  
1024        mockSiteRows = [
1025          makeSite({
1026            id: 1050,
1027            domain: 'uk-co.co.uk',
1028            country_code: 'GB',
1029            google_domain: 'google.co.uk',
1030            contacts_json: JSON.stringify({
1031              business_name: 'UK Co',
1032              email_addresses: [],
1033              phone_numbers: [],
1034              social_profiles: [],
1035              key_pages: ['https://uk-co.co.uk/contact'],
1036              country_code: 'GB',
1037            }),
1038          }),
1039        ];
1040  
1041        await runEnrichmentStage();
1042  
1043        // Should have called batchVerifyEmails (GDPR path)
1044        assert.ok(
1045          batchVerifyEmailsMock.mock.calls.length >= 1,
1046          'batchVerifyEmails should be called for GB site after browsing'
1047        );
1048  
1049        // The final UPDATE should include company_proof
1050        const gdprUpdate = runCalls.find(
1051          c => c.sql.includes('company_proof') && c.sql.includes("status = 'enriched_regex'")
1052        );
1053        assert.ok(gdprUpdate, 'should store company_proof for GDPR site after browser enrichment');
1054      });
1055  
1056      test('sets gdpr_verified=1 when at least one email has high confidence', async () => {
1057        mockParseCountryFromGoogleDomain = _domain => 'GB';
1058        mockCountryByCode = code => {
1059          if (code === 'GB') return { code: 'GB', requiresGDPRCheck: true };
1060          return null;
1061        };
1062        parseCountryFromGoogleDomainMock.mock.mockImplementation(domain =>
1063          mockParseCountryFromGoogleDomain(domain)
1064        );
1065        getCountryByCodeMock.mock.mockImplementation(code => mockCountryByCode(code));
1066  
1067        batchVerifyEmailsMock.mock.mockImplementation(() => [
1068          { isVerified: true, confidence: 'high', reason: 'Company domain' },
1069        ]);
1070  
1071        mockHtml =
1072          '<html><body><a href="mailto:info@gbsite.co.uk">info@gbsite.co.uk</a></body></html>';
1073  
1074        mockSiteRows = [
1075          makeSite({
1076            id: 1051,
1077            domain: 'gbsite.co.uk',
1078            country_code: 'GB',
1079            google_domain: 'google.co.uk',
1080            contacts_json: JSON.stringify({
1081              business_name: 'GB Site',
1082              email_addresses: [],
1083              phone_numbers: [],
1084              social_profiles: [],
1085              key_pages: ['https://gbsite.co.uk/contact'],
1086              country_code: 'GB',
1087            }),
1088          }),
1089        ];
1090  
1091        await runEnrichmentStage();
1092  
1093        const gdprUpdate = runCalls.find(
1094          c => c.sql.includes('gdpr_verified') && c.sql.includes("status = 'enriched_regex'")
1095        );
1096        assert.ok(gdprUpdate, 'should have a DB update with gdpr_verified field');
1097  
1098        // Find the gdpr_verified value in the run args (it's the 6th positional param)
1099        if (gdprUpdate) {
1100          const { args } = gdprUpdate;
1101          // Look for value 1 in args (gdpr_verified=1 for verified)
1102          assert.ok(args.includes(1), 'gdpr_verified should be 1 when high confidence email found');
1103        }
1104      });
1105  
1106      test('sets gdpr_verified=0 when all emails are unverified', async () => {
1107        mockParseCountryFromGoogleDomain = _domain => 'GB';
1108        mockCountryByCode = code => {
1109          if (code === 'GB') return { code: 'GB', requiresGDPRCheck: true };
1110          return null;
1111        };
1112        parseCountryFromGoogleDomainMock.mock.mockImplementation(domain =>
1113          mockParseCountryFromGoogleDomain(domain)
1114        );
1115        getCountryByCodeMock.mock.mockImplementation(code => mockCountryByCode(code));
1116  
1117        // Return unverified results
1118        batchVerifyEmailsMock.mock.mockImplementation(() => [
1119          { isVerified: false, confidence: 'low', reason: 'Free email provider' },
1120        ]);
1121  
1122        mockHtml =
1123          '<html><body><a href="mailto:info@gbsite2.co.uk">info@gbsite2.co.uk</a></body></html>';
1124  
1125        mockSiteRows = [
1126          makeSite({
1127            id: 1052,
1128            domain: 'gbsite2.co.uk',
1129            country_code: 'GB',
1130            google_domain: 'google.co.uk',
1131            contacts_json: JSON.stringify({
1132              business_name: 'GB Site 2',
1133              email_addresses: [],
1134              phone_numbers: [],
1135              social_profiles: [],
1136              key_pages: ['https://gbsite2.co.uk/contact'],
1137              country_code: 'GB',
1138            }),
1139          }),
1140        ];
1141  
1142        await runEnrichmentStage();
1143  
1144        const gdprUpdate = runCalls.find(
1145          c => c.sql.includes('gdpr_verified') && c.sql.includes("status = 'enriched_regex'")
1146        );
1147        assert.ok(gdprUpdate, 'should have a DB update with gdpr_verified field');
1148      });
1149  
1150      test('skips GDPR check when countryCode throws from getCountryByCode', async () => {
1151        mockParseCountryFromGoogleDomain = _domain => 'XX';
1152        mockCountryByCode = _code => {
1153          throw new Error('Unknown country code');
1154        };
1155        parseCountryFromGoogleDomainMock.mock.mockImplementation(domain =>
1156          mockParseCountryFromGoogleDomain(domain)
1157        );
1158        getCountryByCodeMock.mock.mockImplementation(code => mockCountryByCode(code));
1159  
1160        mockHtml = '<html><body><a href="mailto:info@xxsite.xx">info@xxsite.xx</a></body></html>';
1161  
1162        mockSiteRows = [
1163          makeSite({
1164            id: 1053,
1165            domain: 'xxsite.xx',
1166            country_code: 'XX',
1167            google_domain: 'google.xx',
1168            contacts_json: JSON.stringify({
1169              business_name: 'XX Site',
1170              email_addresses: [],
1171              phone_numbers: [],
1172              social_profiles: [],
1173              key_pages: ['https://xxsite.xx/contact'],
1174              country_code: 'XX',
1175            }),
1176          }),
1177        ];
1178  
1179        // Should not throw — exception from getCountryByCode is caught
1180        const result = await runEnrichmentStage();
1181        assert.equal(result.processed, 1, 'should process site even when getCountryByCode throws');
1182      });
1183    });
1184  
1185    // ─────────────────────────────────────────────────────────────
1186    // Suite: Social + Cloudflare combined
1187    // ─────────────────────────────────────────────────────────────
1188    describe('scrapePage — social media with Cloudflare unresolved', () => {
1189      beforeEach(() => {
1190        resetState();
1191      });
1192  
1193      test('social media page with Cloudflare unresolved still completes', async () => {
1194        humanScrollMock.mock.resetCalls();
1195        isSocialMediaUrlMock.mock.mockImplementation(() => true);
1196        waitForCloudflareMock.mock.mockImplementation(async () => false);
1197  
1198        // key_pages URL must match contactPagePattern to trigger browsing
1199        mockSiteRows = [
1200          makeSite({
1201            id: 1060,
1202            contacts_json: makeContactsJson({
1203              key_pages: ['https://facebook.com/testbiz/contact'],
1204            }),
1205          }),
1206        ];
1207  
1208        const result = await runEnrichmentStage();
1209  
1210        assert.equal(result.processed, 1, 'should process social page even with Cloudflare issue');
1211        assert.ok(
1212          humanScrollMock.mock.calls.length >= 1,
1213          'humanScroll should still be called for social page'
1214        );
1215      });
1216    });
1217  });