/ tests / process.test.js
process.test.js
   1  /**
   2   * Tests for src/process.js
   3   *
   4   * Covers:
   5   * - main(): URL direct processing path
   6   * - main(): keyword pipeline path (populateKeywords, getKeywordsToScrape, processKeyword)
   7   * - processSingleUrl(): domain extraction, site creation
   8   * - processSite(): existing site skip, HTTP error path, success path
   9   * - processKeyword(): scrape error, site processing, remaining limit
  10   *
  11   * Uses mock.module() to stub all external dependencies.
  12   *
  13   * NOTE: requires --experimental-test-module-mocks
  14   */
  15  
  16  import { test, describe, mock, before, after } from 'node:test';
  17  import assert from 'node:assert/strict';
  18  import RealDatabase from 'better-sqlite3';
  19  import { join } from 'path';
  20  import { tmpdir } from 'os';
  21  import { mkdirSync, rmSync } from 'fs';
  22  import { createLazyPgMock } from './helpers/pg-mock.js';
  23  
  24  // ── Mutable stubs ─────────────────────────────────────────────────────────────
  25  
  26  let scrapeSERPFn = async () => [];
  27  let captureWebsiteFn = async () => ({
  28    html: '<html>test</html>',
  29    httpStatusCode: 200,
  30    screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
  31    screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
  32  });
  33  let scoreWebsiteFn = async () => ({
  34    overall_calculation: { conversion_score: 75 },
  35    resubmitted: false,
  36  });
  37  let extractGradeFn = () => 'C';
  38  let saveScreenshotsFn = async () => 'screenshots/test';
  39  let generateKeywordCombinationsFn = () => [
  40    {
  41      keyword: 'plumber sydney',
  42      countryCode: 'AU',
  43      googleDomain: 'google.com.au',
  44      searchVolume: 100,
  45      priority: 1,
  46    },
  47    {
  48      keyword: 'electrician sydney',
  49      countryCode: 'AU',
  50      googleDomain: 'google.com.au',
  51      searchVolume: 80,
  52      priority: 2,
  53    },
  54  ];
  55  let upsertKeywordFn = () => {};
  56  let incrementProcessedCountFn = () => {};
  57  let incrementLowScoringCountFn = () => {};
  58  const incrementReworkLowScoringCountFn = () => {};
  59  
  60  // Active SQLite db used by the pg-mock — set before each test that needs it
  61  let activeDb = null;
  62  
  63  const TABLES_SQL = `
  64    CREATE TABLE IF NOT EXISTS sites (
  65      id INTEGER PRIMARY KEY AUTOINCREMENT,
  66      domain TEXT NOT NULL,
  67      landing_page_url TEXT,
  68      keyword TEXT,
  69      html_dom TEXT,
  70      http_status_code INTEGER,
  71      score_json TEXT,
  72      score REAL,
  73      grade TEXT,
  74      status TEXT DEFAULT 'found',
  75      screenshot_path TEXT,
  76      processing_status TEXT,
  77      error_log TEXT,
  78      scored_at TEXT,
  79      created_at TEXT DEFAULT CURRENT_TIMESTAMP,
  80      rescored_at DATETIME
  81    );
  82    CREATE TABLE IF NOT EXISTS keywords (
  83      id INTEGER PRIMARY KEY AUTOINCREMENT,
  84      keyword TEXT NOT NULL,
  85      country_code TEXT,
  86      google_domain TEXT,
  87      search_volume INTEGER,
  88      priority INTEGER DEFAULT 0,
  89      last_scraped_at TEXT,
  90      zenrows_count INTEGER DEFAULT 0
  91    );
  92  `;
  93  
  94  // ── Mock modules BEFORE import ────────────────────────────────────────────────
  95  
  96  // Mock db.js so processSite/processSingleUrl use the activeDb SQLite instance
  97  mock.module('../src/utils/db.js', {
  98    namedExports: createLazyPgMock(() => activeDb),
  99  });
 100  
 101  // Mock logger to suppress output
 102  mock.module('../src/utils/logger.js', {
 103    defaultExport: class { info() {} warn() {} error() {} success() {} debug() {} },
 104  });
 105  
 106  // Mock filesystem storage utilities (process.js writes HTML and score JSON to disk)
 107  mock.module('../src/utils/html-storage.js', {
 108    namedExports: {
 109      writeHtmlDom: () => {},
 110      readHtmlDom: () => null,
 111      hasHtmlDom: () => false,
 112      deleteHtmlDom: () => {},
 113      writeKeyPagesHtml: () => {},
 114      readKeyPagesHtml: () => null,
 115      deleteKeyPagesHtml: () => {},
 116    },
 117  });
 118  
 119  mock.module('../src/utils/score-storage.js', {
 120    namedExports: {
 121      setScoreJson: () => {},
 122      getScoreJson: () => null,
 123      deleteScoreJson: () => {},
 124    },
 125  });
 126  
 127  mock.module('../src/scrape.js', {
 128    namedExports: {
 129      scrapeSERP: (...args) => scrapeSERPFn(...args),
 130    },
 131  });
 132  
 133  mock.module('../src/capture.js', {
 134    namedExports: {
 135      captureWebsite: (...args) => captureWebsiteFn(...args),
 136    },
 137  });
 138  
 139  mock.module('../src/score.js', {
 140    namedExports: {
 141      scoreWebsite: (...args) => scoreWebsiteFn(...args),
 142      extractGrade: (...args) => extractGradeFn(...args),
 143    },
 144  });
 145  
 146  mock.module('../src/utils/screenshot-storage.js', {
 147    namedExports: {
 148      saveScreenshots: (...args) => saveScreenshotsFn(...args),
 149    },
 150  });
 151  
 152  mock.module('../src/utils/keyword-manager.js', {
 153    namedExports: {
 154      generateKeywordCombinations: (...args) => generateKeywordCombinationsFn(...args),
 155      upsertKeyword: (...args) => upsertKeywordFn(...args),
 156      incrementProcessedCount: (...args) => incrementProcessedCountFn(...args),
 157      incrementLowScoringCount: (...args) => incrementLowScoringCountFn(...args),
 158      incrementReworkLowScoringCount: (...args) => incrementReworkLowScoringCountFn(...args),
 159    },
 160  });
 161  
 162  mock.module('../src/utils/load-env.js', {
 163    namedExports: {},
 164  });
 165  
 166  // ── DB setup ──────────────────────────────────────────────────────────────────
 167  
 168  let tmpDir;
 169  let dbPath;
 170  
 171  before(() => {
 172    tmpDir = join(tmpdir(), `process-test-${Date.now()}`);
 173    mkdirSync(tmpDir, { recursive: true });
 174    dbPath = join(tmpDir, 'test.db');
 175    process.env.DATABASE_PATH = dbPath;
 176  });
 177  
 178  after(() => {
 179    delete process.env.DATABASE_PATH;
 180    rmSync(tmpDir, { recursive: true, force: true });
 181  });
 182  
 183  // Import AFTER mocks
 184  const { processSingleUrl, processSite, main } = await import('../src/process.js');
 185  
 186  // ── Helper ────────────────────────────────────────────────────────────────────
 187  
 188  function createTestDb() {
 189    const db = new RealDatabase(':memory:');
 190    db.exec(TABLES_SQL);
 191    return db;
 192  }
 193  
 194  // ── Tests ─────────────────────────────────────────────────────────────────────
 195  
 196  describe('processSingleUrl', () => {
 197    test('inserts site record for a new domain', async () => {
 198      const db = createTestDb();
 199      activeDb = db;
 200      captureWebsiteFn = async () => ({
 201        html: '<html>test</html>',
 202        httpStatusCode: 200,
 203        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 204        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 205      });
 206      scoreWebsiteFn = async () => ({
 207        overall_calculation: { conversion_score: 65 },
 208        resubmitted: false,
 209      });
 210      extractGradeFn = () => 'D';
 211      saveScreenshotsFn = async () => 'screenshots/123';
 212  
 213      await processSingleUrl('https://example.com/page');
 214      const site = db.prepare("SELECT * FROM sites WHERE domain = 'example.com'").get();
 215      db.close();
 216      activeDb = null;
 217  
 218      assert.ok(site, 'site should be inserted');
 219      assert.equal(site.keyword, 'manual', 'keyword should be "manual"');
 220      assert.equal(site.grade, 'D');
 221    });
 222  
 223    test('skips domain that already exists in DB', async () => {
 224      const db = createTestDb();
 225      activeDb = db;
 226      db.prepare("INSERT INTO sites (domain, status) VALUES ('existing.com', 'prog_scored')").run();
 227  
 228      let captureCalled = false;
 229      captureWebsiteFn = async () => {
 230        captureCalled = true;
 231        return { html: '', httpStatusCode: 200, screenshots: {}, screenshotsUncropped: {} };
 232      };
 233  
 234      await processSingleUrl('https://www.existing.com/');
 235      db.close();
 236      activeDb = null;
 237  
 238      assert.equal(captureCalled, false, 'capture should not be called for existing site');
 239    });
 240  });
 241  
 242  describe('processSite — HTTP error path', () => {
 243    test('stores site with failed status when HTTP 404', async () => {
 244      const db = createTestDb();
 245      activeDb = db;
 246      captureWebsiteFn = async () => ({
 247        html: '',
 248        httpStatusCode: 404,
 249        screenshots: {},
 250        screenshotsUncropped: {},
 251      });
 252  
 253      await processSite({ url: 'https://notfound.com', domain: 'notfound.com' }, 'test-keyword');
 254      const site = db.prepare("SELECT * FROM sites WHERE domain = 'notfound.com'").get();
 255      db.close();
 256      activeDb = null;
 257  
 258      assert.ok(site, 'site should be stored even on HTTP error');
 259      assert.equal(site.processing_status, 'failed');
 260      assert.ok(site.error_log?.includes('404'), `error_log should mention 404: ${site.error_log}`);
 261    });
 262  
 263    test('stores site with failed status when HTTP 500', async () => {
 264      const db = createTestDb();
 265      activeDb = db;
 266      captureWebsiteFn = async () => ({
 267        html: '',
 268        httpStatusCode: 500,
 269        screenshots: {},
 270        screenshotsUncropped: {},
 271      });
 272  
 273      await processSite({ url: 'https://error.com', domain: 'error.com' }, 'test');
 274      const site = db.prepare("SELECT * FROM sites WHERE domain = 'error.com'").get();
 275      db.close();
 276      activeDb = null;
 277      assert.equal(site.processing_status, 'failed');
 278    });
 279  });
 280  
 281  describe('processSite — success path', () => {
 282    test('inserts scored site with grade and screenshot_path', async () => {
 283      const db = createTestDb();
 284      activeDb = db;
 285      captureWebsiteFn = async () => ({
 286        html: '<html><head><title>Test</title></head></html>',
 287        httpStatusCode: 200,
 288        screenshots: { desktop_above: Buffer.from('img'), desktop_below: null, mobile_above: null },
 289        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 290      });
 291      scoreWebsiteFn = async () => ({
 292        overall_calculation: { conversion_score: 82 },
 293        resubmitted: false,
 294      });
 295      extractGradeFn = () => 'B';
 296      saveScreenshotsFn = async () => 'screenshots/101';
 297  
 298      await processSite({ url: 'https://good.com', domain: 'good.com' }, 'plumber');
 299      const site = db.prepare("SELECT * FROM sites WHERE domain = 'good.com'").get();
 300      db.close();
 301      activeDb = null;
 302  
 303      assert.ok(site, 'site should be inserted');
 304      assert.equal(site.grade, 'B');
 305      assert.equal(site.screenshot_path, 'screenshots/101');
 306      assert.equal(site.status, 'prog_scored');
 307    });
 308  
 309    test('increments low scoring count for D grade', async () => {
 310      const db = createTestDb();
 311      activeDb = db;
 312  
 313      captureWebsiteFn = async () => ({
 314        html: '<html></html>',
 315        httpStatusCode: 200,
 316        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 317        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 318      });
 319      scoreWebsiteFn = async () => ({
 320        overall_calculation: { conversion_score: 30 },
 321        resubmitted: false,
 322      });
 323      extractGradeFn = () => 'D';
 324      saveScreenshotsFn = async () => 'screenshots/102';
 325  
 326      // Can't easily verify the mock call count without mock.fn, but verify no throw
 327      await assert.doesNotReject(() =>
 328        processSite({ url: 'https://poor.com', domain: 'poor.com' }, 'test')
 329      );
 330      db.close();
 331      activeDb = null;
 332    });
 333  
 334    test('handles resubmitted flag for rework counting', async () => {
 335      const db = createTestDb();
 336      activeDb = db;
 337  
 338      captureWebsiteFn = async () => ({
 339        html: '<html></html>',
 340        httpStatusCode: 200,
 341        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 342        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 343      });
 344      scoreWebsiteFn = async () => ({
 345        overall_calculation: { conversion_score: 20 },
 346        resubmitted: true, // triggers incrementReworkLowScoringCount
 347      });
 348      extractGradeFn = () => 'F';
 349      saveScreenshotsFn = async () => 'screenshots/103';
 350  
 351      await assert.doesNotReject(() =>
 352        processSite({ url: 'https://fail.com', domain: 'fail.com' }, 'test')
 353      );
 354      db.close();
 355      activeDb = null;
 356    });
 357  
 358    test('handles null grade gracefully', async () => {
 359      const db = createTestDb();
 360      activeDb = db;
 361  
 362      captureWebsiteFn = async () => ({
 363        html: '<html></html>',
 364        httpStatusCode: 200,
 365        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 366        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 367      });
 368      scoreWebsiteFn = async () => ({ overall_calculation: null });
 369      extractGradeFn = () => null;
 370      saveScreenshotsFn = async () => 'screenshots/104';
 371  
 372      await assert.doesNotReject(() =>
 373        processSite({ url: 'https://nullgrade.com', domain: 'nullgrade.com' }, 'test')
 374      );
 375      db.close();
 376      activeDb = null;
 377    });
 378  });
 379  
 380  // ── main() — URL direct-processing path (covers lines 34-46) ────────────────
 381  
 382  describe('main — URL path', () => {
 383    test('processes a single URL when url argument provided', async () => {
 384      const db = createTestDb();
 385      activeDb = db;
 386  
 387      let capturedDomain = null;
 388      captureWebsiteFn = async url => {
 389        capturedDomain = new URL(url).hostname.replace(/^www\./, '');
 390        return {
 391          html: '<html>main-url-test</html>',
 392          httpStatusCode: 200,
 393          screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 394          screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 395        };
 396      };
 397      scoreWebsiteFn = async () => ({
 398        overall_calculation: { conversion_score: 55 },
 399        resubmitted: false,
 400      });
 401      extractGradeFn = () => 'C-';
 402      saveScreenshotsFn = async () => 'screenshots/main-url';
 403  
 404      await main(1, 'https://mainurl.com');
 405  
 406      db.close();
 407      activeDb = null;
 408  
 409      // We verify via the capture stub that processSingleUrl was called correctly
 410      assert.equal(capturedDomain, 'mainurl.com', 'should have captured mainurl.com');
 411    });
 412  });
 413  
 414  // ── main() — keyword pipeline path (covers lines 48-76, 85-124, 130-159, 167-179) ──
 415  
 416  describe('main — keyword pipeline path', () => {
 417    // Helper: make upsertKeyword actually insert rows so getKeywordsToScrape finds them
 418    // Note: upsertKeyword is now called as (keyword, data) without a db arg
 419    function makeRealUpsert() {
 420      upsertKeywordFn = async (keyword, data) => {
 421        // Use activeDb
 422        const db = activeDb;
 423        if (!db) return;
 424        // Insert if not exists (simplified version of real upsertKeyword)
 425        const existing = db.prepare('SELECT id FROM keywords WHERE keyword = ?').get(keyword);
 426        if (!existing) {
 427          db.prepare(
 428            'INSERT INTO keywords (keyword, country_code, google_domain, search_volume, priority) VALUES (?, ?, ?, ?, ?)'
 429          ).run(
 430            keyword,
 431            data.country_code || null,
 432            data.google_domain || null,
 433            data.search_volume || 0,
 434            data.priority || 0
 435          );
 436        } else if (data.zenrows_count !== undefined) {
 437          db.prepare(
 438            'UPDATE keywords SET zenrows_count = ?, last_scraped_at = ? WHERE keyword = ?'
 439          ).run(data.zenrows_count, data.last_scraped_at || null, keyword);
 440        }
 441      };
 442    }
 443  
 444    test('populates keywords, scrapes, and processes sites', async () => {
 445      const db = createTestDb();
 446      activeDb = db;
 447      makeRealUpsert();
 448  
 449      let processedCountIncrements = 0;
 450      incrementProcessedCountFn = () => {
 451        processedCountIncrements++;
 452      };
 453  
 454      // scrapeSERP returns 2 sites for the keyword
 455      scrapeSERPFn = async keyword => [
 456        {
 457          url: `https://${keyword.replace(/ /g, '-')}-1.com`,
 458          domain: `${keyword.replace(/ /g, '-')}-1.com`,
 459        },
 460        {
 461          url: `https://${keyword.replace(/ /g, '-')}-2.com`,
 462          domain: `${keyword.replace(/ /g, '-')}-2.com`,
 463        },
 464      ];
 465  
 466      captureWebsiteFn = async () => ({
 467        html: '<html>pipeline</html>',
 468        httpStatusCode: 200,
 469        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 470        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 471      });
 472      scoreWebsiteFn = async () => ({
 473        overall_calculation: { conversion_score: 70 },
 474        resubmitted: false,
 475      });
 476      extractGradeFn = () => 'B-';
 477      saveScreenshotsFn = async () => 'screenshots/pipeline';
 478  
 479      generateKeywordCombinationsFn = () => [
 480        {
 481          keyword: 'test kw',
 482          countryCode: 'AU',
 483          googleDomain: 'google.com.au',
 484          searchVolume: 50,
 485          priority: 1,
 486        },
 487      ];
 488  
 489      await main(10);
 490  
 491      db.close();
 492      activeDb = null;
 493  
 494      // processKeyword should have scraped and processed sites
 495      assert.ok(processedCountIncrements > 0, 'incrementProcessedCount should have been called');
 496    });
 497  
 498    test('respects site limit and stops processing', async () => {
 499      const db = createTestDb();
 500      activeDb = db;
 501      makeRealUpsert();
 502  
 503      let processedCountIncrements = 0;
 504      incrementProcessedCountFn = () => {
 505        processedCountIncrements++;
 506      };
 507  
 508      // Return 5 sites per keyword
 509      scrapeSERPFn = async () => [
 510        { url: 'https://a.com', domain: 'a.com' },
 511        { url: 'https://b.com', domain: 'b.com' },
 512        { url: 'https://c.com', domain: 'c.com' },
 513        { url: 'https://d.com', domain: 'd.com' },
 514        { url: 'https://e.com', domain: 'e.com' },
 515      ];
 516  
 517      captureWebsiteFn = async () => ({
 518        html: '<html></html>',
 519        httpStatusCode: 200,
 520        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 521        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 522      });
 523      scoreWebsiteFn = async () => ({
 524        overall_calculation: { conversion_score: 60 },
 525        resubmitted: false,
 526      });
 527      extractGradeFn = () => 'C';
 528      saveScreenshotsFn = async () => 'screenshots/limit';
 529      generateKeywordCombinationsFn = () => [
 530        {
 531          keyword: 'limit kw',
 532          countryCode: 'AU',
 533          googleDomain: 'google.com.au',
 534          searchVolume: 50,
 535          priority: 1,
 536        },
 537      ];
 538  
 539      // Limit to 2 sites
 540      await main(2);
 541  
 542      db.close();
 543      activeDb = null;
 544  
 545      assert.equal(processedCountIncrements, 2, 'should only process 2 sites (the limit)');
 546    });
 547  
 548    test('handles scrapeSERP failure gracefully (processKeyword catch)', async () => {
 549      const db = createTestDb();
 550      activeDb = db;
 551      makeRealUpsert();
 552      incrementProcessedCountFn = () => {};
 553  
 554      // scrapeSERP throws
 555      scrapeSERPFn = async () => {
 556        throw new Error('ZenRows API down');
 557      };
 558  
 559      generateKeywordCombinationsFn = () => [
 560        {
 561          keyword: 'fail kw',
 562          countryCode: 'AU',
 563          googleDomain: 'google.com.au',
 564          searchVolume: 50,
 565          priority: 1,
 566        },
 567      ];
 568  
 569      // Should not throw — processKeyword catches scrape errors
 570      await assert.doesNotReject(() => main(10));
 571  
 572      db.close();
 573      activeDb = null;
 574    });
 575  
 576    test('handles individual site failure in processKeyword loop', async () => {
 577      const db = createTestDb();
 578      activeDb = db;
 579      makeRealUpsert();
 580  
 581      let processedCountIncrements = 0;
 582      incrementProcessedCountFn = () => {
 583        processedCountIncrements++;
 584      };
 585  
 586      scrapeSERPFn = async () => [
 587        { url: 'https://ok1.com', domain: 'ok1.com' },
 588        { url: 'https://broken.com', domain: 'broken.com' },
 589        { url: 'https://ok2.com', domain: 'ok2.com' },
 590      ];
 591  
 592      captureWebsiteFn = async url => {
 593        if (url === 'https://broken.com') {
 594          throw new Error('capture failed');
 595        }
 596        return {
 597          html: '<html></html>',
 598          httpStatusCode: 200,
 599          screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 600          screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 601        };
 602      };
 603      scoreWebsiteFn = async () => ({
 604        overall_calculation: { conversion_score: 70 },
 605        resubmitted: false,
 606      });
 607      extractGradeFn = () => 'B';
 608      saveScreenshotsFn = async () => 'screenshots/mixed';
 609      generateKeywordCombinationsFn = () => [
 610        {
 611          keyword: 'mixed kw',
 612          countryCode: 'AU',
 613          googleDomain: 'google.com.au',
 614          searchVolume: 50,
 615          priority: 1,
 616        },
 617      ];
 618  
 619      await main(10);
 620  
 621      db.close();
 622      activeDb = null;
 623  
 624      // ok1.com and ok2.com processed, broken.com skipped
 625      assert.equal(processedCountIncrements, 2, 'should process 2 sites, skipping the broken one');
 626    });
 627  
 628    test('stops processing keywords when site limit is reached (lines 60-63)', async () => {
 629      const db = createTestDb();
 630      activeDb = db;
 631      makeRealUpsert();
 632  
 633      let processedCountIncrements = 0;
 634      incrementProcessedCountFn = () => {
 635        processedCountIncrements++;
 636      };
 637  
 638      // Each keyword returns 5 sites
 639      scrapeSERPFn = async keyword => [
 640        {
 641          url: `https://${keyword.replace(/ /g, '-')}-1.com`,
 642          domain: `${keyword.replace(/ /g, '-')}-1.com`,
 643        },
 644        {
 645          url: `https://${keyword.replace(/ /g, '-')}-2.com`,
 646          domain: `${keyword.replace(/ /g, '-')}-2.com`,
 647        },
 648        {
 649          url: `https://${keyword.replace(/ /g, '-')}-3.com`,
 650          domain: `${keyword.replace(/ /g, '-')}-3.com`,
 651        },
 652        {
 653          url: `https://${keyword.replace(/ /g, '-')}-4.com`,
 654          domain: `${keyword.replace(/ /g, '-')}-4.com`,
 655        },
 656        {
 657          url: `https://${keyword.replace(/ /g, '-')}-5.com`,
 658          domain: `${keyword.replace(/ /g, '-')}-5.com`,
 659        },
 660      ];
 661  
 662      captureWebsiteFn = async () => ({
 663        html: '<html></html>',
 664        httpStatusCode: 200,
 665        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 666        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 667      });
 668      scoreWebsiteFn = async () => ({
 669        overall_calculation: { conversion_score: 60 },
 670        resubmitted: false,
 671      });
 672      extractGradeFn = () => 'C';
 673      saveScreenshotsFn = async () => 'screenshots/limit-break';
 674  
 675      // 3 keywords, limit=30 -> ceil(30/10)=3 keywords fetched from DB.
 676      // kw1 returns 10 sites (total=10). kw2 returns 25 sites (remainingLimit=20,
 677      // processes 20, total=30). Loop iterates to kw3: if (30>=30) -> break (lines 60-63).
 678      generateKeywordCombinationsFn = () => [
 679        {
 680          keyword: 'brk-a',
 681          countryCode: 'AU',
 682          googleDomain: 'google.com.au',
 683          searchVolume: 50,
 684          priority: 1,
 685        },
 686        {
 687          keyword: 'brk-b',
 688          countryCode: 'AU',
 689          googleDomain: 'google.com.au',
 690          searchVolume: 40,
 691          priority: 2,
 692        },
 693        {
 694          keyword: 'brk-c',
 695          countryCode: 'AU',
 696          googleDomain: 'google.com.au',
 697          searchVolume: 30,
 698          priority: 3,
 699        },
 700      ];
 701  
 702      scrapeSERPFn = async keyword => {
 703        const count = keyword === 'brk-a' ? 10 : 25;
 704        const sites = [];
 705        for (let i = 0; i < count; i++) {
 706          sites.push({ url: `https://${keyword}-${i}.com`, domain: `${keyword}-${i}.com` });
 707        }
 708        return sites;
 709      };
 710  
 711      await main(30);
 712  
 713      db.close();
 714      activeDb = null;
 715  
 716      assert.equal(processedCountIncrements, 30, 'should stop at limit, skipping kw3');
 717    });
 718  
 719    test('processes multiple keywords and stops at site limit', async () => {
 720      const db = createTestDb();
 721      activeDb = db;
 722      makeRealUpsert();
 723  
 724      let processedCountIncrements = 0;
 725      incrementProcessedCountFn = () => {
 726        processedCountIncrements++;
 727      };
 728  
 729      // Each keyword returns 3 sites
 730      scrapeSERPFn = async keyword => [
 731        {
 732          url: `https://${keyword.replace(/ /g, '-')}-a.com`,
 733          domain: `${keyword.replace(/ /g, '-')}-a.com`,
 734        },
 735        {
 736          url: `https://${keyword.replace(/ /g, '-')}-b.com`,
 737          domain: `${keyword.replace(/ /g, '-')}-b.com`,
 738        },
 739        {
 740          url: `https://${keyword.replace(/ /g, '-')}-c.com`,
 741          domain: `${keyword.replace(/ /g, '-')}-c.com`,
 742        },
 743      ];
 744  
 745      captureWebsiteFn = async () => ({
 746        html: '<html></html>',
 747        httpStatusCode: 200,
 748        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 749        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 750      });
 751      scoreWebsiteFn = async () => ({
 752        overall_calculation: { conversion_score: 60 },
 753        resubmitted: false,
 754      });
 755      extractGradeFn = () => 'C';
 756      saveScreenshotsFn = async () => 'screenshots/multi';
 757  
 758      // Two keywords — need limit >= 20 so Math.ceil(limit/10) >= 2 keywords are fetched
 759      generateKeywordCombinationsFn = () => [
 760        {
 761          keyword: 'multi one',
 762          countryCode: 'AU',
 763          googleDomain: 'google.com.au',
 764          searchVolume: 50,
 765          priority: 1,
 766        },
 767        {
 768          keyword: 'multi two',
 769          countryCode: 'AU',
 770          googleDomain: 'google.com.au',
 771          searchVolume: 40,
 772          priority: 2,
 773        },
 774      ];
 775  
 776      // Limit 5: Math.ceil(5/10)=1 keyword fetched -> only 3 sites from 1 keyword, all under limit
 777      // Limit 20: Math.ceil(20/10)=2 keywords fetched -> 3+3=6, but capped at 5 if limit=5
 778      // Actually we need limit that fetches 2 keywords: Math.ceil(L/10)>=2 -> L>=11
 779      // Use limit=20 (fetches 2 keywords). First keyword processes 3, second processes 3, total 6.
 780      // The outer loop checks sitesProcessed >= limit after each keyword.
 781      // After kw1: sitesProcessed=3, 3<20, continue. After kw2: sitesProcessed=6, done.
 782      await main(20);
 783  
 784      db.close();
 785      activeDb = null;
 786  
 787      // 2 keywords * 3 sites each = 6 total
 788      assert.equal(processedCountIncrements, 6, 'should process all 6 sites across 2 keywords');
 789    });
 790  });
 791  
 792  // ── main() — error path (covers lines 70-76) ────────────────────────────────
 793  
 794  describe('main — error handling', () => {
 795    test('calls process.exit(1) on pipeline failure', async () => {
 796      const db = createTestDb();
 797      activeDb = db;
 798  
 799      let exitCode = null;
 800      const originalExit = process.exit;
 801      process.exit = code => {
 802        exitCode = code;
 803      };
 804  
 805      // Make populateKeywords fail by having generateKeywordCombinations throw
 806      generateKeywordCombinationsFn = () => {
 807        throw new Error('combos failed');
 808      };
 809  
 810      try {
 811        await main(10);
 812      } catch {
 813        // may or may not throw depending on timing
 814      }
 815  
 816      db.close();
 817      activeDb = null;
 818      process.exit = originalExit;
 819      assert.equal(exitCode, 1, 'process.exit should be called with code 1');
 820    });
 821  });
 822  
 823  // ── populateKeywords — existing keyword path (covers lines 130-159) ──────────
 824  
 825  describe('main — populateKeywords with existing keywords', () => {
 826    test('skips keywords that already exist in DB (existingKeywords++ path)', async () => {
 827      const upsertCalls = [];
 828      upsertKeywordFn = (keyword, data) => {
 829        upsertCalls.push({ keyword, data });
 830      };
 831      incrementProcessedCountFn = () => {};
 832  
 833      // Return no sites so we don't proceed past keyword population
 834      scrapeSERPFn = async () => [];
 835  
 836      generateKeywordCombinationsFn = () => [
 837        {
 838          keyword: 'existing kw',
 839          countryCode: 'AU',
 840          googleDomain: 'google.com.au',
 841          searchVolume: 50,
 842          priority: 1,
 843        },
 844        {
 845          keyword: 'new kw',
 846          countryCode: 'AU',
 847          googleDomain: 'google.com.au',
 848          searchVolume: 50,
 849          priority: 1,
 850        },
 851      ];
 852  
 853      // Pre-insert 'existing kw' into the DB before main() runs
 854      const db = createTestDb();
 855      db.prepare("INSERT INTO keywords (keyword, country_code) VALUES ('existing kw', 'AU')").run();
 856      activeDb = db;
 857  
 858      await main(10);
 859  
 860      db.close();
 861      activeDb = null;
 862  
 863      // Only 'new kw' should trigger upsertKeyword; 'existing kw' was pre-inserted
 864      // (covers the existingKeywords++ path, lines 152-153)
 865      const kwUpserts = upsertCalls.filter(c => c.data && c.data.country_code);
 866      assert.equal(kwUpserts.length, 1, 'only the new keyword should be upserted');
 867      assert.equal(kwUpserts[0].keyword, 'new kw');
 868    });
 869  });
 870  
 871  // ── processSite — low-grade variants (B-, C+, etc.) ──────────────────────────
 872  
 873  describe('processSite — low grade variants', () => {
 874    for (const grade of ['B-', 'C+', 'C', 'C-', 'D+', 'D-', 'F']) {
 875      test(`increments low scoring for grade ${grade}`, async () => {
 876        const db = createTestDb();
 877        let lowCalled = false;
 878        incrementLowScoringCountFn = () => {
 879          lowCalled = true;
 880        };
 881  
 882        captureWebsiteFn = async () => ({
 883          html: '<html></html>',
 884          httpStatusCode: 200,
 885          screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 886          screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 887        });
 888        scoreWebsiteFn = async () => ({
 889          overall_calculation: { conversion_score: 30 },
 890          resubmitted: false,
 891        });
 892        extractGradeFn = () => grade;
 893        saveScreenshotsFn = async () => 'screenshots/grade';
 894  
 895        const domain = `grade-${grade.replace('+', 'p').replace('-', 'm')}.com`;
 896        activeDb = db;
 897        await processSite({ url: `https://${domain}`, domain }, 'test');
 898        db.close();
 899        activeDb = null;
 900  
 901        assert.ok(lowCalled, `incrementLowScoringCount should be called for grade ${grade}`);
 902      });
 903    }
 904  
 905    test('does NOT increment low scoring for high grades (A, A-, B+, B)', async () => {
 906      for (const grade of ['A', 'A-', 'B+', 'B']) {
 907        const db = createTestDb();
 908        let lowCalled = false;
 909        incrementLowScoringCountFn = () => {
 910          lowCalled = true;
 911        };
 912  
 913        captureWebsiteFn = async () => ({
 914          html: '<html></html>',
 915          httpStatusCode: 200,
 916          screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 917          screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 918        });
 919        scoreWebsiteFn = async () => ({
 920          overall_calculation: { conversion_score: 90 },
 921          resubmitted: false,
 922        });
 923        extractGradeFn = () => grade;
 924        saveScreenshotsFn = async () => 'screenshots/high';
 925  
 926        const domain = `high-${grade.replace('+', 'p').replace('-', 'm')}.com`;
 927        activeDb = db;
 928        await processSite({ url: `https://${domain}`, domain }, 'test');
 929        db.close();
 930        activeDb = null;
 931  
 932        assert.equal(
 933          lowCalled,
 934          false,
 935          `incrementLowScoringCount should NOT be called for grade ${grade}`
 936        );
 937      }
 938    });
 939  });
 940  
 941  // ── processSite — HTTP status edge cases ─────────────────────────────────────
 942  
 943  describe('processSite — HTTP status edge cases', () => {
 944    test('treats status 399 as success (not >= 400)', async () => {
 945      const db = createTestDb();
 946      captureWebsiteFn = async () => ({
 947        html: '<html>redirect</html>',
 948        httpStatusCode: 399,
 949        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 950        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 951      });
 952      scoreWebsiteFn = async () => ({
 953        overall_calculation: { conversion_score: 70 },
 954        resubmitted: false,
 955      });
 956      extractGradeFn = () => 'B';
 957      saveScreenshotsFn = async () => 'screenshots/399';
 958  
 959      activeDb = db;
 960      await processSite({ url: 'https://redirect.com', domain: 'redirect.com' }, 'test');
 961      const site = db.prepare("SELECT * FROM sites WHERE domain = 'redirect.com'").get();
 962      db.close();
 963      activeDb = null;
 964  
 965      assert.equal(site.status, 'prog_scored', 'should be prog_scored, not failed');
 966      assert.equal(site.processing_status, null, 'processing_status should not be failed');
 967    });
 968  
 969    test('treats status 400 as HTTP error', async () => {
 970      const db = createTestDb();
 971      captureWebsiteFn = async () => ({
 972        html: '',
 973        httpStatusCode: 400,
 974        screenshots: {},
 975        screenshotsUncropped: {},
 976      });
 977  
 978      activeDb = db;
 979      await processSite({ url: 'https://bad400.com', domain: 'bad400.com' }, 'test');
 980      const site = db.prepare("SELECT * FROM sites WHERE domain = 'bad400.com'").get();
 981      db.close();
 982      activeDb = null;
 983  
 984      assert.equal(site.processing_status, 'failed');
 985    });
 986  
 987    test('handles null httpStatusCode (no error)', async () => {
 988      const db = createTestDb();
 989      captureWebsiteFn = async () => ({
 990        html: '<html></html>',
 991        httpStatusCode: null,
 992        screenshots: { desktop_above: null, desktop_below: null, mobile_above: null },
 993        screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null },
 994      });
 995      scoreWebsiteFn = async () => ({
 996        overall_calculation: { conversion_score: 50 },
 997        resubmitted: false,
 998      });
 999      extractGradeFn = () => 'C';
1000      saveScreenshotsFn = async () => 'screenshots/null-status';
1001  
1002      activeDb = db;
1003      await processSite({ url: 'https://nullstatus.com', domain: 'nullstatus.com' }, 'test');
1004      const site = db.prepare("SELECT * FROM sites WHERE domain = 'nullstatus.com'").get();
1005      db.close();
1006      activeDb = null;
1007  
1008      assert.equal(site.status, 'prog_scored', 'null httpStatusCode should proceed to scoring');
1009    });
1010  });