/ tests / stages / scoring.test.js
scoring.test.js
   1  /**
   2   * Unit Tests for Scoring Stage
   3   *
   4   * Tests the runScoringStage() function and its internal scoreSite() logic.
   5   *
   6   * Key behaviors tested:
   7   * - Happy path: site scored, grade/score saved to DB, status updated
   8   * - High score (>82): status set to 'high_score'
   9   * - Low score (<=82): status set to 'scored'
  10   * - Business directory detected (is_business_directory=true): status='ignore'
  11   * - Non-local business detected (is_local_business=false): status='ignore'
  12   * - Error page permanent (isErrorPage + 404/403/410): status='ignore'
  13   * - Error page temporary (isErrorPage + 5xx): status='assets_captured'
  14   * - Broken site: recapture scheduled
  15   * - Broken site max retries exceeded: status='ignore'
  16   * - LOW_SCORE_CUTOFF env var respected
  17   * - ENABLE_VISION=false: contacts saved, status='rescored'
  18   * - No sites: returns early with 0 counts
  19   * - Blocklist filtering marks sites as 'ignore'
  20   *
  21   * SQL patterns used in scoring.js:
  22   *
  23   *   Blocklist UPDATE:
  24   *     UPDATE sites SET status = 'ignore', error_message = ? WHERE id = ?
  25   *     args = [reason, id]
  26   *
  27   *   Directory/Non-local/ErrorPage(perm)/BrokenMax UPDATE (ignore):
  28   *     UPDATE sites SET\n  status = 'ignore',\n  error_message = ?,\n  score_json = ?\n  WHERE id = ?
  29   *     args = [errorMsg, scoreJson, id]
  30   *
  31   *   Temporary error / broken site recapture UPDATE:
  32   *     UPDATE sites SET\n  status = 'assets_captured',...
  33   *     args = [errorMsg, scoreJson, id]  (temporary)
  34   *     args = [errorMsg, recaptureCount, scoreJson, id]  (broken)
  35   *
  36   *   Success UPDATE:
  37   *     UPDATE sites SET grade=?, score=?, score_json=?, city=?, country_code=?, state=?,
  38   *                      contacts_json=?, status=?, scored_at=..., WHERE id=?
  39   *     args = [grade, score, scoreJson, city, countryCode, state, contactsJson, status, id]
  40   *     indices:  0      1      2         3      4            5      6             7       8
  41   *
  42   * Run with:
  43   *   NODE_ENV=test LOGS_DIR=/tmp/test-logs DATABASE_PATH=/tmp/test-sites.db \
  44   *   node --experimental-test-module-mocks --test tests/stages/scoring.test.js
  45   */
  46  
  47  import { test, describe, mock, beforeEach } from 'node:test';
  48  import assert from 'node:assert';
  49  import { createPgMock } from '../helpers/pg-mock.js'; // eslint-disable-line no-unused-vars
  50  
  51  // ============================================================================
  52  // MOCK DEFINITIONS — ALL mock.module() calls MUST come before any dynamic imports
  53  // ============================================================================
  54  
  55  // Shared state holders for mocks (mutated per-test via beforeEach)
  56  let mockSites = [];
  57  let mockSiteDetails = {};
  58  let mockGradeRows = [];
  59  let mockRecaptureData = { recapture_count: 0 };
  60  let mockKeywordData = null;
  61  
  62  // Track DB calls for assertions
  63  const dbCalls = {
  64    updates: [], // { sql: string, args: any[] }
  65  };
  66  
  67  /**
  68   * MockDatabase: simulates better-sqlite3 API.
  69   *
  70   * The constructor receives the dbPath string. prepare().all()/.get()/.run()
  71   * are intercepted to return controlled test data.
  72   *
  73   * SQL DISPATCH STRATEGY:
  74   * - SELECT queries dispatch by unique SQL fragments
  75   * - UPDATE queries captured with full SQL + args for assertion
  76   */
  77  class MockDatabase {
  78    constructor(_path) {
  79      this._closed = false;
  80    }
  81  
  82    prepare(sql) {
  83      const trimmed = sql.trim();
  84  
  85      return {
  86        // ---- SELECT (all) ------------------------------------------------
  87        all: () => {
  88          // Main query: SELECT sites needing scoring
  89          if (trimmed.includes("status = 'assets_captured'") && trimmed.includes('score IS NULL')) {
  90            return mockSites;
  91          }
  92          // Grade distribution query
  93          if (trimmed.includes('grade IS NOT NULL') && trimmed.includes('GROUP BY grade')) {
  94            return mockGradeRows;
  95          }
  96          return [];
  97        },
  98  
  99        // ---- SELECT (get, single row) ------------------------------------
 100        get: _id => {
 101          // Site detail query inside scoreSite (has ssl_status and landing_page_url)
 102          if (trimmed.includes('screenshot_path') && trimmed.includes('ssl_status')) {
 103            return mockSiteDetails[_id] || null;
 104          }
 105          // Broken site: SELECT recapture_count (exact match, no retry_count)
 106          if (trimmed.includes('recapture_count') && !trimmed.includes('retry_count')) {
 107            return mockRecaptureData;
 108          }
 109          // recordFailure: SELECT retry_count
 110          if (trimmed.includes('retry_count') && !trimmed.includes('recapture_count')) {
 111            return { retry_count: 0 };
 112          }
 113          // programmatic scorer: SELECT keyword FROM sites WHERE id = ?
 114          if (trimmed.includes('keyword') && !trimmed.includes('country_code')) {
 115            return { keyword: 'plumber' };
 116          }
 117          // incrementLowScoring: SELECT keyword, country_code
 118          if (trimmed.includes('keyword') && trimmed.includes('country_code')) {
 119            return mockKeywordData;
 120          }
 121          // getScoringStats: aggregate stats query
 122          if (trimmed.includes('total_sites') && trimmed.includes('avg_score')) {
 123            return {
 124              total_sites: 10,
 125              scored_sites: 8,
 126              low_score_sites: 5,
 127              avg_score: 70.5,
 128              min_score: 40,
 129              max_score: 95,
 130            };
 131          }
 132          return null;
 133        },
 134  
 135        // ---- INSERT / UPDATE -------------------------------------------
 136        run: (...args) => {
 137          // Auto-promote UPDATE (has rescored + assets_captured in same SQL): count changes, don't track
 138          const isAutoPromote =
 139            trimmed.includes("status = 'semantic_scored'") &&
 140            trimmed.includes("status = 'assets_captured'");
 141          if (isAutoPromote) {
 142            return { changes: mockSites.length, lastInsertRowid: 0 };
 143          }
 144          if (trimmed.startsWith('UPDATE sites SET') || trimmed.startsWith('UPDATE sites\n')) {
 145            dbCalls.updates.push({ sql: trimmed, args });
 146          }
 147          return { changes: 1, lastInsertRowid: 0 };
 148        },
 149      };
 150    }
 151  
 152    pragma() {
 153      return undefined;
 154    }
 155  
 156    exec() {
 157      return undefined;
 158    }
 159  
 160    transaction(fn) {
 161      return (...args) => fn(...args);
 162    }
 163  
 164    close() {
 165      this._closed = true;
 166    }
 167  }
 168  
 169  // Mock better-sqlite3 (kept for any direct usage, though scoring.js now uses db.js)
 170  mock.module('better-sqlite3', {
 171    defaultExport: MockDatabase,
 172  });
 173  
 174  // ---- Mock db.js (PostgreSQL layer) — mirrors MockDatabase logic as async functions ----
 175  // scoring.js imports { run, getOne, getAll, query, withTransaction } from '../utils/db.js'
 176  mock.module('../../src/utils/db.js', {
 177    namedExports: {
 178      getPool: () => ({}),
 179      closePool: async () => {},
 180      createDatabaseConnection: () => ({}),
 181      closeDatabaseConnection: async () => {},
 182  
 183      getAll: async (sql) => {
 184        const trimmed = sql.trim();
 185        // Main query: SELECT sites needing scoring
 186        if (trimmed.includes("status = 'assets_captured'") && trimmed.includes('score IS NULL')) {
 187          return mockSites;
 188        }
 189        // Grade distribution query
 190        if (trimmed.includes('grade IS NOT NULL') && trimmed.includes('GROUP BY grade')) {
 191          return mockGradeRows;
 192        }
 193        return [];
 194      },
 195  
 196      getOne: async (sql, params) => {
 197        const trimmed = sql.trim();
 198        // Site detail query inside scoreSite (has ssl_status and landing_page_url)
 199        if (trimmed.includes('screenshot_path') && trimmed.includes('ssl_status')) {
 200          const siteId = params && params[0];
 201          return mockSiteDetails[siteId] || null;
 202        }
 203        // Broken site: SELECT recapture_count
 204        if (trimmed.includes('recapture_count') && !trimmed.includes('retry_count')) {
 205          return mockRecaptureData;
 206        }
 207        // recordFailure: SELECT retry_count
 208        if (trimmed.includes('retry_count') && !trimmed.includes('recapture_count')) {
 209          return { retry_count: 0 };
 210        }
 211        // programmatic scorer: SELECT keyword FROM sites WHERE id = ?
 212        if (trimmed.includes('keyword') && !trimmed.includes('country_code')) {
 213          return { keyword: 'plumber' };
 214        }
 215        // incrementLowScoring: SELECT keyword, country_code
 216        if (trimmed.includes('keyword') && trimmed.includes('country_code')) {
 217          return mockKeywordData;
 218        }
 219        // getScoringStats: aggregate stats query
 220        if (trimmed.includes('total_sites') && trimmed.includes('avg_score')) {
 221          return {
 222            total_sites: 10,
 223            scored_sites: 8,
 224            low_score_sites: 5,
 225            avg_score: 70.5,
 226            min_score: 40,
 227            max_score: 95,
 228          };
 229        }
 230        return null;
 231      },
 232  
 233      run: async (sql, args) => {
 234        // Normalize PG params to SQLite-style for assertion matching
 235        const trimmed = sql.trim().replace(/\$\d+/g, '?');
 236        // Auto-promote UPDATE
 237        const isAutoPromote =
 238          trimmed.includes("status = 'semantic_scored'") &&
 239          trimmed.includes("status = 'assets_captured'");
 240        if (isAutoPromote) {
 241          return { changes: mockSites.length, lastInsertRowid: 0 };
 242        }
 243        if (trimmed.startsWith('UPDATE sites SET') || trimmed.startsWith('UPDATE sites\n')) {
 244          dbCalls.updates.push({ sql: trimmed, args: args || [] });
 245        }
 246        return { changes: 1, lastInsertRowid: 0 };
 247      },
 248  
 249      query: async (sql, args) => {
 250        const trimmed = sql.trim().replace(/\$\d+/g, '?');
 251        if (trimmed.startsWith('UPDATE sites SET') || trimmed.startsWith('UPDATE sites\n')) {
 252          dbCalls.updates.push({ sql: trimmed, args: args || [] });
 253          return { rows: [], rowCount: 1 };
 254        }
 255        return { rows: [], rowCount: 0 };
 256      },
 257  
 258      withTransaction: async (fn) => {
 259        const fakeClient = {
 260          query: async (sql, args) => {
 261            const trimmed = sql.trim().replace(/\$\d+/g, '?');
 262            if (trimmed.startsWith('UPDATE sites SET') || trimmed.startsWith('UPDATE sites\n')) {
 263              dbCalls.updates.push({ sql: trimmed, args: args || [] });
 264              return { rows: [], rowCount: 1 };
 265            }
 266            // Grade distribution
 267            if (trimmed.includes('grade IS NOT NULL') && trimmed.includes('GROUP BY grade')) {
 268              return { rows: mockGradeRows, rowCount: mockGradeRows.length };
 269            }
 270            // Keyword lookup inside transaction — return mockKeywordData only if non-null
 271            if (trimmed.includes('keyword') && trimmed.includes('country_code') && trimmed.includes('SELECT')) {
 272              if (mockKeywordData) {
 273                return { rows: [mockKeywordData], rowCount: 1 };
 274              }
 275              return { rows: [], rowCount: 0 };
 276            }
 277            return { rows: [], rowCount: 0 };
 278          },
 279        };
 280        return fn(fakeClient);
 281      },
 282    },
 283  });
 284  
 285  // Shared scoreWebsite mock — default returns a low-score result
 286  const mockScoreWebsite = mock.fn(async () => ({
 287    overall_calculation: {
 288      letter_grade: 'D',
 289      conversion_score: 65,
 290      is_business_directory: false,
 291      is_local_business: true,
 292      is_error_page: false,
 293      is_broken_site: false,
 294      city: 'Sydney',
 295      country_code: 'AU',
 296      state: 'NSW',
 297    },
 298  }));
 299  
 300  mock.module('../../src/score.js', {
 301    namedExports: {
 302      scoreWebsite: mockScoreWebsite,
 303      computeGrade: score => {
 304        if (score >= 97) return 'A+';
 305        if (score >= 90) return 'A';
 306        if (score >= 87) return 'B+';
 307        if (score >= 83) return 'B';
 308        if (score >= 80) return 'B-';
 309        if (score >= 60) return 'C';
 310        return 'F';
 311      },
 312      computeScoreFromFactors: mock.fn(() => 75),
 313      FACTOR_WEIGHTS: {
 314        headline: 0.15,
 315        value_proposition: 0.15,
 316        social_proof: 0.12,
 317        cta: 0.12,
 318        mobile: 0.1,
 319        page_speed: 0.1,
 320        trust: 0.1,
 321        contact: 0.08,
 322        above_fold: 0.08,
 323      },
 324    },
 325  });
 326  
 327  // Mock programmatic-scorer — returns a low-score result by default
 328  const mockScoreWebsiteProgrammatically = mock.fn(() => ({
 329    conversion_score: 60,
 330    letter_grade: 'D',
 331    is_error_page: false,
 332    is_broken_site: false,
 333    is_business_directory: false,
 334    is_local_business: true,
 335    is_law_firm: false,
 336    industry_classification: '',
 337    country_code: 'AU',
 338    city: null,
 339    state: null,
 340    factor_scores: {},
 341    contacts: null,
 342  }));
 343  mock.module('../../src/utils/programmatic-scorer.js', {
 344    namedExports: {
 345      scoreWebsiteProgrammatically: mockScoreWebsiteProgrammatically,
 346    },
 347  });
 348  
 349  // Mock loadScreenshot
 350  const mockLoadScreenshot = mock.fn(async () => null);
 351  mock.module('../../src/utils/screenshot-storage.js', {
 352    namedExports: {
 353      loadScreenshot: mockLoadScreenshot,
 354    },
 355  });
 356  
 357  // Mock site-filters — default: not blocked
 358  const mockCheckBlocklist = mock.fn(() => null);
 359  const mockClassifyIndustry = mock.fn(() => null);
 360  mock.module('../../src/utils/site-filters.js', {
 361    namedExports: {
 362      checkBlocklist: mockCheckBlocklist,
 363      classifyIndustry: mockClassifyIndustry,
 364    },
 365  });
 366  
 367  // Mock keyword-counters
 368  const mockIncrementLowScoring = mock.fn();
 369  mock.module('../../src/utils/keyword-counters.js', {
 370    namedExports: {
 371      incrementLowScoring: mockIncrementLowScoring,
 372      incrementAssetsScraped: mock.fn(),
 373    },
 374  });
 375  
 376  // Mock retry-handler
 377  const mockRecordFailure = mock.fn();
 378  const mockResetRetries = mock.fn();
 379  mock.module('../../src/utils/retry-handler.js', {
 380    namedExports: {
 381      recordFailure: mockRecordFailure,
 382      resetRetries: mockResetRetries,
 383    },
 384  });
 385  
 386  // Mock summary-generator
 387  mock.module('../../src/utils/summary-generator.js', {
 388    namedExports: {
 389      generateStageCompletion: mock.fn(),
 390      displayProgress: mock.fn(),
 391    },
 392  });
 393  
 394  // Mock error-handler — provide a real processBatch implementation so the stage works end-to-end
 395  const mockProcessBatch = mock.fn(async (items, processor, _opts) => {
 396    const results = [];
 397    const errors = [];
 398    for (let i = 0; i < items.length; i++) {
 399      try {
 400        const result = await processor(items[i], i);
 401        results.push(result);
 402      } catch (err) {
 403        errors.push(err);
 404      }
 405    }
 406    return { results, errors };
 407  });
 408  mock.module('../../src/utils/error-handler.js', {
 409    namedExports: {
 410      processBatch: mockProcessBatch,
 411      retryWithBackoff: mock.fn(async fn => fn()),
 412      isRetryableError: mock.fn(() => false),
 413      sleep: mock.fn(async () => {}),
 414      safeJsonParse: mock.fn(str => JSON.parse(str)),
 415      extractDomain: mock.fn(url => new URL(url).hostname),
 416    },
 417  });
 418  
 419  // Mock logger — silent
 420  class MockLogger {
 421    info() {}
 422    success() {}
 423    error() {}
 424    warn() {}
 425    debug() {}
 426  }
 427  mock.module('../../src/utils/logger.js', {
 428    defaultExport: MockLogger,
 429  });
 430  
 431  // Mock dotenv
 432  mock.module('dotenv', {
 433    namedExports: { config: mock.fn() },
 434    defaultExport: { config: mock.fn() },
 435  });
 436  
 437  // html-storage: readHtmlDom reads from filesystem — mock to return controlled HTML
 438  let mockHtmlDom = '<html></html>';
 439  mock.module('../../src/utils/html-storage.js', {
 440    namedExports: {
 441      readHtmlDom: mock.fn(() => mockHtmlDom),
 442      writeHtmlDom: mock.fn(),
 443      hasHtmlDom: mock.fn(() => true),
 444      deleteHtmlDom: mock.fn(),
 445      writeKeyPagesHtml: mock.fn(),
 446      readKeyPagesHtml: mock.fn(() => null),
 447      deleteKeyPagesHtml: mock.fn(),
 448      deleteAllHtml: mock.fn(),
 449      DATA_DIR: '/tmp/test-html-data',
 450    },
 451  });
 452  
 453  // score-storage and contacts-storage: write to filesystem — mock to no-op
 454  mock.module('../../src/utils/score-storage.js', {
 455    namedExports: {
 456      setScoreJson: mock.fn(),
 457      getScoreJson: mock.fn(() => null),
 458    },
 459  });
 460  
 461  mock.module('../../src/utils/contacts-storage.js', {
 462    namedExports: {
 463      setContactsJson: mock.fn(),
 464      getContactsJson: mock.fn(() => null),
 465    },
 466  });
 467  
 468  // ============================================================================
 469  // IMPORTS — must come AFTER all mock.module() calls
 470  // ============================================================================
 471  
 472  const { runScoringStage, getScoringStats } = await import('../../src/stages/scoring.js');
 473  const { setContactsJson: mockSetContactsJson } = await import('../../src/utils/contacts-storage.js');
 474  
 475  // ============================================================================
 476  // HELPERS
 477  // ============================================================================
 478  
 479  /** Build a minimal site row returned by the initial DB query. */
 480  function makeSiteRow(overrides = {}) {
 481    return {
 482      id: 1,
 483      domain: 'example.com',
 484      url: 'https://example.com',
 485      country_code: 'AU',
 486      ...overrides,
 487    };
 488  }
 489  
 490  /** Build the detailed site record returned by scoreSite's inner SELECT. */
 491  function makeSiteDetail(overrides = {}) {
 492    return {
 493      id: 1,
 494      url: 'https://example.com',
 495      screenshot_path: null,
 496      html_dom: '<html><body>Example</body></html>',
 497      ssl_status: null,
 498      http_headers: null,
 499      locale_data: null,
 500      ...overrides,
 501    };
 502  }
 503  
 504  /**
 505   * Find a DB update whose SQL contains a specific status literal
 506   * (e.g. "status = 'ignored'").
 507   * These statuses are embedded in the SQL string, NOT passed as parameters.
 508   */
 509  function findUpdateWithStatus(status) {
 510    return dbCalls.updates.find(u => u.sql.includes(`status = '${status}'`));
 511  }
 512  
 513  /**
 514   * Find the main "success" update for a site (the one that writes grade/score/status/city).
 515   * The success UPDATE has: grade=?, score=?, city=?, country_code=?, state=?, status=?, WHERE id=?
 516   * args indices: [0]=grade, [1]=score, [2]=city, [3]=country_code, [4]=state, [5]=status, [6]=id
 517   * Note: score_json/contacts_json blobs written to filesystem (setScoreJson/setContactsJson).
 518   */
 519  function findMainSuccessUpdate() {
 520    return dbCalls.updates.find(
 521      u => u.sql.includes('grade = ?') && u.sql.includes('score = ?') && u.sql.includes('scored_at')
 522    );
 523  }
 524  
 525  /** Reset all shared mutable state between tests. */
 526  function resetState() {
 527    mockSites = [];
 528    mockSiteDetails = {};
 529    mockGradeRows = [];
 530    mockRecaptureData = { recapture_count: 0 };
 531    mockKeywordData = null;
 532    dbCalls.updates = [];
 533  
 534    mockScoreWebsite.mock.resetCalls();
 535    mockScoreWebsiteProgrammatically.mock.resetCalls();
 536    mockLoadScreenshot.mock.resetCalls();
 537    mockCheckBlocklist.mock.resetCalls();
 538    mockIncrementLowScoring.mock.resetCalls();
 539    mockRecordFailure.mock.resetCalls();
 540    mockResetRetries.mock.resetCalls();
 541    mockProcessBatch.mock.resetCalls();
 542    mockSetContactsJson.mock.resetCalls();
 543  }
 544  
 545  /** Set env to a known baseline state before each test. */
 546  function resetEnv() {
 547    process.env.NODE_ENV = 'production'; // allow directory/local-business checks
 548    process.env.ENABLE_VISION = 'true';
 549    process.env.ENABLE_LLM_SCORING = 'true';
 550    delete process.env.LOW_SCORE_CUTOFF;
 551    delete process.env.SCORING_CONCURRENCY;
 552    // Remove split-DB paths so createDatabaseConnection doesn't validate non-existent files
 553    // (the NODE_ENV != 'test' guard in db.js would otherwise throw)
 554    delete process.env.OPS_DB_PATH;
 555    delete process.env.TEL_DB_PATH;
 556  }
 557  
 558  // ============================================================================
 559  // TESTS
 560  // ============================================================================
 561  
 562  // ---------------------------------------------------------------------------
 563  describe('runScoringStage — no sites', () => {
 564    beforeEach(() => {
 565      resetState();
 566      resetEnv();
 567    });
 568  
 569    test('returns early with 0 counts when no sites need scoring', async () => {
 570      mockSites = []; // DB returns empty list
 571  
 572      const result = await runScoringStage({ limit: 10 });
 573  
 574      assert.strictEqual(result.processed, 0);
 575      assert.strictEqual(result.succeeded, 0);
 576      assert.strictEqual(result.failed, 0);
 577      assert.strictEqual(result.skipped, 0);
 578      assert.ok(typeof result.duration === 'number');
 579  
 580      // scoreWebsite should never be called
 581      assert.strictEqual(mockScoreWebsite.mock.callCount(), 0);
 582    });
 583  });
 584  
 585  // ---------------------------------------------------------------------------
 586  describe('runScoringStage — blocklist filtering', () => {
 587    beforeEach(() => {
 588      resetState();
 589      resetEnv();
 590    });
 591  
 592    test('marks blocked sites as ignore via checkBlocklist', async () => {
 593      const site = makeSiteRow({ id: 1, domain: 'yelp.com' });
 594      mockSites = [site];
 595      mockSiteDetails[1] = makeSiteDetail({ id: 1, url: 'https://yelp.com' });
 596  
 597      // checkBlocklist returns a block reason
 598      mockCheckBlocklist.mock.mockImplementation(() => ({
 599        reason: 'Business directory: yelp.com',
 600      }));
 601  
 602      // scoreWebsite succeeds (the stage still processes the site through processBatch)
 603      mockScoreWebsite.mock.mockImplementation(async () => ({
 604        overall_calculation: {
 605          letter_grade: 'D-',
 606          conversion_score: 60,
 607          is_business_directory: false,
 608          is_local_business: true,
 609          is_error_page: false,
 610          is_broken_site: false,
 611        },
 612      }));
 613  
 614      await runScoringStage({ limit: 1 });
 615  
 616      // checkBlocklist called once for the site
 617      assert.strictEqual(mockCheckBlocklist.mock.callCount(), 1);
 618  
 619      // The blocklist UPDATE embeds status='ignore' directly in SQL:
 620      // "UPDATE sites SET status = 'ignore', error_message = ? WHERE id = ?"
 621      const ignoreUpdate = dbCalls.updates.find(
 622        u =>
 623          u.sql.includes("status = 'ignored'") &&
 624          u.args.some(a => typeof a === 'string' && a.includes('Business directory'))
 625      );
 626      assert.ok(ignoreUpdate, 'Expected an UPDATE with status=ignore from blocklist');
 627    });
 628  
 629    test('does not mark non-blocked sites as ignore via checkBlocklist', async () => {
 630      const site = makeSiteRow();
 631      mockSites = [site];
 632      mockSiteDetails[1] = makeSiteDetail();
 633  
 634      mockCheckBlocklist.mock.mockImplementation(() => null); // not blocked
 635  
 636      mockScoreWebsite.mock.mockImplementation(async () => ({
 637        overall_calculation: {
 638          letter_grade: 'D-',
 639          conversion_score: 60,
 640          is_business_directory: false,
 641          is_local_business: true,
 642          is_error_page: false,
 643          is_broken_site: false,
 644        },
 645      }));
 646  
 647      await runScoringStage({ limit: 1 });
 648  
 649      // No blocklist-style ignore update should exist
 650      const blocklistIgnore = dbCalls.updates.find(
 651        u =>
 652          u.sql.includes("status = 'ignored'") &&
 653          u.args.some(a => typeof a === 'string' && a.includes('Business directory'))
 654      );
 655      assert.ok(!blocklistIgnore, 'Should NOT have a blocklist ignore update for non-blocked site');
 656    });
 657  });
 658  
 659  // ---------------------------------------------------------------------------
 660  describe('runScoringStage — happy path scoring', () => {
 661    beforeEach(() => {
 662      resetState();
 663      resetEnv();
 664    });
 665  
 666    test('scores site, saves grade/score to DB, sets status to scored (low score)', async () => {
 667      const site = makeSiteRow();
 668      mockSites = [site];
 669      mockSiteDetails[1] = makeSiteDetail();
 670      mockCheckBlocklist.mock.mockImplementation(() => null);
 671  
 672      mockScoreWebsite.mock.mockImplementation(async () => ({
 673        overall_calculation: {
 674          letter_grade: 'D',
 675          conversion_score: 65,
 676          is_business_directory: false,
 677          is_local_business: true,
 678          is_error_page: false,
 679          is_broken_site: false,
 680          city: 'Melbourne',
 681          country_code: 'AU',
 682          state: 'VIC',
 683        },
 684      }));
 685  
 686      await runScoringStage({ limit: 1 });
 687  
 688      assert.strictEqual(mockScoreWebsite.mock.callCount(), 1);
 689  
 690      // The main success UPDATE writes grade, score, city, country_code, state, status
 691      const successUpdate = findMainSuccessUpdate();
 692      assert.ok(successUpdate, 'Expected a success UPDATE with grade/score fields');
 693  
 694      // args: [grade, score, city, countryCode, state, contactsJson, FS_SENTINEL, status, id]
 695      // scoreJson removed — now written to filesystem via setScoreJson()
 696      const { args } = successUpdate;
 697      assert.strictEqual(args[0], 'D'); // grade
 698      assert.strictEqual(args[1], 65); // score
 699      assert.strictEqual(args[2], 'Melbourne'); // city
 700      assert.strictEqual(args[3], 'AU'); // country_code
 701      assert.strictEqual(args[4], 'VIC'); // state
 702      assert.strictEqual(args[5], 'prog_scored'); // status
 703  
 704      // resetRetries should be called on success
 705      assert.strictEqual(mockResetRetries.mock.callCount(), 1);
 706    });
 707  
 708    test('calls scoreWebsite with correct site data including html and url', async () => {
 709      mockHtmlDom = '<html>test</html>';
 710      const site = makeSiteRow({ id: 1, url: 'https://example.com' });
 711      mockSites = [site];
 712      mockSiteDetails[1] = makeSiteDetail({
 713        screenshot_path: null,
 714      });
 715      mockCheckBlocklist.mock.mockImplementation(() => null);
 716  
 717      mockScoreWebsite.mock.mockImplementation(async () => ({
 718        overall_calculation: {
 719          letter_grade: 'C+',
 720          conversion_score: 78,
 721          is_business_directory: false,
 722          is_local_business: true,
 723          is_error_page: false,
 724          is_broken_site: false,
 725        },
 726      }));
 727  
 728      await runScoringStage({ limit: 1 });
 729  
 730      const callArgs = mockScoreWebsite.mock.calls[0].arguments;
 731      const siteData = callArgs[0];
 732  
 733      assert.strictEqual(siteData.html, '<html>test</html>');
 734      assert.strictEqual(siteData.url, 'https://example.com');
 735      assert.ok('screenshots' in siteData, 'siteData should have screenshots property');
 736    });
 737  
 738    test('loads screenshots from disk when screenshot_path is set', async () => {
 739      const site = makeSiteRow();
 740      mockSites = [site];
 741      mockSiteDetails[1] = makeSiteDetail({ screenshot_path: '/screenshots/1' });
 742      mockCheckBlocklist.mock.mockImplementation(() => null);
 743  
 744      const fakeScreenshot = Buffer.from('fake-image');
 745      mockLoadScreenshot.mock.mockImplementation(async () => fakeScreenshot);
 746  
 747      mockScoreWebsite.mock.mockImplementation(async () => ({
 748        overall_calculation: {
 749          letter_grade: 'C-',
 750          conversion_score: 70,
 751          is_business_directory: false,
 752          is_local_business: true,
 753          is_error_page: false,
 754          is_broken_site: false,
 755        },
 756      }));
 757  
 758      await runScoringStage({ limit: 1 });
 759  
 760      // loadScreenshot called twice: desktop_above + mobile_above
 761      assert.strictEqual(mockLoadScreenshot.mock.callCount(), 2);
 762    });
 763  
 764    test('does not load screenshots when screenshot_path is null', async () => {
 765      const site = makeSiteRow();
 766      mockSites = [site];
 767      mockSiteDetails[1] = makeSiteDetail({ screenshot_path: null });
 768      mockCheckBlocklist.mock.mockImplementation(() => null);
 769  
 770      mockScoreWebsite.mock.mockImplementation(async () => ({
 771        overall_calculation: {
 772          letter_grade: 'D-',
 773          conversion_score: 60,
 774          is_business_directory: false,
 775          is_local_business: true,
 776          is_error_page: false,
 777          is_broken_site: false,
 778        },
 779      }));
 780  
 781      await runScoringStage({ limit: 1 });
 782  
 783      assert.strictEqual(mockLoadScreenshot.mock.callCount(), 0);
 784    });
 785  });
 786  
 787  // ---------------------------------------------------------------------------
 788  describe('runScoringStage — score threshold / HIGH_SCORE', () => {
 789    beforeEach(() => {
 790      resetState();
 791      resetEnv();
 792    });
 793  
 794    test('sets status to high_score when score > 82 (default threshold)', async () => {
 795      const site = makeSiteRow();
 796      mockSites = [site];
 797      mockSiteDetails[1] = makeSiteDetail();
 798      mockCheckBlocklist.mock.mockImplementation(() => null);
 799  
 800      mockScoreWebsite.mock.mockImplementation(async () => ({
 801        overall_calculation: {
 802          letter_grade: 'A-',
 803          conversion_score: 90,
 804          is_business_directory: false,
 805          is_local_business: true,
 806          is_error_page: false,
 807          is_broken_site: false,
 808        },
 809      }));
 810  
 811      await runScoringStage({ limit: 1 });
 812  
 813      const successUpdate = findMainSuccessUpdate();
 814      assert.ok(successUpdate, 'Expected a success UPDATE');
 815      assert.strictEqual(successUpdate.args[5], 'high_score', 'status should be high_score');
 816    });
 817  
 818    test('sets status to scored when score is exactly 82 (at threshold, not above)', async () => {
 819      const site = makeSiteRow();
 820      mockSites = [site];
 821      mockSiteDetails[1] = makeSiteDetail();
 822      mockCheckBlocklist.mock.mockImplementation(() => null);
 823  
 824      mockScoreWebsite.mock.mockImplementation(async () => ({
 825        overall_calculation: {
 826          letter_grade: 'B-',
 827          conversion_score: 82,
 828          is_business_directory: false,
 829          is_local_business: true,
 830          is_error_page: false,
 831          is_broken_site: false,
 832        },
 833      }));
 834  
 835      await runScoringStage({ limit: 1 });
 836  
 837      // 82 is NOT > 82 so it should be 'scored'
 838      const successUpdate = findMainSuccessUpdate();
 839      assert.ok(successUpdate, 'Expected a success UPDATE');
 840      assert.strictEqual(
 841        successUpdate.args[5],
 842        'prog_scored',
 843        'status should be scored for score=82'
 844      );
 845    });
 846  
 847    test('sets status to scored when score is 1 (well below threshold)', async () => {
 848      const site = makeSiteRow();
 849      mockSites = [site];
 850      mockSiteDetails[1] = makeSiteDetail();
 851      mockCheckBlocklist.mock.mockImplementation(() => null);
 852  
 853      mockScoreWebsite.mock.mockImplementation(async () => ({
 854        overall_calculation: {
 855          letter_grade: 'F',
 856          conversion_score: 1,
 857          is_business_directory: false,
 858          is_local_business: true,
 859          is_error_page: false,
 860          is_broken_site: false,
 861        },
 862      }));
 863  
 864      await runScoringStage({ limit: 1 });
 865  
 866      const successUpdate = findMainSuccessUpdate();
 867      assert.ok(successUpdate, 'Expected a success UPDATE');
 868      assert.strictEqual(
 869        successUpdate.args[5],
 870        'prog_scored',
 871        'status should be prog_scored for very low score'
 872      );
 873    });
 874  
 875    test('respects LOW_SCORE_CUTOFF=90: score 88 is still scored (88 is NOT > 90)', async () => {
 876      process.env.LOW_SCORE_CUTOFF = '90';
 877  
 878      const site = makeSiteRow();
 879      mockSites = [site];
 880      mockSiteDetails[1] = makeSiteDetail();
 881      mockCheckBlocklist.mock.mockImplementation(() => null);
 882  
 883      mockScoreWebsite.mock.mockImplementation(async () => ({
 884        overall_calculation: {
 885          letter_grade: 'B+',
 886          conversion_score: 88,
 887          is_business_directory: false,
 888          is_local_business: true,
 889          is_error_page: false,
 890          is_broken_site: false,
 891        },
 892      }));
 893  
 894      await runScoringStage({ limit: 1 });
 895  
 896      const successUpdate = findMainSuccessUpdate();
 897      assert.ok(successUpdate, 'Expected a success UPDATE');
 898      assert.strictEqual(
 899        successUpdate.args[5],
 900        'prog_scored',
 901        'score 88 should be scored when cutoff=90'
 902      );
 903    });
 904  
 905    test('respects LOW_SCORE_CUTOFF=70: score 75 becomes high_score (75 > 70)', async () => {
 906      process.env.LOW_SCORE_CUTOFF = '70';
 907  
 908      const site = makeSiteRow();
 909      mockSites = [site];
 910      mockSiteDetails[1] = makeSiteDetail();
 911      mockCheckBlocklist.mock.mockImplementation(() => null);
 912  
 913      mockScoreWebsite.mock.mockImplementation(async () => ({
 914        overall_calculation: {
 915          letter_grade: 'C',
 916          conversion_score: 75,
 917          is_business_directory: false,
 918          is_local_business: true,
 919          is_error_page: false,
 920          is_broken_site: false,
 921        },
 922      }));
 923  
 924      await runScoringStage({ limit: 1 });
 925  
 926      const successUpdate = findMainSuccessUpdate();
 927      assert.ok(successUpdate, 'Expected a success UPDATE');
 928      assert.strictEqual(
 929        successUpdate.args[5],
 930        'high_score',
 931        'score 75 should be high_score when cutoff=70'
 932      );
 933    });
 934  });
 935  
 936  // ---------------------------------------------------------------------------
 937  describe('runScoringStage — ENABLE_VISION=false (HTML-only mode)', () => {
 938    beforeEach(() => {
 939      resetState();
 940      resetEnv();
 941      process.env.ENABLE_VISION = 'false';
 942      process.env.ENABLE_LLM_SCORING = 'false'; // prevent early orchestrator-mode return
 943    });
 944  
 945    test('sets status to rescored instead of scored in HTML-only mode (low score)', async () => {
 946      const site = makeSiteRow();
 947      mockSites = [site];
 948      mockSiteDetails[1] = makeSiteDetail();
 949      mockCheckBlocklist.mock.mockImplementation(() => null);
 950  
 951      mockScoreWebsite.mock.mockImplementation(async () => ({
 952        overall_calculation: {
 953          letter_grade: 'D-',
 954          conversion_score: 60,
 955          is_business_directory: false,
 956          is_local_business: true,
 957          is_error_page: false,
 958          is_broken_site: false,
 959        },
 960        contact_details: [{ email: 'owner@example.com' }],
 961      }));
 962  
 963      await runScoringStage({ limit: 1 });
 964  
 965      // Status must be 'rescored' in HTML-only mode (args[5])
 966      const successUpdate = findMainSuccessUpdate();
 967      assert.ok(successUpdate, 'Expected a success UPDATE');
 968      assert.strictEqual(
 969        successUpdate.args[5],
 970        'semantic_scored',
 971        'status should be semantic_scored in HTML-only mode'
 972      );
 973    });
 974  
 975    test('saves contacts_json (non-null) when ENABLE_VISION=false and contact_details present', async () => {
 976      const site = makeSiteRow();
 977      mockSites = [site];
 978      mockSiteDetails[1] = makeSiteDetail();
 979      mockCheckBlocklist.mock.mockImplementation(() => null);
 980  
 981      // In programmatic mode (ENABLE_LLM_SCORING=false), contacts come from the programmatic scorer
 982      mockScoreWebsiteProgrammatically.mock.mockImplementation(() => ({
 983        conversion_score: 40,
 984        letter_grade: 'F',
 985        is_error_page: false,
 986        is_broken_site: false,
 987        is_business_directory: false,
 988        is_local_business: true,
 989        is_law_firm: false,
 990        industry_classification: '',
 991        country_code: 'AU',
 992        city: null,
 993        state: null,
 994        factor_scores: {},
 995        contacts: {
 996          email_addresses: [{ email: 'contact@example.com', source: 'footer' }],
 997          phone_numbers: [],
 998          social_profiles: [],
 999          has_contact_form: false,
1000          key_pages: [],
1001        },
1002      }));
1003  
1004      await runScoringStage({ limit: 1 });
1005  
1006      // contacts written to filesystem via setContactsJson, not stored in DB
1007      assert.equal(mockSetContactsJson.mock.callCount(), 1, 'setContactsJson should be called once');
1008      const [calledSiteId, calledContacts] = mockSetContactsJson.mock.calls[0].arguments;
1009      assert.equal(calledSiteId, 1, 'setContactsJson siteId should match');
1010      assert.ok(calledContacts, 'contacts arg should be truthy');
1011      const parsedContacts = typeof calledContacts === 'string' ? JSON.parse(calledContacts) : calledContacts;
1012      assert.ok(parsedContacts && typeof parsedContacts === 'object', 'contacts arg should be a JSON object');
1013    });
1014  
1015    test('does not save contacts_json when ENABLE_VISION=true (contacts_json=null)', async () => {
1016      process.env.ENABLE_VISION = 'true'; // Override the beforeEach
1017  
1018      const site = makeSiteRow();
1019      mockSites = [site];
1020      mockSiteDetails[1] = makeSiteDetail();
1021      mockCheckBlocklist.mock.mockImplementation(() => null);
1022  
1023      mockScoreWebsite.mock.mockImplementation(async () => ({
1024        overall_calculation: {
1025          letter_grade: 'D',
1026          conversion_score: 65,
1027          is_business_directory: false,
1028          is_local_business: true,
1029          is_error_page: false,
1030          is_broken_site: false,
1031        },
1032        contact_details: [{ email: 'owner@example.com' }],
1033      }));
1034  
1035      await runScoringStage({ limit: 1 });
1036  
1037      // when vision is enabled, setContactsJson should NOT be called
1038      assert.equal(
1039        mockSetContactsJson.mock.callCount(),
1040        0,
1041        'setContactsJson should not be called when ENABLE_VISION=true'
1042      );
1043    });
1044  
1045    test('still sets high_score status in HTML-only mode when score > threshold', async () => {
1046      const site = makeSiteRow();
1047      mockSites = [site];
1048      mockSiteDetails[1] = makeSiteDetail();
1049      mockCheckBlocklist.mock.mockImplementation(() => null);
1050  
1051      // In programmatic mode (ENABLE_LLM_SCORING=false from beforeEach), use programmatic mock
1052      mockScoreWebsiteProgrammatically.mock.mockImplementation(() => ({
1053        conversion_score: 98,
1054        letter_grade: 'A+',
1055        is_error_page: false,
1056        is_broken_site: false,
1057        is_business_directory: false,
1058        is_local_business: true,
1059        is_law_firm: false,
1060        industry_classification: '',
1061        country_code: 'AU',
1062        city: null,
1063        state: null,
1064        factor_scores: {},
1065        contacts: null,
1066      }));
1067  
1068      await runScoringStage({ limit: 1 });
1069  
1070      // high_score takes priority over HTML-only 'rescored' path
1071      const successUpdate = findMainSuccessUpdate();
1072      assert.ok(successUpdate, 'Expected a success UPDATE');
1073      assert.strictEqual(
1074        successUpdate.args[5],
1075        'high_score',
1076        'high_score takes priority even in HTML-only mode'
1077      );
1078    });
1079  });
1080  
1081  // ---------------------------------------------------------------------------
1082  describe('runScoringStage — business directory detection', () => {
1083    beforeEach(() => {
1084      resetState();
1085      resetEnv();
1086      process.env.NODE_ENV = 'production'; // enable directory/local checks
1087    });
1088  
1089    test('marks site as ignore when LLM detects business directory (is_business_directory=true)', async () => {
1090      const site = makeSiteRow();
1091      mockSites = [site];
1092      mockSiteDetails[1] = makeSiteDetail();
1093      mockCheckBlocklist.mock.mockImplementation(() => null);
1094  
1095      mockScoreWebsite.mock.mockImplementation(async () => ({
1096        overall_calculation: {
1097          letter_grade: 'F',
1098          conversion_score: 30,
1099          is_business_directory: true,
1100          is_local_business: true,
1101          is_error_page: false,
1102          is_broken_site: false,
1103        },
1104      }));
1105  
1106      await runScoringStage({ limit: 1 });
1107  
1108      // The directory UPDATE: UPDATE sites SET status='ignore', error_message=?, score_json=? WHERE id=?
1109      // args = ['Ignored: Business directory (LLM detected)', scoreJson, id]
1110      const ignoreUpdate = findUpdateWithStatus('ignored');
1111      assert.ok(ignoreUpdate, 'Expected an UPDATE setting status=ignore');
1112      assert.ok(
1113        ignoreUpdate.args[0].includes('Business directory'),
1114        'Error message should mention business directory'
1115      );
1116    });
1117  
1118    test('marks site as ignore when LLM detects non-local business (is_local_business=false)', async () => {
1119      const site = makeSiteRow();
1120      mockSites = [site];
1121      mockSiteDetails[1] = makeSiteDetail();
1122      mockCheckBlocklist.mock.mockImplementation(() => null);
1123  
1124      mockScoreWebsite.mock.mockImplementation(async () => ({
1125        overall_calculation: {
1126          letter_grade: 'B-',
1127          conversion_score: 80,
1128          is_business_directory: false,
1129          is_local_business: false,
1130          is_error_page: false,
1131          is_broken_site: false,
1132        },
1133      }));
1134  
1135      await runScoringStage({ limit: 1 });
1136  
1137      const ignoreUpdate = findUpdateWithStatus('ignored');
1138      assert.ok(ignoreUpdate, 'Expected an UPDATE setting status=ignore for non-local business');
1139      assert.ok(
1140        ignoreUpdate.args[0].toLowerCase().includes('local'),
1141        'Error message should mention local business'
1142      );
1143    });
1144  
1145    test('does NOT mark site as ignore in test mode (NODE_ENV=test) for directory detection', async () => {
1146      process.env.NODE_ENV = 'test';
1147  
1148      const site = makeSiteRow();
1149      mockSites = [site];
1150      mockSiteDetails[1] = makeSiteDetail();
1151      mockCheckBlocklist.mock.mockImplementation(() => null);
1152  
1153      mockScoreWebsite.mock.mockImplementation(async () => ({
1154        overall_calculation: {
1155          letter_grade: 'F',
1156          conversion_score: 30,
1157          is_business_directory: true,
1158          is_local_business: false,
1159          is_error_page: false,
1160          is_broken_site: false,
1161        },
1162      }));
1163  
1164      await runScoringStage({ limit: 1 });
1165  
1166      // In test mode the directory/non-local checks are skipped — should score normally
1167      const successUpdate = findMainSuccessUpdate();
1168      assert.ok(successUpdate, 'Expected a normal success UPDATE in test mode');
1169  
1170      // No ignore update from directory/local detection
1171      const ignoreUpdate = dbCalls.updates.find(
1172        u =>
1173          u.sql.includes("status = 'ignored'") &&
1174          u.args.some(a => typeof a === 'string' && (a.includes('directory') || a.includes('local')))
1175      );
1176      assert.ok(!ignoreUpdate, 'Should NOT have a directory/local ignore UPDATE in test mode');
1177    });
1178  });
1179  
1180  // ---------------------------------------------------------------------------
1181  describe('runScoringStage — error page detection', () => {
1182    beforeEach(() => {
1183      resetState();
1184      resetEnv();
1185    });
1186  
1187    test('marks site as ignore when is_error_page=true and errorType=404 (permanent)', async () => {
1188      const site = makeSiteRow();
1189      mockSites = [site];
1190      mockSiteDetails[1] = makeSiteDetail();
1191      mockCheckBlocklist.mock.mockImplementation(() => null);
1192  
1193      mockScoreWebsite.mock.mockImplementation(async () => ({
1194        overall_calculation: {
1195          letter_grade: null,
1196          conversion_score: null,
1197          is_business_directory: false,
1198          is_local_business: true,
1199          is_error_page: true,
1200          error_type: '404',
1201          error_description: 'Page not found',
1202          is_broken_site: false,
1203        },
1204      }));
1205  
1206      await runScoringStage({ limit: 1 });
1207  
1208      const ignoreUpdate = findUpdateWithStatus('ignored');
1209      assert.ok(ignoreUpdate, 'Expected UPDATE setting status=ignore for 404 error');
1210      assert.ok(
1211        ignoreUpdate.args[0].includes('Page not found'),
1212        'Error message should come from error_description'
1213      );
1214    });
1215  
1216    test('marks site as ignore when errorType=403 (permanent)', async () => {
1217      const site = makeSiteRow();
1218      mockSites = [site];
1219      mockSiteDetails[1] = makeSiteDetail();
1220      mockCheckBlocklist.mock.mockImplementation(() => null);
1221  
1222      mockScoreWebsite.mock.mockImplementation(async () => ({
1223        overall_calculation: {
1224          letter_grade: null,
1225          conversion_score: null,
1226          is_business_directory: false,
1227          is_local_business: true,
1228          is_error_page: true,
1229          error_type: '403',
1230          error_description: 'Access forbidden',
1231          is_broken_site: false,
1232        },
1233      }));
1234  
1235      await runScoringStage({ limit: 1 });
1236  
1237      const ignoreUpdate = findUpdateWithStatus('ignored');
1238      assert.ok(ignoreUpdate, 'Expected UPDATE setting status=ignore for 403 error');
1239      assert.ok(ignoreUpdate.args[0].includes('Access forbidden'));
1240    });
1241  
1242    test('marks site as ignore when errorType=410 (permanent), uses fallback message', async () => {
1243      const site = makeSiteRow();
1244      mockSites = [site];
1245      mockSiteDetails[1] = makeSiteDetail();
1246      mockCheckBlocklist.mock.mockImplementation(() => null);
1247  
1248      mockScoreWebsite.mock.mockImplementation(async () => ({
1249        overall_calculation: {
1250          letter_grade: null,
1251          conversion_score: null,
1252          is_business_directory: false,
1253          is_local_business: true,
1254          is_error_page: true,
1255          error_type: '410',
1256          error_description: null, // null → falls back to "Permanent error: 410"
1257          is_broken_site: false,
1258        },
1259      }));
1260  
1261      await runScoringStage({ limit: 1 });
1262  
1263      const ignoreUpdate = findUpdateWithStatus('ignored');
1264      assert.ok(ignoreUpdate, 'Expected UPDATE setting status=ignore for 410 error');
1265      // Fallback message: "Permanent error: 410"
1266      assert.ok(
1267        ignoreUpdate.args[0].includes('410') || ignoreUpdate.args[0].includes('Permanent'),
1268        'Error message should reference 410 or Permanent error'
1269      );
1270    });
1271  
1272    test('keeps status=assets_captured for temporary 5xx error page', async () => {
1273      const site = makeSiteRow();
1274      mockSites = [site];
1275      mockSiteDetails[1] = makeSiteDetail();
1276      mockCheckBlocklist.mock.mockImplementation(() => null);
1277  
1278      mockScoreWebsite.mock.mockImplementation(async () => ({
1279        overall_calculation: {
1280          letter_grade: null,
1281          conversion_score: null,
1282          is_business_directory: false,
1283          is_local_business: true,
1284          is_error_page: true,
1285          error_type: '5xx',
1286          error_description: 'Server error',
1287          is_broken_site: false,
1288        },
1289      }));
1290  
1291      await runScoringStage({ limit: 1 });
1292  
1293      // Temporary error: UPDATE sets status = 'assets_captured' in SQL
1294      const temporaryUpdate = findUpdateWithStatus('assets_captured');
1295      assert.ok(temporaryUpdate, 'Expected UPDATE setting status=assets_captured for 5xx');
1296      assert.ok(
1297        temporaryUpdate.args[0].includes('Server error'),
1298        'Error description should be saved'
1299      );
1300  
1301      // Must NOT have an ignore UPDATE
1302      const ignoreUpdate = findUpdateWithStatus('ignored');
1303      assert.ok(!ignoreUpdate, 'Should NOT set status=ignore for temporary 5xx error');
1304    });
1305  
1306    test('keeps status=assets_captured for maintenance error page', async () => {
1307      const site = makeSiteRow();
1308      mockSites = [site];
1309      mockSiteDetails[1] = makeSiteDetail();
1310      mockCheckBlocklist.mock.mockImplementation(() => null);
1311  
1312      mockScoreWebsite.mock.mockImplementation(async () => ({
1313        overall_calculation: {
1314          letter_grade: null,
1315          conversion_score: null,
1316          is_business_directory: false,
1317          is_local_business: true,
1318          is_error_page: true,
1319          error_type: 'maintenance',
1320          error_description: 'Under maintenance',
1321          is_broken_site: false,
1322        },
1323      }));
1324  
1325      await runScoringStage({ limit: 1 });
1326  
1327      const temporaryUpdate = findUpdateWithStatus('assets_captured');
1328      assert.ok(
1329        temporaryUpdate,
1330        'Expected UPDATE setting status=assets_captured for maintenance page'
1331      );
1332    });
1333  });
1334  
1335  // ---------------------------------------------------------------------------
1336  describe('runScoringStage — broken site detection', () => {
1337    beforeEach(() => {
1338      resetState();
1339      resetEnv();
1340    });
1341  
1342    test('schedules recapture for broken site (first attempt)', async () => {
1343      const site = makeSiteRow();
1344      mockSites = [site];
1345      mockSiteDetails[1] = makeSiteDetail();
1346      mockCheckBlocklist.mock.mockImplementation(() => null);
1347      mockRecaptureData = { recapture_count: 0 }; // First attempt
1348  
1349      mockScoreWebsite.mock.mockImplementation(async () => ({
1350        overall_calculation: {
1351          letter_grade: null,
1352          conversion_score: null,
1353          is_business_directory: false,
1354          is_local_business: true,
1355          is_error_page: false,
1356          is_broken_site: true,
1357          broken_site_details: ['JavaScript errors', 'Blank page'],
1358        },
1359      }));
1360  
1361      await runScoringStage({ limit: 1 });
1362  
1363      // The broken site UPDATE sets:
1364      //   status = 'assets_captured', error_message=?, recapture_count=?, recapture_at=..., score_json=?
1365      // so the SQL contains both 'assets_captured' and 'recapture_count'
1366      const brokenUpdate = dbCalls.updates.find(
1367        u => u.sql.includes("status = 'assets_captured'") && u.sql.includes('recapture_count')
1368      );
1369      assert.ok(brokenUpdate, 'Expected a broken-site UPDATE with assets_captured + recapture_count');
1370  
1371      // args: [errorMsg, recaptureCount, scoreJson, id]
1372      assert.ok(
1373        brokenUpdate.args[0].includes('Broken site'),
1374        'Error message should mention broken site'
1375      );
1376      assert.strictEqual(brokenUpdate.args[1], 1, 'recapture_count should be 1 (0+1)');
1377  
1378      // Should NOT be marked ignore
1379      const ignoreUpdate = findUpdateWithStatus('ignored');
1380      assert.ok(!ignoreUpdate, 'Should NOT mark as ignore on first broken-site attempt');
1381    });
1382  
1383    test('increments recapture_count on second broken site attempt (count 1→2)', async () => {
1384      const site = makeSiteRow();
1385      mockSites = [site];
1386      mockSiteDetails[1] = makeSiteDetail();
1387      mockCheckBlocklist.mock.mockImplementation(() => null);
1388      mockRecaptureData = { recapture_count: 1 }; // Previous attempt count = 1
1389  
1390      mockScoreWebsite.mock.mockImplementation(async () => ({
1391        overall_calculation: {
1392          letter_grade: null,
1393          conversion_score: null,
1394          is_business_directory: false,
1395          is_local_business: true,
1396          is_error_page: false,
1397          is_broken_site: true,
1398          broken_site_details: ['Layout broken'],
1399        },
1400      }));
1401  
1402      await runScoringStage({ limit: 1 });
1403  
1404      const brokenUpdate = dbCalls.updates.find(
1405        u => u.sql.includes("status = 'assets_captured'") && u.sql.includes('recapture_count')
1406      );
1407      assert.ok(brokenUpdate, 'Expected a broken-site UPDATE');
1408      // args[1] = recaptureCount = 1+1 = 2
1409      assert.strictEqual(brokenUpdate.args[1], 2, 'recapture_count should be 2 on second attempt');
1410    });
1411  
1412    test('marks site as ignore when broken site exceeds max retries (recapture_count >= 3 → count=4)', async () => {
1413      const site = makeSiteRow();
1414      mockSites = [site];
1415      mockSiteDetails[1] = makeSiteDetail();
1416      mockCheckBlocklist.mock.mockImplementation(() => null);
1417      // 3 previous attempts; 3+1=4 > 3 → max exceeded
1418      mockRecaptureData = { recapture_count: 3 };
1419  
1420      mockScoreWebsite.mock.mockImplementation(async () => ({
1421        overall_calculation: {
1422          letter_grade: null,
1423          conversion_score: null,
1424          is_business_directory: false,
1425          is_local_business: true,
1426          is_error_page: false,
1427          is_broken_site: true,
1428          broken_site_details: ['Persistent crash'],
1429        },
1430      }));
1431  
1432      await runScoringStage({ limit: 1 });
1433  
1434      // When max exceeded: UPDATE sets status='ignore' with "Max recapture attempts reached"
1435      const ignoreUpdate = findUpdateWithStatus('ignored');
1436      assert.ok(ignoreUpdate, 'Expected status=ignore when max recapture retries exceeded');
1437      assert.ok(
1438        ignoreUpdate.args[0].includes('Max recapture'),
1439        'Error message should mention Max recapture'
1440      );
1441  
1442      // Should NOT have a recapture UPDATE (assets_captured + recapture_count)
1443      const recaptureUpdate = dbCalls.updates.find(
1444        u => u.sql.includes("status = 'assets_captured'") && u.sql.includes('recapture_count')
1445      );
1446      assert.ok(!recaptureUpdate, 'Should NOT schedule another recapture when max exceeded');
1447    });
1448  });
1449  
1450  // ---------------------------------------------------------------------------
1451  describe('runScoringStage — keyword counter', () => {
1452    beforeEach(() => {
1453      resetState();
1454      resetEnv();
1455    });
1456  
1457    test('increments low_scoring counter when site scores low and has keyword data', async () => {
1458      const site = makeSiteRow();
1459      mockSites = [site];
1460      mockSiteDetails[1] = makeSiteDetail();
1461      mockCheckBlocklist.mock.mockImplementation(() => null);
1462      mockKeywordData = { keyword: 'plumber near me', country_code: 'AU' };
1463  
1464      mockScoreWebsite.mock.mockImplementation(async () => ({
1465        overall_calculation: {
1466          letter_grade: 'F',
1467          conversion_score: 45,
1468          is_business_directory: false,
1469          is_local_business: true,
1470          is_error_page: false,
1471          is_broken_site: false,
1472        },
1473      }));
1474  
1475      await runScoringStage({ limit: 1 });
1476  
1477      assert.strictEqual(mockIncrementLowScoring.mock.callCount(), 1);
1478    });
1479  
1480    test('does not increment low_scoring counter for high score site (score > threshold)', async () => {
1481      const site = makeSiteRow();
1482      mockSites = [site];
1483      mockSiteDetails[1] = makeSiteDetail();
1484      mockCheckBlocklist.mock.mockImplementation(() => null);
1485      mockKeywordData = { keyword: 'plumber near me', country_code: 'AU' };
1486  
1487      mockScoreWebsite.mock.mockImplementation(async () => ({
1488        overall_calculation: {
1489          letter_grade: 'A',
1490          conversion_score: 95,
1491          is_business_directory: false,
1492          is_local_business: true,
1493          is_error_page: false,
1494          is_broken_site: false,
1495        },
1496      }));
1497  
1498      await runScoringStage({ limit: 1 });
1499  
1500      assert.strictEqual(mockIncrementLowScoring.mock.callCount(), 0);
1501    });
1502  
1503    test('does not increment low_scoring counter when keyword data is null', async () => {
1504      const site = makeSiteRow();
1505      mockSites = [site];
1506      mockSiteDetails[1] = makeSiteDetail();
1507      mockCheckBlocklist.mock.mockImplementation(() => null);
1508      mockKeywordData = null; // no keyword on site
1509  
1510      mockScoreWebsite.mock.mockImplementation(async () => ({
1511        overall_calculation: {
1512          letter_grade: 'F',
1513          conversion_score: 55,
1514          is_business_directory: false,
1515          is_local_business: true,
1516          is_error_page: false,
1517          is_broken_site: false,
1518        },
1519      }));
1520  
1521      await runScoringStage({ limit: 1 });
1522  
1523      assert.strictEqual(mockIncrementLowScoring.mock.callCount(), 0);
1524    });
1525  });
1526  
1527  // ---------------------------------------------------------------------------
1528  describe('runScoringStage — error handling', () => {
1529    beforeEach(() => {
1530      resetState();
1531      resetEnv();
1532    });
1533  
1534    test('records failure via recordFailure when scoreWebsite throws', async () => {
1535      const site = makeSiteRow();
1536      mockSites = [site];
1537      mockSiteDetails[1] = makeSiteDetail();
1538      mockCheckBlocklist.mock.mockImplementation(() => null);
1539  
1540      mockScoreWebsite.mock.mockImplementation(async () => {
1541        throw new Error('OpenRouter API timeout');
1542      });
1543  
1544      const result = await runScoringStage({ limit: 1 });
1545  
1546      // processBatch catches per-site errors, so stage completes with failure counts
1547      assert.strictEqual(result.processed, 1);
1548      assert.strictEqual(result.failed, 1);
1549      assert.strictEqual(result.succeeded, 0);
1550  
1551      // recordFailure called with correct site ID and stage name
1552      // API: recordFailure(siteId, stageName, error, currentStatus)
1553      assert.strictEqual(mockRecordFailure.mock.callCount(), 1);
1554      const rfArgs = mockRecordFailure.mock.calls[0].arguments;
1555      assert.strictEqual(rfArgs[0], 1, 'siteId should be 1');
1556      assert.strictEqual(rfArgs[1], 'scoring', 'stageName should be scoring');
1557    });
1558  
1559    test('returns correct counts for mixed success/failure across multiple sites', async () => {
1560      const site1 = makeSiteRow({ id: 1, url: 'https://site1.com' });
1561      const site2 = makeSiteRow({ id: 2, url: 'https://site2.com' });
1562      mockSites = [site1, site2];
1563      mockSiteDetails[1] = makeSiteDetail({ id: 1, url: 'https://site1.com' });
1564      mockSiteDetails[2] = makeSiteDetail({ id: 2, url: 'https://site2.com' });
1565      mockCheckBlocklist.mock.mockImplementation(() => null);
1566  
1567      mockScoreWebsite.mock.mockImplementation(async siteData => {
1568        if (siteData.url === 'https://site2.com') {
1569          throw new Error('API error for site2');
1570        }
1571        return {
1572          overall_calculation: {
1573            letter_grade: 'D-',
1574            conversion_score: 60,
1575            is_business_directory: false,
1576            is_local_business: true,
1577            is_error_page: false,
1578            is_broken_site: false,
1579          },
1580        };
1581      });
1582  
1583      const result = await runScoringStage({ limit: 2 });
1584  
1585      assert.strictEqual(result.processed, 2);
1586      assert.strictEqual(result.succeeded, 1);
1587      assert.strictEqual(result.failed, 1);
1588    });
1589  });
1590  
1591  // ---------------------------------------------------------------------------
1592  describe('runScoringStage — return value structure', () => {
1593    beforeEach(() => {
1594      resetState();
1595      resetEnv();
1596    });
1597  
1598    test('returns all required fields in the stats object', async () => {
1599      const site = makeSiteRow();
1600      mockSites = [site];
1601      mockSiteDetails[1] = makeSiteDetail();
1602      mockCheckBlocklist.mock.mockImplementation(() => null);
1603      mockGradeRows = [{ grade: 'D-', count: 1 }];
1604  
1605      mockScoreWebsite.mock.mockImplementation(async () => ({
1606        overall_calculation: {
1607          letter_grade: 'D-',
1608          conversion_score: 60,
1609          is_business_directory: false,
1610          is_local_business: true,
1611          is_error_page: false,
1612          is_broken_site: false,
1613        },
1614      }));
1615  
1616      const result = await runScoringStage({ limit: 1 });
1617  
1618      assert.ok('processed' in result, 'Should have processed field');
1619      assert.ok('succeeded' in result, 'Should have succeeded field');
1620      assert.ok('failed' in result, 'Should have failed field');
1621      assert.ok('skipped' in result, 'Should have skipped field');
1622      assert.ok('duration' in result, 'Should have duration field');
1623      assert.ok('gradeDistribution' in result, 'Should have gradeDistribution field');
1624      assert.ok(typeof result.duration === 'number', 'Duration should be a number');
1625      assert.strictEqual(result.gradeDistribution['D-'], 1);
1626    });
1627  
1628    test('returns correct counts for a fully successful run of 2 sites', async () => {
1629      const site1 = makeSiteRow({ id: 1, url: 'https://site1.com' });
1630      const site2 = makeSiteRow({ id: 2, url: 'https://site2.com' });
1631      mockSites = [site1, site2];
1632      mockSiteDetails[1] = makeSiteDetail({ id: 1, url: 'https://site1.com' });
1633      mockSiteDetails[2] = makeSiteDetail({ id: 2, url: 'https://site2.com' });
1634      mockCheckBlocklist.mock.mockImplementation(() => null);
1635  
1636      mockScoreWebsite.mock.mockImplementation(async () => ({
1637        overall_calculation: {
1638          letter_grade: 'C',
1639          conversion_score: 75,
1640          is_business_directory: false,
1641          is_local_business: true,
1642          is_error_page: false,
1643          is_broken_site: false,
1644        },
1645      }));
1646  
1647      const result = await runScoringStage({ limit: 2 });
1648  
1649      assert.strictEqual(result.processed, 2);
1650      assert.strictEqual(result.succeeded, 2);
1651      assert.strictEqual(result.failed, 0);
1652    });
1653  
1654    test('duration is positive number in milliseconds', async () => {
1655      mockSites = [];
1656  
1657      const result = await runScoringStage();
1658  
1659      assert.ok(typeof result.duration === 'number', 'duration should be number');
1660      assert.ok(result.duration >= 0, 'duration should be non-negative');
1661    });
1662  });
1663  
1664  // ─── classifyIndustry truthy branch ─────────────────────────────────────────
1665  
1666  describe('runScoringStage — classifyIndustry ignore', () => {
1667    beforeEach(() => {
1668      dbCalls.updates = [];
1669      mockClassifyIndustry.mock.resetCalls();
1670    });
1671  
1672    test('marks site as ignore when classifyIndustry returns legal industry', async () => {
1673      mockSites = [makeSiteRow({ id: 1, domain: 'lawfirm.com' })];
1674      mockSiteDetails[1] = makeSiteDetail();
1675  
1676      // classifyIndustry returns legal industry
1677      mockClassifyIndustry.mock.mockImplementationOnce(() => ({
1678        type: 'legal',
1679        reason: 'domain contains "law"',
1680      }));
1681  
1682      const result = await runScoringStage({ limit: 1 });
1683  
1684      // Site should be ignored — industry classification fires before LLM scoring
1685      // so processed may be 0 (skipped before batch processing)
1686      const ignoreUpdate = dbCalls.updates.find(
1687        u => u.sql.includes("status = 'ignored'") && u.args.some(a => String(a).includes('Ignored'))
1688      );
1689      assert.ok(ignoreUpdate, 'Should have marked site as ignore for legal industry');
1690    });
1691  
1692    test('marks site as ignore when classifyIndustry returns regulated industry (non-legal)', async () => {
1693      mockSites = [makeSiteRow({ id: 2, domain: 'mypharmacy.com' })];
1694      mockSiteDetails[2] = makeSiteDetail({ id: 2 });
1695  
1696      mockClassifyIndustry.mock.mockImplementationOnce(() => ({
1697        type: 'pharmaceutical',
1698        reason: 'domain contains "pharma"',
1699      }));
1700  
1701      const result = await runScoringStage({ limit: 1 });
1702  
1703      const ignoreUpdate = dbCalls.updates.find(
1704        u => u.sql.includes("status = 'ignored'") && u.args.some(a => String(a).includes('Regulated'))
1705      );
1706      assert.ok(ignoreUpdate, 'Should have marked site as ignore for regulated industry');
1707    });
1708  });
1709  
1710  // ─── getScoringStats ─────────────────────────────────────────────────────────
1711  
1712  describe('getScoringStats', () => {
1713    test('returns stats object with expected fields', async () => {
1714      mockGradeRows = [
1715        { grade: 'A', count: 2 },
1716        { grade: 'B', count: 3 },
1717        { grade: 'F', count: 5 },
1718      ];
1719  
1720      const stats = await getScoringStats();
1721  
1722      assert.ok(typeof stats.total_sites === 'number', 'total_sites should be a number');
1723      assert.ok(typeof stats.avg_score === 'number', 'avg_score should be a number');
1724      assert.ok(typeof stats.gradeDistribution === 'object', 'gradeDistribution should be an object');
1725      assert.strictEqual(stats.gradeDistribution['A'], 2);
1726      assert.strictEqual(stats.gradeDistribution['B'], 3);
1727      assert.strictEqual(stats.gradeDistribution['F'], 5);
1728    });
1729  
1730    test('returns gradeDistribution as empty object when no grades exist', async () => {
1731      mockGradeRows = [];
1732  
1733      const stats = await getScoringStats();
1734  
1735      assert.deepStrictEqual(stats.gradeDistribution, {});
1736    });
1737  });