/ tests / stages / assets-supplement.test.js
assets-supplement.test.js
  1  /**
  2   * Supplemental coverage tests for src/stages/assets.js
  3   *
  4   * These tests use createPgMock() with a real in-memory SQLite DB so that
  5   * withTransaction() resolves correctly (avoids the async-in-sync-transaction
  6   * infinite loop that the original assets.test.js had with MockDatabase).
  7   *
  8   * Coverage targets (lines currently < 50% covered):
  9   *   - runAssetsStage() HTML-only path: no candidates, blocklist, success, failure
 10   *   - runAssetsStage() vision path: no candidates, blocklist, success, failure
 11   *   - getAssetsStats() — SQL shape returned
 12   *   - backfillScreenshots() — empty and non-empty paths
 13   *
 14   * Run with:
 15   *   NODE_ENV=test LOGS_DIR=/tmp/test-logs DATABASE_PATH=/tmp/test-assets-supp.db \
 16   *   node --experimental-test-module-mocks --test tests/stages/assets-supplement.test.js
 17   */
 18  
 19  import { test, describe, mock, beforeEach, after } from 'node:test';
 20  import assert from 'node:assert/strict';
 21  import Database from 'better-sqlite3';
 22  import { createPgMock } from '../helpers/pg-mock.js';
 23  
 24  // ============================================================================
 25  // In-memory SQLite DB + pg-mock wiring — must happen BEFORE any mock.module()
 26  // that would shadow db.js inside the module under test
 27  // ============================================================================
 28  
 29  const db = new Database(':memory:');
 30  
 31  // Minimal schema for assets stage queries
 32  db.exec(`
 33    CREATE TABLE IF NOT EXISTS sites (
 34      id INTEGER PRIMARY KEY AUTOINCREMENT,
 35      domain TEXT NOT NULL,
 36      landing_page_url TEXT,
 37      keyword TEXT,
 38      status TEXT DEFAULT 'found',
 39      html_dom TEXT,
 40      screenshot_path TEXT,
 41      http_status_code INTEGER,
 42      ssl_status TEXT,
 43      http_headers TEXT,
 44      locale_data TEXT,
 45      language_code TEXT,
 46      perf_json TEXT,
 47      assets_captured_at TEXT,
 48      error_message TEXT,
 49      recapture_at TEXT,
 50      recapture_count INTEGER DEFAULT 0,
 51      retry_count INTEGER DEFAULT 0,
 52      last_retry_at TEXT,
 53      country_code TEXT,
 54      score REAL,
 55      grade TEXT,
 56      rescored_at TEXT,
 57      message_type TEXT,
 58      read_at TEXT,
 59      created_at TEXT DEFAULT (datetime('now')),
 60      updated_at TEXT DEFAULT (datetime('now'))
 61    );
 62  
 63    CREATE TABLE IF NOT EXISTS keywords (
 64      id INTEGER PRIMARY KEY AUTOINCREMENT,
 65      keyword TEXT NOT NULL,
 66      country_code TEXT NOT NULL,
 67      assets_scraped_count INTEGER DEFAULT 0,
 68      UNIQUE(keyword, country_code)
 69    );
 70  `);
 71  
 72  // ============================================================================
 73  // All mock.module() calls MUST come before any dynamic import of the module
 74  // under test (Node module mock cache is set at import time).
 75  // ============================================================================
 76  
 77  mock.module('../../src/utils/db.js', { namedExports: createPgMock(db) });
 78  
 79  mock.module('../../src/utils/logger.js', {
 80    defaultExport: class {
 81      info() {}
 82      warn() {}
 83      error() {}
 84      debug() {}
 85      success() {}
 86    },
 87  });
 88  
 89  // Mutable stubs — mutated per-test via resetStubs()
 90  let stubCaptureWebsiteResult = null;
 91  let stubCaptureWebsiteThrows = null;
 92  let stubHtmlCaptureResult = null;
 93  let stubDedupeStats = { sitesIgnored: 0, duplicateDomains: 0, crossBorder: 0 };
 94  let stubCheckBlocklistResult = null;
 95  let stubDetectErrorPageResult = { isErrorPage: false };
 96  let stubScreenshotsExist = { exists: true, missing: [] };
 97  
 98  const mockCaptureWebsite = mock.fn(async () => {
 99    if (stubCaptureWebsiteThrows) throw stubCaptureWebsiteThrows;
100    return (
101      stubCaptureWebsiteResult || {
102        html: '<html><body>Test</body></html>',
103        screenshots: {
104          desktop_above: Buffer.from('a'),
105          desktop_below: Buffer.from('b'),
106          mobile_above: Buffer.from('c'),
107        },
108        screenshotsUncropped: {
109          desktop_above: Buffer.from('ua'),
110          desktop_below: Buffer.from('ub'),
111          mobile_above: Buffer.from('uc'),
112        },
113        httpStatusCode: 200,
114        sslStatus: 'https',
115        httpHeaders: '{}',
116        localeData: '{}',
117        perfData: null,
118      }
119    );
120  });
121  
122  // Mock browser functions — launchBrowser / createStealthContext
123  // The HTML-only path uses these to create a shared browser context
124  mock.module('../../src/capture.js', {
125    namedExports: {
126      captureWebsite: mockCaptureWebsite,
127      launchBrowser: mock.fn(async () => ({ close: async () => {} })),
128      createStealthContext: mock.fn(async () => ({
129        newPage: async () => ({
130          goto: async () => ({
131            status: () => stubHtmlCaptureResult?.httpStatusCode ?? 200,
132            headers: () => ({}),
133          }),
134          content: async () => stubHtmlCaptureResult?.html ?? '<html><body>Valid page</body></html>',
135          close: async () => {},
136          evaluate: async () =>
137            stubHtmlCaptureResult?.localeData
138              ? JSON.parse(stubHtmlCaptureResult.localeData)
139              : { htmlLang: 'en', hreflangs: [] },
140        }),
141        close: async () => {},
142      })),
143    },
144  });
145  
146  const mockSaveScreenshots = mock.fn(async () => '/tmp/ss/1');
147  const mockCroppedScreenshotsExist = mock.fn(async () => stubScreenshotsExist);
148  
149  mock.module('../../src/utils/screenshot-storage.js', {
150    namedExports: {
151      saveScreenshots: mockSaveScreenshots,
152      croppedScreenshotsExist: mockCroppedScreenshotsExist,
153    },
154  });
155  
156  const mockCheckBlocklist = mock.fn(() => stubCheckBlocklistResult);
157  mock.module('../../src/utils/site-filters.js', {
158    namedExports: { checkBlocklist: mockCheckBlocklist },
159  });
160  
161  const mockIncrementAssetsScraped = mock.fn(async () => {});
162  mock.module('../../src/utils/keyword-counters.js', {
163    namedExports: { incrementAssetsScraped: mockIncrementAssetsScraped },
164  });
165  
166  const mockDeduplicateSites = mock.fn(() => stubDedupeStats);
167  mock.module('../../src/utils/dedupe-locale-aware.js', {
168    namedExports: { deduplicateSites: mockDeduplicateSites },
169  });
170  
171  const mockDetectErrorPage = mock.fn(() => stubDetectErrorPageResult);
172  mock.module('../../src/utils/error-page-detector.js', {
173    namedExports: { detectErrorPage: mockDetectErrorPage },
174  });
175  
176  const mockRecordFailure = mock.fn(async () => {});
177  const mockResetRetries = mock.fn(async () => {});
178  mock.module('../../src/utils/retry-handler.js', {
179    namedExports: { recordFailure: mockRecordFailure, resetRetries: mockResetRetries },
180  });
181  
182  mock.module('../../src/utils/adaptive-concurrency.js', {
183    namedExports: { getAdaptiveConcurrencyFast: mock.fn(() => 1) },
184  });
185  
186  mock.module('../../src/config/countries.js', {
187    namedExports: { getCountryByCode: mock.fn(() => ({ code: 'AU', name: 'Australia' })) },
188  });
189  
190  mock.module('../../src/utils/detect-language.js', {
191    namedExports: { deriveLanguageCode: mock.fn(() => 'en') },
192  });
193  
194  mock.module('../../src/utils/summary-generator.js', {
195    namedExports: { generateStageCompletion: mock.fn(), displayProgress: mock.fn() },
196  });
197  
198  // processBatch: execute all items sequentially (same as real behaviour for concurrency=1)
199  mock.module('../../src/utils/error-handler.js', {
200    namedExports: {
201      processBatch: mock.fn(async (items, processor, _opts) => {
202        const results = [];
203        const errors = [];
204        for (let i = 0; i < items.length; i++) {
205          try {
206            results.push(await processor(items[i], i));
207          } catch (err) {
208            errors.push({ item: items[i], error: err });
209          }
210        }
211        return { results, errors };
212      }),
213      withTimeout: mock.fn(async promise => promise),
214    },
215  });
216  
217  const mockWriteHtmlDom = mock.fn(() => {});
218  const mockDeleteHtmlDom = mock.fn(() => {});
219  mock.module('../../src/utils/html-storage.js', {
220    namedExports: {
221      writeHtmlDom: mockWriteHtmlDom,
222      hasHtmlDom: mock.fn(() => false),
223      deleteHtmlDom: mockDeleteHtmlDom,
224      readHtmlDom: mock.fn(() => null),
225      writeKeyPagesHtml: mock.fn(),
226      readKeyPagesHtml: mock.fn(() => null),
227      deleteKeyPagesHtml: mock.fn(),
228      deleteAllHtml: mock.fn(),
229      DATA_DIR: '/tmp/test-html-data',
230    },
231  });
232  
233  // ============================================================================
234  // Import module under test — after all mock.module() calls
235  // ============================================================================
236  
237  const { runAssetsStage, getAssetsStats, backfillScreenshots } =
238    await import('../../src/stages/assets.js');
239  
240  // ============================================================================
241  // Helpers
242  // ============================================================================
243  
244  function resetStubs() {
245    stubCaptureWebsiteResult = null;
246    stubCaptureWebsiteThrows = null;
247    stubHtmlCaptureResult = null;
248    stubDedupeStats = { sitesIgnored: 0, duplicateDomains: 0, crossBorder: 0 };
249    stubCheckBlocklistResult = null;
250    stubDetectErrorPageResult = { isErrorPage: false };
251    stubScreenshotsExist = { exists: true, missing: [] };
252    mockCaptureWebsite.mock.resetCalls();
253    mockSaveScreenshots.mock.resetCalls();
254    mockCroppedScreenshotsExist.mock.resetCalls();
255    mockCheckBlocklist.mock.resetCalls();
256    mockIncrementAssetsScraped.mock.resetCalls();
257    mockDeduplicateSites.mock.resetCalls();
258    mockDetectErrorPage.mock.resetCalls();
259    mockRecordFailure.mock.resetCalls();
260    mockResetRetries.mock.resetCalls();
261    mockWriteHtmlDom.mock.resetCalls();
262    mockDeleteHtmlDom.mock.resetCalls();
263  }
264  
265  function insertSite(overrides = {}) {
266    const defaults = {
267      domain: 'example.com',
268      landing_page_url: 'https://example.com',
269      keyword: 'plumber',
270      status: 'found',
271      html_dom: null,
272      screenshot_path: null,
273      country_code: 'AU',
274      error_message: null,
275    };
276    const s = { ...defaults, ...overrides };
277    const result = db
278      .prepare(
279        `INSERT INTO sites (domain, landing_page_url, keyword, status, html_dom, screenshot_path, country_code, error_message)
280         VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
281      )
282      .run(s.domain, s.landing_page_url, s.keyword, s.status, s.html_dom, s.screenshot_path, s.country_code, s.error_message);
283    return result.lastInsertRowid;
284  }
285  
286  function clearSites() {
287    db.prepare('DELETE FROM sites').run();
288    db.prepare('DELETE FROM keywords').run();
289  }
290  
291  after(() => {
292    db.close();
293  });
294  
295  // ============================================================================
296  // Tests — HTML-only path (ENABLE_VISION=false)
297  // ============================================================================
298  
299  describe('assets.js — HTML-only path (ENABLE_VISION=false)', () => {
300    beforeEach(() => {
301      resetStubs();
302      clearSites();
303      process.env.ENABLE_VISION = 'false';
304    });
305  
306    test('returns zero counts when no sites need HTML capture', async () => {
307      // No sites in DB at all
308      const result = await runAssetsStage({ limit: 10 });
309      assert.strictEqual(result.processed, 0);
310      assert.strictEqual(result.succeeded, 0);
311      assert.strictEqual(result.failed, 0);
312      assert.strictEqual(result.skipped, 0);
313      assert.ok(typeof result.duration === 'number');
314    });
315  
316    test('returns zero counts when all sites already have html_dom', async () => {
317      insertSite({ html_dom: 'fs', status: 'assets_captured' });
318      const result = await runAssetsStage({ limit: 10 });
319      assert.strictEqual(result.processed, 0);
320    });
321  
322    test('marks blocklisted site as ignored and does not capture', async () => {
323      const id = insertSite({ domain: 'yelp.com', country_code: 'US' });
324      stubCheckBlocklistResult = { reason: 'Directory site: yelp.com' };
325  
326      await runAssetsStage({ limit: 10 });
327  
328      const row = db.prepare('SELECT status, error_message FROM sites WHERE id = ?').get(id);
329      assert.strictEqual(row.status, 'ignored');
330      assert.ok(row.error_message.includes('Directory site'));
331      assert.strictEqual(mockWriteHtmlDom.mock.callCount(), 0, 'should not capture blocked site');
332    });
333  
334    test('successfully captures HTML and marks assets_captured', async () => {
335      const id = insertSite({ domain: 'good.com', keyword: 'dentist', country_code: 'AU' });
336      // Insert keyword row so incrementAssetsScraped can be called
337      db.prepare('INSERT OR IGNORE INTO keywords (keyword, country_code) VALUES (?, ?)').run('dentist', 'AU');
338  
339      stubHtmlCaptureResult = {
340        html: '<html><body>Great dentist page</body></html>',
341        httpStatusCode: 200,
342        localeData: '{"htmlLang":"en","hreflangs":[]}',
343      };
344  
345      await runAssetsStage({ limit: 10 });
346  
347      const row = db.prepare('SELECT status FROM sites WHERE id = ?').get(id);
348      assert.strictEqual(row.status, 'assets_captured');
349      assert.strictEqual(mockWriteHtmlDom.mock.callCount(), 1, 'writeHtmlDom should be called');
350      assert.strictEqual(mockResetRetries.mock.callCount(), 1, 'resetRetries should be called');
351      assert.strictEqual(mockRecordFailure.mock.callCount(), 0, 'no failures expected');
352    });
353  
354    test('calls recordFailure when HTML is empty', async () => {
355      insertSite({ domain: 'empty.com', country_code: 'AU' });
356  
357      stubHtmlCaptureResult = {
358        html: '',
359        httpStatusCode: 200,
360        localeData: null,
361      };
362  
363      await runAssetsStage({ limit: 10 });
364  
365      assert.strictEqual(mockRecordFailure.mock.callCount(), 1);
366      // recordFailure(siteId, stage, err, fallbackStatus) — err is index 2
367      const failArgs = mockRecordFailure.mock.calls[0].arguments;
368      assert.match(failArgs[2].message, /HTML DOM capture failed/i);
369    });
370  
371    test('calls recordFailure when HTTP status is 404', async () => {
372      insertSite({ domain: 'notfound.com', country_code: 'AU' });
373  
374      stubHtmlCaptureResult = {
375        html: '<html><body>Not Found</body></html>',
376        httpStatusCode: 404,
377        localeData: null,
378      };
379  
380      await runAssetsStage({ limit: 10 });
381  
382      assert.strictEqual(mockRecordFailure.mock.callCount(), 1);
383      const failArgs = mockRecordFailure.mock.calls[0].arguments;
384      assert.match(failArgs[2].message, /HTTP 404/i);
385    });
386  
387    test('schedules retry in 7 days when error page detected', async () => {
388      const id = insertSite({ domain: 'errpage.com', country_code: 'AU' });
389  
390      stubHtmlCaptureResult = {
391        html: '<html><body>404 Page Not Found</body></html>',
392        httpStatusCode: 200,
393        localeData: null,
394      };
395      stubDetectErrorPageResult = {
396        isErrorPage: true,
397        indicator: '404 keyword',
398        wordCount: 4,
399      };
400  
401      await runAssetsStage({ limit: 10 });
402  
403      const row = db.prepare('SELECT recapture_at FROM sites WHERE id = ?').get(id);
404      assert.ok(row.recapture_at !== null, 'recapture_at should be set');
405      assert.strictEqual(mockRecordFailure.mock.callCount(), 1, 'recordFailure called for error page');
406    });
407  
408    test('deduplicates sites before processing', { skip: 'deduplicateSites removed (DR-106): UNIQUE constraint prevents duplicates at insert time' }, async () => {
409      stubDedupeStats = { sitesIgnored: 5, duplicateDomains: 3, crossBorder: 1 };
410      await runAssetsStage();
411      assert.ok(mockDeduplicateSites.mock.callCount() > 0, 'deduplicateSites should be called');
412    });
413  
414    test('processes multiple sites and returns correct counts', async () => {
415      insertSite({ domain: 'site1.com', country_code: 'AU' });
416      insertSite({ domain: 'site2.com', country_code: 'AU' });
417  
418      stubHtmlCaptureResult = {
419        html: '<html><body>Content</body></html>',
420        httpStatusCode: 200,
421        localeData: null,
422      };
423  
424      const result = await runAssetsStage();
425  
426      assert.strictEqual(result.processed, 2);
427      assert.ok('succeeded' in result);
428      assert.ok('failed' in result);
429      assert.ok('duration' in result);
430    });
431  });
432  
433  // ============================================================================
434  // Tests — Vision-enabled path (ENABLE_VISION=true)
435  // ============================================================================
436  
437  describe('assets.js — Vision-enabled path (ENABLE_VISION=true)', () => {
438    beforeEach(() => {
439      resetStubs();
440      clearSites();
441      process.env.ENABLE_VISION = 'true';
442    });
443  
444    test('returns zero counts when no candidate sites exist', async () => {
445      const result = await runAssetsStage({ limit: 5 });
446      assert.strictEqual(result.processed, 0);
447      assert.strictEqual(result.succeeded, 0);
448      assert.strictEqual(result.failed, 0);
449      assert.ok(typeof result.duration === 'number');
450    });
451  
452    test('captures screenshots and marks assets_captured on success', async () => {
453      const id = insertSite({
454        domain: 'vision.com',
455        keyword: 'electrician',
456        country_code: 'AU',
457        screenshot_path: null,
458        html_dom: null,
459      });
460      db.prepare('INSERT OR IGNORE INTO keywords (keyword, country_code) VALUES (?, ?)').run('electrician', 'AU');
461  
462      stubCaptureWebsiteResult = {
463        html: '<html><body>Electrician page</body></html>',
464        screenshots: {
465          desktop_above: Buffer.from('a'),
466          desktop_below: Buffer.from('b'),
467          mobile_above: Buffer.from('c'),
468        },
469        screenshotsUncropped: {
470          desktop_above: Buffer.from('ua'),
471          desktop_below: Buffer.from('ub'),
472          mobile_above: Buffer.from('uc'),
473        },
474        httpStatusCode: 200,
475        sslStatus: 'https',
476        httpHeaders: '{}',
477        localeData: '{}',
478        perfData: null,
479      };
480      stubScreenshotsExist = { exists: true, missing: [] };
481  
482      await runAssetsStage({ limit: 1 });
483  
484      const row = db.prepare('SELECT status FROM sites WHERE id = ?').get(id);
485      assert.strictEqual(row.status, 'assets_captured');
486      assert.strictEqual(mockCaptureWebsite.mock.callCount(), 1);
487      assert.strictEqual(mockSaveScreenshots.mock.callCount(), 1);
488      assert.strictEqual(mockResetRetries.mock.callCount(), 1);
489      assert.strictEqual(mockRecordFailure.mock.callCount(), 0);
490    });
491  
492    test('calls recordFailure when screenshot validation fails', async () => {
493      insertSite({ domain: 'badss.com', country_code: 'AU', screenshot_path: null, html_dom: null });
494  
495      stubCaptureWebsiteResult = {
496        html: '<html><body>Content</body></html>',
497        screenshots: {
498          desktop_above: Buffer.from('a'),
499          desktop_below: Buffer.from('b'),
500          mobile_above: Buffer.from('c'),
501        },
502        screenshotsUncropped: {
503          desktop_above: Buffer.from('ua'),
504          desktop_below: Buffer.from('ub'),
505          mobile_above: Buffer.from('uc'),
506        },
507        httpStatusCode: 200,
508        sslStatus: 'https',
509        httpHeaders: '{}',
510        localeData: '{}',
511        perfData: null,
512      };
513      stubScreenshotsExist = { exists: false, missing: ['desktop_above', 'mobile_above'] };
514  
515      await runAssetsStage({ limit: 1 });
516  
517      assert.strictEqual(mockRecordFailure.mock.callCount(), 1);
518      // recordFailure(siteId, stage, err, fallbackStatus) — err is index 2
519      const failArgs = mockRecordFailure.mock.calls[0].arguments;
520      assert.match(failArgs[2].message, /Screenshot validation failed/i);
521    });
522  
523    test('marks blocklisted site as ignored in vision mode', async () => {
524      const id = insertSite({
525        domain: 'facebook.com',
526        country_code: 'US',
527        screenshot_path: null,
528        html_dom: null,
529      });
530      stubCheckBlocklistResult = { reason: 'Social media: facebook.com' };
531  
532      await runAssetsStage({ limit: 1 });
533  
534      const row = db.prepare('SELECT status FROM sites WHERE id = ?').get(id);
535      assert.strictEqual(row.status, 'ignored');
536      assert.strictEqual(mockCaptureWebsite.mock.callCount(), 0, 'should not capture blocked sites');
537    });
538  
539    test('cleans up sites with invalid screenshot_path during candidate check', async () => {
540      const id = insertSite({
541        domain: 'stale.com',
542        country_code: 'AU',
543        screenshot_path: '/old/path',
544        html_dom: null,
545      });
546  
547      // First croppedScreenshotsExist call: for candidate check (returns missing)
548      // The mock returns the same value for all calls, so let's set it to missing
549      stubScreenshotsExist = { exists: false, missing: ['desktop_above'] };
550  
551      // captureWebsite will also be called and screenshot validation runs again
552      // To make the test deterministic, let the capture succeed with valid screenshots
553      // but screenshot check during capture validation also fails (same stub)
554      // So the site should go through the "needs capture" path but then fail validation
555      stubCaptureWebsiteResult = {
556        html: '<html><body>Content</body></html>',
557        screenshots: {
558          desktop_above: Buffer.from('a'),
559          desktop_below: Buffer.from('b'),
560          mobile_above: Buffer.from('c'),
561        },
562        screenshotsUncropped: {
563          desktop_above: Buffer.from('ua'),
564          desktop_below: Buffer.from('ub'),
565          mobile_above: Buffer.from('uc'),
566        },
567        httpStatusCode: 200,
568        sslStatus: 'https',
569        httpHeaders: '{}',
570        localeData: '{}',
571        perfData: null,
572      };
573  
574      await runAssetsStage({ limit: 5 });
575  
576      // The site's screenshot_path should have been reset to NULL (cleanup logic)
577      const row = db.prepare('SELECT screenshot_path FROM sites WHERE id = ?').get(id);
578      assert.strictEqual(row.screenshot_path, null, 'screenshot_path should be reset to NULL');
579    });
580  
581    test('calls recordFailure when HTTP status is error', async () => {
582      insertSite({ domain: 'http-err.com', country_code: 'AU', screenshot_path: null, html_dom: null });
583  
584      stubCaptureWebsiteResult = {
585        html: '<html><body>Not Found</body></html>',
586        screenshots: null,
587        screenshotsUncropped: null,
588        httpStatusCode: 404,
589        sslStatus: 'https',
590        httpHeaders: '{}',
591        localeData: '{}',
592        perfData: null,
593      };
594  
595      await runAssetsStage({ limit: 1 });
596  
597      assert.strictEqual(mockRecordFailure.mock.callCount(), 1);
598      // recordFailure(siteId, stage, err, fallbackStatus) — err is index 2
599      const failArgs = mockRecordFailure.mock.calls[0].arguments;
600      assert.match(failArgs[2].message, /HTTP 404/i);
601    });
602  });
603  
604  // ============================================================================
605  // Tests — getAssetsStats()
606  // ============================================================================
607  
608  describe('getAssetsStats()', () => {
609    beforeEach(() => {
610      clearSites();
611    });
612  
613    test('returns stats object with expected numeric fields', async () => {
614      const stats = await getAssetsStats();
615      assert.ok(stats !== null && typeof stats === 'object', 'should return an object');
616      assert.ok('total_sites' in stats, 'should have total_sites');
617      assert.ok('sites_with_screenshots' in stats, 'should have sites_with_screenshots');
618      assert.ok('captured_sites' in stats, 'should have captured_sites');
619      assert.ok('pending_capture' in stats, 'should have pending_capture');
620      assert.ok('failed_capture' in stats, 'should have failed_capture');
621    });
622  
623    test('returns zero counts when no sites exist', async () => {
624      const stats = await getAssetsStats();
625      assert.strictEqual(Number(stats.total_sites), 0);
626      assert.strictEqual(Number(stats.sites_with_screenshots), 0);
627      assert.strictEqual(Number(stats.captured_sites), 0);
628    });
629  
630    test('counts sites_with_screenshots correctly', async () => {
631      insertSite({ screenshot_path: '/path/to/ss', status: 'assets_captured' });
632      insertSite({ screenshot_path: null, status: 'found' });
633      insertSite({ screenshot_path: '/path/to/ss2', status: 'assets_captured' });
634  
635      const stats = await getAssetsStats();
636      assert.strictEqual(Number(stats.total_sites), 3);
637      assert.strictEqual(Number(stats.sites_with_screenshots), 2);
638      assert.strictEqual(Number(stats.captured_sites), 2);
639    });
640  
641    test('counts pending_capture correctly', async () => {
642      insertSite({ screenshot_path: null, status: 'found' });
643      insertSite({ screenshot_path: null, status: 'found' });
644  
645      const stats = await getAssetsStats();
646      assert.strictEqual(Number(stats.pending_capture), 2);
647    });
648  
649    test('counts failed_capture correctly', async () => {
650      insertSite({ screenshot_path: null, status: 'found', error_message: 'Capture failed' });
651      insertSite({ screenshot_path: null, status: 'found', error_message: null });
652  
653      const stats = await getAssetsStats();
654      assert.strictEqual(Number(stats.failed_capture), 1);
655    });
656  });
657  
658  // ============================================================================
659  // Tests — backfillScreenshots()
660  // ============================================================================
661  
662  describe('backfillScreenshots()', () => {
663    beforeEach(() => {
664      resetStubs();
665      clearSites();
666      process.env.ENABLE_VISION = 'false';
667    });
668  
669    test('returns zero counts when no sites need backfill', async () => {
670      const result = await backfillScreenshots(10);
671      assert.strictEqual(result.processed, 0);
672      assert.strictEqual(result.succeeded, 0);
673      assert.strictEqual(result.failed, 0);
674    });
675  
676    test('delegates to runAssetsStage with correct limit', async () => {
677      insertSite({ domain: 'backfill.com', status: 'found', screenshot_path: null });
678      stubHtmlCaptureResult = {
679        html: '<html><body>Backfill page</body></html>',
680        httpStatusCode: 200,
681        localeData: null,
682      };
683  
684      const result = await backfillScreenshots(5);
685  
686      assert.ok('processed' in result, 'should return processed');
687      assert.ok('succeeded' in result, 'should return succeeded');
688      assert.ok('failed' in result, 'should return failed');
689    });
690  
691    test('uses default limit of 10', async () => {
692      const result = await backfillScreenshots();
693      // No sites in DB, so processed = 0 regardless of limit
694      assert.strictEqual(result.processed, 0);
695    });
696  });
697  
698  // ============================================================================
699  // Tests — legacy flag deprecation warning (no throw)
700  // ============================================================================
701  
702  describe('assets.js — legacy flag handling', () => {
703    beforeEach(() => {
704      resetStubs();
705      clearSites();
706      process.env.ENABLE_VISION = 'false';
707    });
708  
709    test('does not throw when legacy ENABLE_SCREENSHOT_CAPTURE flag is set', async () => {
710      process.env.ENABLE_SCREENSHOT_CAPTURE = 'true';
711  
712      const result = await runAssetsStage();
713      assert.strictEqual(result.processed, 0);
714  
715      delete process.env.ENABLE_SCREENSHOT_CAPTURE;
716    });
717  
718    test('does not throw when USE_COMPUTER_VISION_SCORING flag is set', async () => {
719      process.env.USE_COMPUTER_VISION_SCORING = 'true';
720  
721      const result = await runAssetsStage();
722      assert.strictEqual(result.processed, 0);
723  
724      delete process.env.USE_COMPUTER_VISION_SCORING;
725    });
726  });