assets-supplement.test.js
1 /** 2 * Supplemental coverage tests for src/stages/assets.js 3 * 4 * These tests use createPgMock() with a real in-memory SQLite DB so that 5 * withTransaction() resolves correctly (avoids the async-in-sync-transaction 6 * infinite loop that the original assets.test.js had with MockDatabase). 7 * 8 * Coverage targets (lines currently < 50% covered): 9 * - runAssetsStage() HTML-only path: no candidates, blocklist, success, failure 10 * - runAssetsStage() vision path: no candidates, blocklist, success, failure 11 * - getAssetsStats() — SQL shape returned 12 * - backfillScreenshots() — empty and non-empty paths 13 * 14 * Run with: 15 * NODE_ENV=test LOGS_DIR=/tmp/test-logs DATABASE_PATH=/tmp/test-assets-supp.db \ 16 * node --experimental-test-module-mocks --test tests/stages/assets-supplement.test.js 17 */ 18 19 import { test, describe, mock, beforeEach, after } from 'node:test'; 20 import assert from 'node:assert/strict'; 21 import Database from 'better-sqlite3'; 22 import { createPgMock } from '../helpers/pg-mock.js'; 23 24 // ============================================================================ 25 // In-memory SQLite DB + pg-mock wiring — must happen BEFORE any mock.module() 26 // that would shadow db.js inside the module under test 27 // ============================================================================ 28 29 const db = new Database(':memory:'); 30 31 // Minimal schema for assets stage queries 32 db.exec(` 33 CREATE TABLE IF NOT EXISTS sites ( 34 id INTEGER PRIMARY KEY AUTOINCREMENT, 35 domain TEXT NOT NULL, 36 landing_page_url TEXT, 37 keyword TEXT, 38 status TEXT DEFAULT 'found', 39 html_dom TEXT, 40 screenshot_path TEXT, 41 http_status_code INTEGER, 42 ssl_status TEXT, 43 http_headers TEXT, 44 locale_data TEXT, 45 language_code TEXT, 46 perf_json TEXT, 47 assets_captured_at TEXT, 48 error_message TEXT, 49 recapture_at TEXT, 50 recapture_count INTEGER DEFAULT 0, 51 retry_count INTEGER DEFAULT 0, 52 last_retry_at TEXT, 53 country_code TEXT, 54 score REAL, 55 grade TEXT, 56 rescored_at TEXT, 57 message_type TEXT, 58 read_at TEXT, 59 created_at TEXT DEFAULT (datetime('now')), 60 updated_at TEXT DEFAULT (datetime('now')) 61 ); 62 63 CREATE TABLE IF NOT EXISTS keywords ( 64 id INTEGER PRIMARY KEY AUTOINCREMENT, 65 keyword TEXT NOT NULL, 66 country_code TEXT NOT NULL, 67 assets_scraped_count INTEGER DEFAULT 0, 68 UNIQUE(keyword, country_code) 69 ); 70 `); 71 72 // ============================================================================ 73 // All mock.module() calls MUST come before any dynamic import of the module 74 // under test (Node module mock cache is set at import time). 75 // ============================================================================ 76 77 mock.module('../../src/utils/db.js', { namedExports: createPgMock(db) }); 78 79 mock.module('../../src/utils/logger.js', { 80 defaultExport: class { 81 info() {} 82 warn() {} 83 error() {} 84 debug() {} 85 success() {} 86 }, 87 }); 88 89 // Mutable stubs — mutated per-test via resetStubs() 90 let stubCaptureWebsiteResult = null; 91 let stubCaptureWebsiteThrows = null; 92 let stubHtmlCaptureResult = null; 93 let stubDedupeStats = { sitesIgnored: 0, duplicateDomains: 0, crossBorder: 0 }; 94 let stubCheckBlocklistResult = null; 95 let stubDetectErrorPageResult = { isErrorPage: false }; 96 let stubScreenshotsExist = { exists: true, missing: [] }; 97 98 const mockCaptureWebsite = mock.fn(async () => { 99 if (stubCaptureWebsiteThrows) throw stubCaptureWebsiteThrows; 100 return ( 101 stubCaptureWebsiteResult || { 102 html: '<html><body>Test</body></html>', 103 screenshots: { 104 desktop_above: Buffer.from('a'), 105 desktop_below: Buffer.from('b'), 106 mobile_above: Buffer.from('c'), 107 }, 108 screenshotsUncropped: { 109 desktop_above: Buffer.from('ua'), 110 desktop_below: Buffer.from('ub'), 111 mobile_above: Buffer.from('uc'), 112 }, 113 httpStatusCode: 200, 114 sslStatus: 'https', 115 httpHeaders: '{}', 116 localeData: '{}', 117 perfData: null, 118 } 119 ); 120 }); 121 122 // Mock browser functions — launchBrowser / createStealthContext 123 // The HTML-only path uses these to create a shared browser context 124 mock.module('../../src/capture.js', { 125 namedExports: { 126 captureWebsite: mockCaptureWebsite, 127 launchBrowser: mock.fn(async () => ({ close: async () => {} })), 128 createStealthContext: mock.fn(async () => ({ 129 newPage: async () => ({ 130 goto: async () => ({ 131 status: () => stubHtmlCaptureResult?.httpStatusCode ?? 200, 132 headers: () => ({}), 133 }), 134 content: async () => stubHtmlCaptureResult?.html ?? '<html><body>Valid page</body></html>', 135 close: async () => {}, 136 evaluate: async () => 137 stubHtmlCaptureResult?.localeData 138 ? JSON.parse(stubHtmlCaptureResult.localeData) 139 : { htmlLang: 'en', hreflangs: [] }, 140 }), 141 close: async () => {}, 142 })), 143 }, 144 }); 145 146 const mockSaveScreenshots = mock.fn(async () => '/tmp/ss/1'); 147 const mockCroppedScreenshotsExist = mock.fn(async () => stubScreenshotsExist); 148 149 mock.module('../../src/utils/screenshot-storage.js', { 150 namedExports: { 151 saveScreenshots: mockSaveScreenshots, 152 croppedScreenshotsExist: mockCroppedScreenshotsExist, 153 }, 154 }); 155 156 const mockCheckBlocklist = mock.fn(() => stubCheckBlocklistResult); 157 mock.module('../../src/utils/site-filters.js', { 158 namedExports: { checkBlocklist: mockCheckBlocklist }, 159 }); 160 161 const mockIncrementAssetsScraped = mock.fn(async () => {}); 162 mock.module('../../src/utils/keyword-counters.js', { 163 namedExports: { incrementAssetsScraped: mockIncrementAssetsScraped }, 164 }); 165 166 const mockDeduplicateSites = mock.fn(() => stubDedupeStats); 167 mock.module('../../src/utils/dedupe-locale-aware.js', { 168 namedExports: { deduplicateSites: mockDeduplicateSites }, 169 }); 170 171 const mockDetectErrorPage = mock.fn(() => stubDetectErrorPageResult); 172 mock.module('../../src/utils/error-page-detector.js', { 173 namedExports: { detectErrorPage: mockDetectErrorPage }, 174 }); 175 176 const mockRecordFailure = mock.fn(async () => {}); 177 const mockResetRetries = mock.fn(async () => {}); 178 mock.module('../../src/utils/retry-handler.js', { 179 namedExports: { recordFailure: mockRecordFailure, resetRetries: mockResetRetries }, 180 }); 181 182 mock.module('../../src/utils/adaptive-concurrency.js', { 183 namedExports: { getAdaptiveConcurrencyFast: mock.fn(() => 1) }, 184 }); 185 186 mock.module('../../src/config/countries.js', { 187 namedExports: { getCountryByCode: mock.fn(() => ({ code: 'AU', name: 'Australia' })) }, 188 }); 189 190 mock.module('../../src/utils/detect-language.js', { 191 namedExports: { deriveLanguageCode: mock.fn(() => 'en') }, 192 }); 193 194 mock.module('../../src/utils/summary-generator.js', { 195 namedExports: { generateStageCompletion: mock.fn(), displayProgress: mock.fn() }, 196 }); 197 198 // processBatch: execute all items sequentially (same as real behaviour for concurrency=1) 199 mock.module('../../src/utils/error-handler.js', { 200 namedExports: { 201 processBatch: mock.fn(async (items, processor, _opts) => { 202 const results = []; 203 const errors = []; 204 for (let i = 0; i < items.length; i++) { 205 try { 206 results.push(await processor(items[i], i)); 207 } catch (err) { 208 errors.push({ item: items[i], error: err }); 209 } 210 } 211 return { results, errors }; 212 }), 213 withTimeout: mock.fn(async promise => promise), 214 }, 215 }); 216 217 const mockWriteHtmlDom = mock.fn(() => {}); 218 const mockDeleteHtmlDom = mock.fn(() => {}); 219 mock.module('../../src/utils/html-storage.js', { 220 namedExports: { 221 writeHtmlDom: mockWriteHtmlDom, 222 hasHtmlDom: mock.fn(() => false), 223 deleteHtmlDom: mockDeleteHtmlDom, 224 readHtmlDom: mock.fn(() => null), 225 writeKeyPagesHtml: mock.fn(), 226 readKeyPagesHtml: mock.fn(() => null), 227 deleteKeyPagesHtml: mock.fn(), 228 deleteAllHtml: mock.fn(), 229 DATA_DIR: '/tmp/test-html-data', 230 }, 231 }); 232 233 // ============================================================================ 234 // Import module under test — after all mock.module() calls 235 // ============================================================================ 236 237 const { runAssetsStage, getAssetsStats, backfillScreenshots } = 238 await import('../../src/stages/assets.js'); 239 240 // ============================================================================ 241 // Helpers 242 // ============================================================================ 243 244 function resetStubs() { 245 stubCaptureWebsiteResult = null; 246 stubCaptureWebsiteThrows = null; 247 stubHtmlCaptureResult = null; 248 stubDedupeStats = { sitesIgnored: 0, duplicateDomains: 0, crossBorder: 0 }; 249 stubCheckBlocklistResult = null; 250 stubDetectErrorPageResult = { isErrorPage: false }; 251 stubScreenshotsExist = { exists: true, missing: [] }; 252 mockCaptureWebsite.mock.resetCalls(); 253 mockSaveScreenshots.mock.resetCalls(); 254 mockCroppedScreenshotsExist.mock.resetCalls(); 255 mockCheckBlocklist.mock.resetCalls(); 256 mockIncrementAssetsScraped.mock.resetCalls(); 257 mockDeduplicateSites.mock.resetCalls(); 258 mockDetectErrorPage.mock.resetCalls(); 259 mockRecordFailure.mock.resetCalls(); 260 mockResetRetries.mock.resetCalls(); 261 mockWriteHtmlDom.mock.resetCalls(); 262 mockDeleteHtmlDom.mock.resetCalls(); 263 } 264 265 function insertSite(overrides = {}) { 266 const defaults = { 267 domain: 'example.com', 268 landing_page_url: 'https://example.com', 269 keyword: 'plumber', 270 status: 'found', 271 html_dom: null, 272 screenshot_path: null, 273 country_code: 'AU', 274 error_message: null, 275 }; 276 const s = { ...defaults, ...overrides }; 277 const result = db 278 .prepare( 279 `INSERT INTO sites (domain, landing_page_url, keyword, status, html_dom, screenshot_path, country_code, error_message) 280 VALUES (?, ?, ?, ?, ?, ?, ?, ?)` 281 ) 282 .run(s.domain, s.landing_page_url, s.keyword, s.status, s.html_dom, s.screenshot_path, s.country_code, s.error_message); 283 return result.lastInsertRowid; 284 } 285 286 function clearSites() { 287 db.prepare('DELETE FROM sites').run(); 288 db.prepare('DELETE FROM keywords').run(); 289 } 290 291 after(() => { 292 db.close(); 293 }); 294 295 // ============================================================================ 296 // Tests — HTML-only path (ENABLE_VISION=false) 297 // ============================================================================ 298 299 describe('assets.js — HTML-only path (ENABLE_VISION=false)', () => { 300 beforeEach(() => { 301 resetStubs(); 302 clearSites(); 303 process.env.ENABLE_VISION = 'false'; 304 }); 305 306 test('returns zero counts when no sites need HTML capture', async () => { 307 // No sites in DB at all 308 const result = await runAssetsStage({ limit: 10 }); 309 assert.strictEqual(result.processed, 0); 310 assert.strictEqual(result.succeeded, 0); 311 assert.strictEqual(result.failed, 0); 312 assert.strictEqual(result.skipped, 0); 313 assert.ok(typeof result.duration === 'number'); 314 }); 315 316 test('returns zero counts when all sites already have html_dom', async () => { 317 insertSite({ html_dom: 'fs', status: 'assets_captured' }); 318 const result = await runAssetsStage({ limit: 10 }); 319 assert.strictEqual(result.processed, 0); 320 }); 321 322 test('marks blocklisted site as ignored and does not capture', async () => { 323 const id = insertSite({ domain: 'yelp.com', country_code: 'US' }); 324 stubCheckBlocklistResult = { reason: 'Directory site: yelp.com' }; 325 326 await runAssetsStage({ limit: 10 }); 327 328 const row = db.prepare('SELECT status, error_message FROM sites WHERE id = ?').get(id); 329 assert.strictEqual(row.status, 'ignored'); 330 assert.ok(row.error_message.includes('Directory site')); 331 assert.strictEqual(mockWriteHtmlDom.mock.callCount(), 0, 'should not capture blocked site'); 332 }); 333 334 test('successfully captures HTML and marks assets_captured', async () => { 335 const id = insertSite({ domain: 'good.com', keyword: 'dentist', country_code: 'AU' }); 336 // Insert keyword row so incrementAssetsScraped can be called 337 db.prepare('INSERT OR IGNORE INTO keywords (keyword, country_code) VALUES (?, ?)').run('dentist', 'AU'); 338 339 stubHtmlCaptureResult = { 340 html: '<html><body>Great dentist page</body></html>', 341 httpStatusCode: 200, 342 localeData: '{"htmlLang":"en","hreflangs":[]}', 343 }; 344 345 await runAssetsStage({ limit: 10 }); 346 347 const row = db.prepare('SELECT status FROM sites WHERE id = ?').get(id); 348 assert.strictEqual(row.status, 'assets_captured'); 349 assert.strictEqual(mockWriteHtmlDom.mock.callCount(), 1, 'writeHtmlDom should be called'); 350 assert.strictEqual(mockResetRetries.mock.callCount(), 1, 'resetRetries should be called'); 351 assert.strictEqual(mockRecordFailure.mock.callCount(), 0, 'no failures expected'); 352 }); 353 354 test('calls recordFailure when HTML is empty', async () => { 355 insertSite({ domain: 'empty.com', country_code: 'AU' }); 356 357 stubHtmlCaptureResult = { 358 html: '', 359 httpStatusCode: 200, 360 localeData: null, 361 }; 362 363 await runAssetsStage({ limit: 10 }); 364 365 assert.strictEqual(mockRecordFailure.mock.callCount(), 1); 366 // recordFailure(siteId, stage, err, fallbackStatus) — err is index 2 367 const failArgs = mockRecordFailure.mock.calls[0].arguments; 368 assert.match(failArgs[2].message, /HTML DOM capture failed/i); 369 }); 370 371 test('calls recordFailure when HTTP status is 404', async () => { 372 insertSite({ domain: 'notfound.com', country_code: 'AU' }); 373 374 stubHtmlCaptureResult = { 375 html: '<html><body>Not Found</body></html>', 376 httpStatusCode: 404, 377 localeData: null, 378 }; 379 380 await runAssetsStage({ limit: 10 }); 381 382 assert.strictEqual(mockRecordFailure.mock.callCount(), 1); 383 const failArgs = mockRecordFailure.mock.calls[0].arguments; 384 assert.match(failArgs[2].message, /HTTP 404/i); 385 }); 386 387 test('schedules retry in 7 days when error page detected', async () => { 388 const id = insertSite({ domain: 'errpage.com', country_code: 'AU' }); 389 390 stubHtmlCaptureResult = { 391 html: '<html><body>404 Page Not Found</body></html>', 392 httpStatusCode: 200, 393 localeData: null, 394 }; 395 stubDetectErrorPageResult = { 396 isErrorPage: true, 397 indicator: '404 keyword', 398 wordCount: 4, 399 }; 400 401 await runAssetsStage({ limit: 10 }); 402 403 const row = db.prepare('SELECT recapture_at FROM sites WHERE id = ?').get(id); 404 assert.ok(row.recapture_at !== null, 'recapture_at should be set'); 405 assert.strictEqual(mockRecordFailure.mock.callCount(), 1, 'recordFailure called for error page'); 406 }); 407 408 test('deduplicates sites before processing', { skip: 'deduplicateSites removed (DR-106): UNIQUE constraint prevents duplicates at insert time' }, async () => { 409 stubDedupeStats = { sitesIgnored: 5, duplicateDomains: 3, crossBorder: 1 }; 410 await runAssetsStage(); 411 assert.ok(mockDeduplicateSites.mock.callCount() > 0, 'deduplicateSites should be called'); 412 }); 413 414 test('processes multiple sites and returns correct counts', async () => { 415 insertSite({ domain: 'site1.com', country_code: 'AU' }); 416 insertSite({ domain: 'site2.com', country_code: 'AU' }); 417 418 stubHtmlCaptureResult = { 419 html: '<html><body>Content</body></html>', 420 httpStatusCode: 200, 421 localeData: null, 422 }; 423 424 const result = await runAssetsStage(); 425 426 assert.strictEqual(result.processed, 2); 427 assert.ok('succeeded' in result); 428 assert.ok('failed' in result); 429 assert.ok('duration' in result); 430 }); 431 }); 432 433 // ============================================================================ 434 // Tests — Vision-enabled path (ENABLE_VISION=true) 435 // ============================================================================ 436 437 describe('assets.js — Vision-enabled path (ENABLE_VISION=true)', () => { 438 beforeEach(() => { 439 resetStubs(); 440 clearSites(); 441 process.env.ENABLE_VISION = 'true'; 442 }); 443 444 test('returns zero counts when no candidate sites exist', async () => { 445 const result = await runAssetsStage({ limit: 5 }); 446 assert.strictEqual(result.processed, 0); 447 assert.strictEqual(result.succeeded, 0); 448 assert.strictEqual(result.failed, 0); 449 assert.ok(typeof result.duration === 'number'); 450 }); 451 452 test('captures screenshots and marks assets_captured on success', async () => { 453 const id = insertSite({ 454 domain: 'vision.com', 455 keyword: 'electrician', 456 country_code: 'AU', 457 screenshot_path: null, 458 html_dom: null, 459 }); 460 db.prepare('INSERT OR IGNORE INTO keywords (keyword, country_code) VALUES (?, ?)').run('electrician', 'AU'); 461 462 stubCaptureWebsiteResult = { 463 html: '<html><body>Electrician page</body></html>', 464 screenshots: { 465 desktop_above: Buffer.from('a'), 466 desktop_below: Buffer.from('b'), 467 mobile_above: Buffer.from('c'), 468 }, 469 screenshotsUncropped: { 470 desktop_above: Buffer.from('ua'), 471 desktop_below: Buffer.from('ub'), 472 mobile_above: Buffer.from('uc'), 473 }, 474 httpStatusCode: 200, 475 sslStatus: 'https', 476 httpHeaders: '{}', 477 localeData: '{}', 478 perfData: null, 479 }; 480 stubScreenshotsExist = { exists: true, missing: [] }; 481 482 await runAssetsStage({ limit: 1 }); 483 484 const row = db.prepare('SELECT status FROM sites WHERE id = ?').get(id); 485 assert.strictEqual(row.status, 'assets_captured'); 486 assert.strictEqual(mockCaptureWebsite.mock.callCount(), 1); 487 assert.strictEqual(mockSaveScreenshots.mock.callCount(), 1); 488 assert.strictEqual(mockResetRetries.mock.callCount(), 1); 489 assert.strictEqual(mockRecordFailure.mock.callCount(), 0); 490 }); 491 492 test('calls recordFailure when screenshot validation fails', async () => { 493 insertSite({ domain: 'badss.com', country_code: 'AU', screenshot_path: null, html_dom: null }); 494 495 stubCaptureWebsiteResult = { 496 html: '<html><body>Content</body></html>', 497 screenshots: { 498 desktop_above: Buffer.from('a'), 499 desktop_below: Buffer.from('b'), 500 mobile_above: Buffer.from('c'), 501 }, 502 screenshotsUncropped: { 503 desktop_above: Buffer.from('ua'), 504 desktop_below: Buffer.from('ub'), 505 mobile_above: Buffer.from('uc'), 506 }, 507 httpStatusCode: 200, 508 sslStatus: 'https', 509 httpHeaders: '{}', 510 localeData: '{}', 511 perfData: null, 512 }; 513 stubScreenshotsExist = { exists: false, missing: ['desktop_above', 'mobile_above'] }; 514 515 await runAssetsStage({ limit: 1 }); 516 517 assert.strictEqual(mockRecordFailure.mock.callCount(), 1); 518 // recordFailure(siteId, stage, err, fallbackStatus) — err is index 2 519 const failArgs = mockRecordFailure.mock.calls[0].arguments; 520 assert.match(failArgs[2].message, /Screenshot validation failed/i); 521 }); 522 523 test('marks blocklisted site as ignored in vision mode', async () => { 524 const id = insertSite({ 525 domain: 'facebook.com', 526 country_code: 'US', 527 screenshot_path: null, 528 html_dom: null, 529 }); 530 stubCheckBlocklistResult = { reason: 'Social media: facebook.com' }; 531 532 await runAssetsStage({ limit: 1 }); 533 534 const row = db.prepare('SELECT status FROM sites WHERE id = ?').get(id); 535 assert.strictEqual(row.status, 'ignored'); 536 assert.strictEqual(mockCaptureWebsite.mock.callCount(), 0, 'should not capture blocked sites'); 537 }); 538 539 test('cleans up sites with invalid screenshot_path during candidate check', async () => { 540 const id = insertSite({ 541 domain: 'stale.com', 542 country_code: 'AU', 543 screenshot_path: '/old/path', 544 html_dom: null, 545 }); 546 547 // First croppedScreenshotsExist call: for candidate check (returns missing) 548 // The mock returns the same value for all calls, so let's set it to missing 549 stubScreenshotsExist = { exists: false, missing: ['desktop_above'] }; 550 551 // captureWebsite will also be called and screenshot validation runs again 552 // To make the test deterministic, let the capture succeed with valid screenshots 553 // but screenshot check during capture validation also fails (same stub) 554 // So the site should go through the "needs capture" path but then fail validation 555 stubCaptureWebsiteResult = { 556 html: '<html><body>Content</body></html>', 557 screenshots: { 558 desktop_above: Buffer.from('a'), 559 desktop_below: Buffer.from('b'), 560 mobile_above: Buffer.from('c'), 561 }, 562 screenshotsUncropped: { 563 desktop_above: Buffer.from('ua'), 564 desktop_below: Buffer.from('ub'), 565 mobile_above: Buffer.from('uc'), 566 }, 567 httpStatusCode: 200, 568 sslStatus: 'https', 569 httpHeaders: '{}', 570 localeData: '{}', 571 perfData: null, 572 }; 573 574 await runAssetsStage({ limit: 5 }); 575 576 // The site's screenshot_path should have been reset to NULL (cleanup logic) 577 const row = db.prepare('SELECT screenshot_path FROM sites WHERE id = ?').get(id); 578 assert.strictEqual(row.screenshot_path, null, 'screenshot_path should be reset to NULL'); 579 }); 580 581 test('calls recordFailure when HTTP status is error', async () => { 582 insertSite({ domain: 'http-err.com', country_code: 'AU', screenshot_path: null, html_dom: null }); 583 584 stubCaptureWebsiteResult = { 585 html: '<html><body>Not Found</body></html>', 586 screenshots: null, 587 screenshotsUncropped: null, 588 httpStatusCode: 404, 589 sslStatus: 'https', 590 httpHeaders: '{}', 591 localeData: '{}', 592 perfData: null, 593 }; 594 595 await runAssetsStage({ limit: 1 }); 596 597 assert.strictEqual(mockRecordFailure.mock.callCount(), 1); 598 // recordFailure(siteId, stage, err, fallbackStatus) — err is index 2 599 const failArgs = mockRecordFailure.mock.calls[0].arguments; 600 assert.match(failArgs[2].message, /HTTP 404/i); 601 }); 602 }); 603 604 // ============================================================================ 605 // Tests — getAssetsStats() 606 // ============================================================================ 607 608 describe('getAssetsStats()', () => { 609 beforeEach(() => { 610 clearSites(); 611 }); 612 613 test('returns stats object with expected numeric fields', async () => { 614 const stats = await getAssetsStats(); 615 assert.ok(stats !== null && typeof stats === 'object', 'should return an object'); 616 assert.ok('total_sites' in stats, 'should have total_sites'); 617 assert.ok('sites_with_screenshots' in stats, 'should have sites_with_screenshots'); 618 assert.ok('captured_sites' in stats, 'should have captured_sites'); 619 assert.ok('pending_capture' in stats, 'should have pending_capture'); 620 assert.ok('failed_capture' in stats, 'should have failed_capture'); 621 }); 622 623 test('returns zero counts when no sites exist', async () => { 624 const stats = await getAssetsStats(); 625 assert.strictEqual(Number(stats.total_sites), 0); 626 assert.strictEqual(Number(stats.sites_with_screenshots), 0); 627 assert.strictEqual(Number(stats.captured_sites), 0); 628 }); 629 630 test('counts sites_with_screenshots correctly', async () => { 631 insertSite({ screenshot_path: '/path/to/ss', status: 'assets_captured' }); 632 insertSite({ screenshot_path: null, status: 'found' }); 633 insertSite({ screenshot_path: '/path/to/ss2', status: 'assets_captured' }); 634 635 const stats = await getAssetsStats(); 636 assert.strictEqual(Number(stats.total_sites), 3); 637 assert.strictEqual(Number(stats.sites_with_screenshots), 2); 638 assert.strictEqual(Number(stats.captured_sites), 2); 639 }); 640 641 test('counts pending_capture correctly', async () => { 642 insertSite({ screenshot_path: null, status: 'found' }); 643 insertSite({ screenshot_path: null, status: 'found' }); 644 645 const stats = await getAssetsStats(); 646 assert.strictEqual(Number(stats.pending_capture), 2); 647 }); 648 649 test('counts failed_capture correctly', async () => { 650 insertSite({ screenshot_path: null, status: 'found', error_message: 'Capture failed' }); 651 insertSite({ screenshot_path: null, status: 'found', error_message: null }); 652 653 const stats = await getAssetsStats(); 654 assert.strictEqual(Number(stats.failed_capture), 1); 655 }); 656 }); 657 658 // ============================================================================ 659 // Tests — backfillScreenshots() 660 // ============================================================================ 661 662 describe('backfillScreenshots()', () => { 663 beforeEach(() => { 664 resetStubs(); 665 clearSites(); 666 process.env.ENABLE_VISION = 'false'; 667 }); 668 669 test('returns zero counts when no sites need backfill', async () => { 670 const result = await backfillScreenshots(10); 671 assert.strictEqual(result.processed, 0); 672 assert.strictEqual(result.succeeded, 0); 673 assert.strictEqual(result.failed, 0); 674 }); 675 676 test('delegates to runAssetsStage with correct limit', async () => { 677 insertSite({ domain: 'backfill.com', status: 'found', screenshot_path: null }); 678 stubHtmlCaptureResult = { 679 html: '<html><body>Backfill page</body></html>', 680 httpStatusCode: 200, 681 localeData: null, 682 }; 683 684 const result = await backfillScreenshots(5); 685 686 assert.ok('processed' in result, 'should return processed'); 687 assert.ok('succeeded' in result, 'should return succeeded'); 688 assert.ok('failed' in result, 'should return failed'); 689 }); 690 691 test('uses default limit of 10', async () => { 692 const result = await backfillScreenshots(); 693 // No sites in DB, so processed = 0 regardless of limit 694 assert.strictEqual(result.processed, 0); 695 }); 696 }); 697 698 // ============================================================================ 699 // Tests — legacy flag deprecation warning (no throw) 700 // ============================================================================ 701 702 describe('assets.js — legacy flag handling', () => { 703 beforeEach(() => { 704 resetStubs(); 705 clearSites(); 706 process.env.ENABLE_VISION = 'false'; 707 }); 708 709 test('does not throw when legacy ENABLE_SCREENSHOT_CAPTURE flag is set', async () => { 710 process.env.ENABLE_SCREENSHOT_CAPTURE = 'true'; 711 712 const result = await runAssetsStage(); 713 assert.strictEqual(result.processed, 0); 714 715 delete process.env.ENABLE_SCREENSHOT_CAPTURE; 716 }); 717 718 test('does not throw when USE_COMPUTER_VISION_SCORING flag is set', async () => { 719 process.env.USE_COMPUTER_VISION_SCORING = 'true'; 720 721 const result = await runAssetsStage(); 722 assert.strictEqual(result.processed, 0); 723 724 delete process.env.USE_COMPUTER_VISION_SCORING; 725 }); 726 });