process.test.js
1 /** 2 * Tests for src/process.js 3 * 4 * Covers: 5 * - main(): URL direct processing path 6 * - main(): keyword pipeline path (populateKeywords, getKeywordsToScrape, processKeyword) 7 * - processSingleUrl(): domain extraction, site creation 8 * - processSite(): existing site skip, HTTP error path, success path 9 * - processKeyword(): scrape error, site processing, remaining limit 10 * 11 * Uses mock.module() to stub all external dependencies. 12 * 13 * NOTE: requires --experimental-test-module-mocks 14 */ 15 16 import { test, describe, mock, before, after } from 'node:test'; 17 import assert from 'node:assert/strict'; 18 import RealDatabase from 'better-sqlite3'; 19 import { join } from 'path'; 20 import { tmpdir } from 'os'; 21 import { mkdirSync, rmSync } from 'fs'; 22 import { createLazyPgMock } from './helpers/pg-mock.js'; 23 24 // ── Mutable stubs ───────────────────────────────────────────────────────────── 25 26 let scrapeSERPFn = async () => []; 27 let captureWebsiteFn = async () => ({ 28 html: '<html>test</html>', 29 httpStatusCode: 200, 30 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 31 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 32 }); 33 let scoreWebsiteFn = async () => ({ 34 overall_calculation: { conversion_score: 75 }, 35 resubmitted: false, 36 }); 37 let extractGradeFn = () => 'C'; 38 let saveScreenshotsFn = async () => 'screenshots/test'; 39 let generateKeywordCombinationsFn = () => [ 40 { 41 keyword: 'plumber sydney', 42 countryCode: 'AU', 43 googleDomain: 'google.com.au', 44 searchVolume: 100, 45 priority: 1, 46 }, 47 { 48 keyword: 'electrician sydney', 49 countryCode: 'AU', 50 googleDomain: 'google.com.au', 51 searchVolume: 80, 52 priority: 2, 53 }, 54 ]; 55 let upsertKeywordFn = () => {}; 56 let incrementProcessedCountFn = () => {}; 57 let incrementLowScoringCountFn = () => {}; 58 const incrementReworkLowScoringCountFn = () => {}; 59 60 // Active SQLite db used by the pg-mock — set before each test that needs it 61 let activeDb = null; 62 63 const TABLES_SQL = ` 64 CREATE TABLE IF NOT EXISTS sites ( 65 id INTEGER PRIMARY KEY AUTOINCREMENT, 66 domain TEXT NOT NULL, 67 landing_page_url TEXT, 68 keyword TEXT, 69 html_dom TEXT, 70 http_status_code INTEGER, 71 score_json TEXT, 72 score REAL, 73 grade TEXT, 74 status TEXT DEFAULT 'found', 75 screenshot_path TEXT, 76 processing_status TEXT, 77 error_log TEXT, 78 scored_at TEXT, 79 created_at TEXT DEFAULT CURRENT_TIMESTAMP, 80 rescored_at DATETIME 81 ); 82 CREATE TABLE IF NOT EXISTS keywords ( 83 id INTEGER PRIMARY KEY AUTOINCREMENT, 84 keyword TEXT NOT NULL, 85 country_code TEXT, 86 google_domain TEXT, 87 search_volume INTEGER, 88 priority INTEGER DEFAULT 0, 89 last_scraped_at TEXT, 90 zenrows_count INTEGER DEFAULT 0 91 ); 92 `; 93 94 // ── Mock modules BEFORE import ──────────────────────────────────────────────── 95 96 // Mock db.js so processSite/processSingleUrl use the activeDb SQLite instance 97 mock.module('../src/utils/db.js', { 98 namedExports: createLazyPgMock(() => activeDb), 99 }); 100 101 // Mock logger to suppress output 102 mock.module('../src/utils/logger.js', { 103 defaultExport: class { info() {} warn() {} error() {} success() {} debug() {} }, 104 }); 105 106 // Mock filesystem storage utilities (process.js writes HTML and score JSON to disk) 107 mock.module('../src/utils/html-storage.js', { 108 namedExports: { 109 writeHtmlDom: () => {}, 110 readHtmlDom: () => null, 111 hasHtmlDom: () => false, 112 deleteHtmlDom: () => {}, 113 writeKeyPagesHtml: () => {}, 114 readKeyPagesHtml: () => null, 115 deleteKeyPagesHtml: () => {}, 116 }, 117 }); 118 119 mock.module('../src/utils/score-storage.js', { 120 namedExports: { 121 setScoreJson: () => {}, 122 getScoreJson: () => null, 123 deleteScoreJson: () => {}, 124 }, 125 }); 126 127 mock.module('../src/scrape.js', { 128 namedExports: { 129 scrapeSERP: (...args) => scrapeSERPFn(...args), 130 }, 131 }); 132 133 mock.module('../src/capture.js', { 134 namedExports: { 135 captureWebsite: (...args) => captureWebsiteFn(...args), 136 }, 137 }); 138 139 mock.module('../src/score.js', { 140 namedExports: { 141 scoreWebsite: (...args) => scoreWebsiteFn(...args), 142 extractGrade: (...args) => extractGradeFn(...args), 143 }, 144 }); 145 146 mock.module('../src/utils/screenshot-storage.js', { 147 namedExports: { 148 saveScreenshots: (...args) => saveScreenshotsFn(...args), 149 }, 150 }); 151 152 mock.module('../src/utils/keyword-manager.js', { 153 namedExports: { 154 generateKeywordCombinations: (...args) => generateKeywordCombinationsFn(...args), 155 upsertKeyword: (...args) => upsertKeywordFn(...args), 156 incrementProcessedCount: (...args) => incrementProcessedCountFn(...args), 157 incrementLowScoringCount: (...args) => incrementLowScoringCountFn(...args), 158 incrementReworkLowScoringCount: (...args) => incrementReworkLowScoringCountFn(...args), 159 }, 160 }); 161 162 mock.module('../src/utils/load-env.js', { 163 namedExports: {}, 164 }); 165 166 // ── DB setup ────────────────────────────────────────────────────────────────── 167 168 let tmpDir; 169 let dbPath; 170 171 before(() => { 172 tmpDir = join(tmpdir(), `process-test-${Date.now()}`); 173 mkdirSync(tmpDir, { recursive: true }); 174 dbPath = join(tmpDir, 'test.db'); 175 process.env.DATABASE_PATH = dbPath; 176 }); 177 178 after(() => { 179 delete process.env.DATABASE_PATH; 180 rmSync(tmpDir, { recursive: true, force: true }); 181 }); 182 183 // Import AFTER mocks 184 const { processSingleUrl, processSite, main } = await import('../src/process.js'); 185 186 // ── Helper ──────────────────────────────────────────────────────────────────── 187 188 function createTestDb() { 189 const db = new RealDatabase(':memory:'); 190 db.exec(TABLES_SQL); 191 return db; 192 } 193 194 // ── Tests ───────────────────────────────────────────────────────────────────── 195 196 describe('processSingleUrl', () => { 197 test('inserts site record for a new domain', async () => { 198 const db = createTestDb(); 199 activeDb = db; 200 captureWebsiteFn = async () => ({ 201 html: '<html>test</html>', 202 httpStatusCode: 200, 203 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 204 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 205 }); 206 scoreWebsiteFn = async () => ({ 207 overall_calculation: { conversion_score: 65 }, 208 resubmitted: false, 209 }); 210 extractGradeFn = () => 'D'; 211 saveScreenshotsFn = async () => 'screenshots/123'; 212 213 await processSingleUrl('https://example.com/page'); 214 const site = db.prepare("SELECT * FROM sites WHERE domain = 'example.com'").get(); 215 db.close(); 216 activeDb = null; 217 218 assert.ok(site, 'site should be inserted'); 219 assert.equal(site.keyword, 'manual', 'keyword should be "manual"'); 220 assert.equal(site.grade, 'D'); 221 }); 222 223 test('skips domain that already exists in DB', async () => { 224 const db = createTestDb(); 225 activeDb = db; 226 db.prepare("INSERT INTO sites (domain, status) VALUES ('existing.com', 'prog_scored')").run(); 227 228 let captureCalled = false; 229 captureWebsiteFn = async () => { 230 captureCalled = true; 231 return { html: '', httpStatusCode: 200, screenshots: {}, screenshotsUncropped: {} }; 232 }; 233 234 await processSingleUrl('https://www.existing.com/'); 235 db.close(); 236 activeDb = null; 237 238 assert.equal(captureCalled, false, 'capture should not be called for existing site'); 239 }); 240 }); 241 242 describe('processSite — HTTP error path', () => { 243 test('stores site with failed status when HTTP 404', async () => { 244 const db = createTestDb(); 245 activeDb = db; 246 captureWebsiteFn = async () => ({ 247 html: '', 248 httpStatusCode: 404, 249 screenshots: {}, 250 screenshotsUncropped: {}, 251 }); 252 253 await processSite({ url: 'https://notfound.com', domain: 'notfound.com' }, 'test-keyword'); 254 const site = db.prepare("SELECT * FROM sites WHERE domain = 'notfound.com'").get(); 255 db.close(); 256 activeDb = null; 257 258 assert.ok(site, 'site should be stored even on HTTP error'); 259 assert.equal(site.processing_status, 'failed'); 260 assert.ok(site.error_log?.includes('404'), `error_log should mention 404: ${site.error_log}`); 261 }); 262 263 test('stores site with failed status when HTTP 500', async () => { 264 const db = createTestDb(); 265 activeDb = db; 266 captureWebsiteFn = async () => ({ 267 html: '', 268 httpStatusCode: 500, 269 screenshots: {}, 270 screenshotsUncropped: {}, 271 }); 272 273 await processSite({ url: 'https://error.com', domain: 'error.com' }, 'test'); 274 const site = db.prepare("SELECT * FROM sites WHERE domain = 'error.com'").get(); 275 db.close(); 276 activeDb = null; 277 assert.equal(site.processing_status, 'failed'); 278 }); 279 }); 280 281 describe('processSite — success path', () => { 282 test('inserts scored site with grade and screenshot_path', async () => { 283 const db = createTestDb(); 284 activeDb = db; 285 captureWebsiteFn = async () => ({ 286 html: '<html><head><title>Test</title></head></html>', 287 httpStatusCode: 200, 288 screenshots: { desktop_above: Buffer.from('img'), desktop_below: null, mobile_above: null }, 289 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 290 }); 291 scoreWebsiteFn = async () => ({ 292 overall_calculation: { conversion_score: 82 }, 293 resubmitted: false, 294 }); 295 extractGradeFn = () => 'B'; 296 saveScreenshotsFn = async () => 'screenshots/101'; 297 298 await processSite({ url: 'https://good.com', domain: 'good.com' }, 'plumber'); 299 const site = db.prepare("SELECT * FROM sites WHERE domain = 'good.com'").get(); 300 db.close(); 301 activeDb = null; 302 303 assert.ok(site, 'site should be inserted'); 304 assert.equal(site.grade, 'B'); 305 assert.equal(site.screenshot_path, 'screenshots/101'); 306 assert.equal(site.status, 'prog_scored'); 307 }); 308 309 test('increments low scoring count for D grade', async () => { 310 const db = createTestDb(); 311 activeDb = db; 312 313 captureWebsiteFn = async () => ({ 314 html: '<html></html>', 315 httpStatusCode: 200, 316 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 317 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 318 }); 319 scoreWebsiteFn = async () => ({ 320 overall_calculation: { conversion_score: 30 }, 321 resubmitted: false, 322 }); 323 extractGradeFn = () => 'D'; 324 saveScreenshotsFn = async () => 'screenshots/102'; 325 326 // Can't easily verify the mock call count without mock.fn, but verify no throw 327 await assert.doesNotReject(() => 328 processSite({ url: 'https://poor.com', domain: 'poor.com' }, 'test') 329 ); 330 db.close(); 331 activeDb = null; 332 }); 333 334 test('handles resubmitted flag for rework counting', async () => { 335 const db = createTestDb(); 336 activeDb = db; 337 338 captureWebsiteFn = async () => ({ 339 html: '<html></html>', 340 httpStatusCode: 200, 341 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 342 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 343 }); 344 scoreWebsiteFn = async () => ({ 345 overall_calculation: { conversion_score: 20 }, 346 resubmitted: true, // triggers incrementReworkLowScoringCount 347 }); 348 extractGradeFn = () => 'F'; 349 saveScreenshotsFn = async () => 'screenshots/103'; 350 351 await assert.doesNotReject(() => 352 processSite({ url: 'https://fail.com', domain: 'fail.com' }, 'test') 353 ); 354 db.close(); 355 activeDb = null; 356 }); 357 358 test('handles null grade gracefully', async () => { 359 const db = createTestDb(); 360 activeDb = db; 361 362 captureWebsiteFn = async () => ({ 363 html: '<html></html>', 364 httpStatusCode: 200, 365 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 366 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 367 }); 368 scoreWebsiteFn = async () => ({ overall_calculation: null }); 369 extractGradeFn = () => null; 370 saveScreenshotsFn = async () => 'screenshots/104'; 371 372 await assert.doesNotReject(() => 373 processSite({ url: 'https://nullgrade.com', domain: 'nullgrade.com' }, 'test') 374 ); 375 db.close(); 376 activeDb = null; 377 }); 378 }); 379 380 // ── main() — URL direct-processing path (covers lines 34-46) ──────────────── 381 382 describe('main — URL path', () => { 383 test('processes a single URL when url argument provided', async () => { 384 const db = createTestDb(); 385 activeDb = db; 386 387 let capturedDomain = null; 388 captureWebsiteFn = async url => { 389 capturedDomain = new URL(url).hostname.replace(/^www\./, ''); 390 return { 391 html: '<html>main-url-test</html>', 392 httpStatusCode: 200, 393 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 394 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 395 }; 396 }; 397 scoreWebsiteFn = async () => ({ 398 overall_calculation: { conversion_score: 55 }, 399 resubmitted: false, 400 }); 401 extractGradeFn = () => 'C-'; 402 saveScreenshotsFn = async () => 'screenshots/main-url'; 403 404 await main(1, 'https://mainurl.com'); 405 406 db.close(); 407 activeDb = null; 408 409 // We verify via the capture stub that processSingleUrl was called correctly 410 assert.equal(capturedDomain, 'mainurl.com', 'should have captured mainurl.com'); 411 }); 412 }); 413 414 // ── main() — keyword pipeline path (covers lines 48-76, 85-124, 130-159, 167-179) ── 415 416 describe('main — keyword pipeline path', () => { 417 // Helper: make upsertKeyword actually insert rows so getKeywordsToScrape finds them 418 // Note: upsertKeyword is now called as (keyword, data) without a db arg 419 function makeRealUpsert() { 420 upsertKeywordFn = async (keyword, data) => { 421 // Use activeDb 422 const db = activeDb; 423 if (!db) return; 424 // Insert if not exists (simplified version of real upsertKeyword) 425 const existing = db.prepare('SELECT id FROM keywords WHERE keyword = ?').get(keyword); 426 if (!existing) { 427 db.prepare( 428 'INSERT INTO keywords (keyword, country_code, google_domain, search_volume, priority) VALUES (?, ?, ?, ?, ?)' 429 ).run( 430 keyword, 431 data.country_code || null, 432 data.google_domain || null, 433 data.search_volume || 0, 434 data.priority || 0 435 ); 436 } else if (data.zenrows_count !== undefined) { 437 db.prepare( 438 'UPDATE keywords SET zenrows_count = ?, last_scraped_at = ? WHERE keyword = ?' 439 ).run(data.zenrows_count, data.last_scraped_at || null, keyword); 440 } 441 }; 442 } 443 444 test('populates keywords, scrapes, and processes sites', async () => { 445 const db = createTestDb(); 446 activeDb = db; 447 makeRealUpsert(); 448 449 let processedCountIncrements = 0; 450 incrementProcessedCountFn = () => { 451 processedCountIncrements++; 452 }; 453 454 // scrapeSERP returns 2 sites for the keyword 455 scrapeSERPFn = async keyword => [ 456 { 457 url: `https://${keyword.replace(/ /g, '-')}-1.com`, 458 domain: `${keyword.replace(/ /g, '-')}-1.com`, 459 }, 460 { 461 url: `https://${keyword.replace(/ /g, '-')}-2.com`, 462 domain: `${keyword.replace(/ /g, '-')}-2.com`, 463 }, 464 ]; 465 466 captureWebsiteFn = async () => ({ 467 html: '<html>pipeline</html>', 468 httpStatusCode: 200, 469 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 470 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 471 }); 472 scoreWebsiteFn = async () => ({ 473 overall_calculation: { conversion_score: 70 }, 474 resubmitted: false, 475 }); 476 extractGradeFn = () => 'B-'; 477 saveScreenshotsFn = async () => 'screenshots/pipeline'; 478 479 generateKeywordCombinationsFn = () => [ 480 { 481 keyword: 'test kw', 482 countryCode: 'AU', 483 googleDomain: 'google.com.au', 484 searchVolume: 50, 485 priority: 1, 486 }, 487 ]; 488 489 await main(10); 490 491 db.close(); 492 activeDb = null; 493 494 // processKeyword should have scraped and processed sites 495 assert.ok(processedCountIncrements > 0, 'incrementProcessedCount should have been called'); 496 }); 497 498 test('respects site limit and stops processing', async () => { 499 const db = createTestDb(); 500 activeDb = db; 501 makeRealUpsert(); 502 503 let processedCountIncrements = 0; 504 incrementProcessedCountFn = () => { 505 processedCountIncrements++; 506 }; 507 508 // Return 5 sites per keyword 509 scrapeSERPFn = async () => [ 510 { url: 'https://a.com', domain: 'a.com' }, 511 { url: 'https://b.com', domain: 'b.com' }, 512 { url: 'https://c.com', domain: 'c.com' }, 513 { url: 'https://d.com', domain: 'd.com' }, 514 { url: 'https://e.com', domain: 'e.com' }, 515 ]; 516 517 captureWebsiteFn = async () => ({ 518 html: '<html></html>', 519 httpStatusCode: 200, 520 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 521 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 522 }); 523 scoreWebsiteFn = async () => ({ 524 overall_calculation: { conversion_score: 60 }, 525 resubmitted: false, 526 }); 527 extractGradeFn = () => 'C'; 528 saveScreenshotsFn = async () => 'screenshots/limit'; 529 generateKeywordCombinationsFn = () => [ 530 { 531 keyword: 'limit kw', 532 countryCode: 'AU', 533 googleDomain: 'google.com.au', 534 searchVolume: 50, 535 priority: 1, 536 }, 537 ]; 538 539 // Limit to 2 sites 540 await main(2); 541 542 db.close(); 543 activeDb = null; 544 545 assert.equal(processedCountIncrements, 2, 'should only process 2 sites (the limit)'); 546 }); 547 548 test('handles scrapeSERP failure gracefully (processKeyword catch)', async () => { 549 const db = createTestDb(); 550 activeDb = db; 551 makeRealUpsert(); 552 incrementProcessedCountFn = () => {}; 553 554 // scrapeSERP throws 555 scrapeSERPFn = async () => { 556 throw new Error('ZenRows API down'); 557 }; 558 559 generateKeywordCombinationsFn = () => [ 560 { 561 keyword: 'fail kw', 562 countryCode: 'AU', 563 googleDomain: 'google.com.au', 564 searchVolume: 50, 565 priority: 1, 566 }, 567 ]; 568 569 // Should not throw — processKeyword catches scrape errors 570 await assert.doesNotReject(() => main(10)); 571 572 db.close(); 573 activeDb = null; 574 }); 575 576 test('handles individual site failure in processKeyword loop', async () => { 577 const db = createTestDb(); 578 activeDb = db; 579 makeRealUpsert(); 580 581 let processedCountIncrements = 0; 582 incrementProcessedCountFn = () => { 583 processedCountIncrements++; 584 }; 585 586 scrapeSERPFn = async () => [ 587 { url: 'https://ok1.com', domain: 'ok1.com' }, 588 { url: 'https://broken.com', domain: 'broken.com' }, 589 { url: 'https://ok2.com', domain: 'ok2.com' }, 590 ]; 591 592 captureWebsiteFn = async url => { 593 if (url === 'https://broken.com') { 594 throw new Error('capture failed'); 595 } 596 return { 597 html: '<html></html>', 598 httpStatusCode: 200, 599 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 600 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 601 }; 602 }; 603 scoreWebsiteFn = async () => ({ 604 overall_calculation: { conversion_score: 70 }, 605 resubmitted: false, 606 }); 607 extractGradeFn = () => 'B'; 608 saveScreenshotsFn = async () => 'screenshots/mixed'; 609 generateKeywordCombinationsFn = () => [ 610 { 611 keyword: 'mixed kw', 612 countryCode: 'AU', 613 googleDomain: 'google.com.au', 614 searchVolume: 50, 615 priority: 1, 616 }, 617 ]; 618 619 await main(10); 620 621 db.close(); 622 activeDb = null; 623 624 // ok1.com and ok2.com processed, broken.com skipped 625 assert.equal(processedCountIncrements, 2, 'should process 2 sites, skipping the broken one'); 626 }); 627 628 test('stops processing keywords when site limit is reached (lines 60-63)', async () => { 629 const db = createTestDb(); 630 activeDb = db; 631 makeRealUpsert(); 632 633 let processedCountIncrements = 0; 634 incrementProcessedCountFn = () => { 635 processedCountIncrements++; 636 }; 637 638 // Each keyword returns 5 sites 639 scrapeSERPFn = async keyword => [ 640 { 641 url: `https://${keyword.replace(/ /g, '-')}-1.com`, 642 domain: `${keyword.replace(/ /g, '-')}-1.com`, 643 }, 644 { 645 url: `https://${keyword.replace(/ /g, '-')}-2.com`, 646 domain: `${keyword.replace(/ /g, '-')}-2.com`, 647 }, 648 { 649 url: `https://${keyword.replace(/ /g, '-')}-3.com`, 650 domain: `${keyword.replace(/ /g, '-')}-3.com`, 651 }, 652 { 653 url: `https://${keyword.replace(/ /g, '-')}-4.com`, 654 domain: `${keyword.replace(/ /g, '-')}-4.com`, 655 }, 656 { 657 url: `https://${keyword.replace(/ /g, '-')}-5.com`, 658 domain: `${keyword.replace(/ /g, '-')}-5.com`, 659 }, 660 ]; 661 662 captureWebsiteFn = async () => ({ 663 html: '<html></html>', 664 httpStatusCode: 200, 665 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 666 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 667 }); 668 scoreWebsiteFn = async () => ({ 669 overall_calculation: { conversion_score: 60 }, 670 resubmitted: false, 671 }); 672 extractGradeFn = () => 'C'; 673 saveScreenshotsFn = async () => 'screenshots/limit-break'; 674 675 // 3 keywords, limit=30 -> ceil(30/10)=3 keywords fetched from DB. 676 // kw1 returns 10 sites (total=10). kw2 returns 25 sites (remainingLimit=20, 677 // processes 20, total=30). Loop iterates to kw3: if (30>=30) -> break (lines 60-63). 678 generateKeywordCombinationsFn = () => [ 679 { 680 keyword: 'brk-a', 681 countryCode: 'AU', 682 googleDomain: 'google.com.au', 683 searchVolume: 50, 684 priority: 1, 685 }, 686 { 687 keyword: 'brk-b', 688 countryCode: 'AU', 689 googleDomain: 'google.com.au', 690 searchVolume: 40, 691 priority: 2, 692 }, 693 { 694 keyword: 'brk-c', 695 countryCode: 'AU', 696 googleDomain: 'google.com.au', 697 searchVolume: 30, 698 priority: 3, 699 }, 700 ]; 701 702 scrapeSERPFn = async keyword => { 703 const count = keyword === 'brk-a' ? 10 : 25; 704 const sites = []; 705 for (let i = 0; i < count; i++) { 706 sites.push({ url: `https://${keyword}-${i}.com`, domain: `${keyword}-${i}.com` }); 707 } 708 return sites; 709 }; 710 711 await main(30); 712 713 db.close(); 714 activeDb = null; 715 716 assert.equal(processedCountIncrements, 30, 'should stop at limit, skipping kw3'); 717 }); 718 719 test('processes multiple keywords and stops at site limit', async () => { 720 const db = createTestDb(); 721 activeDb = db; 722 makeRealUpsert(); 723 724 let processedCountIncrements = 0; 725 incrementProcessedCountFn = () => { 726 processedCountIncrements++; 727 }; 728 729 // Each keyword returns 3 sites 730 scrapeSERPFn = async keyword => [ 731 { 732 url: `https://${keyword.replace(/ /g, '-')}-a.com`, 733 domain: `${keyword.replace(/ /g, '-')}-a.com`, 734 }, 735 { 736 url: `https://${keyword.replace(/ /g, '-')}-b.com`, 737 domain: `${keyword.replace(/ /g, '-')}-b.com`, 738 }, 739 { 740 url: `https://${keyword.replace(/ /g, '-')}-c.com`, 741 domain: `${keyword.replace(/ /g, '-')}-c.com`, 742 }, 743 ]; 744 745 captureWebsiteFn = async () => ({ 746 html: '<html></html>', 747 httpStatusCode: 200, 748 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 749 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 750 }); 751 scoreWebsiteFn = async () => ({ 752 overall_calculation: { conversion_score: 60 }, 753 resubmitted: false, 754 }); 755 extractGradeFn = () => 'C'; 756 saveScreenshotsFn = async () => 'screenshots/multi'; 757 758 // Two keywords — need limit >= 20 so Math.ceil(limit/10) >= 2 keywords are fetched 759 generateKeywordCombinationsFn = () => [ 760 { 761 keyword: 'multi one', 762 countryCode: 'AU', 763 googleDomain: 'google.com.au', 764 searchVolume: 50, 765 priority: 1, 766 }, 767 { 768 keyword: 'multi two', 769 countryCode: 'AU', 770 googleDomain: 'google.com.au', 771 searchVolume: 40, 772 priority: 2, 773 }, 774 ]; 775 776 // Limit 5: Math.ceil(5/10)=1 keyword fetched -> only 3 sites from 1 keyword, all under limit 777 // Limit 20: Math.ceil(20/10)=2 keywords fetched -> 3+3=6, but capped at 5 if limit=5 778 // Actually we need limit that fetches 2 keywords: Math.ceil(L/10)>=2 -> L>=11 779 // Use limit=20 (fetches 2 keywords). First keyword processes 3, second processes 3, total 6. 780 // The outer loop checks sitesProcessed >= limit after each keyword. 781 // After kw1: sitesProcessed=3, 3<20, continue. After kw2: sitesProcessed=6, done. 782 await main(20); 783 784 db.close(); 785 activeDb = null; 786 787 // 2 keywords * 3 sites each = 6 total 788 assert.equal(processedCountIncrements, 6, 'should process all 6 sites across 2 keywords'); 789 }); 790 }); 791 792 // ── main() — error path (covers lines 70-76) ──────────────────────────────── 793 794 describe('main — error handling', () => { 795 test('calls process.exit(1) on pipeline failure', async () => { 796 const db = createTestDb(); 797 activeDb = db; 798 799 let exitCode = null; 800 const originalExit = process.exit; 801 process.exit = code => { 802 exitCode = code; 803 }; 804 805 // Make populateKeywords fail by having generateKeywordCombinations throw 806 generateKeywordCombinationsFn = () => { 807 throw new Error('combos failed'); 808 }; 809 810 try { 811 await main(10); 812 } catch { 813 // may or may not throw depending on timing 814 } 815 816 db.close(); 817 activeDb = null; 818 process.exit = originalExit; 819 assert.equal(exitCode, 1, 'process.exit should be called with code 1'); 820 }); 821 }); 822 823 // ── populateKeywords — existing keyword path (covers lines 130-159) ────────── 824 825 describe('main — populateKeywords with existing keywords', () => { 826 test('skips keywords that already exist in DB (existingKeywords++ path)', async () => { 827 const upsertCalls = []; 828 upsertKeywordFn = (keyword, data) => { 829 upsertCalls.push({ keyword, data }); 830 }; 831 incrementProcessedCountFn = () => {}; 832 833 // Return no sites so we don't proceed past keyword population 834 scrapeSERPFn = async () => []; 835 836 generateKeywordCombinationsFn = () => [ 837 { 838 keyword: 'existing kw', 839 countryCode: 'AU', 840 googleDomain: 'google.com.au', 841 searchVolume: 50, 842 priority: 1, 843 }, 844 { 845 keyword: 'new kw', 846 countryCode: 'AU', 847 googleDomain: 'google.com.au', 848 searchVolume: 50, 849 priority: 1, 850 }, 851 ]; 852 853 // Pre-insert 'existing kw' into the DB before main() runs 854 const db = createTestDb(); 855 db.prepare("INSERT INTO keywords (keyword, country_code) VALUES ('existing kw', 'AU')").run(); 856 activeDb = db; 857 858 await main(10); 859 860 db.close(); 861 activeDb = null; 862 863 // Only 'new kw' should trigger upsertKeyword; 'existing kw' was pre-inserted 864 // (covers the existingKeywords++ path, lines 152-153) 865 const kwUpserts = upsertCalls.filter(c => c.data && c.data.country_code); 866 assert.equal(kwUpserts.length, 1, 'only the new keyword should be upserted'); 867 assert.equal(kwUpserts[0].keyword, 'new kw'); 868 }); 869 }); 870 871 // ── processSite — low-grade variants (B-, C+, etc.) ────────────────────────── 872 873 describe('processSite — low grade variants', () => { 874 for (const grade of ['B-', 'C+', 'C', 'C-', 'D+', 'D-', 'F']) { 875 test(`increments low scoring for grade ${grade}`, async () => { 876 const db = createTestDb(); 877 let lowCalled = false; 878 incrementLowScoringCountFn = () => { 879 lowCalled = true; 880 }; 881 882 captureWebsiteFn = async () => ({ 883 html: '<html></html>', 884 httpStatusCode: 200, 885 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 886 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 887 }); 888 scoreWebsiteFn = async () => ({ 889 overall_calculation: { conversion_score: 30 }, 890 resubmitted: false, 891 }); 892 extractGradeFn = () => grade; 893 saveScreenshotsFn = async () => 'screenshots/grade'; 894 895 const domain = `grade-${grade.replace('+', 'p').replace('-', 'm')}.com`; 896 activeDb = db; 897 await processSite({ url: `https://${domain}`, domain }, 'test'); 898 db.close(); 899 activeDb = null; 900 901 assert.ok(lowCalled, `incrementLowScoringCount should be called for grade ${grade}`); 902 }); 903 } 904 905 test('does NOT increment low scoring for high grades (A, A-, B+, B)', async () => { 906 for (const grade of ['A', 'A-', 'B+', 'B']) { 907 const db = createTestDb(); 908 let lowCalled = false; 909 incrementLowScoringCountFn = () => { 910 lowCalled = true; 911 }; 912 913 captureWebsiteFn = async () => ({ 914 html: '<html></html>', 915 httpStatusCode: 200, 916 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 917 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 918 }); 919 scoreWebsiteFn = async () => ({ 920 overall_calculation: { conversion_score: 90 }, 921 resubmitted: false, 922 }); 923 extractGradeFn = () => grade; 924 saveScreenshotsFn = async () => 'screenshots/high'; 925 926 const domain = `high-${grade.replace('+', 'p').replace('-', 'm')}.com`; 927 activeDb = db; 928 await processSite({ url: `https://${domain}`, domain }, 'test'); 929 db.close(); 930 activeDb = null; 931 932 assert.equal( 933 lowCalled, 934 false, 935 `incrementLowScoringCount should NOT be called for grade ${grade}` 936 ); 937 } 938 }); 939 }); 940 941 // ── processSite — HTTP status edge cases ───────────────────────────────────── 942 943 describe('processSite — HTTP status edge cases', () => { 944 test('treats status 399 as success (not >= 400)', async () => { 945 const db = createTestDb(); 946 captureWebsiteFn = async () => ({ 947 html: '<html>redirect</html>', 948 httpStatusCode: 399, 949 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 950 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 951 }); 952 scoreWebsiteFn = async () => ({ 953 overall_calculation: { conversion_score: 70 }, 954 resubmitted: false, 955 }); 956 extractGradeFn = () => 'B'; 957 saveScreenshotsFn = async () => 'screenshots/399'; 958 959 activeDb = db; 960 await processSite({ url: 'https://redirect.com', domain: 'redirect.com' }, 'test'); 961 const site = db.prepare("SELECT * FROM sites WHERE domain = 'redirect.com'").get(); 962 db.close(); 963 activeDb = null; 964 965 assert.equal(site.status, 'prog_scored', 'should be prog_scored, not failed'); 966 assert.equal(site.processing_status, null, 'processing_status should not be failed'); 967 }); 968 969 test('treats status 400 as HTTP error', async () => { 970 const db = createTestDb(); 971 captureWebsiteFn = async () => ({ 972 html: '', 973 httpStatusCode: 400, 974 screenshots: {}, 975 screenshotsUncropped: {}, 976 }); 977 978 activeDb = db; 979 await processSite({ url: 'https://bad400.com', domain: 'bad400.com' }, 'test'); 980 const site = db.prepare("SELECT * FROM sites WHERE domain = 'bad400.com'").get(); 981 db.close(); 982 activeDb = null; 983 984 assert.equal(site.processing_status, 'failed'); 985 }); 986 987 test('handles null httpStatusCode (no error)', async () => { 988 const db = createTestDb(); 989 captureWebsiteFn = async () => ({ 990 html: '<html></html>', 991 httpStatusCode: null, 992 screenshots: { desktop_above: null, desktop_below: null, mobile_above: null }, 993 screenshotsUncropped: { desktop_above: null, desktop_below: null, mobile_above: null }, 994 }); 995 scoreWebsiteFn = async () => ({ 996 overall_calculation: { conversion_score: 50 }, 997 resubmitted: false, 998 }); 999 extractGradeFn = () => 'C'; 1000 saveScreenshotsFn = async () => 'screenshots/null-status'; 1001 1002 activeDb = db; 1003 await processSite({ url: 'https://nullstatus.com', domain: 'nullstatus.com' }, 'test'); 1004 const site = db.prepare("SELECT * FROM sites WHERE domain = 'nullstatus.com'").get(); 1005 db.close(); 1006 activeDb = null; 1007 1008 assert.equal(site.status, 'prog_scored', 'null httpStatusCode should proceed to scoring'); 1009 }); 1010 });