serps.test.js
1 /** 2 * Unit Tests for SERPs Stage 3 * 4 * Tests runSerpsStage() and getSerpsStats() from src/stages/serps.js 5 * 6 * Uses Node.js 22+ mock.module() to mock all external dependencies. 7 * ALL mock.module() calls MUST come before any dynamic imports. 8 * 9 * Run with: 10 * NODE_ENV=test LOGS_DIR=/tmp/test-logs DATABASE_PATH=/tmp/test-sites.db \ 11 * node --experimental-test-module-mocks --test tests/stages/serps.test.js 12 */ 13 14 import { describe, test, mock, beforeEach, afterEach } from 'node:test'; 15 import assert from 'node:assert'; 16 import Database from 'better-sqlite3'; 17 import { createLazyPgMock } from '../helpers/pg-mock.js'; 18 import { readFileSync } from 'fs'; 19 import { join, dirname } from 'path'; 20 import { fileURLToPath } from 'url'; 21 22 const __filename = fileURLToPath(import.meta.url); 23 const __dirname = dirname(__filename); 24 const projectRoot = join(__dirname, '../..'); 25 26 // ─── Test DB factory ───────────────────────────────────────────────────────── 27 // Each test creates a fresh in-memory DB. We expose it through `currentTestDb` 28 // so the mock constructor can hand it to `runSerpsStage()`. 29 // 30 // IMPORTANT: serps.js calls `db.close()` in a finally block. If we let that 31 // close the real test DB the test assertions break afterwards. We wrap the DB 32 // in a Proxy that makes close() a no-op, while keeping it open for assertions. 33 34 let currentTestDb = null; 35 36 function buildTestDb() { 37 const db = new Database(':memory:'); 38 const schema = readFileSync(join(projectRoot, 'db/schema.sql'), 'utf-8'); 39 db.exec(schema); 40 41 // Proxy: intercept close() so serps.js's finally-block doesn't close our 42 // test DB before we can make assertions on it. 43 return new Proxy(db, { 44 get(target, prop) { 45 if (prop === 'close') { 46 return () => {}; // no-op — we close manually in afterEach 47 } 48 if (prop === 'open') { 49 return target.open; 50 } 51 const value = target[prop]; 52 return typeof value === 'function' ? value.bind(target) : value; 53 }, 54 }); 55 } 56 57 // ─── Mock: better-sqlite3 ───────────────────────────────────────────────────── 58 // The mock constructor returns whatever `currentTestDb` points to at call time. 59 class DatabaseMock { 60 constructor() { 61 return currentTestDb; 62 } 63 } 64 65 mock.module('better-sqlite3', { 66 defaultExport: DatabaseMock, 67 }); 68 69 // ─── Mock: db.js (PostgreSQL layer) — lazily delegates to currentTestDb ─────── 70 // serps.js imports { run, getOne, getAll, query } from '../utils/db.js' 71 // We use createLazyPgMock so each test gets its own isolated in-memory DB. 72 mock.module('../../src/utils/db.js', { 73 namedExports: createLazyPgMock(() => currentTestDb), 74 }); 75 76 // ─── Mock: scrape.js ───────────────────────────────────────────────────────── 77 const scrapeSERPMock = mock.fn(); 78 79 mock.module('../../src/scrape.js', { 80 namedExports: { 81 scrapeSERP: scrapeSERPMock, 82 }, 83 }); 84 85 // ─── Mock: logger ───────────────────────────────────────────────────────────── 86 mock.module('../../src/utils/logger.js', { 87 defaultExport: class MockLogger { 88 info() {} 89 warn() {} 90 error() {} 91 success() {} 92 debug() {} 93 }, 94 }); 95 96 // ─── Mock: summary-generator ───────────────────────────────────────────────── 97 mock.module('../../src/utils/summary-generator.js', { 98 namedExports: { 99 generateStageCompletion: mock.fn(), 100 displayProgress: mock.fn(), 101 }, 102 }); 103 104 // ─── Mock: error-handler — use the REAL processBatch ───────────────────────── 105 // Re-export the real implementation so concurrency and error collection work. 106 const realErrorHandler = await import('../../src/utils/error-handler.js'); 107 108 mock.module('../../src/utils/error-handler.js', { 109 namedExports: { 110 processBatch: realErrorHandler.processBatch, 111 retryWithBackoff: realErrorHandler.retryWithBackoff, 112 isRetryableError: realErrorHandler.isRetryableError, 113 sleep: realErrorHandler.sleep, 114 safeJsonParse: realErrorHandler.safeJsonParse, 115 extractDomain: realErrorHandler.extractDomain, 116 withTimeout: realErrorHandler.withTimeout, 117 }, 118 }); 119 120 // ─── Mock: site-filters ─────────────────────────────────────────────────────── 121 const checkBlocklistMock = mock.fn(); 122 123 mock.module('../../src/utils/site-filters.js', { 124 namedExports: { 125 checkBlocklist: checkBlocklistMock, 126 DIRECTORY_DOMAINS: [], 127 SOCIAL_DOMAINS: [], 128 }, 129 }); 130 131 // ─── Mock: tld-detector ─────────────────────────────────────────────────────── 132 const detectCountryFromTLDMock = mock.fn(); 133 134 mock.module('../../src/utils/tld-detector.js', { 135 namedExports: { 136 detectCountryFromTLD: detectCountryFromTLDMock, 137 }, 138 }); 139 140 // ─── Mock: dotenv ──────────────────────────────────────────────────────────── 141 mock.module('dotenv', { 142 namedExports: { 143 config: mock.fn(), 144 }, 145 }); 146 147 // ─── NOW import the module under test ──────────────────────────────────────── 148 const { runSerpsStage, getSerpsStats } = await import('../../src/stages/serps.js'); 149 150 // ─── Helpers ────────────────────────────────────────────────────────────────── 151 152 function insertKeyword(db, overrides = {}) { 153 const kw = { 154 keyword: overrides.keyword ?? 'plumber sydney', 155 country_code: overrides.country_code ?? 'AU', 156 google_domain: overrides.google_domain ?? 'google.com.au', 157 status: overrides.status ?? 'active', 158 priority: overrides.priority ?? 1, 159 search_count: overrides.search_count ?? 0, 160 zenrows_count: overrides.zenrows_count ?? 0, 161 last_searched_at: overrides.last_searched_at ?? null, 162 }; 163 164 const stmt = db.prepare(` 165 INSERT INTO keywords (keyword, country_code, google_domain, status, priority, 166 search_count, zenrows_count, last_searched_at) 167 VALUES (?, ?, ?, ?, ?, ?, ?, ?) 168 `); 169 const result = stmt.run( 170 kw.keyword, 171 kw.country_code, 172 kw.google_domain, 173 kw.status, 174 kw.priority, 175 kw.search_count, 176 kw.zenrows_count, 177 kw.last_searched_at 178 ); 179 return result.lastInsertRowid; 180 } 181 182 function makeScrapeResult( 183 keyword, 184 countryCode = 'AU', 185 googleDomain = 'google.com.au', 186 urls = null 187 ) { 188 const defaultUrls = [ 189 { url: 'https://acme-plumbing.com.au', title: 'Acme Plumbing Sydney' }, 190 { url: 'https://bestplumber.com', title: 'Best Plumber Co' }, 191 ]; 192 return { 193 results: urls ?? defaultUrls, 194 metadata: { keyword, countryCode, googleDomain, currency: 'AUD' }, 195 }; 196 } 197 198 // ─── Test Suite: runSerpsStage() ────────────────────────────────────────────── 199 200 describe('SERPs Stage — runSerpsStage()', () => { 201 beforeEach(() => { 202 currentTestDb = buildTestDb(); 203 scrapeSERPMock.mock.resetCalls(); 204 checkBlocklistMock.mock.resetCalls(); 205 detectCountryFromTLDMock.mock.resetCalls(); 206 207 // Default: low-confidence TLD → fall back to google_domain country 208 detectCountryFromTLDMock.mock.mockImplementation(() => ({ 209 countryCode: null, 210 confidence: 'low', 211 source: 'unknown', 212 })); 213 214 // Default: nothing is blocklisted 215 checkBlocklistMock.mock.mockImplementation(() => null); 216 }); 217 218 afterEach(() => { 219 // Close the underlying real DB (the Proxy's close() is a no-op) 220 const realDb = Object.getPrototypeOf(currentTestDb.constructor).call ? null : currentTestDb; 221 // We need to reach the real Database instance to call close 222 // Since close() is proxied as no-op, call it on the actual target 223 // We kept the real db around via the Proxy — close it safely by 224 // accessing internal open state: just suppress errors. 225 try { 226 // Try to finalize any leftover prepared statements 227 if (currentTestDb) { 228 // Manually close by re-opening the target db 229 // The proxy forwards everything except close(), so we use a workaround: 230 // re-create and discard (the :memory: db is ephemeral anyway) 231 const sentinel = currentTestDb.pragma('user_version'); 232 } 233 } catch { 234 // Already closed or errored — fine for in-memory test db 235 } 236 currentTestDb = null; 237 }); 238 239 // ── No-op: no keywords ────────────────────────────────────────────────────── 240 241 test('returns zero stats when no active keywords exist', async () => { 242 const result = await runSerpsStage({}); 243 244 assert.strictEqual(result.processed, 0); 245 assert.strictEqual(result.succeeded, 0); 246 assert.strictEqual(result.failed, 0); 247 assert.strictEqual(result.skipped, 0); 248 assert.ok(typeof result.duration === 'number', 'duration should be a number'); 249 }); 250 251 // ── Happy path: single keyword ─────────────────────────────────────────────── 252 253 test('scrapes results for a single active keyword and inserts sites', async () => { 254 insertKeyword(currentTestDb, { keyword: 'plumber sydney', country_code: 'AU' }); 255 256 scrapeSERPMock.mock.mockImplementation(async keyword => 257 makeScrapeResult(keyword, 'AU', 'google.com.au') 258 ); 259 260 const result = await runSerpsStage({}); 261 262 assert.strictEqual(result.processed, 1, 'should process 1 keyword'); 263 assert.strictEqual(result.succeeded, 1, 'should succeed for 1 keyword'); 264 assert.strictEqual(result.failed, 0); 265 assert.strictEqual(result.newSites, 2, 'should add 2 new sites'); 266 267 // Verify sites were inserted in the DB 268 const sites = currentTestDb.prepare('SELECT * FROM sites').all(); 269 assert.strictEqual(sites.length, 2); 270 assert.ok( 271 sites.some(s => s.landing_page_url === 'https://acme-plumbing.com.au'), 272 'first site URL should be stored' 273 ); 274 assert.ok( 275 sites.every(s => s.status === 'found'), 276 'all sites should have status = found' 277 ); 278 assert.ok( 279 sites.every(s => s.keyword === 'plumber sydney'), 280 'all sites should carry the correct keyword' 281 ); 282 }); 283 284 // ── Keyword metric updates ─────────────────────────────────────────────────── 285 286 test('increments keyword search_count and zenrows_count after successful scrape', async () => { 287 const kwId = insertKeyword(currentTestDb, { 288 keyword: 'electrician brisbane', 289 search_count: 3, 290 zenrows_count: 7, 291 }); 292 293 scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('electrician brisbane')); 294 295 await runSerpsStage({}); 296 297 const kw = currentTestDb.prepare('SELECT * FROM keywords WHERE id = ?').get(kwId); 298 assert.strictEqual(kw.search_count, 4, 'search_count should increment by 1'); 299 // zenrows_count increments by the number of NEW sites inserted (2) 300 assert.strictEqual(kw.zenrows_count, 9, 'zenrows_count should increment by 2'); 301 assert.ok(kw.last_searched_at, 'last_searched_at should be set'); 302 }); 303 304 // ── Zero-result keyword → inactive ───────────────────────────────────────── 305 306 test('marks keyword inactive when scrape returns 0 results', async () => { 307 const kwId = insertKeyword(currentTestDb, { keyword: 'obscure keyword xyz' }); 308 309 scrapeSERPMock.mock.mockImplementation(async () => ({ 310 results: [], 311 metadata: { 312 keyword: 'obscure keyword xyz', 313 countryCode: 'AU', 314 googleDomain: 'google.com.au', 315 }, 316 })); 317 318 const result = await runSerpsStage({}); 319 320 assert.strictEqual(result.markedInactive, 1); 321 assert.strictEqual(result.newSites, 0); 322 323 const kw = currentTestDb.prepare('SELECT * FROM keywords WHERE id = ?').get(kwId); 324 assert.strictEqual(kw.status, 'inactive', 'keyword status should be inactive'); 325 assert.ok(kw.last_searched_at, 'last_searched_at should be updated'); 326 }); 327 328 // ── resultsPerKeyword threshold ───────────────────────────────────────────── 329 330 test('skips keywords that already have enough sites (resultsPerKeyword threshold)', async () => { 331 insertKeyword(currentTestDb, { keyword: 'painter melbourne' }); 332 333 // Insert 5 existing sites for this keyword 334 for (let i = 0; i < 5; i++) { 335 currentTestDb 336 .prepare( 337 `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain) 338 VALUES (?, ?, 'painter melbourne', 'found', 'AU', 'google.com.au')` 339 ) 340 .run(`existing${i}.com.au`, `https://existing${i}.com.au`); 341 } 342 343 scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('painter melbourne')); 344 345 // With resultsPerKeyword=5, keyword already has 5 sites → skip 346 const result = await runSerpsStage({ resultsPerKeyword: 5 }); 347 348 assert.strictEqual(result.processed, 0, 'keyword with 5 sites should be skipped'); 349 assert.strictEqual(scrapeSERPMock.mock.calls.length, 0, 'scrapeSERP should not be called'); 350 }); 351 352 test('processes a keyword that has fewer sites than the threshold', async () => { 353 insertKeyword(currentTestDb, { keyword: 'painter melbourne' }); 354 355 // Insert only 4 sites — under the threshold of 5 356 for (let i = 0; i < 4; i++) { 357 currentTestDb 358 .prepare( 359 `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain) 360 VALUES (?, ?, 'painter melbourne', 'found', 'AU', 'google.com.au')` 361 ) 362 .run(`existing${i}.com.au`, `https://existing${i}.com.au`); 363 } 364 365 scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('painter melbourne')); 366 367 const result = await runSerpsStage({ resultsPerKeyword: 5 }); 368 369 assert.strictEqual(result.processed, 1, 'keyword with 4 sites should still be processed'); 370 assert.strictEqual(scrapeSERPMock.mock.calls.length, 1); 371 }); 372 373 // ── limit option ──────────────────────────────────────────────────────────── 374 375 test('respects the limit option to cap keywords processed', async () => { 376 insertKeyword(currentTestDb, { keyword: 'plumber sydney', priority: 3 }); 377 insertKeyword(currentTestDb, { keyword: 'electrician sydney', priority: 2 }); 378 insertKeyword(currentTestDb, { keyword: 'painter sydney', priority: 1 }); 379 380 scrapeSERPMock.mock.mockImplementation(async keyword => makeScrapeResult(keyword)); 381 382 const result = await runSerpsStage({ limit: 2 }); 383 384 assert.strictEqual(result.processed, 2, 'only 2 keywords should be processed with limit=2'); 385 assert.strictEqual(scrapeSERPMock.mock.calls.length, 2); 386 }); 387 388 // ── Blocklist filtering ───────────────────────────────────────────────────── 389 390 test('marks blocklisted domains as ignored with error_message set', async () => { 391 insertKeyword(currentTestDb, { keyword: 'plumber sydney' }); 392 393 scrapeSERPMock.mock.mockImplementation(async () => ({ 394 results: [ 395 { url: 'https://yelp.com/biz/some-plumber', title: 'Yelp Listing' }, 396 { url: 'https://legitimate-plumber.com.au', title: 'Legit Plumber' }, 397 ], 398 metadata: { keyword: 'plumber sydney', countryCode: 'AU', googleDomain: 'google.com.au' }, 399 })); 400 401 checkBlocklistMock.mock.mockImplementation(domain => { 402 if (domain === 'yelp.com') { 403 return { reason: 'Directory domain: yelp.com' }; 404 } 405 return null; 406 }); 407 408 const result = await runSerpsStage({}); 409 410 assert.strictEqual(result.succeeded, 1, 'keyword should succeed despite blocklist hit'); 411 412 const allSites = currentTestDb.prepare('SELECT * FROM sites ORDER BY domain').all(); 413 assert.strictEqual(allSites.length, 2, 'both sites should be in DB'); 414 415 const yelpSite = allSites.find(s => s.domain === 'yelp.com'); 416 assert.ok(yelpSite, 'yelp.com should be in DB'); 417 assert.strictEqual(yelpSite.status, 'ignored', 'yelp.com should have status=ignored'); 418 assert.ok( 419 yelpSite.error_message.includes('Directory domain'), 420 'error_message should contain blocklist reason' 421 ); 422 423 const legitSite = allSites.find(s => s.domain === 'legitimate-plumber.com.au'); 424 assert.ok(legitSite, 'legitimate site should be in DB'); 425 assert.strictEqual(legitSite.status, 'found', 'legitimate site should keep status=found'); 426 }); 427 428 test('checkBlocklist is called for each newly inserted domain', async () => { 429 insertKeyword(currentTestDb, { keyword: 'plumber sydney' }); 430 431 scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('plumber sydney')); 432 433 await runSerpsStage({}); 434 435 // 2 results → 2 domains inserted → checkBlocklist called twice 436 assert.strictEqual( 437 checkBlocklistMock.mock.calls.length, 438 2, 439 'checkBlocklist should be called once per inserted domain' 440 ); 441 }); 442 443 // ── TLD country detection ─────────────────────────────────────────────────── 444 445 test('uses TLD-detected country code when TLD confidence is high', async () => { 446 insertKeyword(currentTestDb, { 447 keyword: 'plumber sydney', 448 country_code: 'US', 449 google_domain: 'google.com', 450 }); 451 452 scrapeSERPMock.mock.mockImplementation(async () => ({ 453 results: [{ url: 'https://example.com.au', title: 'AU Plumber' }], 454 metadata: { keyword: 'plumber sydney', countryCode: 'US', googleDomain: 'google.com' }, 455 })); 456 457 detectCountryFromTLDMock.mock.mockImplementation(domain => { 458 if (domain.endsWith('.com.au')) { 459 return { countryCode: 'AU', confidence: 'high', source: 'cctld' }; 460 } 461 return { countryCode: null, confidence: 'low', source: 'unknown' }; 462 }); 463 464 await runSerpsStage({}); 465 466 const site = currentTestDb 467 .prepare('SELECT * FROM sites WHERE domain = ?') 468 .get('example.com.au'); 469 assert.ok(site, 'site should be inserted'); 470 assert.strictEqual( 471 site.country_code, 472 'AU', 473 'high-confidence TLD should override google_domain country' 474 ); 475 }); 476 477 test('falls back to google_domain country when TLD confidence is low', async () => { 478 insertKeyword(currentTestDb, { 479 keyword: 'plumber new york', 480 country_code: 'US', 481 google_domain: 'google.com', 482 }); 483 484 scrapeSERPMock.mock.mockImplementation(async () => ({ 485 results: [{ url: 'https://nyplumber.com', title: 'NY Plumber' }], 486 metadata: { keyword: 'plumber new york', countryCode: 'US', googleDomain: 'google.com' }, 487 })); 488 489 // Low confidence (default mock), so should use google_domain country 490 await runSerpsStage({}); 491 492 const site = currentTestDb.prepare('SELECT * FROM sites WHERE domain = ?').get('nyplumber.com'); 493 assert.ok(site, 'site should be inserted'); 494 assert.strictEqual(site.country_code, 'US', 'should fall back to google_domain country'); 495 }); 496 497 // ── Multiple keywords ──────────────────────────────────────────────────────── 498 499 test('aggregates stats correctly across multiple keywords', async () => { 500 insertKeyword(currentTestDb, { keyword: 'keyword one', priority: 2 }); 501 insertKeyword(currentTestDb, { keyword: 'keyword two', priority: 1 }); 502 503 scrapeSERPMock.mock.mockImplementation(async keyword => ({ 504 results: [{ url: `https://${keyword.replace(/ /g, '-')}.com`, title: keyword }], 505 metadata: { keyword, countryCode: 'US', googleDomain: 'google.com' }, 506 })); 507 508 const result = await runSerpsStage({}); 509 510 assert.strictEqual(result.processed, 2); 511 assert.strictEqual(result.succeeded, 2); 512 assert.strictEqual(result.newSites, 2, 'each keyword adds 1 site'); 513 assert.strictEqual(result.failed, 0); 514 }); 515 516 // ── Error handling ────────────────────────────────────────────────────────── 517 518 test('records failed keyword in stats.failed when scrape throws', async () => { 519 insertKeyword(currentTestDb, { keyword: 'failing keyword' }); 520 521 scrapeSERPMock.mock.mockImplementation(async () => { 522 throw new Error('ZenRows quota exceeded'); 523 }); 524 525 const result = await runSerpsStage({}); 526 527 assert.strictEqual(result.failed, 1, 'failed count should be 1'); 528 assert.strictEqual(result.succeeded, 0); 529 530 const sites = currentTestDb.prepare('SELECT * FROM sites').all(); 531 assert.strictEqual(sites.length, 0, 'no sites should be inserted on scrape failure'); 532 }); 533 534 test('handles mix of succeeding and failing keywords correctly', async () => { 535 insertKeyword(currentTestDb, { keyword: 'good keyword', priority: 2 }); 536 insertKeyword(currentTestDb, { keyword: 'bad keyword', priority: 1 }); 537 538 scrapeSERPMock.mock.mockImplementation(async keyword => { 539 if (keyword === 'bad keyword') throw new Error('API error'); 540 return makeScrapeResult(keyword); 541 }); 542 543 const result = await runSerpsStage({}); 544 545 assert.strictEqual(result.processed, 2); 546 assert.strictEqual(result.succeeded, 1); 547 assert.strictEqual(result.failed, 1); 548 // Only the good keyword inserts sites (2 URLs) 549 assert.strictEqual(result.newSites, 2, 'only good keyword should add sites'); 550 }); 551 552 // ── google_domain stored on sites ────────────────────────────────────────── 553 554 test('stores google_domain on inserted sites', async () => { 555 insertKeyword(currentTestDb, { 556 keyword: 'locksmith london', 557 country_code: 'UK', 558 google_domain: 'google.co.uk', 559 }); 560 561 scrapeSERPMock.mock.mockImplementation(async () => ({ 562 results: [{ url: 'https://londonlocksmith.co.uk', title: 'London Locksmith' }], 563 metadata: { keyword: 'locksmith london', countryCode: 'UK', googleDomain: 'google.co.uk' }, 564 })); 565 566 await runSerpsStage({}); 567 568 const site = currentTestDb 569 .prepare('SELECT * FROM sites WHERE domain = ?') 570 .get('londonlocksmith.co.uk'); 571 assert.ok(site, 'site should be inserted'); 572 assert.strictEqual(site.google_domain, 'google.co.uk', 'google_domain should be stored'); 573 }); 574 575 // ── Domain extraction ─────────────────────────────────────────────────────── 576 577 test('strips www prefix when extracting domain from URL', async () => { 578 insertKeyword(currentTestDb, { keyword: 'plumber sydney' }); 579 580 scrapeSERPMock.mock.mockImplementation(async () => ({ 581 results: [{ url: 'https://www.example-plumbing.com', title: 'Example Plumbing' }], 582 metadata: { keyword: 'plumber sydney', countryCode: 'AU', googleDomain: 'google.com.au' }, 583 })); 584 585 await runSerpsStage({}); 586 587 const site = currentTestDb 588 .prepare('SELECT * FROM sites WHERE domain = ?') 589 .get('example-plumbing.com'); 590 assert.ok(site, 'domain should be stored without www'); 591 assert.strictEqual(site.domain, 'example-plumbing.com'); 592 }); 593 594 // ── Inactive keywords are skipped ────────────────────────────────────────── 595 596 test('does not process inactive keywords', async () => { 597 insertKeyword(currentTestDb, { keyword: 'inactive kw', status: 'inactive' }); 598 599 const result = await runSerpsStage({}); 600 601 assert.strictEqual(result.processed, 0, 'inactive keyword should not be processed'); 602 assert.strictEqual(scrapeSERPMock.mock.calls.length, 0); 603 }); 604 605 // ── Duration field ────────────────────────────────────────────────────────── 606 607 test('returns a numeric duration field', async () => { 608 insertKeyword(currentTestDb, { keyword: 'any keyword' }); 609 scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('any keyword')); 610 611 const result = await runSerpsStage({}); 612 613 assert.ok(typeof result.duration === 'number'); 614 assert.ok(result.duration >= 0); 615 }); 616 617 // ── scrapeSERP called with correct args ───────────────────────────────────── 618 619 test('calls scrapeSERP with keyword, resultsPerKeyword, and country_code', async () => { 620 insertKeyword(currentTestDb, { 621 keyword: 'builder perth', 622 country_code: 'AU', 623 google_domain: 'google.com.au', 624 }); 625 626 scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('builder perth')); 627 628 await runSerpsStage({ resultsPerKeyword: 15 }); 629 630 assert.strictEqual(scrapeSERPMock.mock.calls.length, 1); 631 const [keyword, limit, countryCode] = scrapeSERPMock.mock.calls[0].arguments; 632 assert.strictEqual(keyword, 'builder perth'); 633 assert.strictEqual(limit, 15); 634 assert.strictEqual(countryCode, 'AU'); 635 }); 636 }); 637 638 // ─── Test Suite: getSerpsStats() ───────────────────────────────────────────── 639 640 describe('SERPs Stage — getSerpsStats()', () => { 641 beforeEach(() => { 642 currentTestDb = buildTestDb(); 643 }); 644 645 afterEach(() => { 646 currentTestDb = null; 647 }); 648 649 test('returns zero counts for an empty database', async () => { 650 const stats = await getSerpsStats(); 651 652 assert.strictEqual(stats.total_keywords, 0); 653 assert.strictEqual(stats.total_sites, 0); 654 assert.strictEqual(stats.found_sites, 0); 655 assert.strictEqual(stats.scored_sites, 0); 656 }); 657 658 test('counts total_sites correctly', async () => { 659 for (let i = 0; i < 3; i++) { 660 currentTestDb 661 .prepare( 662 `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain) 663 VALUES (?, ?, 'kw', 'found', 'AU', 'google.com.au')` 664 ) 665 .run(`site${i}.com`, `https://site${i}.com`); 666 } 667 668 const stats = await getSerpsStats(); 669 670 assert.strictEqual(stats.total_sites, 3); 671 }); 672 673 test('counts found_sites vs scored_sites correctly by status', async () => { 674 const rows = [ 675 ['a.com', 'found'], 676 ['b.com', 'found'], 677 ['c.com', 'prog_scored'], 678 ['d.com', 'enriched'], 679 ]; 680 681 for (const [domain, status] of rows) { 682 currentTestDb 683 .prepare( 684 `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain) 685 VALUES (?, ?, 'kw', ?, 'AU', 'google.com.au')` 686 ) 687 .run(domain, `https://${domain}`, status); 688 } 689 690 const stats = await getSerpsStats(); 691 692 assert.strictEqual(stats.total_sites, 4); 693 assert.strictEqual(stats.found_sites, 2); 694 assert.strictEqual(stats.scored_sites, 1); 695 }); 696 697 test('counts distinct keywords (same keyword appearing on multiple sites)', async () => { 698 const rows = [ 699 ['site1.com', 'plumber sydney'], 700 ['site2.com', 'plumber sydney'], 701 ['site3.com', 'electrician sydney'], 702 ]; 703 704 for (const [domain, keyword] of rows) { 705 currentTestDb 706 .prepare( 707 `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain) 708 VALUES (?, ?, ?, 'found', 'AU', 'google.com.au')` 709 ) 710 .run(domain, `https://${domain}`, keyword); 711 } 712 713 const stats = await getSerpsStats(); 714 715 assert.strictEqual(stats.total_keywords, 2, 'should deduplicate keyword count'); 716 assert.strictEqual(stats.total_sites, 3); 717 }); 718 719 test('returns object with all required fields', async () => { 720 const stats = await getSerpsStats(); 721 722 assert.ok('total_keywords' in stats); 723 assert.ok('total_sites' in stats); 724 assert.ok('found_sites' in stats); 725 assert.ok('scored_sites' in stats); 726 }); 727 });