/ tests / stages / serps.test.js
serps.test.js
  1  /**
  2   * Unit Tests for SERPs Stage
  3   *
  4   * Tests runSerpsStage() and getSerpsStats() from src/stages/serps.js
  5   *
  6   * Uses Node.js 22+ mock.module() to mock all external dependencies.
  7   * ALL mock.module() calls MUST come before any dynamic imports.
  8   *
  9   * Run with:
 10   *   NODE_ENV=test LOGS_DIR=/tmp/test-logs DATABASE_PATH=/tmp/test-sites.db \
 11   *   node --experimental-test-module-mocks --test tests/stages/serps.test.js
 12   */
 13  
 14  import { describe, test, mock, beforeEach, afterEach } from 'node:test';
 15  import assert from 'node:assert';
 16  import Database from 'better-sqlite3';
 17  import { createLazyPgMock } from '../helpers/pg-mock.js';
 18  import { readFileSync } from 'fs';
 19  import { join, dirname } from 'path';
 20  import { fileURLToPath } from 'url';
 21  
 22  const __filename = fileURLToPath(import.meta.url);
 23  const __dirname = dirname(__filename);
 24  const projectRoot = join(__dirname, '../..');
 25  
 26  // ─── Test DB factory ─────────────────────────────────────────────────────────
 27  // Each test creates a fresh in-memory DB. We expose it through `currentTestDb`
 28  // so the mock constructor can hand it to `runSerpsStage()`.
 29  //
 30  // IMPORTANT: serps.js calls `db.close()` in a finally block. If we let that
 31  // close the real test DB the test assertions break afterwards. We wrap the DB
 32  // in a Proxy that makes close() a no-op, while keeping it open for assertions.
 33  
 34  let currentTestDb = null;
 35  
 36  function buildTestDb() {
 37    const db = new Database(':memory:');
 38    const schema = readFileSync(join(projectRoot, 'db/schema.sql'), 'utf-8');
 39    db.exec(schema);
 40  
 41    // Proxy: intercept close() so serps.js's finally-block doesn't close our
 42    // test DB before we can make assertions on it.
 43    return new Proxy(db, {
 44      get(target, prop) {
 45        if (prop === 'close') {
 46          return () => {}; // no-op — we close manually in afterEach
 47        }
 48        if (prop === 'open') {
 49          return target.open;
 50        }
 51        const value = target[prop];
 52        return typeof value === 'function' ? value.bind(target) : value;
 53      },
 54    });
 55  }
 56  
 57  // ─── Mock: better-sqlite3 ─────────────────────────────────────────────────────
 58  // The mock constructor returns whatever `currentTestDb` points to at call time.
 59  class DatabaseMock {
 60    constructor() {
 61      return currentTestDb;
 62    }
 63  }
 64  
 65  mock.module('better-sqlite3', {
 66    defaultExport: DatabaseMock,
 67  });
 68  
 69  // ─── Mock: db.js (PostgreSQL layer) — lazily delegates to currentTestDb ───────
 70  // serps.js imports { run, getOne, getAll, query } from '../utils/db.js'
 71  // We use createLazyPgMock so each test gets its own isolated in-memory DB.
 72  mock.module('../../src/utils/db.js', {
 73    namedExports: createLazyPgMock(() => currentTestDb),
 74  });
 75  
 76  // ─── Mock: scrape.js ─────────────────────────────────────────────────────────
 77  const scrapeSERPMock = mock.fn();
 78  
 79  mock.module('../../src/scrape.js', {
 80    namedExports: {
 81      scrapeSERP: scrapeSERPMock,
 82    },
 83  });
 84  
 85  // ─── Mock: logger ─────────────────────────────────────────────────────────────
 86  mock.module('../../src/utils/logger.js', {
 87    defaultExport: class MockLogger {
 88      info() {}
 89      warn() {}
 90      error() {}
 91      success() {}
 92      debug() {}
 93    },
 94  });
 95  
 96  // ─── Mock: summary-generator ─────────────────────────────────────────────────
 97  mock.module('../../src/utils/summary-generator.js', {
 98    namedExports: {
 99      generateStageCompletion: mock.fn(),
100      displayProgress: mock.fn(),
101    },
102  });
103  
104  // ─── Mock: error-handler — use the REAL processBatch ─────────────────────────
105  // Re-export the real implementation so concurrency and error collection work.
106  const realErrorHandler = await import('../../src/utils/error-handler.js');
107  
108  mock.module('../../src/utils/error-handler.js', {
109    namedExports: {
110      processBatch: realErrorHandler.processBatch,
111      retryWithBackoff: realErrorHandler.retryWithBackoff,
112      isRetryableError: realErrorHandler.isRetryableError,
113      sleep: realErrorHandler.sleep,
114      safeJsonParse: realErrorHandler.safeJsonParse,
115      extractDomain: realErrorHandler.extractDomain,
116      withTimeout: realErrorHandler.withTimeout,
117    },
118  });
119  
120  // ─── Mock: site-filters ───────────────────────────────────────────────────────
121  const checkBlocklistMock = mock.fn();
122  
123  mock.module('../../src/utils/site-filters.js', {
124    namedExports: {
125      checkBlocklist: checkBlocklistMock,
126      DIRECTORY_DOMAINS: [],
127      SOCIAL_DOMAINS: [],
128    },
129  });
130  
131  // ─── Mock: tld-detector ───────────────────────────────────────────────────────
132  const detectCountryFromTLDMock = mock.fn();
133  
134  mock.module('../../src/utils/tld-detector.js', {
135    namedExports: {
136      detectCountryFromTLD: detectCountryFromTLDMock,
137    },
138  });
139  
140  // ─── Mock: dotenv ────────────────────────────────────────────────────────────
141  mock.module('dotenv', {
142    namedExports: {
143      config: mock.fn(),
144    },
145  });
146  
147  // ─── NOW import the module under test ────────────────────────────────────────
148  const { runSerpsStage, getSerpsStats } = await import('../../src/stages/serps.js');
149  
150  // ─── Helpers ──────────────────────────────────────────────────────────────────
151  
152  function insertKeyword(db, overrides = {}) {
153    const kw = {
154      keyword: overrides.keyword ?? 'plumber sydney',
155      country_code: overrides.country_code ?? 'AU',
156      google_domain: overrides.google_domain ?? 'google.com.au',
157      status: overrides.status ?? 'active',
158      priority: overrides.priority ?? 1,
159      search_count: overrides.search_count ?? 0,
160      zenrows_count: overrides.zenrows_count ?? 0,
161      last_searched_at: overrides.last_searched_at ?? null,
162    };
163  
164    const stmt = db.prepare(`
165      INSERT INTO keywords (keyword, country_code, google_domain, status, priority,
166                            search_count, zenrows_count, last_searched_at)
167      VALUES (?, ?, ?, ?, ?, ?, ?, ?)
168    `);
169    const result = stmt.run(
170      kw.keyword,
171      kw.country_code,
172      kw.google_domain,
173      kw.status,
174      kw.priority,
175      kw.search_count,
176      kw.zenrows_count,
177      kw.last_searched_at
178    );
179    return result.lastInsertRowid;
180  }
181  
182  function makeScrapeResult(
183    keyword,
184    countryCode = 'AU',
185    googleDomain = 'google.com.au',
186    urls = null
187  ) {
188    const defaultUrls = [
189      { url: 'https://acme-plumbing.com.au', title: 'Acme Plumbing Sydney' },
190      { url: 'https://bestplumber.com', title: 'Best Plumber Co' },
191    ];
192    return {
193      results: urls ?? defaultUrls,
194      metadata: { keyword, countryCode, googleDomain, currency: 'AUD' },
195    };
196  }
197  
198  // ─── Test Suite: runSerpsStage() ──────────────────────────────────────────────
199  
200  describe('SERPs Stage — runSerpsStage()', () => {
201    beforeEach(() => {
202      currentTestDb = buildTestDb();
203      scrapeSERPMock.mock.resetCalls();
204      checkBlocklistMock.mock.resetCalls();
205      detectCountryFromTLDMock.mock.resetCalls();
206  
207      // Default: low-confidence TLD → fall back to google_domain country
208      detectCountryFromTLDMock.mock.mockImplementation(() => ({
209        countryCode: null,
210        confidence: 'low',
211        source: 'unknown',
212      }));
213  
214      // Default: nothing is blocklisted
215      checkBlocklistMock.mock.mockImplementation(() => null);
216    });
217  
218    afterEach(() => {
219      // Close the underlying real DB (the Proxy's close() is a no-op)
220      const realDb = Object.getPrototypeOf(currentTestDb.constructor).call ? null : currentTestDb;
221      // We need to reach the real Database instance to call close
222      // Since close() is proxied as no-op, call it on the actual target
223      // We kept the real db around via the Proxy — close it safely by
224      // accessing internal open state: just suppress errors.
225      try {
226        // Try to finalize any leftover prepared statements
227        if (currentTestDb) {
228          // Manually close by re-opening the target db
229          // The proxy forwards everything except close(), so we use a workaround:
230          // re-create and discard (the :memory: db is ephemeral anyway)
231          const sentinel = currentTestDb.pragma('user_version');
232        }
233      } catch {
234        // Already closed or errored — fine for in-memory test db
235      }
236      currentTestDb = null;
237    });
238  
239    // ── No-op: no keywords ──────────────────────────────────────────────────────
240  
241    test('returns zero stats when no active keywords exist', async () => {
242      const result = await runSerpsStage({});
243  
244      assert.strictEqual(result.processed, 0);
245      assert.strictEqual(result.succeeded, 0);
246      assert.strictEqual(result.failed, 0);
247      assert.strictEqual(result.skipped, 0);
248      assert.ok(typeof result.duration === 'number', 'duration should be a number');
249    });
250  
251    // ── Happy path: single keyword ───────────────────────────────────────────────
252  
253    test('scrapes results for a single active keyword and inserts sites', async () => {
254      insertKeyword(currentTestDb, { keyword: 'plumber sydney', country_code: 'AU' });
255  
256      scrapeSERPMock.mock.mockImplementation(async keyword =>
257        makeScrapeResult(keyword, 'AU', 'google.com.au')
258      );
259  
260      const result = await runSerpsStage({});
261  
262      assert.strictEqual(result.processed, 1, 'should process 1 keyword');
263      assert.strictEqual(result.succeeded, 1, 'should succeed for 1 keyword');
264      assert.strictEqual(result.failed, 0);
265      assert.strictEqual(result.newSites, 2, 'should add 2 new sites');
266  
267      // Verify sites were inserted in the DB
268      const sites = currentTestDb.prepare('SELECT * FROM sites').all();
269      assert.strictEqual(sites.length, 2);
270      assert.ok(
271        sites.some(s => s.landing_page_url === 'https://acme-plumbing.com.au'),
272        'first site URL should be stored'
273      );
274      assert.ok(
275        sites.every(s => s.status === 'found'),
276        'all sites should have status = found'
277      );
278      assert.ok(
279        sites.every(s => s.keyword === 'plumber sydney'),
280        'all sites should carry the correct keyword'
281      );
282    });
283  
284    // ── Keyword metric updates ───────────────────────────────────────────────────
285  
286    test('increments keyword search_count and zenrows_count after successful scrape', async () => {
287      const kwId = insertKeyword(currentTestDb, {
288        keyword: 'electrician brisbane',
289        search_count: 3,
290        zenrows_count: 7,
291      });
292  
293      scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('electrician brisbane'));
294  
295      await runSerpsStage({});
296  
297      const kw = currentTestDb.prepare('SELECT * FROM keywords WHERE id = ?').get(kwId);
298      assert.strictEqual(kw.search_count, 4, 'search_count should increment by 1');
299      // zenrows_count increments by the number of NEW sites inserted (2)
300      assert.strictEqual(kw.zenrows_count, 9, 'zenrows_count should increment by 2');
301      assert.ok(kw.last_searched_at, 'last_searched_at should be set');
302    });
303  
304    // ── Zero-result keyword → inactive ─────────────────────────────────────────
305  
306    test('marks keyword inactive when scrape returns 0 results', async () => {
307      const kwId = insertKeyword(currentTestDb, { keyword: 'obscure keyword xyz' });
308  
309      scrapeSERPMock.mock.mockImplementation(async () => ({
310        results: [],
311        metadata: {
312          keyword: 'obscure keyword xyz',
313          countryCode: 'AU',
314          googleDomain: 'google.com.au',
315        },
316      }));
317  
318      const result = await runSerpsStage({});
319  
320      assert.strictEqual(result.markedInactive, 1);
321      assert.strictEqual(result.newSites, 0);
322  
323      const kw = currentTestDb.prepare('SELECT * FROM keywords WHERE id = ?').get(kwId);
324      assert.strictEqual(kw.status, 'inactive', 'keyword status should be inactive');
325      assert.ok(kw.last_searched_at, 'last_searched_at should be updated');
326    });
327  
328    // ── resultsPerKeyword threshold ─────────────────────────────────────────────
329  
330    test('skips keywords that already have enough sites (resultsPerKeyword threshold)', async () => {
331      insertKeyword(currentTestDb, { keyword: 'painter melbourne' });
332  
333      // Insert 5 existing sites for this keyword
334      for (let i = 0; i < 5; i++) {
335        currentTestDb
336          .prepare(
337            `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain)
338             VALUES (?, ?, 'painter melbourne', 'found', 'AU', 'google.com.au')`
339          )
340          .run(`existing${i}.com.au`, `https://existing${i}.com.au`);
341      }
342  
343      scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('painter melbourne'));
344  
345      // With resultsPerKeyword=5, keyword already has 5 sites → skip
346      const result = await runSerpsStage({ resultsPerKeyword: 5 });
347  
348      assert.strictEqual(result.processed, 0, 'keyword with 5 sites should be skipped');
349      assert.strictEqual(scrapeSERPMock.mock.calls.length, 0, 'scrapeSERP should not be called');
350    });
351  
352    test('processes a keyword that has fewer sites than the threshold', async () => {
353      insertKeyword(currentTestDb, { keyword: 'painter melbourne' });
354  
355      // Insert only 4 sites — under the threshold of 5
356      for (let i = 0; i < 4; i++) {
357        currentTestDb
358          .prepare(
359            `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain)
360             VALUES (?, ?, 'painter melbourne', 'found', 'AU', 'google.com.au')`
361          )
362          .run(`existing${i}.com.au`, `https://existing${i}.com.au`);
363      }
364  
365      scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('painter melbourne'));
366  
367      const result = await runSerpsStage({ resultsPerKeyword: 5 });
368  
369      assert.strictEqual(result.processed, 1, 'keyword with 4 sites should still be processed');
370      assert.strictEqual(scrapeSERPMock.mock.calls.length, 1);
371    });
372  
373    // ── limit option ────────────────────────────────────────────────────────────
374  
375    test('respects the limit option to cap keywords processed', async () => {
376      insertKeyword(currentTestDb, { keyword: 'plumber sydney', priority: 3 });
377      insertKeyword(currentTestDb, { keyword: 'electrician sydney', priority: 2 });
378      insertKeyword(currentTestDb, { keyword: 'painter sydney', priority: 1 });
379  
380      scrapeSERPMock.mock.mockImplementation(async keyword => makeScrapeResult(keyword));
381  
382      const result = await runSerpsStage({ limit: 2 });
383  
384      assert.strictEqual(result.processed, 2, 'only 2 keywords should be processed with limit=2');
385      assert.strictEqual(scrapeSERPMock.mock.calls.length, 2);
386    });
387  
388    // ── Blocklist filtering ─────────────────────────────────────────────────────
389  
390    test('marks blocklisted domains as ignored with error_message set', async () => {
391      insertKeyword(currentTestDb, { keyword: 'plumber sydney' });
392  
393      scrapeSERPMock.mock.mockImplementation(async () => ({
394        results: [
395          { url: 'https://yelp.com/biz/some-plumber', title: 'Yelp Listing' },
396          { url: 'https://legitimate-plumber.com.au', title: 'Legit Plumber' },
397        ],
398        metadata: { keyword: 'plumber sydney', countryCode: 'AU', googleDomain: 'google.com.au' },
399      }));
400  
401      checkBlocklistMock.mock.mockImplementation(domain => {
402        if (domain === 'yelp.com') {
403          return { reason: 'Directory domain: yelp.com' };
404        }
405        return null;
406      });
407  
408      const result = await runSerpsStage({});
409  
410      assert.strictEqual(result.succeeded, 1, 'keyword should succeed despite blocklist hit');
411  
412      const allSites = currentTestDb.prepare('SELECT * FROM sites ORDER BY domain').all();
413      assert.strictEqual(allSites.length, 2, 'both sites should be in DB');
414  
415      const yelpSite = allSites.find(s => s.domain === 'yelp.com');
416      assert.ok(yelpSite, 'yelp.com should be in DB');
417      assert.strictEqual(yelpSite.status, 'ignored', 'yelp.com should have status=ignored');
418      assert.ok(
419        yelpSite.error_message.includes('Directory domain'),
420        'error_message should contain blocklist reason'
421      );
422  
423      const legitSite = allSites.find(s => s.domain === 'legitimate-plumber.com.au');
424      assert.ok(legitSite, 'legitimate site should be in DB');
425      assert.strictEqual(legitSite.status, 'found', 'legitimate site should keep status=found');
426    });
427  
428    test('checkBlocklist is called for each newly inserted domain', async () => {
429      insertKeyword(currentTestDb, { keyword: 'plumber sydney' });
430  
431      scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('plumber sydney'));
432  
433      await runSerpsStage({});
434  
435      // 2 results → 2 domains inserted → checkBlocklist called twice
436      assert.strictEqual(
437        checkBlocklistMock.mock.calls.length,
438        2,
439        'checkBlocklist should be called once per inserted domain'
440      );
441    });
442  
443    // ── TLD country detection ───────────────────────────────────────────────────
444  
445    test('uses TLD-detected country code when TLD confidence is high', async () => {
446      insertKeyword(currentTestDb, {
447        keyword: 'plumber sydney',
448        country_code: 'US',
449        google_domain: 'google.com',
450      });
451  
452      scrapeSERPMock.mock.mockImplementation(async () => ({
453        results: [{ url: 'https://example.com.au', title: 'AU Plumber' }],
454        metadata: { keyword: 'plumber sydney', countryCode: 'US', googleDomain: 'google.com' },
455      }));
456  
457      detectCountryFromTLDMock.mock.mockImplementation(domain => {
458        if (domain.endsWith('.com.au')) {
459          return { countryCode: 'AU', confidence: 'high', source: 'cctld' };
460        }
461        return { countryCode: null, confidence: 'low', source: 'unknown' };
462      });
463  
464      await runSerpsStage({});
465  
466      const site = currentTestDb
467        .prepare('SELECT * FROM sites WHERE domain = ?')
468        .get('example.com.au');
469      assert.ok(site, 'site should be inserted');
470      assert.strictEqual(
471        site.country_code,
472        'AU',
473        'high-confidence TLD should override google_domain country'
474      );
475    });
476  
477    test('falls back to google_domain country when TLD confidence is low', async () => {
478      insertKeyword(currentTestDb, {
479        keyword: 'plumber new york',
480        country_code: 'US',
481        google_domain: 'google.com',
482      });
483  
484      scrapeSERPMock.mock.mockImplementation(async () => ({
485        results: [{ url: 'https://nyplumber.com', title: 'NY Plumber' }],
486        metadata: { keyword: 'plumber new york', countryCode: 'US', googleDomain: 'google.com' },
487      }));
488  
489      // Low confidence (default mock), so should use google_domain country
490      await runSerpsStage({});
491  
492      const site = currentTestDb.prepare('SELECT * FROM sites WHERE domain = ?').get('nyplumber.com');
493      assert.ok(site, 'site should be inserted');
494      assert.strictEqual(site.country_code, 'US', 'should fall back to google_domain country');
495    });
496  
497    // ── Multiple keywords ────────────────────────────────────────────────────────
498  
499    test('aggregates stats correctly across multiple keywords', async () => {
500      insertKeyword(currentTestDb, { keyword: 'keyword one', priority: 2 });
501      insertKeyword(currentTestDb, { keyword: 'keyword two', priority: 1 });
502  
503      scrapeSERPMock.mock.mockImplementation(async keyword => ({
504        results: [{ url: `https://${keyword.replace(/ /g, '-')}.com`, title: keyword }],
505        metadata: { keyword, countryCode: 'US', googleDomain: 'google.com' },
506      }));
507  
508      const result = await runSerpsStage({});
509  
510      assert.strictEqual(result.processed, 2);
511      assert.strictEqual(result.succeeded, 2);
512      assert.strictEqual(result.newSites, 2, 'each keyword adds 1 site');
513      assert.strictEqual(result.failed, 0);
514    });
515  
516    // ── Error handling ──────────────────────────────────────────────────────────
517  
518    test('records failed keyword in stats.failed when scrape throws', async () => {
519      insertKeyword(currentTestDb, { keyword: 'failing keyword' });
520  
521      scrapeSERPMock.mock.mockImplementation(async () => {
522        throw new Error('ZenRows quota exceeded');
523      });
524  
525      const result = await runSerpsStage({});
526  
527      assert.strictEqual(result.failed, 1, 'failed count should be 1');
528      assert.strictEqual(result.succeeded, 0);
529  
530      const sites = currentTestDb.prepare('SELECT * FROM sites').all();
531      assert.strictEqual(sites.length, 0, 'no sites should be inserted on scrape failure');
532    });
533  
534    test('handles mix of succeeding and failing keywords correctly', async () => {
535      insertKeyword(currentTestDb, { keyword: 'good keyword', priority: 2 });
536      insertKeyword(currentTestDb, { keyword: 'bad keyword', priority: 1 });
537  
538      scrapeSERPMock.mock.mockImplementation(async keyword => {
539        if (keyword === 'bad keyword') throw new Error('API error');
540        return makeScrapeResult(keyword);
541      });
542  
543      const result = await runSerpsStage({});
544  
545      assert.strictEqual(result.processed, 2);
546      assert.strictEqual(result.succeeded, 1);
547      assert.strictEqual(result.failed, 1);
548      // Only the good keyword inserts sites (2 URLs)
549      assert.strictEqual(result.newSites, 2, 'only good keyword should add sites');
550    });
551  
552    // ── google_domain stored on sites ──────────────────────────────────────────
553  
554    test('stores google_domain on inserted sites', async () => {
555      insertKeyword(currentTestDb, {
556        keyword: 'locksmith london',
557        country_code: 'UK',
558        google_domain: 'google.co.uk',
559      });
560  
561      scrapeSERPMock.mock.mockImplementation(async () => ({
562        results: [{ url: 'https://londonlocksmith.co.uk', title: 'London Locksmith' }],
563        metadata: { keyword: 'locksmith london', countryCode: 'UK', googleDomain: 'google.co.uk' },
564      }));
565  
566      await runSerpsStage({});
567  
568      const site = currentTestDb
569        .prepare('SELECT * FROM sites WHERE domain = ?')
570        .get('londonlocksmith.co.uk');
571      assert.ok(site, 'site should be inserted');
572      assert.strictEqual(site.google_domain, 'google.co.uk', 'google_domain should be stored');
573    });
574  
575    // ── Domain extraction ───────────────────────────────────────────────────────
576  
577    test('strips www prefix when extracting domain from URL', async () => {
578      insertKeyword(currentTestDb, { keyword: 'plumber sydney' });
579  
580      scrapeSERPMock.mock.mockImplementation(async () => ({
581        results: [{ url: 'https://www.example-plumbing.com', title: 'Example Plumbing' }],
582        metadata: { keyword: 'plumber sydney', countryCode: 'AU', googleDomain: 'google.com.au' },
583      }));
584  
585      await runSerpsStage({});
586  
587      const site = currentTestDb
588        .prepare('SELECT * FROM sites WHERE domain = ?')
589        .get('example-plumbing.com');
590      assert.ok(site, 'domain should be stored without www');
591      assert.strictEqual(site.domain, 'example-plumbing.com');
592    });
593  
594    // ── Inactive keywords are skipped ──────────────────────────────────────────
595  
596    test('does not process inactive keywords', async () => {
597      insertKeyword(currentTestDb, { keyword: 'inactive kw', status: 'inactive' });
598  
599      const result = await runSerpsStage({});
600  
601      assert.strictEqual(result.processed, 0, 'inactive keyword should not be processed');
602      assert.strictEqual(scrapeSERPMock.mock.calls.length, 0);
603    });
604  
605    // ── Duration field ──────────────────────────────────────────────────────────
606  
607    test('returns a numeric duration field', async () => {
608      insertKeyword(currentTestDb, { keyword: 'any keyword' });
609      scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('any keyword'));
610  
611      const result = await runSerpsStage({});
612  
613      assert.ok(typeof result.duration === 'number');
614      assert.ok(result.duration >= 0);
615    });
616  
617    // ── scrapeSERP called with correct args ─────────────────────────────────────
618  
619    test('calls scrapeSERP with keyword, resultsPerKeyword, and country_code', async () => {
620      insertKeyword(currentTestDb, {
621        keyword: 'builder perth',
622        country_code: 'AU',
623        google_domain: 'google.com.au',
624      });
625  
626      scrapeSERPMock.mock.mockImplementation(async () => makeScrapeResult('builder perth'));
627  
628      await runSerpsStage({ resultsPerKeyword: 15 });
629  
630      assert.strictEqual(scrapeSERPMock.mock.calls.length, 1);
631      const [keyword, limit, countryCode] = scrapeSERPMock.mock.calls[0].arguments;
632      assert.strictEqual(keyword, 'builder perth');
633      assert.strictEqual(limit, 15);
634      assert.strictEqual(countryCode, 'AU');
635    });
636  });
637  
638  // ─── Test Suite: getSerpsStats() ─────────────────────────────────────────────
639  
640  describe('SERPs Stage — getSerpsStats()', () => {
641    beforeEach(() => {
642      currentTestDb = buildTestDb();
643    });
644  
645    afterEach(() => {
646      currentTestDb = null;
647    });
648  
649    test('returns zero counts for an empty database', async () => {
650      const stats = await getSerpsStats();
651  
652      assert.strictEqual(stats.total_keywords, 0);
653      assert.strictEqual(stats.total_sites, 0);
654      assert.strictEqual(stats.found_sites, 0);
655      assert.strictEqual(stats.scored_sites, 0);
656    });
657  
658    test('counts total_sites correctly', async () => {
659      for (let i = 0; i < 3; i++) {
660        currentTestDb
661          .prepare(
662            `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain)
663             VALUES (?, ?, 'kw', 'found', 'AU', 'google.com.au')`
664          )
665          .run(`site${i}.com`, `https://site${i}.com`);
666      }
667  
668      const stats = await getSerpsStats();
669  
670      assert.strictEqual(stats.total_sites, 3);
671    });
672  
673    test('counts found_sites vs scored_sites correctly by status', async () => {
674      const rows = [
675        ['a.com', 'found'],
676        ['b.com', 'found'],
677        ['c.com', 'prog_scored'],
678        ['d.com', 'enriched'],
679      ];
680  
681      for (const [domain, status] of rows) {
682        currentTestDb
683          .prepare(
684            `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain)
685             VALUES (?, ?, 'kw', ?, 'AU', 'google.com.au')`
686          )
687          .run(domain, `https://${domain}`, status);
688      }
689  
690      const stats = await getSerpsStats();
691  
692      assert.strictEqual(stats.total_sites, 4);
693      assert.strictEqual(stats.found_sites, 2);
694      assert.strictEqual(stats.scored_sites, 1);
695    });
696  
697    test('counts distinct keywords (same keyword appearing on multiple sites)', async () => {
698      const rows = [
699        ['site1.com', 'plumber sydney'],
700        ['site2.com', 'plumber sydney'],
701        ['site3.com', 'electrician sydney'],
702      ];
703  
704      for (const [domain, keyword] of rows) {
705        currentTestDb
706          .prepare(
707            `INSERT INTO sites (domain, landing_page_url, keyword, status, country_code, google_domain)
708             VALUES (?, ?, ?, 'found', 'AU', 'google.com.au')`
709          )
710          .run(domain, `https://${domain}`, keyword);
711      }
712  
713      const stats = await getSerpsStats();
714  
715      assert.strictEqual(stats.total_keywords, 2, 'should deduplicate keyword count');
716      assert.strictEqual(stats.total_sites, 3);
717    });
718  
719    test('returns object with all required fields', async () => {
720      const stats = await getSerpsStats();
721  
722      assert.ok('total_keywords' in stats);
723      assert.ok('total_sites' in stats);
724      assert.ok('found_sites' in stats);
725      assert.ok('scored_sites' in stats);
726    });
727  });