/ tests / utils / keyword-filter.test.js
keyword-filter.test.js
  1  /**
  2   * Tests for Keyword Filter Module
  3   */
  4  
  5  import { test, describe, mock, beforeEach } from 'node:test';
  6  import assert from 'node:assert';
  7  
  8  // Mock dependencies before importing
  9  let mockCallLLM;
 10  
 11  mock.module('../../src/utils/llm-provider.js', {
 12    namedExports: {
 13      callLLM: (...args) => mockCallLLM(...args),
 14    },
 15  });
 16  
 17  mock.module('../../src/utils/logger.js', {
 18    defaultExport: class {
 19      info() {}
 20      warn() {}
 21      error() {}
 22      success() {}
 23      debug() {}
 24    },
 25  });
 26  
 27  const { deterministicFilter, filterKeywordsPreOverview, rankKeywordsPostOverview } =
 28    await import('../../src/utils/keyword-filter.js');
 29  
 30  describe('Keyword Filter', () => {
 31    beforeEach(() => {
 32      mockCallLLM = async () => ({
 33        content: JSON.stringify({ filtered_keywords: [], removed: [] }),
 34      });
 35    });
 36  
 37    describe('deterministicFilter', () => {
 38      test('keeps generic service keywords', () => {
 39        const { kept, removed } = deterministicFilter(['plumber', 'electrician', 'landscaping']);
 40        assert.deepStrictEqual(kept, ['plumber', 'electrician', 'landscaping']);
 41        assert.strictEqual(removed.length, 0);
 42      });
 43  
 44      test('removes job-related keywords', () => {
 45        const { kept, removed } = deterministicFilter([
 46          'plumber salary',
 47          'electrician jobs',
 48          'carpenter apprenticeship',
 49        ]);
 50        assert.strictEqual(kept.length, 0);
 51        assert.strictEqual(removed.length, 3);
 52        assert.ok(removed.every(r => r.reason === 'job'));
 53      });
 54  
 55      test('removes education-related keywords', () => {
 56        const { kept, removed } = deterministicFilter([
 57          'plumber course',
 58          'electrical training',
 59          'welding certification',
 60        ]);
 61        assert.strictEqual(kept.length, 0);
 62        assert.strictEqual(removed.length, 3);
 63        assert.ok(removed.every(r => r.reason === 'education'));
 64      });
 65  
 66      test('removes product-related keywords', () => {
 67        const { kept, removed } = deterministicFilter([
 68          'plumbing supplies',
 69          'electrical tools',
 70          'fence parts',
 71        ]);
 72        assert.strictEqual(kept.length, 0);
 73        assert.strictEqual(removed.length, 3);
 74        assert.ok(removed.every(r => r.reason === 'products'));
 75      });
 76  
 77      test('removes entertainment-related keywords', () => {
 78        const { kept, removed } = deterministicFilter(['band hire', 'movie tickets']);
 79        // "band hire" has "hire" as positive token → should be kept
 80        // "movie tickets" has no positive token → removed
 81        assert.ok(removed.some(r => r.keyword === 'movie tickets'));
 82        assert.ok(removed.every(r => r.reason === 'entertainment'));
 83      });
 84  
 85      test('removes informational keywords', () => {
 86        const { kept, removed } = deterministicFilter([
 87          'how to do plumbing', // "how to" informational, no positive tokens
 88          'plumbing tutorial', // "tutorial" informational, no positive tokens
 89          'diy fence', // "diy" informational, no positive tokens
 90        ]);
 91        assert.strictEqual(kept.length, 0);
 92        assert.strictEqual(removed.length, 3);
 93      });
 94  
 95      test('always removes near me keywords (hard filter)', () => {
 96        const { kept, removed } = deterministicFilter([
 97          'plumber near me',
 98          'electrician nearby',
 99          'landscaper close to me',
100        ]);
101        assert.strictEqual(kept.length, 0);
102        assert.strictEqual(removed.length, 3);
103        assert.ok(removed.every(r => r.reason === 'nearme'));
104      });
105  
106      test('keeps keywords with both negative and positive tokens', () => {
107        // "plumber tools repair" has "tools" (products) but also "repair" (positive)
108        const { kept, removed } = deterministicFilter(['plumber tools repair']);
109        assert.strictEqual(kept.length, 1);
110        assert.strictEqual(removed.length, 0);
111      });
112  
113      test('keeps service+action keywords', () => {
114        const { kept } = deterministicFilter([
115          'emergency plumber',
116          'hvac repair',
117          'tree removal',
118          'roof installation',
119        ]);
120        assert.strictEqual(kept.length, 4);
121      });
122  
123      test('handles empty input', () => {
124        const { kept, removed } = deterministicFilter([]);
125        assert.deepStrictEqual(kept, []);
126        assert.deepStrictEqual(removed, []);
127      });
128  
129      test('is case insensitive', () => {
130        const { removed: lower } = deterministicFilter(['plumber salary']);
131        const { removed: upper } = deterministicFilter(['PLUMBER SALARY']);
132        assert.strictEqual(lower.length, 1);
133        assert.strictEqual(upper.length, 1);
134      });
135    });
136  
137    describe('filterKeywordsPreOverview', () => {
138      test('applies deterministic filter first', async () => {
139        const result = await filterKeywordsPreOverview(
140          ['plumber', 'plumber salary', 'electrician', 'carpenter jobs'],
141          'en',
142          80,
143          null
144        );
145  
146        assert.ok(result.filtered_keywords.includes('plumber'));
147        assert.ok(result.filtered_keywords.includes('electrician'));
148        assert.ok(!result.filtered_keywords.includes('plumber salary'));
149        assert.ok(!result.filtered_keywords.includes('carpenter jobs'));
150      });
151  
152      test('skips LLM filter when keywords are within maxOutput', async () => {
153        let llmCalled = false;
154        mockCallLLM = async () => {
155          llmCalled = true;
156          return { content: '{"filtered_keywords":[],"removed":[]}' };
157        };
158  
159        await filterKeywordsPreOverview(['plumber', 'electrician'], 'en', 80, null);
160        assert.strictEqual(llmCalled, false, 'LLM should not be called when keywords < maxOutput');
161      });
162  
163      test('calls LLM filter when keywords exceed maxOutput', async () => {
164        let llmCalled = false;
165        mockCallLLM = async () => {
166          llmCalled = true;
167          return {
168            content: JSON.stringify({
169              filtered_keywords: ['plumber'],
170              removed: [{ keyword: 'electrician', reason: 'other' }],
171            }),
172          };
173        };
174  
175        // Set maxOutput to 1 to force LLM call
176        const result = await filterKeywordsPreOverview(['plumber', 'electrician'], 'en', 1, null);
177        assert.strictEqual(llmCalled, true);
178        assert.deepStrictEqual(result.filtered_keywords, ['plumber']);
179      });
180  
181      test('handles LLM failure gracefully', async () => {
182        mockCallLLM = async () => {
183          throw new Error('API error');
184        };
185  
186        // Force LLM call by setting maxOutput low
187        const result = await filterKeywordsPreOverview(['plumber', 'electrician'], 'en', 1, null);
188  
189        // Should fall back to keeping all keywords
190        assert.ok(result.filtered_keywords.includes('plumber'));
191        assert.ok(result.filtered_keywords.includes('electrician'));
192      });
193  
194      test('returns combined removed from all filters', async () => {
195        const result = await filterKeywordsPreOverview(
196          ['plumber', 'plumber near me', 'electrician salary'],
197          'en',
198          80,
199          null
200        );
201  
202        assert.strictEqual(result.removed.length, 2);
203        assert.ok(result.removed.some(r => r.reason === 'nearme'));
204        assert.ok(result.removed.some(r => r.reason === 'job'));
205      });
206  
207      test('filters place-specific keywords when countryCode provided', async () => {
208        // AU regions file exists at data/au/regions-final-filtered.csv
209        // and contains entries like "parramatta", "sydney"
210        const result = await filterKeywordsPreOverview(
211          ['plumber', 'plumber parramatta', 'electrician sydney', 'hvac repair'],
212          'en',
213          80,
214          'au'
215        );
216  
217        // Generic keywords should be kept
218        assert.ok(result.filtered_keywords.includes('plumber'), 'should keep generic keyword');
219        assert.ok(result.filtered_keywords.includes('hvac repair'), 'should keep non-place keyword');
220        // Place-specific keywords should be removed
221        assert.ok(
222          !result.filtered_keywords.includes('plumber parramatta'),
223          'should filter parramatta'
224        );
225        assert.ok(!result.filtered_keywords.includes('electrician sydney'), 'should filter sydney');
226        // Removed list should include place-specific reasons
227        assert.ok(
228          result.removed.some(r => r.reason && r.reason.includes('place-specific')),
229          'removed should include place-specific entry'
230        );
231      });
232  
233      test('skips place filter when no regions file for country', async () => {
234        // Use a country code with no regions file (e.g., 'xx')
235        const result = await filterKeywordsPreOverview(
236          ['plumber sydney', 'electrician melbourne'],
237          'en',
238          80,
239          'xx'
240        );
241  
242        // Without regions file, nothing is filtered by place
243        assert.ok(result.filtered_keywords.includes('plumber sydney'));
244        assert.ok(result.filtered_keywords.includes('electrician melbourne'));
245      });
246    });
247  
248    describe('rankKeywordsPostOverview', () => {
249      test('ranks keywords using LLM', async () => {
250        mockCallLLM = async () => ({
251          content: JSON.stringify({
252            ranked: [
253              { keyword: 'emergency plumber', score: 95, rank: 1, explanation: 'High intent' },
254              { keyword: 'plumber', score: 80, rank: 2, explanation: 'Generic' },
255            ],
256          }),
257        });
258  
259        const data = [
260          { keyword: 'plumber', search_volume: 1000, competition: 0.5, cpc: 2.5 },
261          { keyword: 'emergency plumber', search_volume: 500, competition: 0.3, cpc: 5.0 },
262        ];
263  
264        const result = await rankKeywordsPostOverview(data, 'en', 50);
265        assert.strictEqual(result.length, 2);
266        assert.strictEqual(result[0].keyword, 'emergency plumber');
267        assert.strictEqual(result[0].rank, 1);
268      });
269  
270      test('falls back to volume sorting on LLM failure', async () => {
271        mockCallLLM = async () => {
272          throw new Error('API error');
273        };
274  
275        const data = [
276          { keyword: 'plumber', search_volume: 500 },
277          { keyword: 'electrician', search_volume: 1000 },
278        ];
279  
280        const result = await rankKeywordsPostOverview(data, 'en', 50);
281        assert.strictEqual(result[0].keyword, 'electrician'); // higher volume first
282        assert.strictEqual(result[0].explanation, 'Fallback: sorted by volume');
283      });
284  
285      test('respects topN limit', async () => {
286        mockCallLLM = async () => ({
287          content: JSON.stringify({
288            ranked: [
289              { keyword: 'a', score: 90, rank: 1 },
290              { keyword: 'b', score: 80, rank: 2 },
291              { keyword: 'c', score: 70, rank: 3 },
292            ],
293          }),
294        });
295  
296        const data = [
297          { keyword: 'a', search_volume: 100 },
298          { keyword: 'b', search_volume: 200 },
299          { keyword: 'c', search_volume: 300 },
300        ];
301  
302        const result = await rankKeywordsPostOverview(data, 'en', 2);
303        assert.strictEqual(result.length, 2);
304      });
305  
306      test('handles empty input', async () => {
307        const result = await rankKeywordsPostOverview([], 'en', 50);
308        assert.deepStrictEqual(result, []);
309      });
310  
311      test('strips markdown code fences from LLM response', async () => {
312        mockCallLLM = async () => ({
313          content:
314            '```json\n{"ranked":[{"keyword":"plumber","score":90,"rank":1,"explanation":"test"}]}\n```',
315        });
316  
317        const data = [{ keyword: 'plumber', search_volume: 100 }];
318        const result = await rankKeywordsPostOverview(data, 'en', 50);
319        assert.strictEqual(result[0].keyword, 'plumber');
320      });
321    });
322  });