/ tests / utils / keyword-filter-deterministic.test.js
keyword-filter-deterministic.test.js
  1  /**
  2   * Tests for keyword-filter.js - deterministicFilter (pure function, no mocking needed)
  3   *
  4   * The existing keyword-filter.test.js is broken due to mock.module not being
  5   * available in this Node.js runtime. This file covers the pure deterministicFilter
  6   * function directly.
  7   */
  8  import { test, describe } from 'node:test';
  9  import assert from 'node:assert/strict';
 10  import { deterministicFilter } from '../../src/utils/keyword-filter.js';
 11  
 12  describe('deterministicFilter - service keywords', () => {
 13    test('keeps generic service keywords', () => {
 14      const { kept, removed } = deterministicFilter(['plumber', 'electrician', 'landscaping']);
 15      assert.deepEqual(kept, ['plumber', 'electrician', 'landscaping']);
 16      assert.equal(removed.length, 0);
 17    });
 18  
 19    test('keeps service+action keywords', () => {
 20      const { kept } = deterministicFilter([
 21        'emergency plumber',
 22        'hvac repair',
 23        'tree removal',
 24        'roof installation',
 25      ]);
 26      assert.equal(kept.length, 4);
 27    });
 28  
 29    test('handles empty input', () => {
 30      const { kept, removed } = deterministicFilter([]);
 31      assert.deepEqual(kept, []);
 32      assert.deepEqual(removed, []);
 33    });
 34  
 35    test('is case insensitive', () => {
 36      const { removed: lower } = deterministicFilter(['plumber salary']);
 37      const { removed: upper } = deterministicFilter(['PLUMBER SALARY']);
 38      assert.equal(lower.length, 1);
 39      assert.equal(upper.length, 1);
 40    });
 41  });
 42  
 43  describe('deterministicFilter - job keywords', () => {
 44    test('removes salary keywords', () => {
 45      const { removed } = deterministicFilter(['plumber salary']);
 46      assert.equal(removed.length, 1);
 47      assert.equal(removed[0].reason, 'job');
 48    });
 49  
 50    test('removes jobs keywords', () => {
 51      const { removed } = deterministicFilter(['electrician jobs']);
 52      assert.equal(removed[0].reason, 'job');
 53    });
 54  
 55    test('removes apprenticeship keywords', () => {
 56      const { removed } = deterministicFilter(['carpenter apprenticeship']);
 57      assert.equal(removed[0].reason, 'job');
 58    });
 59  
 60    test('removes hiring/recruitment keywords', () => {
 61      const { removed } = deterministicFilter(['plumber hiring', 'recruitment agency']);
 62      assert.equal(removed.length, 2);
 63      assert.ok(removed.every(r => r.reason === 'job'));
 64    });
 65  
 66    test('removes vacancy keywords', () => {
 67      const { removed } = deterministicFilter(['plumber vacancy']);
 68      assert.equal(removed[0].reason, 'job');
 69    });
 70  });
 71  
 72  describe('deterministicFilter - education keywords', () => {
 73    test('removes course keywords', () => {
 74      const { removed } = deterministicFilter(['plumber course']);
 75      assert.equal(removed[0].reason, 'education');
 76    });
 77  
 78    test('removes training keywords', () => {
 79      const { removed } = deterministicFilter(['electrical training']);
 80      assert.equal(removed[0].reason, 'education');
 81    });
 82  
 83    test('removes certification keywords', () => {
 84      const { removed } = deterministicFilter(['welding certification']);
 85      assert.equal(removed[0].reason, 'education');
 86    });
 87  
 88    test('removes degree keywords', () => {
 89      const { removed } = deterministicFilter(['plumbing degree']);
 90      assert.equal(removed[0].reason, 'education');
 91    });
 92  
 93    test('removes school/university keywords', () => {
 94      const { removed } = deterministicFilter(['trade school', 'university course']);
 95      assert.ok(removed.every(r => r.reason === 'education'));
 96    });
 97  });
 98  
 99  describe('deterministicFilter - products keywords', () => {
100    test('removes supplies keywords', () => {
101      const { removed } = deterministicFilter(['plumbing supplies']);
102      assert.equal(removed[0].reason, 'products');
103    });
104  
105    test('removes tools keywords', () => {
106      const { removed } = deterministicFilter(['electrical tools']);
107      assert.equal(removed[0].reason, 'products');
108    });
109  
110    test('removes parts keywords', () => {
111      const { removed } = deterministicFilter(['fence parts']);
112      assert.equal(removed[0].reason, 'products');
113    });
114  
115    test('removes equipment keywords', () => {
116      const { removed } = deterministicFilter(['plumbing equipment']);
117      assert.equal(removed[0].reason, 'products');
118    });
119  });
120  
121  describe('deterministicFilter - entertainment keywords', () => {
122    test('removes movie/concert/show/lyrics keywords', () => {
123      const { removed } = deterministicFilter(['movie tickets', 'concert show', 'song lyrics']);
124      assert.equal(removed.length, 3);
125      assert.ok(removed.every(r => r.reason === 'entertainment'));
126    });
127  
128    test('keeps "band hire" (has positive token "hire")', () => {
129      // "band" matches entertainment but "hire" is a positive token → kept
130      const { kept } = deterministicFilter(['band hire']);
131      assert.equal(kept.length, 1);
132    });
133  });
134  
135  describe('deterministicFilter - informational keywords', () => {
136    test('removes "how to" keywords without positive tokens', () => {
137      const { removed } = deterministicFilter(['how to do plumbing']);
138      assert.equal(removed[0].reason, 'informational');
139    });
140  
141    test('removes tutorial keywords', () => {
142      const { removed } = deterministicFilter(['plumbing tutorial']);
143      assert.equal(removed[0].reason, 'informational');
144    });
145  
146    test('removes diy keywords', () => {
147      const { removed } = deterministicFilter(['diy fence']);
148      assert.equal(removed[0].reason, 'informational');
149    });
150  
151    test('removes guide keywords', () => {
152      const { removed } = deterministicFilter(['guide to plumbing']);
153      assert.equal(removed[0].reason, 'informational');
154    });
155  
156    test('removes meaning/definition keywords', () => {
157      const { removed } = deterministicFilter(['plumber meaning', 'what is a definition']);
158      assert.ok(removed.every(r => r.reason === 'informational'));
159    });
160  });
161  
162  describe('deterministicFilter - near me keywords (hard filter)', () => {
163    test('always removes "near me" even with positive tokens', () => {
164      const { removed } = deterministicFilter([
165        'plumber near me',
166        'electrician nearby',
167        'landscaper close to me',
168        'repair service near me',
169      ]);
170      assert.equal(removed.length, 4);
171      assert.ok(removed.every(r => r.reason === 'nearme'));
172    });
173  
174    test('"around me" is removed', () => {
175      const { removed } = deterministicFilter(['service around me']);
176      assert.equal(removed[0].reason, 'nearme');
177    });
178  
179    test('"in my area" is removed', () => {
180      const { removed } = deterministicFilter(['plumber in my area']);
181      assert.equal(removed[0].reason, 'nearme');
182    });
183  });
184  
185  describe('deterministicFilter - positive token override', () => {
186    test('keeps keyword with negative+positive tokens (except near me)', () => {
187      // "tools" matches products but "repair" is a positive token
188      const { kept, removed } = deterministicFilter(['plumber tools repair']);
189      assert.equal(kept.length, 1);
190      assert.equal(removed.length, 0);
191    });
192  
193    test('keeps "buy" keyword with "emergency" positive token', () => {
194      const { kept } = deterministicFilter(['emergency buy service']);
195      assert.equal(kept.length, 1);
196    });
197  
198    test('positive token does NOT save near-me keywords (hard filter)', () => {
199      // "near me" + "repair" — near me is a hard filter, so should still be removed
200      const { removed } = deterministicFilter(['plumber near me repair']);
201      assert.equal(removed.length, 1);
202      assert.equal(removed[0].reason, 'nearme');
203    });
204  });
205  
206  describe('deterministicFilter - multiple keywords mixed', () => {
207    test('processes a realistic mixed list', () => {
208      const keywords = [
209        'plumber', // keep
210        'emergency plumber', // keep
211        'plumber salary', // remove: job
212        'plumber course', // remove: education
213        'plumbing supplies', // remove: products
214        'plumber near me', // remove: nearme
215        'how to plumb', // remove: informational
216      ];
217  
218      const { kept, removed } = deterministicFilter(keywords);
219      assert.equal(kept.length, 2);
220      assert.equal(removed.length, 5);
221      assert.ok(kept.includes('plumber'));
222      assert.ok(kept.includes('emergency plumber'));
223    });
224  });