keyword-filter.test.js
1 /** 2 * Tests for Keyword Filter Module 3 */ 4 5 import { test, describe, mock, beforeEach } from 'node:test'; 6 import assert from 'node:assert'; 7 8 // Mock dependencies before importing 9 let mockCallLLM; 10 11 mock.module('../../src/utils/llm-provider.js', { 12 namedExports: { 13 callLLM: (...args) => mockCallLLM(...args), 14 }, 15 }); 16 17 mock.module('../../src/utils/logger.js', { 18 defaultExport: class { 19 info() {} 20 warn() {} 21 error() {} 22 success() {} 23 debug() {} 24 }, 25 }); 26 27 const { deterministicFilter, filterKeywordsPreOverview, rankKeywordsPostOverview } = 28 await import('../../src/utils/keyword-filter.js'); 29 30 describe('Keyword Filter', () => { 31 beforeEach(() => { 32 mockCallLLM = async () => ({ 33 content: JSON.stringify({ filtered_keywords: [], removed: [] }), 34 }); 35 }); 36 37 describe('deterministicFilter', () => { 38 test('keeps generic service keywords', () => { 39 const { kept, removed } = deterministicFilter(['plumber', 'electrician', 'landscaping']); 40 assert.deepStrictEqual(kept, ['plumber', 'electrician', 'landscaping']); 41 assert.strictEqual(removed.length, 0); 42 }); 43 44 test('removes job-related keywords', () => { 45 const { kept, removed } = deterministicFilter([ 46 'plumber salary', 47 'electrician jobs', 48 'carpenter apprenticeship', 49 ]); 50 assert.strictEqual(kept.length, 0); 51 assert.strictEqual(removed.length, 3); 52 assert.ok(removed.every(r => r.reason === 'job')); 53 }); 54 55 test('removes education-related keywords', () => { 56 const { kept, removed } = deterministicFilter([ 57 'plumber course', 58 'electrical training', 59 'welding certification', 60 ]); 61 assert.strictEqual(kept.length, 0); 62 assert.strictEqual(removed.length, 3); 63 assert.ok(removed.every(r => r.reason === 'education')); 64 }); 65 66 test('removes product-related keywords', () => { 67 const { kept, removed } = deterministicFilter([ 68 'plumbing supplies', 69 'electrical tools', 70 'fence parts', 71 ]); 72 assert.strictEqual(kept.length, 0); 73 assert.strictEqual(removed.length, 3); 74 assert.ok(removed.every(r => r.reason === 'products')); 75 }); 76 77 test('removes entertainment-related keywords', () => { 78 const { kept, removed } = deterministicFilter(['band hire', 'movie tickets']); 79 // "band hire" has "hire" as positive token → should be kept 80 // "movie tickets" has no positive token → removed 81 assert.ok(removed.some(r => r.keyword === 'movie tickets')); 82 assert.ok(removed.every(r => r.reason === 'entertainment')); 83 }); 84 85 test('removes informational keywords', () => { 86 const { kept, removed } = deterministicFilter([ 87 'how to do plumbing', // "how to" informational, no positive tokens 88 'plumbing tutorial', // "tutorial" informational, no positive tokens 89 'diy fence', // "diy" informational, no positive tokens 90 ]); 91 assert.strictEqual(kept.length, 0); 92 assert.strictEqual(removed.length, 3); 93 }); 94 95 test('always removes near me keywords (hard filter)', () => { 96 const { kept, removed } = deterministicFilter([ 97 'plumber near me', 98 'electrician nearby', 99 'landscaper close to me', 100 ]); 101 assert.strictEqual(kept.length, 0); 102 assert.strictEqual(removed.length, 3); 103 assert.ok(removed.every(r => r.reason === 'nearme')); 104 }); 105 106 test('keeps keywords with both negative and positive tokens', () => { 107 // "plumber tools repair" has "tools" (products) but also "repair" (positive) 108 const { kept, removed } = deterministicFilter(['plumber tools repair']); 109 assert.strictEqual(kept.length, 1); 110 assert.strictEqual(removed.length, 0); 111 }); 112 113 test('keeps service+action keywords', () => { 114 const { kept } = deterministicFilter([ 115 'emergency plumber', 116 'hvac repair', 117 'tree removal', 118 'roof installation', 119 ]); 120 assert.strictEqual(kept.length, 4); 121 }); 122 123 test('handles empty input', () => { 124 const { kept, removed } = deterministicFilter([]); 125 assert.deepStrictEqual(kept, []); 126 assert.deepStrictEqual(removed, []); 127 }); 128 129 test('is case insensitive', () => { 130 const { removed: lower } = deterministicFilter(['plumber salary']); 131 const { removed: upper } = deterministicFilter(['PLUMBER SALARY']); 132 assert.strictEqual(lower.length, 1); 133 assert.strictEqual(upper.length, 1); 134 }); 135 }); 136 137 describe('filterKeywordsPreOverview', () => { 138 test('applies deterministic filter first', async () => { 139 const result = await filterKeywordsPreOverview( 140 ['plumber', 'plumber salary', 'electrician', 'carpenter jobs'], 141 'en', 142 80, 143 null 144 ); 145 146 assert.ok(result.filtered_keywords.includes('plumber')); 147 assert.ok(result.filtered_keywords.includes('electrician')); 148 assert.ok(!result.filtered_keywords.includes('plumber salary')); 149 assert.ok(!result.filtered_keywords.includes('carpenter jobs')); 150 }); 151 152 test('skips LLM filter when keywords are within maxOutput', async () => { 153 let llmCalled = false; 154 mockCallLLM = async () => { 155 llmCalled = true; 156 return { content: '{"filtered_keywords":[],"removed":[]}' }; 157 }; 158 159 await filterKeywordsPreOverview(['plumber', 'electrician'], 'en', 80, null); 160 assert.strictEqual(llmCalled, false, 'LLM should not be called when keywords < maxOutput'); 161 }); 162 163 test('calls LLM filter when keywords exceed maxOutput', async () => { 164 let llmCalled = false; 165 mockCallLLM = async () => { 166 llmCalled = true; 167 return { 168 content: JSON.stringify({ 169 filtered_keywords: ['plumber'], 170 removed: [{ keyword: 'electrician', reason: 'other' }], 171 }), 172 }; 173 }; 174 175 // Set maxOutput to 1 to force LLM call 176 const result = await filterKeywordsPreOverview(['plumber', 'electrician'], 'en', 1, null); 177 assert.strictEqual(llmCalled, true); 178 assert.deepStrictEqual(result.filtered_keywords, ['plumber']); 179 }); 180 181 test('handles LLM failure gracefully', async () => { 182 mockCallLLM = async () => { 183 throw new Error('API error'); 184 }; 185 186 // Force LLM call by setting maxOutput low 187 const result = await filterKeywordsPreOverview(['plumber', 'electrician'], 'en', 1, null); 188 189 // Should fall back to keeping all keywords 190 assert.ok(result.filtered_keywords.includes('plumber')); 191 assert.ok(result.filtered_keywords.includes('electrician')); 192 }); 193 194 test('returns combined removed from all filters', async () => { 195 const result = await filterKeywordsPreOverview( 196 ['plumber', 'plumber near me', 'electrician salary'], 197 'en', 198 80, 199 null 200 ); 201 202 assert.strictEqual(result.removed.length, 2); 203 assert.ok(result.removed.some(r => r.reason === 'nearme')); 204 assert.ok(result.removed.some(r => r.reason === 'job')); 205 }); 206 207 test('filters place-specific keywords when countryCode provided', async () => { 208 // AU regions file exists at data/au/regions-final-filtered.csv 209 // and contains entries like "parramatta", "sydney" 210 const result = await filterKeywordsPreOverview( 211 ['plumber', 'plumber parramatta', 'electrician sydney', 'hvac repair'], 212 'en', 213 80, 214 'au' 215 ); 216 217 // Generic keywords should be kept 218 assert.ok(result.filtered_keywords.includes('plumber'), 'should keep generic keyword'); 219 assert.ok(result.filtered_keywords.includes('hvac repair'), 'should keep non-place keyword'); 220 // Place-specific keywords should be removed 221 assert.ok( 222 !result.filtered_keywords.includes('plumber parramatta'), 223 'should filter parramatta' 224 ); 225 assert.ok(!result.filtered_keywords.includes('electrician sydney'), 'should filter sydney'); 226 // Removed list should include place-specific reasons 227 assert.ok( 228 result.removed.some(r => r.reason && r.reason.includes('place-specific')), 229 'removed should include place-specific entry' 230 ); 231 }); 232 233 test('skips place filter when no regions file for country', async () => { 234 // Use a country code with no regions file (e.g., 'xx') 235 const result = await filterKeywordsPreOverview( 236 ['plumber sydney', 'electrician melbourne'], 237 'en', 238 80, 239 'xx' 240 ); 241 242 // Without regions file, nothing is filtered by place 243 assert.ok(result.filtered_keywords.includes('plumber sydney')); 244 assert.ok(result.filtered_keywords.includes('electrician melbourne')); 245 }); 246 }); 247 248 describe('rankKeywordsPostOverview', () => { 249 test('ranks keywords using LLM', async () => { 250 mockCallLLM = async () => ({ 251 content: JSON.stringify({ 252 ranked: [ 253 { keyword: 'emergency plumber', score: 95, rank: 1, explanation: 'High intent' }, 254 { keyword: 'plumber', score: 80, rank: 2, explanation: 'Generic' }, 255 ], 256 }), 257 }); 258 259 const data = [ 260 { keyword: 'plumber', search_volume: 1000, competition: 0.5, cpc: 2.5 }, 261 { keyword: 'emergency plumber', search_volume: 500, competition: 0.3, cpc: 5.0 }, 262 ]; 263 264 const result = await rankKeywordsPostOverview(data, 'en', 50); 265 assert.strictEqual(result.length, 2); 266 assert.strictEqual(result[0].keyword, 'emergency plumber'); 267 assert.strictEqual(result[0].rank, 1); 268 }); 269 270 test('falls back to volume sorting on LLM failure', async () => { 271 mockCallLLM = async () => { 272 throw new Error('API error'); 273 }; 274 275 const data = [ 276 { keyword: 'plumber', search_volume: 500 }, 277 { keyword: 'electrician', search_volume: 1000 }, 278 ]; 279 280 const result = await rankKeywordsPostOverview(data, 'en', 50); 281 assert.strictEqual(result[0].keyword, 'electrician'); // higher volume first 282 assert.strictEqual(result[0].explanation, 'Fallback: sorted by volume'); 283 }); 284 285 test('respects topN limit', async () => { 286 mockCallLLM = async () => ({ 287 content: JSON.stringify({ 288 ranked: [ 289 { keyword: 'a', score: 90, rank: 1 }, 290 { keyword: 'b', score: 80, rank: 2 }, 291 { keyword: 'c', score: 70, rank: 3 }, 292 ], 293 }), 294 }); 295 296 const data = [ 297 { keyword: 'a', search_volume: 100 }, 298 { keyword: 'b', search_volume: 200 }, 299 { keyword: 'c', search_volume: 300 }, 300 ]; 301 302 const result = await rankKeywordsPostOverview(data, 'en', 2); 303 assert.strictEqual(result.length, 2); 304 }); 305 306 test('handles empty input', async () => { 307 const result = await rankKeywordsPostOverview([], 'en', 50); 308 assert.deepStrictEqual(result, []); 309 }); 310 311 test('strips markdown code fences from LLM response', async () => { 312 mockCallLLM = async () => ({ 313 content: 314 '```json\n{"ranked":[{"keyword":"plumber","score":90,"rank":1,"explanation":"test"}]}\n```', 315 }); 316 317 const data = [{ keyword: 'plumber', search_volume: 100 }]; 318 const result = await rankKeywordsPostOverview(data, 'en', 50); 319 assert.strictEqual(result[0].keyword, 'plumber'); 320 }); 321 }); 322 });