site-filters.test.js
1 /** 2 * Tests for site-filters module 3 */ 4 5 import { test, describe } from 'node:test'; 6 import assert from 'node:assert'; 7 import fs from 'fs'; 8 import path from 'path'; 9 import { fileURLToPath } from 'url'; 10 import { 11 checkBlocklist, 12 DIRECTORY_DOMAINS, 13 SOCIAL_MEDIA_DOMAINS, 14 DEMO_EMAIL_DOMAINS, 15 loadFranchiseDomains, 16 isGovernmentDomain, 17 isEducationDomain, 18 isDemoEmail, 19 isGovernmentEmail, 20 isEducationEmail, 21 } from '../../src/utils/site-filters.js'; 22 23 const __filename = fileURLToPath(import.meta.url); 24 const __dirname = path.dirname(__filename); 25 26 test('checkBlocklist should detect social media domains', () => { 27 const socialDomains = ['facebook.com', 'twitter.com', 'linkedin.com', 'instagram.com', 'x.com']; 28 29 for (const domain of socialDomains) { 30 const result = checkBlocklist(domain); 31 assert.ok(result, `${domain} should be detected`); 32 assert.strictEqual(result.blocklist, 'social_media'); 33 assert.strictEqual(result.reason, 'Ignored: Social media platform'); 34 } 35 }); 36 37 test('checkBlocklist should detect business directory domains', () => { 38 const directoryDomains = [ 39 'yelp.com', 40 'yellowpages.com', 41 'craigslist.com', 42 'thumbtack.com', 43 'angi.com', 44 ]; 45 46 for (const domain of directoryDomains) { 47 const result = checkBlocklist(domain); 48 assert.ok(result, `${domain} should be detected`); 49 assert.strictEqual(result.blocklist, 'directory'); 50 assert.strictEqual(result.reason, 'Ignored: Business directory'); 51 } 52 }); 53 54 test('checkBlocklist should handle case-insensitive matching', () => { 55 const testCases = ['YELP.COM', 'YeLp.CoM', 'Facebook.COM', 'TWITTER.com']; 56 57 for (const domain of testCases) { 58 const result = checkBlocklist(domain); 59 assert.ok(result, `${domain} should be detected (case-insensitive)`); 60 } 61 }); 62 63 test('checkBlocklist should handle subdomain matching', () => { 64 const testCases = ['www.yelp.com', 'au.yelp.com', 'm.facebook.com', 'business.linkedin.com']; 65 66 for (const domain of testCases) { 67 const result = checkBlocklist(domain); 68 assert.ok(result, `${domain} should be detected (with subdomain)`); 69 } 70 }); 71 72 test('checkBlocklist should return null for normal business sites', () => { 73 const normalDomains = [ 74 'example.com', 75 'myplumber.com', 76 'joes-pizza.com', 77 'dentist-office.co.uk', 78 'law-firm.com', 79 ]; 80 81 for (const domain of normalDomains) { 82 const result = checkBlocklist(domain); 83 assert.strictEqual(result, null, `${domain} should not be blocked`); 84 } 85 }); 86 87 test('checkBlocklist should handle null/undefined input', () => { 88 assert.strictEqual(checkBlocklist(null), null); 89 assert.strictEqual(checkBlocklist(undefined), null); 90 assert.strictEqual(checkBlocklist(''), null); 91 }); 92 93 test('DIRECTORY_DOMAINS should include major directories', () => { 94 const expectedDomains = [ 95 'yelp.com', 96 'yellowpages.com', 97 'craigslist.com', 98 'craigslist.org', 99 'thumbtack.com', 100 'angi.com', 101 'homeadvisor.com', 102 'zillow.com', 103 'tripadvisor.com', 104 ]; 105 106 for (const domain of expectedDomains) { 107 assert.ok(DIRECTORY_DOMAINS.includes(domain), `DIRECTORY_DOMAINS should include ${domain}`); 108 } 109 }); 110 111 test('SOCIAL_MEDIA_DOMAINS should include major platforms', () => { 112 const expectedDomains = [ 113 'facebook.com', 114 'instagram.com', 115 'twitter.com', 116 'x.com', 117 'linkedin.com', 118 'youtube.com', 119 'tiktok.com', 120 'pinterest.com', 121 ]; 122 123 for (const domain of expectedDomains) { 124 assert.ok( 125 SOCIAL_MEDIA_DOMAINS.includes(domain), 126 `SOCIAL_MEDIA_DOMAINS should include ${domain}` 127 ); 128 } 129 }); 130 131 test('checkBlocklist should prioritize social media over directory', () => { 132 // LinkedIn is in both lists, but should return social_media 133 const result = checkBlocklist('linkedin.com'); 134 assert.ok(result); 135 assert.strictEqual(result.blocklist, 'social_media'); 136 }); 137 138 // ===== Franchise Detection Tests ===== 139 140 test('checkBlocklist should detect US franchises', () => { 141 // Mr. Rooter franchise 142 let result = checkBlocklist('mrrooter.com', 'us'); 143 assert.ok(result); 144 assert.strictEqual(result.blocklist, 'franchise'); 145 assert.strictEqual(result.reason, 'Ignored: Home service franchise'); 146 147 // With subdomain 148 result = checkBlocklist('austin.mrrooter.com', 'us'); 149 assert.ok(result); 150 assert.strictEqual(result.blocklist, 'franchise'); 151 152 // Molly Maid franchise 153 result = checkBlocklist('mollymaid.com', 'us'); 154 assert.ok(result); 155 assert.strictEqual(result.blocklist, 'franchise'); 156 }); 157 158 test('checkBlocklist should detect AU franchises', () => { 159 // Jim's Mowing 160 let result = checkBlocklist('jimsmowing.com.au', 'au'); 161 assert.ok(result); 162 assert.strictEqual(result.blocklist, 'franchise'); 163 164 // Jim's Cleaning 165 result = checkBlocklist('jimscleaning.com.au', 'au'); 166 assert.ok(result); 167 assert.strictEqual(result.blocklist, 'franchise'); 168 }); 169 170 test('checkBlocklist should not detect franchises without country code', () => { 171 // Should return null when no country code provided 172 const result = checkBlocklist('mrrooter.com'); 173 assert.strictEqual(result, null); 174 }); 175 176 test('checkBlocklist should handle franchise domain normalization', () => { 177 // With hyphens 178 let result = checkBlocklist('mr-rooter.com', 'us'); 179 assert.ok(result); 180 assert.strictEqual(result.blocklist, 'franchise'); 181 182 // With dots 183 result = checkBlocklist('mr.rooter.com', 'us'); 184 assert.ok(result); 185 assert.strictEqual(result.blocklist, 'franchise'); 186 }); 187 188 test('checkBlocklist should not block independent businesses', () => { 189 // Should not block non-franchise plumber 190 let result = checkBlocklist('acmeplumbing.com', 'us'); 191 assert.strictEqual(result, null); 192 193 // Should not block non-franchise cleaner 194 result = checkBlocklist('smithcleaning.com.au', 'au'); 195 assert.strictEqual(result, null); 196 }); 197 198 // ===== loadFranchiseDomains Tests ===== 199 200 describe('loadFranchiseDomains', () => { 201 test('returns empty array for null country code', () => { 202 const result = loadFranchiseDomains(null); 203 assert.deepStrictEqual(result, []); 204 }); 205 206 test('returns empty array for undefined country code', () => { 207 const result = loadFranchiseDomains(undefined); 208 assert.deepStrictEqual(result, []); 209 }); 210 211 test('returns empty array for non-existent country code', () => { 212 const result = loadFranchiseDomains('zz'); 213 assert.deepStrictEqual(result, []); 214 }); 215 216 test('caches franchise lists (second call returns same reference)', () => { 217 const result1 = loadFranchiseDomains('us'); 218 const result2 = loadFranchiseDomains('us'); 219 assert.strictEqual(result1, result2); 220 }); 221 222 test('normalizes brand names to domain-friendly format', () => { 223 // Write to data/franchises/ at repo root (where site-filters.js reads from) 224 const projectRoot = path.join(__dirname, '../..'); 225 const testFile = path.join(projectRoot, 'data/franchises/testload99.txt'); 226 const testDir = path.dirname(testFile); 227 228 if (!fs.existsSync(testDir)) { 229 fs.mkdirSync(testDir, { recursive: true }); 230 } 231 232 fs.writeFileSync( 233 testFile, 234 `# Test franchise list 235 Mr. Rooter 236 Jim's Mowing 237 CertaPro Painters 238 239 # Another franchise 240 The UPS Store` 241 ); 242 243 try { 244 const result = loadFranchiseDomains('testload99'); 245 246 assert.ok(result.includes('mrrooter')); 247 assert.ok(result.includes('jimsmowing')); 248 assert.ok(result.includes('certapropainters')); 249 assert.ok(result.includes('theupsstore')); 250 assert.strictEqual(result.filter(f => f.startsWith('#')).length, 0); 251 } finally { 252 if (fs.existsSync(testFile)) { 253 fs.unlinkSync(testFile); 254 } 255 } 256 }); 257 }); 258 259 // ===== Government Domain Tests ===== 260 261 describe('isGovernmentDomain', () => { 262 test('returns false for invalid inputs', () => { 263 assert.strictEqual(isGovernmentDomain(null), false); 264 assert.strictEqual(isGovernmentDomain(undefined), false); 265 assert.strictEqual(isGovernmentDomain(''), false); 266 assert.strictEqual(isGovernmentDomain(123), false); 267 }); 268 269 test('detects US federal .gov domains', () => { 270 assert.strictEqual(isGovernmentDomain('whitehouse.gov'), true); 271 assert.strictEqual(isGovernmentDomain('nasa.gov'), true); 272 }); 273 274 test('detects country-specific .gov domains', () => { 275 assert.strictEqual(isGovernmentDomain('example.gov.au'), true); 276 assert.strictEqual(isGovernmentDomain('example.gov.uk'), true); 277 assert.strictEqual(isGovernmentDomain('example.gov.in'), true); 278 }); 279 280 test('detects Canadian .gc.ca domains', () => { 281 assert.strictEqual(isGovernmentDomain('example.gc.ca'), true); 282 }); 283 284 test('detects New Zealand .govt.nz domains', () => { 285 assert.strictEqual(isGovernmentDomain('example.govt.nz'), true); 286 }); 287 288 test('detects Spanish .gob domains', () => { 289 assert.strictEqual(isGovernmentDomain('example.gob.mx'), true); 290 assert.strictEqual(isGovernmentDomain('example.gob.es'), true); 291 }); 292 293 test('detects French .gouv domains', () => { 294 assert.strictEqual(isGovernmentDomain('example.gouv.fr'), true); 295 }); 296 297 test('detects Asian .go domains', () => { 298 assert.strictEqual(isGovernmentDomain('example.go.jp'), true); 299 assert.strictEqual(isGovernmentDomain('example.go.kr'), true); 300 }); 301 302 test('detects Brazilian .gov.br domains', () => { 303 assert.strictEqual(isGovernmentDomain('example.gov.br'), true); 304 }); 305 306 test('detects US military .mil domains', () => { 307 assert.strictEqual(isGovernmentDomain('army.mil'), true); 308 }); 309 310 test('returns false for non-government domains', () => { 311 assert.strictEqual(isGovernmentDomain('example.com'), false); 312 assert.strictEqual(isGovernmentDomain('government.com'), false); 313 }); 314 315 test('is case insensitive', () => { 316 assert.strictEqual(isGovernmentDomain('EXAMPLE.GOV'), true); 317 }); 318 319 test('handles whitespace', () => { 320 assert.strictEqual(isGovernmentDomain(' example.gov '), true); 321 }); 322 }); 323 324 // ===== Education Domain Tests ===== 325 326 describe('isEducationDomain', () => { 327 test('returns false for invalid inputs', () => { 328 assert.strictEqual(isEducationDomain(null), false); 329 assert.strictEqual(isEducationDomain(undefined), false); 330 assert.strictEqual(isEducationDomain(''), false); 331 assert.strictEqual(isEducationDomain(123), false); 332 }); 333 334 test('detects US .edu domains', () => { 335 assert.strictEqual(isEducationDomain('harvard.edu'), true); 336 assert.strictEqual(isEducationDomain('mit.edu'), true); 337 }); 338 339 test('detects country-specific .edu domains', () => { 340 assert.strictEqual(isEducationDomain('example.edu.au'), true); 341 assert.strictEqual(isEducationDomain('example.edu.uk'), true); 342 }); 343 344 test('detects .ac academic domains', () => { 345 assert.strictEqual(isEducationDomain('example.ac.uk'), true); 346 assert.strictEqual(isEducationDomain('example.ac.jp'), true); 347 assert.strictEqual(isEducationDomain('example.ac.nz'), true); 348 }); 349 350 test('returns false for non-education domains', () => { 351 assert.strictEqual(isEducationDomain('example.com'), false); 352 assert.strictEqual(isEducationDomain('education.com'), false); 353 }); 354 355 test('is case insensitive', () => { 356 assert.strictEqual(isEducationDomain('EXAMPLE.EDU'), true); 357 }); 358 359 test('handles whitespace', () => { 360 assert.strictEqual(isEducationDomain(' example.edu '), true); 361 }); 362 }); 363 364 // ===== Demo Email Tests ===== 365 366 describe('isDemoEmail', () => { 367 test('returns false for invalid inputs', () => { 368 assert.strictEqual(isDemoEmail(null), false); 369 assert.strictEqual(isDemoEmail(undefined), false); 370 assert.strictEqual(isDemoEmail(''), false); 371 assert.strictEqual(isDemoEmail(123), false); 372 }); 373 374 test('detects exact demo domain matches', () => { 375 assert.strictEqual(isDemoEmail('user@example.com'), true); 376 assert.strictEqual(isDemoEmail('user@test.com'), true); 377 assert.strictEqual(isDemoEmail('user@mailinator.com'), true); 378 }); 379 380 test('detects subdomain demo emails', () => { 381 assert.strictEqual(isDemoEmail('user@subdomain.example.com'), true); 382 assert.strictEqual(isDemoEmail('user@api.test.com'), true); 383 }); 384 385 test('returns false for emails without @ symbol', () => { 386 assert.strictEqual(isDemoEmail('notanemail'), false); 387 }); 388 389 test('returns false for legitimate email domains', () => { 390 assert.strictEqual(isDemoEmail('user@gmail.com'), false); 391 assert.strictEqual(isDemoEmail('user@company.com'), false); 392 }); 393 394 test('is case insensitive', () => { 395 assert.strictEqual(isDemoEmail('USER@EXAMPLE.COM'), true); 396 }); 397 398 test('handles whitespace', () => { 399 assert.strictEqual(isDemoEmail(' user@example.com '), true); 400 }); 401 402 test('detects throwaway email services', () => { 403 assert.strictEqual(isDemoEmail('user@10minutemail.com'), true); 404 assert.strictEqual(isDemoEmail('user@guerrillamail.com'), true); 405 assert.strictEqual(isDemoEmail('user@throwaway.email'), true); 406 }); 407 }); 408 409 // ===== Government Email Tests ===== 410 411 describe('isGovernmentEmail', () => { 412 test('returns false for invalid inputs', () => { 413 assert.strictEqual(isGovernmentEmail(null), false); 414 assert.strictEqual(isGovernmentEmail(undefined), false); 415 assert.strictEqual(isGovernmentEmail(''), false); 416 assert.strictEqual(isGovernmentEmail(123), false); 417 }); 418 419 test('detects government email addresses', () => { 420 assert.strictEqual(isGovernmentEmail('user@whitehouse.gov'), true); 421 assert.strictEqual(isGovernmentEmail('user@example.gov.au'), true); 422 assert.strictEqual(isGovernmentEmail('user@example.gc.ca'), true); 423 }); 424 425 test('returns false for emails without @ symbol', () => { 426 assert.strictEqual(isGovernmentEmail('notanemail.gov'), false); 427 }); 428 429 test('returns false for non-government emails', () => { 430 assert.strictEqual(isGovernmentEmail('user@example.com'), false); 431 assert.strictEqual(isGovernmentEmail('user@government.com'), false); 432 }); 433 434 test('is case insensitive', () => { 435 assert.strictEqual(isGovernmentEmail('USER@EXAMPLE.GOV'), true); 436 }); 437 438 test('handles whitespace', () => { 439 assert.strictEqual(isGovernmentEmail(' user@example.gov '), true); 440 }); 441 }); 442 443 // ===== Education Email Tests ===== 444 445 describe('isEducationEmail', () => { 446 test('returns false for invalid inputs', () => { 447 assert.strictEqual(isEducationEmail(null), false); 448 assert.strictEqual(isEducationEmail(undefined), false); 449 assert.strictEqual(isEducationEmail(''), false); 450 assert.strictEqual(isEducationEmail(123), false); 451 }); 452 453 test('detects education email addresses', () => { 454 assert.strictEqual(isEducationEmail('student@harvard.edu'), true); 455 assert.strictEqual(isEducationEmail('student@example.edu.au'), true); 456 assert.strictEqual(isEducationEmail('student@example.ac.uk'), true); 457 }); 458 459 test('returns false for emails without @ symbol', () => { 460 assert.strictEqual(isEducationEmail('notanemail.edu'), false); 461 }); 462 463 test('returns false for non-education emails', () => { 464 assert.strictEqual(isEducationEmail('user@example.com'), false); 465 assert.strictEqual(isEducationEmail('user@education.com'), false); 466 }); 467 468 test('is case insensitive', () => { 469 assert.strictEqual(isEducationEmail('STUDENT@EXAMPLE.EDU'), true); 470 }); 471 472 test('handles whitespace', () => { 473 assert.strictEqual(isEducationEmail(' student@example.edu '), true); 474 }); 475 }); 476 477 // ===== Government/Education Blocking Integration ===== 478 479 describe('checkBlocklist - government and education domains', () => { 480 test('blocks government domains', () => { 481 const result = checkBlocklist('example.gov'); 482 assert.ok(result); 483 assert.strictEqual(result.reason, 'Ignored: Government domain'); 484 assert.strictEqual(result.blocklist, 'government'); 485 }); 486 487 test('blocks education domains', () => { 488 const result = checkBlocklist('example.edu'); 489 assert.ok(result); 490 assert.strictEqual(result.reason, 'Ignored: Education domain'); 491 assert.strictEqual(result.blocklist, 'education'); 492 }); 493 494 test('blocks country-specific government domains', () => { 495 assert.ok(checkBlocklist('example.gov.au')); 496 assert.ok(checkBlocklist('example.gc.ca')); 497 assert.ok(checkBlocklist('example.gob.mx')); 498 }); 499 500 test('blocks country-specific education domains', () => { 501 assert.ok(checkBlocklist('example.edu.au')); 502 assert.ok(checkBlocklist('example.ac.uk')); 503 }); 504 });