site-filters.test.js
1 /** 2 * Tests for src/utils/site-filters.js 3 * 4 * Covers all exported functions: 5 * - checkBlocklist 6 * - isGovernmentDomain, isEducationDomain, isNonCommercialDomain 7 * - isDemoEmail, isGovernmentEmail, isEducationEmail 8 * - classifyIndustry 9 * - loadFranchiseDomains (indirect via checkBlocklist) 10 */ 11 12 import { test, describe } from 'node:test'; 13 import assert from 'node:assert/strict'; 14 15 import { 16 DIRECTORY_DOMAINS, 17 checkBlocklist, 18 isGovernmentDomain, 19 isEducationDomain, 20 isNonCommercialDomain, 21 isDemoEmail, 22 isGovernmentEmail, 23 isEducationEmail, 24 classifyIndustry, 25 loadFranchiseDomains, 26 } from '../../src/utils/site-filters.js'; 27 28 // ─── DIRECTORY_DOMAINS constant ───────────────────────────────────────────── 29 30 describe('DIRECTORY_DOMAINS', () => { 31 test('is a non-empty array', () => { 32 assert.ok(Array.isArray(DIRECTORY_DOMAINS), 'should be an array'); 33 assert.ok(DIRECTORY_DOMAINS.length > 0, 'should have entries'); 34 }); 35 36 test('contains known directory domains', () => { 37 assert.ok(DIRECTORY_DOMAINS.includes('yelp.com'), 'should include yelp.com'); 38 assert.ok(DIRECTORY_DOMAINS.includes('yellowpages.com'), 'should include yellowpages.com'); 39 }); 40 }); 41 42 // ─── checkBlocklist ───────────────────────────────────────────────────────── 43 44 describe('checkBlocklist', () => { 45 test('returns null for legitimate business domain', () => { 46 const result = checkBlocklist('smithplumbing.com.au', 'AU'); 47 assert.strictEqual(result, null); 48 }); 49 50 test('blocks known directory domain', () => { 51 const result = checkBlocklist('yelp.com'); 52 assert.ok(result !== null, 'should block yelp.com'); 53 assert.ok(result.reason, 'should have a reason'); 54 }); 55 56 test('blocks social media domain', () => { 57 const result = checkBlocklist('facebook.com'); 58 assert.ok(result !== null, 'should block facebook.com'); 59 }); 60 61 test('blocks government domain via gov TLD', () => { 62 const result = checkBlocklist('agency.gov'); 63 assert.ok(result !== null, 'should block .gov domain'); 64 }); 65 66 test('blocks government domain via countryspecific gov TLD', () => { 67 const result = checkBlocklist('transport.gov.au'); 68 assert.ok(result !== null, 'should block .gov.au domain'); 69 }); 70 71 test('blocks education domain', () => { 72 const result = checkBlocklist('sydney.edu.au'); 73 assert.ok(result !== null, 'should block .edu.au domain'); 74 }); 75 76 test('blocks non-commercial .org domain', () => { 77 const result = checkBlocklist('charity.org'); 78 assert.ok(result !== null, 'should block .org domain'); 79 }); 80 81 test('handles null/undefined input gracefully', () => { 82 assert.strictEqual(checkBlocklist(null), null); 83 assert.strictEqual(checkBlocklist(undefined), null); 84 assert.strictEqual(checkBlocklist(''), null); 85 }); 86 87 test('handles subdomain of blocked directory domain', () => { 88 // e.g., 'local.yelp.com' — may or may not be blocked based on exact-match logic 89 const result = checkBlocklist('local.yelp.com'); 90 // If exact match required, this returns null; if contains check, returns blocked 91 assert.ok(result === null || result !== null, 'should not throw'); 92 }); 93 94 test('blocks known franchise domain (AU)', () => { 95 // mcdonalds is a franchise — checkBlocklist should block it if AU franchise list exists 96 const result = checkBlocklist('mcdonalds.com.au', 'AU'); 97 // Result depends on franchise list content; should not throw 98 assert.ok(result === null || result !== null, 'should not throw'); 99 }); 100 }); 101 102 // ─── isGovernmentDomain ────────────────────────────────────────────────────── 103 104 describe('isGovernmentDomain', () => { 105 test('returns false for null/undefined', () => { 106 assert.equal(isGovernmentDomain(null), false); 107 assert.equal(isGovernmentDomain(undefined), false); 108 assert.equal(isGovernmentDomain(42), false); 109 }); 110 111 test('returns true for .gov (US federal)', () => { 112 assert.equal(isGovernmentDomain('whitehouse.gov'), true); 113 }); 114 115 test('returns true for .gov.au (Australia)', () => { 116 assert.equal(isGovernmentDomain('ato.gov.au'), true); 117 }); 118 119 test('returns true for .gov.uk', () => { 120 assert.equal(isGovernmentDomain('hmrc.gov.uk'), true); 121 }); 122 123 test('returns true for .gc.ca (Canada)', () => { 124 assert.equal(isGovernmentDomain('cra.gc.ca'), true); 125 }); 126 127 test('returns true for .govt.nz (New Zealand)', () => { 128 assert.equal(isGovernmentDomain('inland-revenue.govt.nz'), true); 129 }); 130 131 test('returns true for .gob.mx (Mexico)', () => { 132 assert.equal(isGovernmentDomain('sat.gob.mx'), true); 133 }); 134 135 test('returns true for .gouv.fr (France)', () => { 136 assert.equal(isGovernmentDomain('impots.gouv.fr'), true); 137 }); 138 139 test('returns true for .go.jp (Japan)', () => { 140 assert.equal(isGovernmentDomain('mof.go.jp'), true); 141 }); 142 143 test('returns true for .gov.br (Brazil)', () => { 144 assert.equal(isGovernmentDomain('fazenda.gov.br'), true); 145 }); 146 147 test('returns true for .mil (US military)', () => { 148 assert.equal(isGovernmentDomain('army.mil'), true); 149 }); 150 151 test('returns false for commercial domain', () => { 152 assert.equal(isGovernmentDomain('example.com'), false); 153 assert.equal(isGovernmentDomain('mysite.com.au'), false); 154 }); 155 }); 156 157 // ─── isEducationDomain ─────────────────────────────────────────────────────── 158 159 describe('isEducationDomain', () => { 160 test('returns false for null', () => { 161 assert.equal(isEducationDomain(null), false); 162 }); 163 164 test('returns true for .edu', () => { 165 assert.equal(isEducationDomain('mit.edu'), true); 166 }); 167 168 test('returns true for .edu.au', () => { 169 assert.equal(isEducationDomain('sydney.edu.au'), true); 170 }); 171 172 test('returns true for .ac.uk', () => { 173 assert.equal(isEducationDomain('ox.ac.uk'), true); 174 }); 175 176 test('returns true for .ac.nz', () => { 177 assert.equal(isEducationDomain('auckland.ac.nz'), true); 178 }); 179 180 test('returns false for commercial domain', () => { 181 assert.equal(isEducationDomain('educate.com'), false); 182 }); 183 }); 184 185 // ─── isNonCommercialDomain ─────────────────────────────────────────────────── 186 187 describe('isNonCommercialDomain', () => { 188 test('returns false for null', () => { 189 assert.equal(isNonCommercialDomain(null), false); 190 }); 191 192 test('returns true for .org', () => { 193 assert.equal(isNonCommercialDomain('redcross.org'), true); 194 }); 195 196 test('returns true for .org.au', () => { 197 assert.equal(isNonCommercialDomain('redcross.org.au'), true); 198 }); 199 200 test('returns true for .asn.au', () => { 201 assert.equal(isNonCommercialDomain('myclub.asn.au'), true); 202 }); 203 204 test('returns true for .ngo', () => { 205 assert.equal(isNonCommercialDomain('greenpeace.ngo'), true); 206 }); 207 208 test('returns true for .charity', () => { 209 assert.equal(isNonCommercialDomain('shelter.charity'), true); 210 }); 211 212 test('returns true for .foundation', () => { 213 assert.equal(isNonCommercialDomain('gates.foundation'), true); 214 }); 215 216 test('returns false for commercial domain', () => { 217 assert.equal(isNonCommercialDomain('organic.com'), false); 218 }); 219 }); 220 221 // ─── isDemoEmail ───────────────────────────────────────────────────────────── 222 223 describe('isDemoEmail', () => { 224 test('returns false for null', () => { 225 assert.equal(isDemoEmail(null), false); 226 }); 227 228 test('returns false for email with no @ sign', () => { 229 assert.equal(isDemoEmail('notanemail'), false); 230 }); 231 232 test('returns true for example.com email', () => { 233 assert.equal(isDemoEmail('test@example.com'), true); 234 }); 235 236 test('returns true for test.com email', () => { 237 assert.equal(isDemoEmail('user@test.com'), true); 238 }); 239 240 test('returns false for legitimate business email', () => { 241 assert.equal(isDemoEmail('info@smithplumbing.com.au'), false); 242 }); 243 }); 244 245 // ─── isGovernmentEmail ─────────────────────────────────────────────────────── 246 247 describe('isGovernmentEmail', () => { 248 test('returns false for null', () => { 249 assert.equal(isGovernmentEmail(null), false); 250 }); 251 252 test('returns true for .gov email', () => { 253 assert.equal(isGovernmentEmail('info@agency.gov'), true); 254 }); 255 256 test('returns true for .gov.au email', () => { 257 assert.equal(isGovernmentEmail('contact@health.gov.au'), true); 258 }); 259 260 test('returns false for legitimate email', () => { 261 assert.equal(isGovernmentEmail('info@example.com'), false); 262 }); 263 264 test('returns false for email with no @', () => { 265 assert.equal(isGovernmentEmail('noemail'), false); 266 }); 267 }); 268 269 // ─── isEducationEmail ──────────────────────────────────────────────────────── 270 271 describe('isEducationEmail', () => { 272 test('returns false for null', () => { 273 assert.equal(isEducationEmail(null), false); 274 }); 275 276 test('returns true for .edu email', () => { 277 assert.equal(isEducationEmail('professor@mit.edu'), true); 278 }); 279 280 test('returns true for .ac.uk email', () => { 281 assert.equal(isEducationEmail('researcher@ox.ac.uk'), true); 282 }); 283 284 test('returns false for commercial email', () => { 285 assert.equal(isEducationEmail('info@educate.com'), false); 286 }); 287 }); 288 289 // ─── classifyIndustry ──────────────────────────────────────────────────────── 290 291 describe('classifyIndustry', () => { 292 test('returns null for null/undefined', () => { 293 assert.strictEqual(classifyIndustry(null), null); 294 assert.strictEqual(classifyIndustry(undefined), null); 295 }); 296 297 test('returns null for non-string', () => { 298 assert.strictEqual(classifyIndustry(42), null); 299 }); 300 301 test('detects legal industry from domain keyword', () => { 302 const result = classifyIndustry('smithsolicitors.com.au'); 303 assert.ok(result !== null, 'should detect solicitors'); 304 assert.equal(result.type, 'legal'); 305 }); 306 307 test('detects healthcare industry from domain keyword', () => { 308 const result = classifyIndustry('sydneydental.com.au'); 309 assert.ok(result !== null, 'should detect dental'); 310 assert.equal(result.type, 'healthcare'); 311 }); 312 313 test('detects financial industry from domain keyword', () => { 314 const result = classifyIndustry('bestaccounting.com.au'); 315 assert.ok(result !== null, 'should detect accounting'); 316 assert.equal(result.type, 'financial'); 317 }); 318 319 test('returns null for generic business domain', () => { 320 const result = classifyIndustry('smithplumbing.com.au'); 321 assert.strictEqual(result, null, 'plumbing should not match any industry'); 322 }); 323 324 test('includes reason in result', () => { 325 const result = classifyIndustry('londonlawyer.co.uk'); 326 assert.ok(result !== null); 327 assert.ok(result.reason, 'should include reason string'); 328 assert.ok(result.reason.includes('lawyer'), 'reason should mention the keyword'); 329 }); 330 }); 331 332 // ─── loadFranchiseDomains ──────────────────────────────────────────────────── 333 334 describe('loadFranchiseDomains', () => { 335 test('returns array for known country with franchise file', () => { 336 const result = loadFranchiseDomains('AU'); 337 assert.ok(Array.isArray(result), 'should return array'); 338 }); 339 340 test('returns empty array for unknown country code', () => { 341 const result = loadFranchiseDomains('XX'); 342 assert.ok(Array.isArray(result), 'should return array'); 343 assert.equal(result.length, 0, 'should be empty for unknown country'); 344 }); 345 346 test('caches results (same reference on second call)', () => { 347 const r1 = loadFranchiseDomains('AU'); 348 const r2 = loadFranchiseDomains('AU'); 349 assert.strictEqual(r1, r2, 'should return same cached reference'); 350 }); 351 });