gdpr-verification.test.js
1 /** 2 * GDPR Verification Unit Tests 3 */ 4 5 import { test } from 'node:test'; 6 import assert from 'node:assert'; 7 import { 8 isFreeEmailProvider, 9 searchCompanyTypes, 10 searchCompanyKeywords, 11 getKeyPageNames, 12 verifyCompanyEmail, 13 batchVerifyEmails, 14 } from '../../src/utils/gdpr-verification.js'; 15 import { getCountryByCode } from '../../src/config/countries.js'; 16 17 // Test free email provider detection 18 test('isFreeEmailProvider: correctly identifies free email providers', () => { 19 // International providers 20 assert.strictEqual(isFreeEmailProvider('user@gmail.com'), true); 21 assert.strictEqual(isFreeEmailProvider('user@yahoo.com'), true); 22 assert.strictEqual(isFreeEmailProvider('user@outlook.com'), true); 23 assert.strictEqual(isFreeEmailProvider('user@hotmail.com'), true); 24 25 // German providers 26 assert.strictEqual(isFreeEmailProvider('user@gmx.de'), true); 27 assert.strictEqual(isFreeEmailProvider('user@web.de'), true); 28 assert.strictEqual(isFreeEmailProvider('user@t-online.de'), true); 29 30 // UK providers 31 assert.strictEqual(isFreeEmailProvider('user@btinternet.com'), true); 32 assert.strictEqual(isFreeEmailProvider('user@virginmedia.com'), true); 33 34 // Company domains (not free providers) 35 assert.strictEqual(isFreeEmailProvider('info@acme-gmbh.de'), false); 36 assert.strictEqual(isFreeEmailProvider('contact@example.co.uk'), false); 37 assert.strictEqual(isFreeEmailProvider('sales@business.com'), false); 38 39 // Edge cases 40 assert.strictEqual(isFreeEmailProvider(''), false); 41 assert.strictEqual(isFreeEmailProvider('invalid-email'), false); 42 assert.strictEqual(isFreeEmailProvider(null), false); 43 assert.strictEqual(isFreeEmailProvider(undefined), false); 44 45 // Case insensitive 46 assert.strictEqual(isFreeEmailProvider('USER@GMAIL.COM'), true); 47 assert.strictEqual(isFreeEmailProvider('User@GMX.DE'), true); 48 }); 49 50 // Test company type search 51 test('searchCompanyTypes: finds company types in HTML', () => { 52 const deCountry = getCountryByCode('DE'); 53 const ukCountry = getCountryByCode('UK'); 54 55 // German company types 56 const deHtml1 = '<footer>Acme Solutions GmbH | Handelsregister: HRB 12345</footer>'; 57 const deResult1 = searchCompanyTypes(deHtml1, deCountry); 58 assert.strictEqual(deResult1.found, true); 59 assert.ok(deResult1.matches.includes('GmbH')); 60 61 const deHtml2 = '<p>Example Firma AG - Registered in Munich</p>'; 62 const deResult2 = searchCompanyTypes(deHtml2, deCountry); 63 assert.strictEqual(deResult2.found, true); 64 assert.ok(deResult2.matches.includes('AG')); 65 66 // UK company types 67 const ukHtml1 = '<footer>Acme Solutions Ltd | Company No. 12345678</footer>'; 68 const ukResult1 = searchCompanyTypes(ukHtml1, ukCountry); 69 assert.strictEqual(ukResult1.found, true); 70 assert.ok(ukResult1.matches.includes('Ltd')); 71 72 const ukHtml2 = '<p>Example Business Limited - Registered in England</p>'; 73 const ukResult2 = searchCompanyTypes(ukHtml2, ukCountry); 74 assert.strictEqual(ukResult2.found, true); 75 assert.ok(ukResult2.matches.includes('Limited')); 76 77 // No company type found 78 const noCompanyHtml = '<p>Welcome to our website! Contact us today.</p>'; 79 const noResult = searchCompanyTypes(noCompanyHtml, deCountry); 80 assert.strictEqual(noResult.found, false); 81 assert.strictEqual(noResult.matches.length, 0); 82 83 // Case insensitive 84 const caseHtml = '<footer>acme solutions gmbh</footer>'; 85 const caseResult = searchCompanyTypes(caseHtml, deCountry); 86 assert.strictEqual(caseResult.found, true); 87 88 // Word boundaries (should not match partial words) 89 const partialHtml = '<p>ExampleGmbH without spaces</p>'; 90 const partialResult = searchCompanyTypes(partialHtml, deCountry); 91 assert.strictEqual(partialResult.found, false); 92 }); 93 94 // Test company keyword search 95 test('searchCompanyKeywords: finds registration keywords in HTML', () => { 96 const deCountry = getCountryByCode('DE'); 97 const ukCountry = getCountryByCode('UK'); 98 99 // German keywords 100 const deHtml1 = '<footer>Handelsregister: HRB 12345 | USt-IdNr: DE123456789</footer>'; 101 const deResult1 = searchCompanyKeywords(deHtml1, deCountry); 102 assert.strictEqual(deResult1.found, true); 103 assert.ok(deResult1.matches.includes('Handelsregister')); 104 assert.ok(deResult1.matches.includes('USt-IdNr')); 105 106 const deHtml2 = '<p>Geschäftsführer: Max Mustermann | Registergericht: München</p>'; 107 const deResult2 = searchCompanyKeywords(deHtml2, deCountry); 108 assert.strictEqual(deResult2.found, true); 109 assert.ok(deResult2.matches.includes('Geschäftsführer')); 110 assert.ok(deResult2.matches.includes('Registergericht')); 111 112 // UK keywords 113 const ukHtml1 = '<footer>Company Number: 12345678 | VAT Number: GB123456789</footer>'; 114 const ukResult1 = searchCompanyKeywords(ukHtml1, ukCountry); 115 assert.strictEqual(ukResult1.found, true); 116 assert.ok(ukResult1.matches.includes('Company number')); 117 assert.ok(ukResult1.matches.includes('VAT number')); 118 119 // No keywords found 120 const noKeywordHtml = '<p>Welcome to our website! Contact us today.</p>'; 121 const noResult = searchCompanyKeywords(noKeywordHtml, deCountry); 122 assert.strictEqual(noResult.found, false); 123 assert.strictEqual(noResult.matches.length, 0); 124 125 // Case insensitive 126 const caseHtml = '<footer>handelsregister: hrb 12345</footer>'; 127 const caseResult = searchCompanyKeywords(caseHtml, deCountry); 128 assert.strictEqual(caseResult.found, true); 129 }); 130 131 // Test key page names retrieval 132 test('getKeyPageNames: returns country-specific key pages', () => { 133 // GDPR countries have key page names 134 const dePages = getKeyPageNames('DE'); 135 assert.ok(dePages.includes('impressum')); 136 assert.ok(dePages.includes('kontakt')); 137 138 const ukPages = getKeyPageNames('UK'); 139 assert.ok(ukPages.includes('about')); 140 assert.ok(ukPages.includes('contact')); 141 142 // Non-GDPR countries return empty array 143 const usPages = getKeyPageNames('US'); 144 assert.ok(Array.isArray(usPages)); 145 assert.strictEqual(usPages.length, 0); 146 }); 147 148 // Test full company email verification 149 test('verifyCompanyEmail: complete verification flow', () => { 150 // Test 1: Non-GDPR country (US) - should pass without verification 151 const usResult = verifyCompanyEmail({ 152 email: 'info@example.com', 153 html: '<p>Simple website</p>', 154 countryCode: 'US', 155 domain: 'example.com', 156 }); 157 assert.strictEqual(usResult.isVerified, true); 158 assert.strictEqual(usResult.confidence, 'n/a'); 159 assert.ok(usResult.reason.includes('not required')); 160 161 // Test 2: GDPR country with free email - should FAIL 162 const freeEmailResult = verifyCompanyEmail({ 163 email: 'contact@gmail.com', 164 html: '<footer>Acme GmbH | Handelsregister: HRB 12345</footer>', 165 countryCode: 'DE', 166 domain: 'acme.de', 167 }); 168 assert.strictEqual(freeEmailResult.isVerified, false); 169 assert.strictEqual(freeEmailResult.confidence, 'high'); 170 assert.ok(freeEmailResult.reason.includes('Free email provider')); 171 172 // Test 3: Company type found - should PASS with high confidence 173 const companyTypeResult = verifyCompanyEmail({ 174 email: 'info@acme.de', 175 html: '<footer>Acme Solutions GmbH | Handelsregister: HRB 12345</footer>', 176 countryCode: 'DE', 177 domain: 'acme.de', 178 }); 179 assert.strictEqual(companyTypeResult.isVerified, true); 180 assert.strictEqual(companyTypeResult.confidence, 'high'); 181 assert.ok(companyTypeResult.reason.includes('Company type found')); 182 assert.ok(companyTypeResult.matches.includes('GmbH')); 183 184 // Test 4: Company keywords found - should PASS with medium confidence 185 const keywordResult = verifyCompanyEmail({ 186 email: 'info@business.de', 187 html: '<footer>Handelsregister: HRB 12345 | USt-IdNr: DE123456789</footer>', 188 countryCode: 'DE', 189 domain: 'business.de', 190 }); 191 assert.strictEqual(keywordResult.isVerified, true); 192 assert.strictEqual(keywordResult.confidence, 'medium'); 193 assert.ok(keywordResult.reason.includes('registration keywords')); 194 195 // Test 5: Unable to verify - should FAIL with uncertain confidence 196 const uncertainResult = verifyCompanyEmail({ 197 email: 'info@mystery.de', 198 html: '<p>Welcome to our website! Contact us today.</p>', 199 countryCode: 'DE', 200 domain: 'mystery.de', 201 }); 202 assert.strictEqual(uncertainResult.isVerified, false); 203 assert.strictEqual(uncertainResult.confidence, 'uncertain'); 204 assert.ok(uncertainResult.reason.includes('Unable to verify')); 205 206 // Test 6: UK company with Ltd 207 const ukResult = verifyCompanyEmail({ 208 email: 'info@acme.co.uk', 209 html: '<footer>Acme Solutions Ltd | Company No. 12345678</footer>', 210 countryCode: 'UK', 211 domain: 'acme.co.uk', 212 }); 213 assert.strictEqual(ukResult.isVerified, true); 214 assert.strictEqual(ukResult.confidence, 'high'); 215 assert.ok(ukResult.matches.includes('Ltd')); 216 }); 217 218 // Test batch verification 219 test('batchVerifyEmails: verifies multiple emails', () => { 220 const html = '<footer>Acme Solutions GmbH | Handelsregister: HRB 12345</footer>'; 221 const emails = [ 222 'info@acme.de', // Company email - should PASS 223 'contact@gmail.com', // Free email - should FAIL 224 'sales@acme.de', // Company email - should PASS 225 ]; 226 227 const results = batchVerifyEmails({ 228 emails, 229 html, 230 countryCode: 'DE', 231 domain: 'acme.de', 232 }); 233 234 assert.strictEqual(results.length, 3); 235 236 // First email: company email - PASS 237 assert.strictEqual(results[0].isVerified, true); 238 assert.strictEqual(results[0].confidence, 'high'); 239 240 // Second email: free email - FAIL 241 assert.strictEqual(results[1].isVerified, false); 242 assert.strictEqual(results[1].confidence, 'high'); 243 assert.ok(results[1].reason.includes('Free email provider')); 244 245 // Third email: company email - PASS 246 assert.strictEqual(results[2].isVerified, true); 247 assert.strictEqual(results[2].confidence, 'high'); 248 249 // Edge cases 250 const emptyResults = batchVerifyEmails({ 251 emails: [], 252 html, 253 countryCode: 'DE', 254 domain: 'test.de', 255 }); 256 assert.strictEqual(emptyResults.length, 0); 257 258 const nullResults = batchVerifyEmails({ 259 emails: null, 260 html, 261 countryCode: 'DE', 262 domain: 'test.de', 263 }); 264 assert.strictEqual(nullResults.length, 0); 265 });