/ tests / utils / gdpr-verification.test.js
gdpr-verification.test.js
  1  /**
  2   * GDPR Verification Tests
  3   *
  4   * Tests for isFreeEmailProvider(), searchCompanyTypes(), searchCompanyKeywords(),
  5   * verifyCompanyEmail(), and batchVerifyEmails().
  6   * Pure functions — getCountryByCode injected via module-level mock.
  7   */
  8  
  9  import { test, describe, mock, beforeEach } from 'node:test';
 10  import assert from 'node:assert/strict';
 11  
 12  // ─── Mock countries config ─────────────────────────────────────────────────────
 13  
 14  mock.module('../../src/config/countries.js', {
 15    namedExports: {
 16      getCountryByCode: code => {
 17        const countries = {
 18          DE: {
 19            requiresGDPRCheck: true,
 20            companyTypes: ['GmbH', 'AG', 'e.V.', 'OHG'],
 21            companyKeywords: ['Handelsregister', 'USt-IdNr', 'Steuernummer'],
 22            keyPageNames: ['Impressum', 'Datenschutz'],
 23          },
 24          US: {
 25            requiresGDPRCheck: false,
 26            companyTypes: ['Inc', 'LLC', 'Corp'],
 27            companyKeywords: ['EIN', 'Federal Tax ID'],
 28            keyPageNames: ['About', 'Contact'],
 29          },
 30          UK: {
 31            requiresGDPRCheck: true,
 32            companyTypes: ['Ltd', 'PLC', 'LLP'],
 33            companyKeywords: ['Company No', 'Registered in England'],
 34            keyPageNames: ['About', 'Contact'],
 35          },
 36        };
 37        return (
 38          countries[code] || {
 39            requiresGDPRCheck: false,
 40            companyTypes: [],
 41            companyKeywords: [],
 42            keyPageNames: [],
 43          }
 44        );
 45      },
 46    },
 47  });
 48  
 49  const {
 50    isFreeEmailProvider,
 51    searchCompanyTypes,
 52    searchCompanyKeywords,
 53    verifyCompanyEmail,
 54    batchVerifyEmails,
 55  } = await import('../../src/utils/gdpr-verification.js');
 56  
 57  // ─── isFreeEmailProvider ──────────────────────────────────────────────────────
 58  
 59  describe('isFreeEmailProvider', () => {
 60    test('returns true for gmail.com', () => {
 61      assert.equal(isFreeEmailProvider('user@gmail.com'), true);
 62    });
 63  
 64    test('returns true for yahoo.com', () => {
 65      assert.equal(isFreeEmailProvider('user@yahoo.com'), true);
 66    });
 67  
 68    test('returns true for outlook.com', () => {
 69      assert.equal(isFreeEmailProvider('user@outlook.com'), true);
 70    });
 71  
 72    test('returns true for German free providers', () => {
 73      assert.equal(isFreeEmailProvider('user@gmx.de'), true);
 74      assert.equal(isFreeEmailProvider('user@web.de'), true);
 75      assert.equal(isFreeEmailProvider('user@t-online.de'), true);
 76    });
 77  
 78    test('returns true for French free providers', () => {
 79      assert.equal(isFreeEmailProvider('user@laposte.net'), true);
 80      assert.equal(isFreeEmailProvider('user@orange.fr'), true);
 81    });
 82  
 83    test('returns true for UK free providers', () => {
 84      assert.equal(isFreeEmailProvider('user@btinternet.com'), true);
 85      assert.equal(isFreeEmailProvider('user@virginmedia.com'), true);
 86    });
 87  
 88    test('returns false for company domain email', () => {
 89      assert.equal(isFreeEmailProvider('john@example.com'), false);
 90      assert.equal(isFreeEmailProvider('info@acme-gmbh.de'), false);
 91      assert.equal(isFreeEmailProvider('hello@mybusiness.co.uk'), false);
 92    });
 93  
 94    test('returns false for null/undefined/empty', () => {
 95      assert.equal(isFreeEmailProvider(null), false);
 96      assert.equal(isFreeEmailProvider(undefined), false);
 97      assert.equal(isFreeEmailProvider(''), false);
 98      assert.equal(isFreeEmailProvider(42), false);
 99    });
100  
101    test('returns false for email without domain', () => {
102      assert.equal(isFreeEmailProvider('no-at-sign'), false);
103      assert.equal(isFreeEmailProvider('@'), false);
104    });
105  
106    test('case-insensitive domain matching', () => {
107      assert.equal(isFreeEmailProvider('user@GMAIL.COM'), true);
108      assert.equal(isFreeEmailProvider('user@Gmail.Com'), true);
109    });
110  });
111  
112  // ─── searchCompanyTypes ────────────────────────────────────────────────────────
113  
114  describe('searchCompanyTypes', () => {
115    const deCountry = {
116      companyTypes: ['GmbH', 'AG', 'e.V.', 'OHG'],
117    };
118  
119    test('finds GmbH in HTML', () => {
120      const result = searchCompanyTypes('<p>Example GmbH</p>', deCountry);
121      assert.equal(result.found, true);
122      assert.ok(result.matches.includes('GmbH'));
123    });
124  
125    test('finds multiple company types', () => {
126      const result = searchCompanyTypes('<p>Test AG und Example OHG</p>', deCountry);
127      assert.equal(result.found, true);
128      assert.ok(result.matches.length >= 2);
129    });
130  
131    test('returns false when no company type found', () => {
132      const result = searchCompanyTypes('<p>Just a regular website</p>', deCountry);
133      assert.equal(result.found, false);
134      assert.equal(result.matches.length, 0);
135    });
136  
137    test('returns false for null/empty HTML', () => {
138      assert.equal(searchCompanyTypes(null, deCountry).found, false);
139      assert.equal(searchCompanyTypes('', deCountry).found, false);
140    });
141  
142    test('returns false when country has no companyTypes', () => {
143      const result = searchCompanyTypes('<p>Some GmbH company</p>', { companyTypes: null });
144      assert.equal(result.found, false);
145    });
146  
147    test('uses word boundaries (no partial matches)', () => {
148      // "GmbHsomething" should NOT match "GmbH" with word boundary
149      const result = searchCompanyTypes('<p>GmbHsomething</p>', deCountry);
150      assert.equal(result.found, false);
151    });
152  
153    test('case-insensitive matching', () => {
154      const result = searchCompanyTypes('<p>example gmbh</p>', deCountry);
155      assert.equal(result.found, true);
156    });
157  });
158  
159  // ─── searchCompanyKeywords ─────────────────────────────────────────────────────
160  
161  describe('searchCompanyKeywords', () => {
162    const deCountry = {
163      companyKeywords: ['Handelsregister', 'USt-IdNr', 'Steuernummer'],
164    };
165  
166    test('finds Handelsregister in HTML', () => {
167      const result = searchCompanyKeywords('<p>Handelsregister: HRB 12345</p>', deCountry);
168      assert.equal(result.found, true);
169      assert.ok(result.matches.includes('Handelsregister'));
170    });
171  
172    test('returns false when no keyword found', () => {
173      const result = searchCompanyKeywords('<p>Normal website</p>', deCountry);
174      assert.equal(result.found, false);
175    });
176  
177    test('returns false for null/empty HTML', () => {
178      assert.equal(searchCompanyKeywords(null, deCountry).found, false);
179      assert.equal(searchCompanyKeywords('', deCountry).found, false);
180    });
181  
182    test('returns false when country has no companyKeywords', () => {
183      const result = searchCompanyKeywords('<p>Handelsregister</p>', { companyKeywords: null });
184      assert.equal(result.found, false);
185    });
186  
187    test('case-insensitive matching', () => {
188      const result = searchCompanyKeywords('<p>HANDELSREGISTER: HRB 999</p>', deCountry);
189      assert.equal(result.found, true);
190    });
191  
192    test('finds USt-IdNr in HTML', () => {
193      const result = searchCompanyKeywords('<p>USt-IdNr DE123456789</p>', deCountry);
194      assert.equal(result.found, true);
195      assert.ok(result.matches.includes('USt-IdNr'));
196    });
197  });
198  
199  // ─── verifyCompanyEmail ────────────────────────────────────────────────────────
200  
201  describe('verifyCompanyEmail', () => {
202    test('passes immediately when GDPR not required (US)', () => {
203      const result = verifyCompanyEmail({
204        email: 'user@gmail.com', // Would fail normally
205        html: '<p>no company info</p>',
206        countryCode: 'US',
207        domain: 'example.com',
208      });
209      assert.equal(result.isVerified, true);
210      assert.equal(result.confidence, 'n/a');
211      assert.ok(result.reason.includes('not required'));
212    });
213  
214    test('fails for free email provider in GDPR country', () => {
215      const result = verifyCompanyEmail({
216        email: 'user@gmail.com',
217        html: '<p>Example GmbH</p>',
218        countryCode: 'DE',
219        domain: 'example.de',
220      });
221      assert.equal(result.isVerified, false);
222      assert.equal(result.confidence, 'high');
223      assert.ok(result.reason.includes('Free email'));
224    });
225  
226    test('passes with high confidence when company type found', () => {
227      const result = verifyCompanyEmail({
228        email: 'info@example-gmbh.de',
229        html: '<p>Example GmbH, registered in Germany</p>',
230        countryCode: 'DE',
231        domain: 'example-gmbh.de',
232      });
233      assert.equal(result.isVerified, true);
234      assert.equal(result.confidence, 'high');
235      assert.ok(result.reason.includes('GmbH'));
236    });
237  
238    test('passes with medium confidence when keyword found (no company type)', () => {
239      const result = verifyCompanyEmail({
240        email: 'info@example.de',
241        html: '<p>Handelsregister: HRB 12345, contact us</p>',
242        countryCode: 'DE',
243        domain: 'example.de',
244      });
245      assert.equal(result.isVerified, true);
246      assert.equal(result.confidence, 'medium');
247      assert.ok(result.reason.includes('Handelsregister'));
248    });
249  
250    test('returns uncertain when unable to verify', () => {
251      const result = verifyCompanyEmail({
252        email: 'info@example.de',
253        html: '<p>Welcome to our website</p>',
254        countryCode: 'DE',
255        domain: 'example.de',
256      });
257      assert.equal(result.isVerified, false);
258      assert.equal(result.confidence, 'uncertain');
259    });
260  
261    test('passes for UK Ltd company', () => {
262      const result = verifyCompanyEmail({
263        email: 'info@company.co.uk',
264        html: '<p>Company Ltd, Company No 12345678</p>',
265        countryCode: 'UK',
266        domain: 'company.co.uk',
267      });
268      assert.equal(result.isVerified, true);
269    });
270  });
271  
272  // ─── batchVerifyEmails ────────────────────────────────────────────────────────
273  
274  describe('batchVerifyEmails', () => {
275    test('returns empty array for null/non-array emails', () => {
276      assert.deepEqual(batchVerifyEmails({ emails: null, html: '', countryCode: 'DE' }), []);
277      assert.deepEqual(batchVerifyEmails({ emails: undefined, html: '', countryCode: 'DE' }), []);
278      assert.deepEqual(batchVerifyEmails({ emails: 'not-array', html: '', countryCode: 'DE' }), []);
279    });
280  
281    test('returns one result per email', () => {
282      const results = batchVerifyEmails({
283        emails: ['a@gmail.com', 'b@company.de'],
284        html: '<p>Example GmbH</p>',
285        countryCode: 'DE',
286        domain: 'test.de',
287      });
288      assert.equal(results.length, 2);
289    });
290  
291    test('first email (free provider) fails, second (company domain) passes via company type', () => {
292      const results = batchVerifyEmails({
293        emails: ['user@gmail.com', 'info@example.de'],
294        html: '<p>Example GmbH</p>',
295        countryCode: 'DE',
296        domain: 'example.de',
297      });
298      assert.equal(results[0].isVerified, false);
299      assert.equal(results[1].isVerified, true);
300    });
301  
302    test('returns empty array for empty emails array', () => {
303      assert.deepEqual(batchVerifyEmails({ emails: [], html: '<p>test</p>', countryCode: 'US' }), []);
304    });
305  });