/ tests / pipeline / gdpr-verification.test.js
gdpr-verification.test.js
  1  /**
  2   * GDPR Verification Unit Tests
  3   */
  4  
  5  import { test } from 'node:test';
  6  import assert from 'node:assert';
  7  import {
  8    isFreeEmailProvider,
  9    searchCompanyTypes,
 10    searchCompanyKeywords,
 11    getKeyPageNames,
 12    verifyCompanyEmail,
 13    batchVerifyEmails,
 14  } from '../../src/utils/gdpr-verification.js';
 15  import { getCountryByCode } from '../../src/config/countries.js';
 16  
 17  // Test free email provider detection
 18  test('isFreeEmailProvider: correctly identifies free email providers', () => {
 19    // International providers
 20    assert.strictEqual(isFreeEmailProvider('user@gmail.com'), true);
 21    assert.strictEqual(isFreeEmailProvider('user@yahoo.com'), true);
 22    assert.strictEqual(isFreeEmailProvider('user@outlook.com'), true);
 23    assert.strictEqual(isFreeEmailProvider('user@hotmail.com'), true);
 24  
 25    // German providers
 26    assert.strictEqual(isFreeEmailProvider('user@gmx.de'), true);
 27    assert.strictEqual(isFreeEmailProvider('user@web.de'), true);
 28    assert.strictEqual(isFreeEmailProvider('user@t-online.de'), true);
 29  
 30    // UK providers
 31    assert.strictEqual(isFreeEmailProvider('user@btinternet.com'), true);
 32    assert.strictEqual(isFreeEmailProvider('user@virginmedia.com'), true);
 33  
 34    // Company domains (not free providers)
 35    assert.strictEqual(isFreeEmailProvider('info@acme-gmbh.de'), false);
 36    assert.strictEqual(isFreeEmailProvider('contact@example.co.uk'), false);
 37    assert.strictEqual(isFreeEmailProvider('sales@business.com'), false);
 38  
 39    // Edge cases
 40    assert.strictEqual(isFreeEmailProvider(''), false);
 41    assert.strictEqual(isFreeEmailProvider('invalid-email'), false);
 42    assert.strictEqual(isFreeEmailProvider(null), false);
 43    assert.strictEqual(isFreeEmailProvider(undefined), false);
 44  
 45    // Case insensitive
 46    assert.strictEqual(isFreeEmailProvider('USER@GMAIL.COM'), true);
 47    assert.strictEqual(isFreeEmailProvider('User@GMX.DE'), true);
 48  });
 49  
 50  // Test company type search
 51  test('searchCompanyTypes: finds company types in HTML', () => {
 52    const deCountry = getCountryByCode('DE');
 53    const ukCountry = getCountryByCode('UK');
 54  
 55    // German company types
 56    const deHtml1 = '<footer>Acme Solutions GmbH | Handelsregister: HRB 12345</footer>';
 57    const deResult1 = searchCompanyTypes(deHtml1, deCountry);
 58    assert.strictEqual(deResult1.found, true);
 59    assert.ok(deResult1.matches.includes('GmbH'));
 60  
 61    const deHtml2 = '<p>Example Firma AG - Registered in Munich</p>';
 62    const deResult2 = searchCompanyTypes(deHtml2, deCountry);
 63    assert.strictEqual(deResult2.found, true);
 64    assert.ok(deResult2.matches.includes('AG'));
 65  
 66    // UK company types
 67    const ukHtml1 = '<footer>Acme Solutions Ltd | Company No. 12345678</footer>';
 68    const ukResult1 = searchCompanyTypes(ukHtml1, ukCountry);
 69    assert.strictEqual(ukResult1.found, true);
 70    assert.ok(ukResult1.matches.includes('Ltd'));
 71  
 72    const ukHtml2 = '<p>Example Business Limited - Registered in England</p>';
 73    const ukResult2 = searchCompanyTypes(ukHtml2, ukCountry);
 74    assert.strictEqual(ukResult2.found, true);
 75    assert.ok(ukResult2.matches.includes('Limited'));
 76  
 77    // No company type found
 78    const noCompanyHtml = '<p>Welcome to our website! Contact us today.</p>';
 79    const noResult = searchCompanyTypes(noCompanyHtml, deCountry);
 80    assert.strictEqual(noResult.found, false);
 81    assert.strictEqual(noResult.matches.length, 0);
 82  
 83    // Case insensitive
 84    const caseHtml = '<footer>acme solutions gmbh</footer>';
 85    const caseResult = searchCompanyTypes(caseHtml, deCountry);
 86    assert.strictEqual(caseResult.found, true);
 87  
 88    // Word boundaries (should not match partial words)
 89    const partialHtml = '<p>ExampleGmbH without spaces</p>';
 90    const partialResult = searchCompanyTypes(partialHtml, deCountry);
 91    assert.strictEqual(partialResult.found, false);
 92  });
 93  
 94  // Test company keyword search
 95  test('searchCompanyKeywords: finds registration keywords in HTML', () => {
 96    const deCountry = getCountryByCode('DE');
 97    const ukCountry = getCountryByCode('UK');
 98  
 99    // German keywords
100    const deHtml1 = '<footer>Handelsregister: HRB 12345 | USt-IdNr: DE123456789</footer>';
101    const deResult1 = searchCompanyKeywords(deHtml1, deCountry);
102    assert.strictEqual(deResult1.found, true);
103    assert.ok(deResult1.matches.includes('Handelsregister'));
104    assert.ok(deResult1.matches.includes('USt-IdNr'));
105  
106    const deHtml2 = '<p>Geschäftsführer: Max Mustermann | Registergericht: München</p>';
107    const deResult2 = searchCompanyKeywords(deHtml2, deCountry);
108    assert.strictEqual(deResult2.found, true);
109    assert.ok(deResult2.matches.includes('Geschäftsführer'));
110    assert.ok(deResult2.matches.includes('Registergericht'));
111  
112    // UK keywords
113    const ukHtml1 = '<footer>Company Number: 12345678 | VAT Number: GB123456789</footer>';
114    const ukResult1 = searchCompanyKeywords(ukHtml1, ukCountry);
115    assert.strictEqual(ukResult1.found, true);
116    assert.ok(ukResult1.matches.includes('Company number'));
117    assert.ok(ukResult1.matches.includes('VAT number'));
118  
119    // No keywords found
120    const noKeywordHtml = '<p>Welcome to our website! Contact us today.</p>';
121    const noResult = searchCompanyKeywords(noKeywordHtml, deCountry);
122    assert.strictEqual(noResult.found, false);
123    assert.strictEqual(noResult.matches.length, 0);
124  
125    // Case insensitive
126    const caseHtml = '<footer>handelsregister: hrb 12345</footer>';
127    const caseResult = searchCompanyKeywords(caseHtml, deCountry);
128    assert.strictEqual(caseResult.found, true);
129  });
130  
131  // Test key page names retrieval
132  test('getKeyPageNames: returns country-specific key pages', () => {
133    // GDPR countries have key page names
134    const dePages = getKeyPageNames('DE');
135    assert.ok(dePages.includes('impressum'));
136    assert.ok(dePages.includes('kontakt'));
137  
138    const ukPages = getKeyPageNames('UK');
139    assert.ok(ukPages.includes('about'));
140    assert.ok(ukPages.includes('contact'));
141  
142    // Non-GDPR countries return empty array
143    const usPages = getKeyPageNames('US');
144    assert.ok(Array.isArray(usPages));
145    assert.strictEqual(usPages.length, 0);
146  });
147  
148  // Test full company email verification
149  test('verifyCompanyEmail: complete verification flow', () => {
150    // Test 1: Non-GDPR country (US) - should pass without verification
151    const usResult = verifyCompanyEmail({
152      email: 'info@example.com',
153      html: '<p>Simple website</p>',
154      countryCode: 'US',
155      domain: 'example.com',
156    });
157    assert.strictEqual(usResult.isVerified, true);
158    assert.strictEqual(usResult.confidence, 'n/a');
159    assert.ok(usResult.reason.includes('not required'));
160  
161    // Test 2: GDPR country with free email - should FAIL
162    const freeEmailResult = verifyCompanyEmail({
163      email: 'contact@gmail.com',
164      html: '<footer>Acme GmbH | Handelsregister: HRB 12345</footer>',
165      countryCode: 'DE',
166      domain: 'acme.de',
167    });
168    assert.strictEqual(freeEmailResult.isVerified, false);
169    assert.strictEqual(freeEmailResult.confidence, 'high');
170    assert.ok(freeEmailResult.reason.includes('Free email provider'));
171  
172    // Test 3: Company type found - should PASS with high confidence
173    const companyTypeResult = verifyCompanyEmail({
174      email: 'info@acme.de',
175      html: '<footer>Acme Solutions GmbH | Handelsregister: HRB 12345</footer>',
176      countryCode: 'DE',
177      domain: 'acme.de',
178    });
179    assert.strictEqual(companyTypeResult.isVerified, true);
180    assert.strictEqual(companyTypeResult.confidence, 'high');
181    assert.ok(companyTypeResult.reason.includes('Company type found'));
182    assert.ok(companyTypeResult.matches.includes('GmbH'));
183  
184    // Test 4: Company keywords found - should PASS with medium confidence
185    const keywordResult = verifyCompanyEmail({
186      email: 'info@business.de',
187      html: '<footer>Handelsregister: HRB 12345 | USt-IdNr: DE123456789</footer>',
188      countryCode: 'DE',
189      domain: 'business.de',
190    });
191    assert.strictEqual(keywordResult.isVerified, true);
192    assert.strictEqual(keywordResult.confidence, 'medium');
193    assert.ok(keywordResult.reason.includes('registration keywords'));
194  
195    // Test 5: Unable to verify - should FAIL with uncertain confidence
196    const uncertainResult = verifyCompanyEmail({
197      email: 'info@mystery.de',
198      html: '<p>Welcome to our website! Contact us today.</p>',
199      countryCode: 'DE',
200      domain: 'mystery.de',
201    });
202    assert.strictEqual(uncertainResult.isVerified, false);
203    assert.strictEqual(uncertainResult.confidence, 'uncertain');
204    assert.ok(uncertainResult.reason.includes('Unable to verify'));
205  
206    // Test 6: UK company with Ltd
207    const ukResult = verifyCompanyEmail({
208      email: 'info@acme.co.uk',
209      html: '<footer>Acme Solutions Ltd | Company No. 12345678</footer>',
210      countryCode: 'UK',
211      domain: 'acme.co.uk',
212    });
213    assert.strictEqual(ukResult.isVerified, true);
214    assert.strictEqual(ukResult.confidence, 'high');
215    assert.ok(ukResult.matches.includes('Ltd'));
216  });
217  
218  // Test batch verification
219  test('batchVerifyEmails: verifies multiple emails', () => {
220    const html = '<footer>Acme Solutions GmbH | Handelsregister: HRB 12345</footer>';
221    const emails = [
222      'info@acme.de', // Company email - should PASS
223      'contact@gmail.com', // Free email - should FAIL
224      'sales@acme.de', // Company email - should PASS
225    ];
226  
227    const results = batchVerifyEmails({
228      emails,
229      html,
230      countryCode: 'DE',
231      domain: 'acme.de',
232    });
233  
234    assert.strictEqual(results.length, 3);
235  
236    // First email: company email - PASS
237    assert.strictEqual(results[0].isVerified, true);
238    assert.strictEqual(results[0].confidence, 'high');
239  
240    // Second email: free email - FAIL
241    assert.strictEqual(results[1].isVerified, false);
242    assert.strictEqual(results[1].confidence, 'high');
243    assert.ok(results[1].reason.includes('Free email provider'));
244  
245    // Third email: company email - PASS
246    assert.strictEqual(results[2].isVerified, true);
247    assert.strictEqual(results[2].confidence, 'high');
248  
249    // Edge cases
250    const emptyResults = batchVerifyEmails({
251      emails: [],
252      html,
253      countryCode: 'DE',
254      domain: 'test.de',
255    });
256    assert.strictEqual(emptyResults.length, 0);
257  
258    const nullResults = batchVerifyEmails({
259      emails: null,
260      html,
261      countryCode: 'DE',
262      domain: 'test.de',
263    });
264    assert.strictEqual(nullResults.length, 0);
265  });