contact-repair.test.js
1 /** 2 * Tests for Contact Repair Module 3 */ 4 5 import { test, describe, mock, beforeEach } from 'node:test'; 6 import assert from 'node:assert'; 7 8 // Mock dependencies before importing 9 let mockCallLLM; 10 let mockValidatePhone; 11 let mockValidateEmail; 12 13 mock.module('../../src/utils/llm-provider.js', { 14 namedExports: { 15 callLLM: (...args) => mockCallLLM(...args), 16 }, 17 }); 18 19 mock.module('../../src/utils/contact-validator.js', { 20 namedExports: { 21 validatePhone: (...args) => mockValidatePhone(...args), 22 validateEmail: (...args) => mockValidateEmail(...args), 23 }, 24 }); 25 26 mock.module('../../src/utils/error-handler.js', { 27 namedExports: { 28 safeJsonParse: str => { 29 try { 30 return JSON.parse(str); 31 } catch { 32 return null; 33 } 34 }, 35 }, 36 }); 37 38 mock.module('../../src/utils/logger.js', { 39 defaultExport: class { 40 info() {} 41 warn() {} 42 error() {} 43 success() {} 44 debug() {} 45 }, 46 }); 47 48 const { validateAndRepairContacts } = await import('../../src/utils/contact-repair.js'); 49 50 describe('Contact Repair', () => { 51 beforeEach(() => { 52 // Default: all contacts valid 53 mockValidatePhone = () => ({ valid: true }); 54 mockValidateEmail = () => ({ valid: true }); 55 mockCallLLM = async () => ({ content: '{}' }); 56 }); 57 58 describe('classifySource (internal, tested via behavior)', () => { 59 test('handles image source contacts by calling LLM with image data', async () => { 60 mockValidatePhone = number => ({ 61 valid: false, 62 reason: 'Invalid format', 63 }); 64 65 let capturedMessages; 66 mockCallLLM = async opts => { 67 capturedMessages = opts.messages; 68 return { 69 content: JSON.stringify({ 70 phone_numbers: [{ number: '+61412345678', source: 'above_fold.jpg' }], 71 }), 72 }; 73 }; 74 75 const contacts = { 76 phone_numbers: [{ number: 'bad-number', source: 'above_fold.jpg' }], 77 }; 78 79 await validateAndRepairContacts(contacts, { 80 countryCode: 'AU', 81 html: '', 82 screenshots: { 'above_fold.jpg': 'base64data' }, 83 }); 84 85 // Should have sent the image in the LLM call 86 const userContent = capturedMessages[1].content; 87 assert.ok(Array.isArray(userContent)); 88 const hasImage = userContent.some(c => c.type === 'image_url'); 89 assert.ok(hasImage, 'Should include image in repair call'); 90 }); 91 92 test('handles xpath source contacts by including HTML context', async () => { 93 mockValidatePhone = () => ({ valid: false, reason: 'Invalid' }); 94 95 let capturedMessages; 96 mockCallLLM = async opts => { 97 capturedMessages = opts.messages; 98 return { content: '{}' }; 99 }; 100 101 const contacts = { 102 phone_numbers: [{ number: 'bad', source: '//div[@class="phone"]/text()' }], 103 }; 104 105 await validateAndRepairContacts(contacts, { 106 countryCode: 'AU', 107 html: '<div class="phone">0412345678</div>', 108 screenshots: {}, 109 }); 110 111 const userContent = capturedMessages[1].content; 112 const htmlPart = userContent.find(c => c.type === 'text' && c.text.includes('website_html')); 113 assert.ok(htmlPart, 'Should include HTML context for xpath sources'); 114 const xpathPart = userContent.find(c => c.type === 'text' && c.text.includes('XPaths')); 115 assert.ok(xpathPart, 'Should list XPaths to check'); 116 }); 117 }); 118 119 describe('validateAndRepairContacts', () => { 120 test('returns contacts unchanged when all valid', async () => { 121 const contacts = { 122 phone_numbers: [{ number: '+61412345678' }], 123 email_addresses: [{ email: 'test@example.com' }], 124 }; 125 126 const result = await validateAndRepairContacts(contacts, { countryCode: 'AU' }); 127 assert.deepStrictEqual(result, contacts); 128 }); 129 130 test('returns null/undefined input unchanged', async () => { 131 assert.strictEqual(await validateAndRepairContacts(null), null); 132 assert.strictEqual(await validateAndRepairContacts(undefined), undefined); 133 }); 134 135 test('returns non-object input unchanged', async () => { 136 assert.strictEqual(await validateAndRepairContacts('string'), 'string'); 137 }); 138 139 test('repairs invalid phone numbers via LLM', async () => { 140 mockValidatePhone = number => { 141 if (number === 'bad-number') return { valid: false, reason: 'Invalid format' }; 142 return { valid: true }; 143 }; 144 145 mockCallLLM = async () => ({ 146 content: JSON.stringify({ 147 phone_numbers: [{ number: '+61412345678' }], 148 }), 149 }); 150 151 const contacts = { 152 phone_numbers: [ 153 { number: '+61400000001' }, // valid 154 { number: 'bad-number' }, // invalid → repair 155 ], 156 }; 157 158 const result = await validateAndRepairContacts(contacts, { 159 countryCode: 'AU', 160 html: '<html></html>', 161 screenshots: {}, 162 }); 163 164 // Should keep valid number and add repaired one 165 assert.ok(result.phone_numbers.some(p => p.number === '+61400000001')); 166 assert.ok(result.phone_numbers.some(p => p.number === '+61412345678')); 167 // Invalid number should be removed 168 assert.ok(!result.phone_numbers.some(p => p.number === 'bad-number')); 169 }); 170 171 test('repairs invalid email addresses via LLM', async () => { 172 const contacts = { 173 email_addresses: [ 174 { email: 'valid@example.com' }, 175 { email: 'not-an-email' }, // invalid 176 ], 177 }; 178 179 mockCallLLM = async () => ({ 180 content: JSON.stringify({ 181 email_addresses: [{ email: 'fixed@example.com' }], 182 }), 183 }); 184 185 const result = await validateAndRepairContacts(contacts, { 186 countryCode: 'AU', 187 html: '<html></html>', 188 screenshots: {}, 189 }); 190 191 assert.ok(result.email_addresses.some(e => e.email === 'valid@example.com')); 192 assert.ok(result.email_addresses.some(e => e.email === 'fixed@example.com')); 193 assert.ok(!result.email_addresses.some(e => e.email === 'not-an-email')); 194 }); 195 196 test('drops invalid contacts when LLM repair fails', async () => { 197 mockValidatePhone = () => ({ valid: false, reason: 'Invalid' }); 198 mockCallLLM = async () => { 199 throw new Error('LLM unavailable'); 200 }; 201 202 const contacts = { 203 phone_numbers: [{ number: 'invalid-phone' }], 204 email_addresses: [], 205 }; 206 207 const result = await validateAndRepairContacts(contacts, { 208 countryCode: 'AU', 209 html: '<html></html>', 210 screenshots: {}, 211 }); 212 213 // Invalid phone should be dropped (not kept) 214 assert.strictEqual(result.phone_numbers.length, 0); 215 }); 216 217 test('drops invalid emails when LLM repair returns null', async () => { 218 mockCallLLM = async () => ({ content: 'null' }); 219 220 const contacts = { 221 email_addresses: [{ email: '' }], // empty email = invalid 222 }; 223 224 const result = await validateAndRepairContacts(contacts, { 225 countryCode: 'AU', 226 }); 227 228 assert.strictEqual(result.email_addresses.length, 0); 229 }); 230 231 test('handles contacts with missing phone_numbers array', async () => { 232 const contacts = { email_addresses: [{ email: 'test@example.com' }] }; 233 const result = await validateAndRepairContacts(contacts, { countryCode: 'AU' }); 234 assert.deepStrictEqual(result, contacts); 235 }); 236 237 test('handles contacts with missing email_addresses array', async () => { 238 const contacts = { phone_numbers: [{ number: '+61412345678' }] }; 239 const result = await validateAndRepairContacts(contacts, { countryCode: 'AU' }); 240 assert.deepStrictEqual(result, contacts); 241 }); 242 243 test('validates email format with regex', async () => { 244 // These should be caught as invalid — override mock to simulate validation failure 245 const contacts = { 246 email_addresses: [ 247 { email: 'missing-at-sign.com' }, 248 { email: '@no-local-part.com' }, 249 { email: 'test@.com' }, // dot-only domain 250 ], 251 }; 252 253 // LLM returns null → safeJsonParse returns null → triggers drop path (else branch) 254 mockCallLLM = async () => ({ content: 'null' }); 255 256 const result = await validateAndRepairContacts(contacts, { 257 countryCode: 'AU', 258 html: '<html></html>', 259 screenshots: {}, 260 }); 261 262 // All invalid emails should be dropped (LLM returned null, no repair) 263 assert.strictEqual(result.email_addresses.length, 0); 264 }); 265 266 test('handles mixed valid and invalid contacts', async () => { 267 mockValidatePhone = number => { 268 if (number === 'bad') return { valid: false, reason: 'Invalid' }; 269 return { valid: true }; 270 }; 271 272 mockCallLLM = async () => ({ 273 content: JSON.stringify({ 274 phone_numbers: [{ number: '+61412999999' }], 275 }), 276 }); 277 278 const contacts = { 279 phone_numbers: [ 280 { number: '+61412345678' }, // valid 281 { number: 'bad' }, // invalid → repaired 282 ], 283 email_addresses: [{ email: 'ok@example.com' }], 284 }; 285 286 const result = await validateAndRepairContacts(contacts, { 287 countryCode: 'AU', 288 html: '', 289 screenshots: {}, 290 }); 291 292 assert.strictEqual(result.phone_numbers.length, 2); 293 assert.ok(result.phone_numbers.some(p => p.number === '+61412345678')); 294 assert.ok(result.phone_numbers.some(p => p.number === '+61412999999')); 295 assert.strictEqual(result.email_addresses.length, 1); 296 }); 297 }); 298 });