test-social-extraction.js
1 #!/usr/bin/env node 2 /** 3 * Integration test: run social contact extractor against real social profile URLs 4 * from existing contacts data. 5 * 6 * Usage: node scripts/test-social-extraction.js 7 */ 8 9 import '../src/utils/load-env.js'; 10 import { readdirSync, readFileSync } from 'fs'; 11 import { join } from 'path'; 12 import { extractFromSocialProfiles } from '../src/utils/social-contact-extractor.js'; 13 import { launchStealthBrowser } from '../src/utils/stealth-browser.js'; 14 15 const contactsDir = join(import.meta.dirname, '../data/contacts'); 16 17 // Collect sample URLs per platform 18 const samples = { youtube: [], facebook: [], linkedin: [], yelp: [], instagram: [] }; 19 20 const files = readdirSync(contactsDir).filter(f => f.endsWith('.json')); 21 for (const file of files) { 22 // Stop once we have enough samples 23 const allFull = Object.values(samples).every(arr => arr.length >= 3); 24 if (allFull) break; 25 26 try { 27 const data = JSON.parse(readFileSync(join(contactsDir, file), 'utf8')); 28 for (const sp of data.social_profiles || []) { 29 const url = sp.url || sp; 30 if (!url) continue; 31 32 if (url.includes('youtube.com/channel') && samples.youtube.length < 3) { 33 samples.youtube.push({ url, label: sp.label || 'YouTube', siteId: file }); 34 } else if (url.includes('facebook.com/') && !url.includes('profile.php') && !url.includes('/groups/') && samples.facebook.length < 3) { 35 samples.facebook.push({ url, label: sp.label || 'Facebook', siteId: file }); 36 } else if (url.includes('linkedin.com/company') && samples.linkedin.length < 3) { 37 samples.linkedin.push({ url, label: sp.label || 'LinkedIn', siteId: file }); 38 } else if (url.includes('yelp.com/biz') && samples.yelp.length < 3) { 39 samples.yelp.push({ url, label: sp.label || 'Yelp', siteId: file }); 40 } else if (url.includes('instagram.com/') && !url.includes('/p/') && !url.includes('/reel/') && samples.instagram.length < 3) { 41 samples.instagram.push({ url, label: sp.label || 'Instagram', siteId: file }); 42 } 43 } 44 } catch { /* skip broken files */ } 45 } 46 47 console.log('Collected samples:'); 48 for (const [platform, urls] of Object.entries(samples)) { 49 console.log(` ${platform}: ${urls.length} URLs`); 50 for (const u of urls) console.log(` ${u.url} (from ${u.siteId})`); 51 } 52 53 // Test YouTube first (no browser needed) 54 console.log('\n=== YOUTUBE (raw HTTP) ==='); 55 for (const sp of samples.youtube) { 56 console.log(`\nTesting: ${sp.url}`); 57 const result = await extractFromSocialProfiles([sp], 'test', null); 58 console.log('Result:', JSON.stringify(result, null, 2)); 59 } 60 61 // Launch browser for remaining platforms 62 console.log('\n=== Launching stealth browser ==='); 63 const browser = await launchStealthBrowser({ headless: true, stealthLevel: 'standard' }); 64 65 try { 66 for (const platform of ['linkedin', 'facebook', 'yelp', 'instagram']) { 67 console.log(`\n=== ${platform.toUpperCase()} (Playwright stealth) ===`); 68 for (const sp of samples[platform]) { 69 console.log(`\nTesting: ${sp.url}`); 70 try { 71 const result = await extractFromSocialProfiles([sp], 'test', browser); 72 console.log('Result:', JSON.stringify(result, null, 2)); 73 } catch (err) { 74 console.error(`Error: ${err.message}`); 75 } 76 } 77 } 78 } finally { 79 await browser.close(); 80 console.log('\nBrowser closed. Done.'); 81 }