stealth-browser.js
1 /** 2 * Stealth Browser Utility 3 * Centralized bot-detection avoidance for Playwright 4 */ 5 6 import { chromium } from 'playwright-extra'; 7 import stealth from 'puppeteer-extra-plugin-stealth'; 8 import UserAgent from 'user-agents'; 9 import Logger from './logger.js'; 10 import { 11 existsSync, 12 mkdirSync, 13 readFileSync, 14 writeFileSync, 15 readdirSync, 16 symlinkSync, 17 rmSync, 18 } from 'fs'; 19 import { join, dirname } from 'path'; 20 import { fileURLToPath } from 'url'; 21 import { execSync } from 'child_process'; 22 23 const __dirname = dirname(fileURLToPath(import.meta.url)); 24 25 chromium.use(stealth()); 26 27 const logger = new Logger('StealthBrowser'); 28 29 /** 30 * Auto-detect chromium path on NixOS 31 * @returns {string|null} Path to chromium executable or null 32 */ 33 function detectChromiumPath() { 34 try { 35 const chromiumPath = execSync('which chromium', { encoding: 'utf-8' }).trim(); 36 // eslint-disable-next-line security/detect-non-literal-fs-filename -- Safe: path from `which` command 37 if (chromiumPath && existsSync(chromiumPath)) { 38 logger.debug(`Auto-detected chromium at: ${chromiumPath}`); 39 return chromiumPath; 40 } 41 } catch { 42 logger.debug('Chromium not found in PATH, using Playwright bundled browser'); 43 } 44 return null; 45 } 46 47 /** 48 * Generate a random modern user agent using user-agents package 49 * @returns {string} User agent string 50 */ 51 function getRandomUserAgent() { 52 const userAgent = new UserAgent({ deviceCategory: 'desktop' }); 53 return userAgent.toString(); 54 } 55 56 export async function launchStealthBrowser(options = {}) { 57 const { 58 headless = true, 59 slowMo = 0, 60 stealthLevel = 'minimal', // Default to minimal (fastest, least intrusive) 61 devtools = false, 62 } = options; 63 64 const launchOptions = { 65 headless, 66 slowMo, 67 devtools, 68 args: [ 69 '--disable-blink-features=AutomationControlled', 70 '--disable-dev-shm-usage', 71 '--disable-setuid-sandbox', 72 '--disable-features=IsolateOrigins,site-per-process', 73 '--disable-web-security', 74 '--disable-features=BlockInsecurePrivateNetworkRequests', 75 '--no-first-run', 76 '--start-maximized', 77 // Resource limits to prevent system freeze 78 '--disable-gpu', 79 '--disable-software-rasterizer', 80 '--disable-background-networking', 81 '--disable-background-timer-throttling', 82 '--disable-backgrounding-occluded-windows', 83 '--disable-breakpad', 84 '--disable-component-extensions-with-background-pages', 85 '--disable-extensions', 86 '--disable-features=TranslateUI,BlinkGenPropertyTrees', 87 '--disable-ipc-flooding-protection', 88 '--disable-renderer-backgrounding', 89 '--renderer-process-limit=1', 90 '--js-flags=--max-old-space-size=256', 91 ], 92 }; 93 94 // Use explicit path from env, or auto-detect on NixOS 95 const browserPath = process.env.CHROMIUM_PATH || detectChromiumPath(); 96 if (browserPath) { 97 logger.info(`Using chromium at: ${browserPath}`); 98 launchOptions.executablePath = browserPath; 99 } 100 101 logger.info(`Launching browser with stealth level: ${stealthLevel}`); 102 const browser = await chromium.launch(launchOptions); 103 return browser; 104 } 105 106 // eslint-disable-next-line require-await -- browser.newContext() returns a promise 107 export async function createStealthContext(browser, options = {}) { 108 const defaultTimezone = process.env.TIMEZONE || 'Australia/Sydney'; 109 const acceptLanguage = process.env.ACCEPT_LANGUAGE || 'en-AU,en;q=0.9'; 110 // Extract locale from ACCEPT_LANGUAGE (e.g., "en-AU,en;q=0.9" -> "en-AU") 111 const defaultLocale = acceptLanguage.split(',')[0]; 112 113 const { 114 viewport = { width: 1440, height: 900 }, 115 locale = defaultLocale, 116 timezoneId = defaultTimezone, 117 } = options; 118 119 return browser.newContext({ 120 userAgent: getRandomUserAgent(), 121 viewport, 122 locale, 123 timezoneId, 124 permissions: [], 125 extraHTTPHeaders: { 'Accept-Language': acceptLanguage }, 126 }); 127 } 128 129 /** 130 * No-op stub kept for call-site compatibility. 131 * CAPTCHA solving is now handled by the NopeCHA extension when NOPECHA_API_KEY is set. 132 */ 133 export async function configureNopeCHA(_context) {} 134 135 /** 136 * Prepare a writable copy of the NopeCHA extension with the API key injected. 137 * Returns null if NOPECHA_API_KEY is not set or extension dir is missing. 138 * 139 * We write the key into a /tmp copy so we never dirty the tracked extension source. 140 */ 141 function prepareNopeCHAExtension() { 142 const apiKey = process.env.NOPECHA_API_KEY || process.env.NOPECHA_API_KEY_2; 143 if (!apiKey) return null; 144 145 const srcDir = join(__dirname, '../../extensions/nopecha'); 146 if (!existsSync(srcDir)) { 147 logger.warn('NopeCHA extension not found at extensions/nopecha — CAPTCHA auto-solve disabled'); 148 return null; 149 } 150 151 // Write to /tmp so each run gets a fresh copy with the correct key 152 const destDir = `/tmp/nopecha-ext-${process.pid}`; 153 if (!existsSync(destDir)) { 154 mkdirSync(destDir, { recursive: true }); 155 // Shallow copy all extension files 156 for (const entry of readdirSync(srcDir, { withFileTypes: true })) { 157 const srcPath = join(srcDir, entry.name); 158 const destPath = join(destDir, entry.name); 159 if (entry.isDirectory()) { 160 // Symlink subdirs — they're read-only content, no need to copy 161 symlinkSync(srcPath, destPath); 162 } else { 163 writeFileSync(destPath, readFileSync(srcPath)); 164 } 165 } 166 } 167 168 // Inject API key into manifest.json in the temp copy 169 const manifestPath = join(destDir, 'manifest.json'); 170 try { 171 const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')); 172 // NopeCHA automation build: set key in the nopecha settings object 173 // The background script reads from chrome.storage.local { settings: { key } } 174 // but the simplest injection is via a storage-seed content script added to manifest 175 // Preferred: write a __nopecha_key__.js seed script that chrome.storage.local.set on install 176 if (!manifest._nopecha_key_injected) { 177 manifest.content_scripts = manifest.content_scripts || []; 178 // Inject a one-shot seed script on all URLs that sets the key in storage 179 manifest.content_scripts.push({ 180 matches: ['<all_urls>'], 181 js: ['__key_seed__.js'], 182 run_at: 'document_start', 183 all_frames: false, 184 }); 185 manifest._nopecha_key_injected = true; 186 writeFileSync(manifestPath, JSON.stringify(manifest, null, 2)); 187 // Write the seed script. 188 // The background service worker caches settings into a Promise at startup. 189 // Writing to chrome.storage.local alone won't update the live cache — we must 190 // also send a settings::update message so the background reloads the key. 191 // Message format: [nonce, action, settingsObject] (see background.js _t handler). 192 writeFileSync( 193 join(destDir, '__key_seed__.js'), 194 `(function() { 195 var k = ${JSON.stringify(apiKey)}; 196 chrome.storage.local.get('settings', function(r) { 197 var s = Object.assign({}, r.settings || {}); 198 if (s.key) return; 199 s.key = k; 200 chrome.storage.local.set({ settings: s }, function() { 201 // Notify the background service worker to update its live settings cache. 202 // Format: [nonce, action, settingsObject]. Ignore errors (worker may be sleeping). 203 try { 204 chrome.runtime.sendMessage([null, 'settings::update', { key: k }], function() { 205 void chrome.runtime.lastError; 206 }); 207 } catch (_) {} 208 }); 209 }); 210 })();` 211 ); 212 } 213 } catch (e) { 214 logger.warn(`Could not inject NopeCHA key into manifest: ${e.message}`); 215 return null; 216 } 217 218 logger.info(`NopeCHA extension prepared at ${destDir} (key: ${apiKey.substring(0, 8)}...)`); 219 return destDir; 220 } 221 222 /** 223 * Launch a browser context with Chrome extensions loaded (for CAPTCHA solving). 224 * Uses launchPersistentContext — required by Playwright for extension support. 225 * 226 * Returns { context, close } where close() shuts down the browser. 227 * Use instead of launchStealthBrowser + createStealthContext when extensions needed. 228 */ 229 export async function launchWithExtensions(options = {}) { 230 const { headless = false, devtools = false, stealthLevel = 'minimal' } = options; 231 232 const nopechaDir = prepareNopeCHAExtension(); 233 234 // Build args — same base set as launchStealthBrowser but WITHOUT --disable-extensions 235 const args = [ 236 '--disable-blink-features=AutomationControlled', 237 '--disable-dev-shm-usage', 238 '--disable-setuid-sandbox', 239 '--disable-features=IsolateOrigins,site-per-process', 240 '--disable-web-security', 241 '--disable-features=BlockInsecurePrivateNetworkRequests', 242 '--no-first-run', 243 '--start-maximized', 244 '--disable-gpu', 245 '--disable-software-rasterizer', 246 '--disable-background-networking', 247 '--disable-background-timer-throttling', 248 '--disable-backgrounding-occluded-windows', 249 '--disable-breakpad', 250 '--disable-ipc-flooding-protection', 251 '--disable-renderer-backgrounding', 252 '--renderer-process-limit=4', // allow more for multi-tab 253 '--js-flags=--max-old-space-size=256', 254 ]; 255 256 if (nopechaDir) { 257 args.push(`--disable-extensions-except=${nopechaDir}`); 258 args.push(`--load-extension=${nopechaDir}`); 259 logger.info('NopeCHA CAPTCHA solver extension loaded'); 260 } 261 262 if (stealthLevel === 'aggressive') { 263 args.push('--disable-web-security', '--disable-site-isolation-trials'); 264 } 265 266 const browserPath = process.env.CHROMIUM_PATH || detectChromiumPath(); 267 const userDataDir = `/tmp/pw-ext-profile-${process.pid}`; 268 mkdirSync(userDataDir, { recursive: true }); 269 270 const defaultTimezone = process.env.TIMEZONE || 'Australia/Sydney'; 271 const acceptLanguage = process.env.ACCEPT_LANGUAGE || 'en-AU,en;q=0.9'; 272 const locale = acceptLanguage.split(',')[0]; 273 274 // launchPersistentContext is required for extensions in Playwright. 275 // Use the plain playwright chromium (not playwright-extra) — extra plugins 276 // require a Browser object which launchPersistentContext doesn't return. 277 // It returns a BrowserContext directly (no separate browser object). 278 const { chromium: plainChromium } = await import('playwright'); 279 const context = await plainChromium.launchPersistentContext(userDataDir, { 280 headless, 281 devtools, 282 slowMo: 0, 283 args, 284 executablePath: browserPath || undefined, 285 userAgent: getRandomUserAgent(), 286 viewport: { width: 1440, height: 900 }, 287 locale, 288 timezoneId: defaultTimezone, 289 permissions: [], 290 extraHTTPHeaders: { 'Accept-Language': acceptLanguage }, 291 chromiumSandbox: true, 292 // Suppress Playwright's own copies of flags we supply explicitly above 293 // to prevent "unsupported version of flag" errors from duplicate args 294 ignoreDefaultArgs: [ 295 '--enable-automation', 296 '--disable-extensions', 297 '--disable-dev-shm-usage', 298 '--disable-background-networking', 299 '--disable-background-timer-throttling', 300 '--disable-backgrounding-occluded-windows', 301 '--disable-breakpad', 302 '--disable-ipc-flooding-protection', 303 '--disable-renderer-backgrounding', 304 '--no-first-run', 305 ], 306 }); 307 308 // Hide automation signals — launchPersistentContext uses plain chromium (no stealth plugin), 309 // so we must manually override navigator.webdriver and remove Chrome automation markers. 310 await context.addInitScript(() => { 311 /* eslint-disable no-undef */ 312 Object.defineProperty(navigator, 'webdriver', { get: () => false }); 313 // Restore a minimal window.chrome object (Chromium headful has it; automation strips it) 314 if (!window.chrome) { 315 window.chrome = { runtime: {} }; 316 } 317 // Remove cdc_ variables injected by ChromeDriver 318 const cdcKeys = Object.keys(window).filter(k => k.startsWith('cdc_')); 319 for (const k of cdcKeys) { 320 try { delete window[k]; } catch { /* ok */ } 321 } 322 /* eslint-enable no-undef */ 323 }); 324 325 logger.info(`Launched browser with extensions (userDataDir: ${userDataDir})`); 326 327 return { 328 context, 329 close: async () => { 330 await context.close().catch(() => {}); 331 try { 332 rmSync(userDataDir, { recursive: true, force: true }); 333 } catch { 334 /* ok */ 335 } 336 try { 337 rmSync(`/tmp/nopecha-ext-${process.pid}`, { recursive: true, force: true }); 338 } catch { 339 /* ok */ 340 } 341 }, 342 hasNopeCHA: !!nopechaDir, 343 }; 344 } 345 346 export async function randomDelay(min = 100, max = 500) { 347 const delay = Math.floor(Math.random() * (max - min + 1)) + min; 348 await new Promise(resolve => setTimeout(resolve, delay)); 349 } 350 351 function generateBezierWaypoints(x1, y1, x2, y2, numPoints = 4) { 352 const points = []; 353 const cp1x = x1 + (x2 - x1) * (0.2 + Math.random() * 0.3); 354 const cp1y = y1 + (y2 - y1) * (0.2 + Math.random() * 0.3); 355 const cp2x = x1 + (x2 - x1) * (0.5 + Math.random() * 0.3); 356 const cp2y = y1 + (y2 - y1) * (0.5 + Math.random() * 0.3); 357 358 for (let i = 0; i <= numPoints; i++) { 359 const t = i / numPoints; 360 const mt = 1 - t; 361 const x = mt ** 3 * x1 + 3 * mt ** 2 * t * cp1x + 3 * mt * t ** 2 * cp2x + t ** 3 * x2; 362 const y = mt ** 3 * y1 + 3 * mt ** 2 * t * cp1y + 3 * mt * t ** 2 * cp2y + t ** 3 * y2; 363 points.push({ x: Math.round(x), y: Math.round(y) }); 364 } 365 return points; 366 } 367 368 export async function humanMouseMove(page, targetX, targetY) { 369 /* eslint-disable no-undef -- window is available in browser context (page.evaluate) */ 370 const current = await page.evaluate(() => ({ x: window.mouseX || 100, y: window.mouseY || 100 })); 371 /* eslint-enable no-undef */ 372 373 const waypoints = generateBezierWaypoints( 374 current.x, 375 current.y, 376 targetX, 377 targetY, 378 3 + Math.floor(Math.random() * 3) 379 ); 380 381 for (const point of waypoints) { 382 await page.mouse.move(point.x, point.y); 383 await new Promise(resolve => setTimeout(resolve, 5 + Math.random() * 10)); 384 } 385 } 386 387 export async function humanScroll(page, options = {}) { 388 const { distance = 'viewport', smooth = true } = options; 389 390 /* eslint-disable no-undef -- window is available in browser context (page.evaluate) */ 391 const scrollAmount = 392 distance === 'viewport' 393 ? await page.evaluate(() => window.innerHeight) 394 : distance === 'short' 395 ? 300 396 : parseInt(distance) || 300; 397 398 await page.evaluate( 399 ({ amount, smooth }) => { 400 if (!smooth) { 401 // Use fallback if site overrides window.scrollBy 402 try { 403 window.scrollBy(0, amount); 404 } catch { 405 // Fallback: directly set scroll position 406 const currentY = document.documentElement.scrollTop || document.body.scrollTop; 407 const targetY = currentY + amount; 408 document.documentElement.scrollTop = targetY; 409 document.body.scrollTop = targetY; 410 } 411 return; 412 } 413 414 const start = window.pageYOffset; 415 const startTime = Date.now(); 416 const duration = 500 + Math.random() * 500; 417 418 function easeInOutQuad(t) { 419 return t < 0.5 ? 2 * t * t : -1 + (4 - 2 * t) * t; 420 } 421 422 function scroll() { 423 const elapsed = Date.now() - startTime; 424 const progress = Math.min(elapsed / duration, 1); 425 const eased = easeInOutQuad(progress); 426 const targetY = start + amount * eased; 427 // Use fallback if site overrides window.scrollTo 428 try { 429 window.scrollTo(0, targetY); 430 } catch { 431 // Fallback: directly set scroll position 432 document.documentElement.scrollTop = targetY; 433 document.body.scrollTop = targetY; 434 } 435 if (progress < 1) requestAnimationFrame(scroll); 436 } 437 scroll(); 438 }, 439 { amount: scrollAmount, smooth } 440 ); 441 /* eslint-enable no-undef */ 442 443 await randomDelay(200, 400); 444 } 445 446 export async function humanClick(page, selector) { 447 const element = await page.locator(selector).first(); 448 const box = await element.boundingBox(); 449 450 if (box) { 451 const targetX = box.x + box.width / 2 + (Math.random() - 0.5) * box.width * 0.3; 452 const targetY = box.y + box.height / 2 + (Math.random() - 0.5) * box.height * 0.3; 453 await humanMouseMove(page, targetX, targetY); 454 } 455 456 await randomDelay(10, 60); 457 await element.click(); 458 await randomDelay(50, 200); 459 } 460 461 export async function humanType(page, selector, text) { 462 const element = await page.locator(selector).first(); 463 464 await humanClick(page, selector); 465 466 if (text.length > 50) { 467 // Long text (proposals, messages): fill() is instant and indistinguishable from paste. 468 // Most form frameworks only check keystroke timing on short credential-like fields. 469 await element.fill(text); 470 // Brief settle so React/Vue onChange handlers fire before we move on 471 await randomDelay(80, 150); 472 } else { 473 // Short text (name, email, phone): type char-by-char to simulate real keystrokes 474 await element.pressSequentially(text, { delay: 30 + Math.random() * 50 }); 475 await randomDelay(80, 200); 476 } 477 } 478 479 export function isSocialMediaUrl(url) { 480 const socialDomains = [ 481 'twitter.com', 482 'x.com', 483 'linkedin.com', 484 'facebook.com', 485 'fb.com', 486 'instagram.com', 487 'youtube.com', 488 'tiktok.com', 489 'reddit.com', 490 ]; 491 492 try { 493 const hostname = new URL(url).hostname.toLowerCase(); 494 return socialDomains.some(domain => hostname.includes(domain)); 495 } catch { 496 return false; 497 } 498 } 499 500 /** 501 * Wait for Cloudflare Turnstile challenge to complete 502 * @param {Page} page - Playwright page instance 503 * @param {Object} options - Wait options 504 * @returns {Promise<boolean>} True if challenge passed, false if still blocking 505 */ 506 export async function waitForCloudflare(page, options = {}) { 507 const { timeout = 30000, checkInterval = 1000 } = options; 508 const startTime = Date.now(); 509 510 logger.debug('Waiting for potential Cloudflare/Turnstile challenge...'); 511 512 // Wait for network idle to give Turnstile time to load — timeout here is NOT a CF block, 513 // it just means the page has persistent network activity (analytics, chat widgets, etc.) 514 try { 515 await page.waitForLoadState('networkidle', { timeout: timeout / 2 }); 516 } catch { 517 // Non-idle network is normal for pages with chat widgets or analytics — keep going 518 logger.debug('Network did not reach idle state, proceeding with CF indicator check'); 519 } 520 521 // Additional delay to let Turnstile process 522 await randomDelay(2000, 4000); 523 524 // Check for actual Cloudflare/Turnstile blocking indicators 525 try { 526 while (Date.now() - startTime < timeout) { 527 /* eslint-disable no-undef -- document is available in browser context */ 528 const isBlocked = await page.evaluate(() => { 529 const bodyText = document.body?.innerText?.toLowerCase() || ''; 530 const title = document.title?.toLowerCase() || ''; 531 532 // Check for common Cloudflare blocking messages 533 const blockIndicators = [ 534 'checking your browser', 535 'verifying you are human', 536 'just a moment', 537 'enable javascript and cookies', 538 'attention required', 539 'cloudflare', 540 ]; 541 542 return blockIndicators.some( 543 indicator => bodyText.includes(indicator) || title.includes(indicator) 544 ); 545 }); 546 /* eslint-enable no-undef */ 547 548 if (!isBlocked) { 549 logger.debug('Cloudflare/Turnstile challenge passed or not present'); 550 return true; 551 } 552 553 logger.debug('Cloudflare/Turnstile challenge detected, waiting...'); 554 await new Promise(resolve => setTimeout(resolve, checkInterval)); 555 } 556 557 logger.warn('Cloudflare/Turnstile challenge did not resolve within timeout'); 558 return false; 559 } catch (error) { 560 logger.warn(`Error checking for Cloudflare indicators: ${error.message}`); 561 // On evaluation error, assume page is accessible rather than false-positive blocking 562 return true; 563 } 564 } 565 566 // --- Persistent Browser Profiles --- 567 568 const PROFILES_DIR = process.env.BROWSER_PROFILES_DIR || './.browser-profiles'; 569 570 /** 571 * Get the directory path for a platform profile 572 * @param {string} platform - Platform name (e.g., 'x', 'linkedin') 573 * @param {string} profileName - Profile name (e.g., 'profile-1') 574 * @returns {string} Full path to profile directory 575 */ 576 function getProfileDir(platform, profileName) { 577 return join(PROFILES_DIR, platform, profileName); 578 } 579 580 /** 581 * Save browser profile (cookies, localStorage, sessionStorage) to disk 582 * @param {Page} page - Playwright page instance 583 * @param {string} platform - Platform name ('x' or 'linkedin') 584 * @param {string} profileName - Profile name 585 * @param {Object} extraMetadata - Additional metadata to save (e.g., username) 586 */ 587 export async function saveProfile(page, platform, profileName, extraMetadata = {}) { 588 const profileDir = getProfileDir(platform, profileName); 589 // eslint-disable-next-line security/detect-non-literal-fs-filename 590 mkdirSync(profileDir, { recursive: true }); 591 592 // Save cookies 593 const cookies = await page.context().cookies(); 594 // eslint-disable-next-line security/detect-non-literal-fs-filename 595 writeFileSync(join(profileDir, 'cookies.json'), JSON.stringify(cookies, null, 2)); 596 597 // Save localStorage and sessionStorage 598 const storageData = await page.evaluate(() => { 599 /* eslint-disable no-undef, security/detect-object-injection -- browser context: safe key access from storage API */ 600 const local = {}; 601 for (let i = 0; i < localStorage.length; i++) { 602 const key = localStorage.key(i); 603 local[key] = localStorage.getItem(key); 604 } 605 const session = {}; 606 for (let i = 0; i < sessionStorage.length; i++) { 607 const key = sessionStorage.key(i); 608 session[key] = sessionStorage.getItem(key); 609 } 610 return { localStorage: local, sessionStorage: session }; 611 /* eslint-enable no-undef, security/detect-object-injection */ 612 }); 613 614 // eslint-disable-next-line security/detect-non-literal-fs-filename 615 writeFileSync( 616 join(profileDir, 'localStorage.json'), 617 JSON.stringify(storageData.localStorage, null, 2) 618 ); 619 // eslint-disable-next-line security/detect-non-literal-fs-filename 620 writeFileSync( 621 join(profileDir, 'sessionStorage.json'), 622 JSON.stringify(storageData.sessionStorage, null, 2) 623 ); 624 625 // Save/update metadata 626 const metadataPath = join(profileDir, 'metadata.json'); 627 let existingMetadata = {}; 628 // eslint-disable-next-line security/detect-non-literal-fs-filename 629 if (existsSync(metadataPath)) { 630 try { 631 // eslint-disable-next-line security/detect-non-literal-fs-filename 632 existingMetadata = JSON.parse(readFileSync(metadataPath, 'utf-8')); 633 } catch { 634 // Corrupted metadata, start fresh 635 } 636 } 637 638 const metadata = { 639 ...existingMetadata, 640 ...extraMetadata, 641 platform, 642 profileName, 643 created_at: existingMetadata.created_at || new Date().toISOString(), 644 last_used_at: new Date().toISOString(), 645 }; 646 647 // eslint-disable-next-line security/detect-non-literal-fs-filename 648 writeFileSync(metadataPath, JSON.stringify(metadata, null, 2)); 649 650 logger.info('Profile saved', { platform, profileName }); 651 } 652 653 /** 654 * Load a saved browser profile into a page (cookies + storage) 655 * @param {Page} page - Playwright page instance 656 * @param {string} platform - Platform name 657 * @param {string} profileName - Profile name 658 * @returns {boolean} True if profile was loaded successfully 659 */ 660 export async function loadProfile(page, platform, profileName) { 661 const profileDir = getProfileDir(platform, profileName); 662 const cookiesPath = join(profileDir, 'cookies.json'); 663 664 // eslint-disable-next-line security/detect-non-literal-fs-filename 665 if (!existsSync(cookiesPath)) { 666 logger.debug('No saved profile found', { platform, profileName }); 667 return false; 668 } 669 670 try { 671 // Load cookies 672 // eslint-disable-next-line security/detect-non-literal-fs-filename 673 const cookies = JSON.parse(readFileSync(cookiesPath, 'utf-8')); 674 await page.context().addCookies(cookies); 675 676 // Update last_used_at in metadata 677 const metadataPath = join(profileDir, 'metadata.json'); 678 // eslint-disable-next-line security/detect-non-literal-fs-filename 679 if (existsSync(metadataPath)) { 680 // eslint-disable-next-line security/detect-non-literal-fs-filename 681 const metadata = JSON.parse(readFileSync(metadataPath, 'utf-8')); 682 metadata.last_used_at = new Date().toISOString(); 683 // eslint-disable-next-line security/detect-non-literal-fs-filename 684 writeFileSync(metadataPath, JSON.stringify(metadata, null, 2)); 685 } 686 687 logger.info('Profile loaded', { platform, profileName }); 688 return true; 689 } catch (error) { 690 logger.warn('Failed to load profile', { platform, profileName, error: error.message }); 691 return false; 692 } 693 } 694 695 /** 696 * Restore localStorage and sessionStorage after page navigation 697 * Must be called AFTER page.goto() since storage is domain-specific 698 * @param {Page} page - Playwright page instance 699 * @param {string} platform - Platform name 700 * @param {string} profileName - Profile name 701 */ 702 export async function restoreStorage(page, platform, profileName) { 703 const profileDir = getProfileDir(platform, profileName); 704 705 try { 706 const localPath = join(profileDir, 'localStorage.json'); 707 const sessionPath = join(profileDir, 'sessionStorage.json'); 708 709 // eslint-disable-next-line security/detect-non-literal-fs-filename 710 if (existsSync(localPath)) { 711 // eslint-disable-next-line security/detect-non-literal-fs-filename 712 const localData = JSON.parse(readFileSync(localPath, 'utf-8')); 713 await page.evaluate(data => { 714 /* eslint-disable no-undef */ 715 for (const [key, value] of Object.entries(data)) { 716 localStorage.setItem(key, value); 717 } 718 /* eslint-enable no-undef */ 719 }, localData); 720 } 721 722 // eslint-disable-next-line security/detect-non-literal-fs-filename 723 if (existsSync(sessionPath)) { 724 // eslint-disable-next-line security/detect-non-literal-fs-filename 725 const sessionData = JSON.parse(readFileSync(sessionPath, 'utf-8')); 726 await page.evaluate(data => { 727 /* eslint-disable no-undef */ 728 for (const [key, value] of Object.entries(data)) { 729 sessionStorage.setItem(key, value); 730 } 731 /* eslint-enable no-undef */ 732 }, sessionData); 733 } 734 } catch (error) { 735 logger.debug('Could not restore storage', { platform, profileName, error: error.message }); 736 } 737 } 738 739 /** 740 * List all profiles for a platform with metadata 741 * @param {string} platform - Platform name ('x', 'linkedin', or null for all) 742 * @returns {Array<Object>} Array of profile metadata objects 743 */ 744 export function listProfiles(platform = null) { 745 const profiles = []; 746 const platforms = platform ? [platform] : ['x', 'linkedin']; 747 748 for (const plat of platforms) { 749 const platDir = join(PROFILES_DIR, plat); 750 // eslint-disable-next-line security/detect-non-literal-fs-filename 751 if (!existsSync(platDir)) continue; 752 753 // eslint-disable-next-line security/detect-non-literal-fs-filename 754 const dirs = readdirSync(platDir, { withFileTypes: true }) 755 .filter(d => d.isDirectory()) 756 .map(d => d.name); 757 758 for (const dir of dirs) { 759 const metadataPath = join(platDir, dir, 'metadata.json'); 760 // eslint-disable-next-line security/detect-non-literal-fs-filename 761 if (existsSync(metadataPath)) { 762 try { 763 // eslint-disable-next-line security/detect-non-literal-fs-filename 764 const metadata = JSON.parse(readFileSync(metadataPath, 'utf-8')); 765 profiles.push(metadata); 766 } catch { 767 profiles.push({ platform: plat, profileName: dir, error: 'corrupted metadata' }); 768 } 769 } 770 } 771 } 772 773 return profiles; 774 } 775 776 /** 777 * Get the next profile to use (LRU strategy) 778 * If fewer profiles exist than configured count, creates a new profile name. 779 * Otherwise returns the least recently used profile. 780 * @param {string} platform - Platform name ('x' or 'linkedin') 781 * @returns {string} Profile name to use 782 */ 783 export function getNextProfile(platform) { 784 const countEnvVar = platform === 'x' ? 'X_PROFILE_COUNT' : 'LINKEDIN_PROFILE_COUNT'; 785 const maxProfiles = parseInt(process.env[countEnvVar] || '3', 10); // eslint-disable-line security/detect-object-injection -- safe: ternary-selected constant key 786 787 const existing = listProfiles(platform); 788 789 // If fewer profiles than configured, create a new one 790 if (existing.length < maxProfiles) { 791 const newName = `profile-${existing.length + 1}`; 792 logger.info('Creating new profile slot', { 793 platform, 794 profileName: newName, 795 existing: existing.length, 796 max: maxProfiles, 797 }); 798 return newName; 799 } 800 801 // Sort by last_used_at ascending (oldest first) for LRU 802 existing.sort((a, b) => { 803 const aTime = a.last_used_at ? new Date(a.last_used_at).getTime() : 0; 804 const bTime = b.last_used_at ? new Date(b.last_used_at).getTime() : 0; 805 return aTime - bTime; 806 }); 807 808 const selected = existing[0].profileName; 809 logger.info('Selected LRU profile', { 810 platform, 811 profileName: selected, 812 lastUsed: existing[0].last_used_at, 813 }); 814 return selected; 815 } 816 817 /** 818 * Create a persistent browser context with profile support. 819 * Does NOT override userAgent - uses Chromium's default (stealth plugin strips markers). 820 * @param {Browser} browser - Playwright browser instance 821 * @param {string} platform - Platform name ('x' or 'linkedin') 822 * @param {string} profileName - Profile name 823 * @param {Object} options - Context options (viewport, locale, etc.) 824 * @returns {Object} { context, page, profileLoaded } 825 */ 826 export async function createPersistentContext(browser, platform, profileName, options = {}) { 827 const defaultTimezone = process.env.TIMEZONE || 'Australia/Sydney'; 828 const acceptLanguage = process.env.ACCEPT_LANGUAGE || 'en-AU,en;q=0.9'; 829 const defaultLocale = acceptLanguage.split(',')[0]; 830 831 const { 832 viewport = null, // null = use window size (for headed mode) 833 locale = defaultLocale, 834 timezoneId = defaultTimezone, 835 } = options; 836 837 // Create context WITHOUT userAgent override - let Chromium use its default 838 const context = await browser.newContext({ 839 viewport, 840 locale, 841 timezoneId, 842 permissions: [], 843 extraHTTPHeaders: { 'Accept-Language': acceptLanguage }, 844 }); 845 846 const page = await context.newPage(); 847 848 // Load saved profile (cookies only - storage must be restored after navigation) 849 const profileLoaded = await loadProfile(page, platform, profileName); 850 851 return { context, page, profileLoaded }; 852 } 853 854 export default { 855 launchStealthBrowser, 856 createStealthContext, 857 randomDelay, 858 humanMouseMove, 859 humanScroll, 860 humanClick, 861 humanType, 862 isSocialMediaUrl, 863 waitForCloudflare, 864 saveProfile, 865 loadProfile, 866 restoreStorage, 867 listProfiles, 868 getNextProfile, 869 createPersistentContext, 870 };