stealth-browser.js
1 /** 2 * Stealth Browser Utility 3 * Centralized bot-detection avoidance for Playwright 4 */ 5 6 import { chromium } from 'playwright-extra'; 7 import stealth from 'puppeteer-extra-plugin-stealth'; 8 import UserAgent from 'user-agents'; 9 import Logger from './logger.js'; 10 import { 11 existsSync, 12 mkdirSync, 13 readFileSync, 14 writeFileSync, 15 readdirSync, 16 symlinkSync, 17 rmSync, 18 } from 'fs'; 19 import { join, dirname } from 'path'; 20 import { fileURLToPath } from 'url'; 21 import { execSync } from 'child_process'; 22 23 const __dirname = dirname(fileURLToPath(import.meta.url)); 24 25 chromium.use(stealth()); 26 27 const logger = new Logger('StealthBrowser'); 28 29 /** 30 * Auto-detect chromium path on NixOS 31 * @returns {string|null} Path to chromium executable or null 32 */ 33 function detectChromiumPath() { 34 try { 35 const chromiumPath = execSync('which chromium', { encoding: 'utf-8' }).trim(); 36 // eslint-disable-next-line security/detect-non-literal-fs-filename -- Safe: path from `which` command 37 if (chromiumPath && existsSync(chromiumPath)) { 38 logger.debug(`Auto-detected chromium at: ${chromiumPath}`); 39 return chromiumPath; 40 } 41 } catch { 42 logger.debug('Chromium not found in PATH, using Playwright bundled browser'); 43 } 44 return null; 45 } 46 47 /** 48 * Generate a random modern user agent using user-agents package 49 * @returns {string} User agent string 50 */ 51 function getRandomUserAgent() { 52 const userAgent = new UserAgent({ deviceCategory: 'desktop' }); 53 return userAgent.toString(); 54 } 55 56 export async function launchStealthBrowser(options = {}) { 57 const { 58 headless = true, 59 slowMo = 0, 60 stealthLevel = 'minimal', // Default to minimal (fastest, least intrusive) 61 devtools = false, 62 } = options; 63 64 const launchOptions = { 65 headless, 66 slowMo, 67 devtools, 68 args: [ 69 '--disable-blink-features=AutomationControlled', 70 '--disable-dev-shm-usage', 71 '--no-sandbox', 72 '--disable-setuid-sandbox', 73 '--disable-features=IsolateOrigins,site-per-process', 74 '--disable-web-security', 75 '--disable-features=BlockInsecurePrivateNetworkRequests', 76 '--no-first-run', 77 '--start-maximized', 78 // Resource limits to prevent system freeze 79 '--disable-gpu', 80 '--disable-software-rasterizer', 81 '--disable-background-networking', 82 '--disable-background-timer-throttling', 83 '--disable-backgrounding-occluded-windows', 84 '--disable-breakpad', 85 '--disable-component-extensions-with-background-pages', 86 '--disable-extensions', 87 '--disable-features=TranslateUI,BlinkGenPropertyTrees', 88 '--disable-ipc-flooding-protection', 89 '--disable-renderer-backgrounding', 90 '--renderer-process-limit=1', 91 '--js-flags=--max-old-space-size=256', 92 ], 93 }; 94 95 // Use explicit path from env, or auto-detect on NixOS 96 const browserPath = process.env.CHROMIUM_PATH || detectChromiumPath(); 97 if (browserPath) { 98 logger.info(`Using chromium at: ${browserPath}`); 99 launchOptions.executablePath = browserPath; 100 } 101 102 logger.info(`Launching browser with stealth level: ${stealthLevel}`); 103 const browser = await chromium.launch(launchOptions); 104 return browser; 105 } 106 107 // eslint-disable-next-line require-await -- browser.newContext() returns a promise 108 export async function createStealthContext(browser, options = {}) { 109 const defaultTimezone = process.env.TIMEZONE || 'Australia/Sydney'; 110 const acceptLanguage = process.env.ACCEPT_LANGUAGE || 'en-AU,en;q=0.9'; 111 // Extract locale from ACCEPT_LANGUAGE (e.g., "en-AU,en;q=0.9" -> "en-AU") 112 const defaultLocale = acceptLanguage.split(',')[0]; 113 114 const { 115 viewport = { width: 1440, height: 900 }, 116 locale = defaultLocale, 117 timezoneId = defaultTimezone, 118 } = options; 119 120 return browser.newContext({ 121 userAgent: getRandomUserAgent(), 122 viewport, 123 locale, 124 timezoneId, 125 permissions: [], 126 extraHTTPHeaders: { 'Accept-Language': acceptLanguage }, 127 }); 128 } 129 130 /** 131 * No-op stub kept for call-site compatibility. 132 * CAPTCHA solving is now handled by the NopeCHA extension when NOPECHA_API_KEY is set. 133 */ 134 export async function configureNopeCHA(_context) {} 135 136 /** 137 * Prepare a writable copy of the NopeCHA extension with the API key injected. 138 * Returns null if NOPECHA_API_KEY is not set or extension dir is missing. 139 * 140 * We write the key into a /tmp copy so we never dirty the tracked extension source. 141 */ 142 function prepareNopeCHAExtension() { 143 const apiKey = process.env.NOPECHA_API_KEY || process.env.NOPECHA_API_KEY_2; 144 if (!apiKey) return null; 145 146 const srcDir = join(__dirname, '../../extensions/nopecha'); 147 if (!existsSync(srcDir)) { 148 logger.warn('NopeCHA extension not found at extensions/nopecha — CAPTCHA auto-solve disabled'); 149 return null; 150 } 151 152 // Write to /tmp so each run gets a fresh copy with the correct key 153 const destDir = `/tmp/nopecha-ext-${process.pid}`; 154 if (!existsSync(destDir)) { 155 mkdirSync(destDir, { recursive: true }); 156 // Shallow copy all extension files 157 for (const entry of readdirSync(srcDir, { withFileTypes: true })) { 158 const srcPath = join(srcDir, entry.name); 159 const destPath = join(destDir, entry.name); 160 if (entry.isDirectory()) { 161 // Symlink subdirs — they're read-only content, no need to copy 162 symlinkSync(srcPath, destPath); 163 } else { 164 writeFileSync(destPath, readFileSync(srcPath)); 165 } 166 } 167 } 168 169 // Inject API key into manifest.json in the temp copy 170 const manifestPath = join(destDir, 'manifest.json'); 171 try { 172 const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')); 173 // NopeCHA automation build: set key in the nopecha settings object 174 // The background script reads from chrome.storage.local { settings: { key } } 175 // but the simplest injection is via a storage-seed content script added to manifest 176 // Preferred: write a __nopecha_key__.js seed script that chrome.storage.local.set on install 177 if (!manifest._nopecha_key_injected) { 178 manifest.content_scripts = manifest.content_scripts || []; 179 // Inject a one-shot seed script on all URLs that sets the key in storage 180 manifest.content_scripts.push({ 181 matches: ['<all_urls>'], 182 js: ['__key_seed__.js'], 183 run_at: 'document_start', 184 all_frames: false, 185 }); 186 manifest._nopecha_key_injected = true; 187 writeFileSync(manifestPath, JSON.stringify(manifest, null, 2)); 188 // Write the seed script 189 writeFileSync( 190 join(destDir, '__key_seed__.js'), 191 `chrome.storage.local.get('settings', function(r) { 192 var s = r.settings || {}; 193 if (!s.key) { s.key = ${JSON.stringify(apiKey)}; chrome.storage.local.set({ settings: s }); } 194 });` 195 ); 196 } 197 } catch (e) { 198 logger.warn(`Could not inject NopeCHA key into manifest: ${e.message}`); 199 return null; 200 } 201 202 logger.info(`NopeCHA extension prepared at ${destDir} (key: ${apiKey.substring(0, 8)}...)`); 203 return destDir; 204 } 205 206 /** 207 * Launch a browser context with Chrome extensions loaded (for CAPTCHA solving). 208 * Uses launchPersistentContext — required by Playwright for extension support. 209 * 210 * Returns { context, close } where close() shuts down the browser. 211 * Use instead of launchStealthBrowser + createStealthContext when extensions needed. 212 */ 213 export async function launchWithExtensions(options = {}) { 214 const { headless = false, devtools = false, stealthLevel = 'minimal' } = options; 215 216 const nopechaDir = prepareNopeCHAExtension(); 217 218 // Build args — same base set as launchStealthBrowser but WITHOUT --disable-extensions 219 const args = [ 220 '--disable-blink-features=AutomationControlled', 221 '--disable-dev-shm-usage', 222 '--no-sandbox', 223 '--disable-setuid-sandbox', 224 '--disable-features=IsolateOrigins,site-per-process', 225 '--disable-web-security', 226 '--disable-features=BlockInsecurePrivateNetworkRequests', 227 '--no-first-run', 228 '--start-maximized', 229 '--disable-gpu', 230 '--disable-software-rasterizer', 231 '--disable-background-networking', 232 '--disable-background-timer-throttling', 233 '--disable-backgrounding-occluded-windows', 234 '--disable-breakpad', 235 '--disable-ipc-flooding-protection', 236 '--disable-renderer-backgrounding', 237 '--renderer-process-limit=4', // allow more for multi-tab 238 '--js-flags=--max-old-space-size=256', 239 ]; 240 241 if (nopechaDir) { 242 args.push(`--disable-extensions-except=${nopechaDir}`); 243 args.push(`--load-extension=${nopechaDir}`); 244 logger.info('NopeCHA CAPTCHA solver extension loaded'); 245 } 246 247 if (stealthLevel === 'aggressive') { 248 args.push('--disable-web-security', '--disable-site-isolation-trials'); 249 } 250 251 const browserPath = process.env.CHROMIUM_PATH || detectChromiumPath(); 252 const userDataDir = `/tmp/pw-ext-profile-${process.pid}`; 253 mkdirSync(userDataDir, { recursive: true }); 254 255 const defaultTimezone = process.env.TIMEZONE || 'Australia/Sydney'; 256 const acceptLanguage = process.env.ACCEPT_LANGUAGE || 'en-AU,en;q=0.9'; 257 const locale = acceptLanguage.split(',')[0]; 258 259 // launchPersistentContext is required for extensions in Playwright. 260 // Use the plain playwright chromium (not playwright-extra) — extra plugins 261 // require a Browser object which launchPersistentContext doesn't return. 262 // It returns a BrowserContext directly (no separate browser object). 263 const { chromium: plainChromium } = await import('playwright'); 264 const context = await plainChromium.launchPersistentContext(userDataDir, { 265 headless, 266 devtools, 267 slowMo: 0, 268 args, 269 executablePath: browserPath || undefined, 270 userAgent: getRandomUserAgent(), 271 viewport: { width: 1440, height: 900 }, 272 locale, 273 timezoneId: defaultTimezone, 274 permissions: [], 275 extraHTTPHeaders: { 'Accept-Language': acceptLanguage }, 276 chromiumSandbox: false, 277 // Suppress Playwright's own copies of flags we supply explicitly above 278 // to prevent "unsupported version of flag" errors from duplicate args 279 ignoreDefaultArgs: [ 280 '--enable-automation', 281 '--disable-extensions', 282 '--disable-dev-shm-usage', 283 '--disable-background-networking', 284 '--disable-background-timer-throttling', 285 '--disable-backgrounding-occluded-windows', 286 '--disable-breakpad', 287 '--disable-ipc-flooding-protection', 288 '--disable-renderer-backgrounding', 289 '--no-first-run', 290 ], 291 }); 292 293 logger.info(`Launched browser with extensions (userDataDir: ${userDataDir})`); 294 295 return { 296 context, 297 close: async () => { 298 await context.close().catch(() => {}); 299 try { 300 rmSync(userDataDir, { recursive: true, force: true }); 301 } catch { 302 /* ok */ 303 } 304 try { 305 rmSync(`/tmp/nopecha-ext-${process.pid}`, { recursive: true, force: true }); 306 } catch { 307 /* ok */ 308 } 309 }, 310 hasNopeCHA: !!nopechaDir, 311 }; 312 } 313 314 export async function randomDelay(min = 100, max = 500) { 315 const delay = Math.floor(Math.random() * (max - min + 1)) + min; 316 await new Promise(resolve => setTimeout(resolve, delay)); 317 } 318 319 function generateBezierWaypoints(x1, y1, x2, y2, numPoints = 4) { 320 const points = []; 321 const cp1x = x1 + (x2 - x1) * (0.2 + Math.random() * 0.3); 322 const cp1y = y1 + (y2 - y1) * (0.2 + Math.random() * 0.3); 323 const cp2x = x1 + (x2 - x1) * (0.5 + Math.random() * 0.3); 324 const cp2y = y1 + (y2 - y1) * (0.5 + Math.random() * 0.3); 325 326 for (let i = 0; i <= numPoints; i++) { 327 const t = i / numPoints; 328 const mt = 1 - t; 329 const x = mt ** 3 * x1 + 3 * mt ** 2 * t * cp1x + 3 * mt * t ** 2 * cp2x + t ** 3 * x2; 330 const y = mt ** 3 * y1 + 3 * mt ** 2 * t * cp1y + 3 * mt * t ** 2 * cp2y + t ** 3 * y2; 331 points.push({ x: Math.round(x), y: Math.round(y) }); 332 } 333 return points; 334 } 335 336 export async function humanMouseMove(page, targetX, targetY) { 337 /* eslint-disable no-undef -- window is available in browser context (page.evaluate) */ 338 const current = await page.evaluate(() => ({ x: window.mouseX || 100, y: window.mouseY || 100 })); 339 /* eslint-enable no-undef */ 340 341 const waypoints = generateBezierWaypoints( 342 current.x, 343 current.y, 344 targetX, 345 targetY, 346 3 + Math.floor(Math.random() * 3) 347 ); 348 349 for (const point of waypoints) { 350 await page.mouse.move(point.x, point.y); 351 await new Promise(resolve => setTimeout(resolve, 5 + Math.random() * 10)); 352 } 353 } 354 355 export async function humanScroll(page, options = {}) { 356 const { distance = 'viewport', smooth = true } = options; 357 358 /* eslint-disable no-undef -- window is available in browser context (page.evaluate) */ 359 const scrollAmount = 360 distance === 'viewport' 361 ? await page.evaluate(() => window.innerHeight) 362 : distance === 'short' 363 ? 300 364 : parseInt(distance) || 300; 365 366 await page.evaluate( 367 ({ amount, smooth }) => { 368 if (!smooth) { 369 // Use fallback if site overrides window.scrollBy 370 try { 371 window.scrollBy(0, amount); 372 } catch { 373 // Fallback: directly set scroll position 374 const currentY = document.documentElement.scrollTop || document.body.scrollTop; 375 const targetY = currentY + amount; 376 document.documentElement.scrollTop = targetY; 377 document.body.scrollTop = targetY; 378 } 379 return; 380 } 381 382 const start = window.pageYOffset; 383 const startTime = Date.now(); 384 const duration = 500 + Math.random() * 500; 385 386 function easeInOutQuad(t) { 387 return t < 0.5 ? 2 * t * t : -1 + (4 - 2 * t) * t; 388 } 389 390 function scroll() { 391 const elapsed = Date.now() - startTime; 392 const progress = Math.min(elapsed / duration, 1); 393 const eased = easeInOutQuad(progress); 394 const targetY = start + amount * eased; 395 // Use fallback if site overrides window.scrollTo 396 try { 397 window.scrollTo(0, targetY); 398 } catch { 399 // Fallback: directly set scroll position 400 document.documentElement.scrollTop = targetY; 401 document.body.scrollTop = targetY; 402 } 403 if (progress < 1) requestAnimationFrame(scroll); 404 } 405 scroll(); 406 }, 407 { amount: scrollAmount, smooth } 408 ); 409 /* eslint-enable no-undef */ 410 411 await randomDelay(200, 400); 412 } 413 414 export async function humanClick(page, selector) { 415 const element = await page.locator(selector).first(); 416 const box = await element.boundingBox(); 417 418 if (box) { 419 const targetX = box.x + box.width / 2 + (Math.random() - 0.5) * box.width * 0.3; 420 const targetY = box.y + box.height / 2 + (Math.random() - 0.5) * box.height * 0.3; 421 await humanMouseMove(page, targetX, targetY); 422 } 423 424 await randomDelay(10, 60); 425 await element.click(); 426 await randomDelay(50, 200); 427 } 428 429 export async function humanType(page, selector, text) { 430 const element = await page.locator(selector).first(); 431 432 await humanClick(page, selector); 433 434 if (text.length > 50) { 435 // Long text (proposals, messages): fill() is instant and indistinguishable from paste. 436 // Most form frameworks only check keystroke timing on short credential-like fields. 437 await element.fill(text); 438 // Brief settle so React/Vue onChange handlers fire before we move on 439 await randomDelay(80, 150); 440 } else { 441 // Short text (name, email, phone): type char-by-char to simulate real keystrokes 442 await element.pressSequentially(text, { delay: 30 + Math.random() * 50 }); 443 await randomDelay(80, 200); 444 } 445 } 446 447 export function isSocialMediaUrl(url) { 448 const socialDomains = [ 449 'twitter.com', 450 'x.com', 451 'linkedin.com', 452 'facebook.com', 453 'fb.com', 454 'instagram.com', 455 'youtube.com', 456 'tiktok.com', 457 'reddit.com', 458 ]; 459 460 try { 461 const hostname = new URL(url).hostname.toLowerCase(); 462 return socialDomains.some(domain => hostname.includes(domain)); 463 } catch { 464 return false; 465 } 466 } 467 468 /** 469 * Wait for Cloudflare Turnstile challenge to complete 470 * @param {Page} page - Playwright page instance 471 * @param {Object} options - Wait options 472 * @returns {Promise<boolean>} True if challenge passed, false if still blocking 473 */ 474 export async function waitForCloudflare(page, options = {}) { 475 const { timeout = 30000, checkInterval = 1000 } = options; 476 const startTime = Date.now(); 477 478 logger.debug('Waiting for potential Cloudflare/Turnstile challenge...'); 479 480 // Wait for network idle to give Turnstile time to load — timeout here is NOT a CF block, 481 // it just means the page has persistent network activity (analytics, chat widgets, etc.) 482 try { 483 await page.waitForLoadState('networkidle', { timeout: timeout / 2 }); 484 } catch { 485 // Non-idle network is normal for pages with chat widgets or analytics — keep going 486 logger.debug('Network did not reach idle state, proceeding with CF indicator check'); 487 } 488 489 // Additional delay to let Turnstile process 490 await randomDelay(2000, 4000); 491 492 // Check for actual Cloudflare/Turnstile blocking indicators 493 try { 494 while (Date.now() - startTime < timeout) { 495 /* eslint-disable no-undef -- document is available in browser context */ 496 const isBlocked = await page.evaluate(() => { 497 const bodyText = document.body?.innerText?.toLowerCase() || ''; 498 const title = document.title?.toLowerCase() || ''; 499 500 // Check for common Cloudflare blocking messages 501 const blockIndicators = [ 502 'checking your browser', 503 'verifying you are human', 504 'just a moment', 505 'enable javascript and cookies', 506 'attention required', 507 'cloudflare', 508 ]; 509 510 return blockIndicators.some( 511 indicator => bodyText.includes(indicator) || title.includes(indicator) 512 ); 513 }); 514 /* eslint-enable no-undef */ 515 516 if (!isBlocked) { 517 logger.debug('Cloudflare/Turnstile challenge passed or not present'); 518 return true; 519 } 520 521 logger.debug('Cloudflare/Turnstile challenge detected, waiting...'); 522 await new Promise(resolve => setTimeout(resolve, checkInterval)); 523 } 524 525 logger.warn('Cloudflare/Turnstile challenge did not resolve within timeout'); 526 return false; 527 } catch (error) { 528 logger.warn(`Error checking for Cloudflare indicators: ${error.message}`); 529 // On evaluation error, assume page is accessible rather than false-positive blocking 530 return true; 531 } 532 } 533 534 // --- Persistent Browser Profiles --- 535 536 const PROFILES_DIR = process.env.BROWSER_PROFILES_DIR || './.browser-profiles'; 537 538 /** 539 * Get the directory path for a platform profile 540 * @param {string} platform - Platform name (e.g., 'x', 'linkedin') 541 * @param {string} profileName - Profile name (e.g., 'profile-1') 542 * @returns {string} Full path to profile directory 543 */ 544 function getProfileDir(platform, profileName) { 545 return join(PROFILES_DIR, platform, profileName); 546 } 547 548 /** 549 * Save browser profile (cookies, localStorage, sessionStorage) to disk 550 * @param {Page} page - Playwright page instance 551 * @param {string} platform - Platform name ('x' or 'linkedin') 552 * @param {string} profileName - Profile name 553 * @param {Object} extraMetadata - Additional metadata to save (e.g., username) 554 */ 555 export async function saveProfile(page, platform, profileName, extraMetadata = {}) { 556 const profileDir = getProfileDir(platform, profileName); 557 // eslint-disable-next-line security/detect-non-literal-fs-filename 558 mkdirSync(profileDir, { recursive: true }); 559 560 // Save cookies 561 const cookies = await page.context().cookies(); 562 // eslint-disable-next-line security/detect-non-literal-fs-filename 563 writeFileSync(join(profileDir, 'cookies.json'), JSON.stringify(cookies, null, 2)); 564 565 // Save localStorage and sessionStorage 566 const storageData = await page.evaluate(() => { 567 /* eslint-disable no-undef, security/detect-object-injection -- browser context: safe key access from storage API */ 568 const local = {}; 569 for (let i = 0; i < localStorage.length; i++) { 570 const key = localStorage.key(i); 571 local[key] = localStorage.getItem(key); 572 } 573 const session = {}; 574 for (let i = 0; i < sessionStorage.length; i++) { 575 const key = sessionStorage.key(i); 576 session[key] = sessionStorage.getItem(key); 577 } 578 return { localStorage: local, sessionStorage: session }; 579 /* eslint-enable no-undef, security/detect-object-injection */ 580 }); 581 582 // eslint-disable-next-line security/detect-non-literal-fs-filename 583 writeFileSync( 584 join(profileDir, 'localStorage.json'), 585 JSON.stringify(storageData.localStorage, null, 2) 586 ); 587 // eslint-disable-next-line security/detect-non-literal-fs-filename 588 writeFileSync( 589 join(profileDir, 'sessionStorage.json'), 590 JSON.stringify(storageData.sessionStorage, null, 2) 591 ); 592 593 // Save/update metadata 594 const metadataPath = join(profileDir, 'metadata.json'); 595 let existingMetadata = {}; 596 // eslint-disable-next-line security/detect-non-literal-fs-filename 597 if (existsSync(metadataPath)) { 598 try { 599 // eslint-disable-next-line security/detect-non-literal-fs-filename 600 existingMetadata = JSON.parse(readFileSync(metadataPath, 'utf-8')); 601 } catch { 602 // Corrupted metadata, start fresh 603 } 604 } 605 606 const metadata = { 607 ...existingMetadata, 608 ...extraMetadata, 609 platform, 610 profileName, 611 created_at: existingMetadata.created_at || new Date().toISOString(), 612 last_used_at: new Date().toISOString(), 613 }; 614 615 // eslint-disable-next-line security/detect-non-literal-fs-filename 616 writeFileSync(metadataPath, JSON.stringify(metadata, null, 2)); 617 618 logger.info('Profile saved', { platform, profileName }); 619 } 620 621 /** 622 * Load a saved browser profile into a page (cookies + storage) 623 * @param {Page} page - Playwright page instance 624 * @param {string} platform - Platform name 625 * @param {string} profileName - Profile name 626 * @returns {boolean} True if profile was loaded successfully 627 */ 628 export async function loadProfile(page, platform, profileName) { 629 const profileDir = getProfileDir(platform, profileName); 630 const cookiesPath = join(profileDir, 'cookies.json'); 631 632 // eslint-disable-next-line security/detect-non-literal-fs-filename 633 if (!existsSync(cookiesPath)) { 634 logger.debug('No saved profile found', { platform, profileName }); 635 return false; 636 } 637 638 try { 639 // Load cookies 640 // eslint-disable-next-line security/detect-non-literal-fs-filename 641 const cookies = JSON.parse(readFileSync(cookiesPath, 'utf-8')); 642 await page.context().addCookies(cookies); 643 644 // Update last_used_at in metadata 645 const metadataPath = join(profileDir, 'metadata.json'); 646 // eslint-disable-next-line security/detect-non-literal-fs-filename 647 if (existsSync(metadataPath)) { 648 // eslint-disable-next-line security/detect-non-literal-fs-filename 649 const metadata = JSON.parse(readFileSync(metadataPath, 'utf-8')); 650 metadata.last_used_at = new Date().toISOString(); 651 // eslint-disable-next-line security/detect-non-literal-fs-filename 652 writeFileSync(metadataPath, JSON.stringify(metadata, null, 2)); 653 } 654 655 logger.info('Profile loaded', { platform, profileName }); 656 return true; 657 } catch (error) { 658 logger.warn('Failed to load profile', { platform, profileName, error: error.message }); 659 return false; 660 } 661 } 662 663 /** 664 * Restore localStorage and sessionStorage after page navigation 665 * Must be called AFTER page.goto() since storage is domain-specific 666 * @param {Page} page - Playwright page instance 667 * @param {string} platform - Platform name 668 * @param {string} profileName - Profile name 669 */ 670 export async function restoreStorage(page, platform, profileName) { 671 const profileDir = getProfileDir(platform, profileName); 672 673 try { 674 const localPath = join(profileDir, 'localStorage.json'); 675 const sessionPath = join(profileDir, 'sessionStorage.json'); 676 677 // eslint-disable-next-line security/detect-non-literal-fs-filename 678 if (existsSync(localPath)) { 679 // eslint-disable-next-line security/detect-non-literal-fs-filename 680 const localData = JSON.parse(readFileSync(localPath, 'utf-8')); 681 await page.evaluate(data => { 682 /* eslint-disable no-undef */ 683 for (const [key, value] of Object.entries(data)) { 684 localStorage.setItem(key, value); 685 } 686 /* eslint-enable no-undef */ 687 }, localData); 688 } 689 690 // eslint-disable-next-line security/detect-non-literal-fs-filename 691 if (existsSync(sessionPath)) { 692 // eslint-disable-next-line security/detect-non-literal-fs-filename 693 const sessionData = JSON.parse(readFileSync(sessionPath, 'utf-8')); 694 await page.evaluate(data => { 695 /* eslint-disable no-undef */ 696 for (const [key, value] of Object.entries(data)) { 697 sessionStorage.setItem(key, value); 698 } 699 /* eslint-enable no-undef */ 700 }, sessionData); 701 } 702 } catch (error) { 703 logger.debug('Could not restore storage', { platform, profileName, error: error.message }); 704 } 705 } 706 707 /** 708 * List all profiles for a platform with metadata 709 * @param {string} platform - Platform name ('x', 'linkedin', or null for all) 710 * @returns {Array<Object>} Array of profile metadata objects 711 */ 712 export function listProfiles(platform = null) { 713 const profiles = []; 714 const platforms = platform ? [platform] : ['x', 'linkedin']; 715 716 for (const plat of platforms) { 717 const platDir = join(PROFILES_DIR, plat); 718 // eslint-disable-next-line security/detect-non-literal-fs-filename 719 if (!existsSync(platDir)) continue; 720 721 // eslint-disable-next-line security/detect-non-literal-fs-filename 722 const dirs = readdirSync(platDir, { withFileTypes: true }) 723 .filter(d => d.isDirectory()) 724 .map(d => d.name); 725 726 for (const dir of dirs) { 727 const metadataPath = join(platDir, dir, 'metadata.json'); 728 // eslint-disable-next-line security/detect-non-literal-fs-filename 729 if (existsSync(metadataPath)) { 730 try { 731 // eslint-disable-next-line security/detect-non-literal-fs-filename 732 const metadata = JSON.parse(readFileSync(metadataPath, 'utf-8')); 733 profiles.push(metadata); 734 } catch { 735 profiles.push({ platform: plat, profileName: dir, error: 'corrupted metadata' }); 736 } 737 } 738 } 739 } 740 741 return profiles; 742 } 743 744 /** 745 * Get the next profile to use (LRU strategy) 746 * If fewer profiles exist than configured count, creates a new profile name. 747 * Otherwise returns the least recently used profile. 748 * @param {string} platform - Platform name ('x' or 'linkedin') 749 * @returns {string} Profile name to use 750 */ 751 export function getNextProfile(platform) { 752 const countEnvVar = platform === 'x' ? 'X_PROFILE_COUNT' : 'LINKEDIN_PROFILE_COUNT'; 753 const maxProfiles = parseInt(process.env[countEnvVar] || '3', 10); // eslint-disable-line security/detect-object-injection -- safe: ternary-selected constant key 754 755 const existing = listProfiles(platform); 756 757 // If fewer profiles than configured, create a new one 758 if (existing.length < maxProfiles) { 759 const newName = `profile-${existing.length + 1}`; 760 logger.info('Creating new profile slot', { 761 platform, 762 profileName: newName, 763 existing: existing.length, 764 max: maxProfiles, 765 }); 766 return newName; 767 } 768 769 // Sort by last_used_at ascending (oldest first) for LRU 770 existing.sort((a, b) => { 771 const aTime = a.last_used_at ? new Date(a.last_used_at).getTime() : 0; 772 const bTime = b.last_used_at ? new Date(b.last_used_at).getTime() : 0; 773 return aTime - bTime; 774 }); 775 776 const selected = existing[0].profileName; 777 logger.info('Selected LRU profile', { 778 platform, 779 profileName: selected, 780 lastUsed: existing[0].last_used_at, 781 }); 782 return selected; 783 } 784 785 /** 786 * Create a persistent browser context with profile support. 787 * Does NOT override userAgent - uses Chromium's default (stealth plugin strips markers). 788 * @param {Browser} browser - Playwright browser instance 789 * @param {string} platform - Platform name ('x' or 'linkedin') 790 * @param {string} profileName - Profile name 791 * @param {Object} options - Context options (viewport, locale, etc.) 792 * @returns {Object} { context, page, profileLoaded } 793 */ 794 export async function createPersistentContext(browser, platform, profileName, options = {}) { 795 const defaultTimezone = process.env.TIMEZONE || 'Australia/Sydney'; 796 const acceptLanguage = process.env.ACCEPT_LANGUAGE || 'en-AU,en;q=0.9'; 797 const defaultLocale = acceptLanguage.split(',')[0]; 798 799 const { 800 viewport = null, // null = use window size (for headed mode) 801 locale = defaultLocale, 802 timezoneId = defaultTimezone, 803 } = options; 804 805 // Create context WITHOUT userAgent override - let Chromium use its default 806 const context = await browser.newContext({ 807 viewport, 808 locale, 809 timezoneId, 810 permissions: [], 811 extraHTTPHeaders: { 'Accept-Language': acceptLanguage }, 812 }); 813 814 const page = await context.newPage(); 815 816 // Load saved profile (cookies only - storage must be restored after navigation) 817 const profileLoaded = await loadProfile(page, platform, profileName); 818 819 return { context, page, profileLoaded }; 820 } 821 822 export default { 823 launchStealthBrowser, 824 createStealthContext, 825 randomDelay, 826 humanMouseMove, 827 humanScroll, 828 humanClick, 829 humanType, 830 isSocialMediaUrl, 831 waitForCloudflare, 832 saveProfile, 833 loadProfile, 834 restoreStorage, 835 listProfiles, 836 getNextProfile, 837 createPersistentContext, 838 };