/ src / browser / stealth.ts
stealth.ts
  1  /**
  2   * Stealth anti-detection module.
  3   *
  4   * Generates JS code that patches browser globals to hide automation
  5   * fingerprints (e.g. navigator.webdriver, missing chrome object, empty
  6   * plugin list). Injected before page scripts run so that websites cannot
  7   * detect CDP / extension-based control.
  8   *
  9   * Inspired by puppeteer-extra-plugin-stealth.
 10   */
 11  
 12  /**
 13   * Return a self-contained JS string that, when evaluated in a page context,
 14   * applies all stealth patches. Safe to call multiple times — the guard flag
 15   * ensures patches are applied only once.
 16   *
 17   * The generated string is pure static (no dynamic parameters), so we cache
 18   * it after the first call to avoid re-building ~350 lines on every goto().
 19   */
 20  let _cachedStealthJs: string | undefined;
 21  
 22  export function generateStealthJs(): string {
 23    if (_cachedStealthJs !== undefined) return _cachedStealthJs;
 24    return (_cachedStealthJs = `
 25      (() => {
 26        // Guard: prevent double-injection across separate CDP evaluations.
 27        // We cannot use a closure variable (each eval is a fresh scope), and
 28        // window properties / Symbols are discoverable by anti-bot scripts.
 29        // Instead, stash the flag in a non-enumerable getter on a built-in
 30        // prototype that fingerprinters are unlikely to scan.
 31        const _gProto = EventTarget.prototype;
 32        const _gKey = '__lsn';  // looks like an internal listener cache
 33        if (_gProto[_gKey]) return 'skipped';
 34        try {
 35          Object.defineProperty(_gProto, _gKey, { value: true, enumerable: false, configurable: true });
 36        } catch {}
 37  
 38        // 1. navigator.webdriver → false
 39        //    Most common check; Playwright/Puppeteer/CDP set this to true.
 40        //    Real Chrome returns false (not undefined) — returning undefined is
 41        //    itself a detection signal for advanced fingerprinters.
 42        try {
 43          Object.defineProperty(navigator, 'webdriver', {
 44            get: () => false,
 45            configurable: true,
 46          });
 47        } catch {}
 48  
 49        // 2. window.chrome stub
 50        //    Real Chrome exposes window.chrome with runtime, loadTimes, csi.
 51        //    Headless/automated Chrome may not have it.
 52        try {
 53          if (!window.chrome) {
 54            window.chrome = {
 55              runtime: {
 56                onConnect: { addListener: () => {}, removeListener: () => {} },
 57                onMessage: { addListener: () => {}, removeListener: () => {} },
 58              },
 59              loadTimes: () => ({}),
 60              csi: () => ({}),
 61            };
 62          }
 63        } catch {}
 64  
 65        // 3. navigator.plugins — fake population only if empty
 66        //    Real user browser already has plugins; only patch in automated/headless
 67        //    contexts where the list is empty (overwriting real plugins with fakes
 68        //    would be counterproductive and detectable).
 69        try {
 70          if (!navigator.plugins || navigator.plugins.length === 0) {
 71            const fakePlugins = [
 72              { name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
 73              { name: 'Chrome PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
 74              { name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
 75              { name: 'Microsoft Edge PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
 76              { name: 'WebKit built-in PDF', filename: 'internal-pdf-viewer', description: '' },
 77            ];
 78            fakePlugins.item = (i) => fakePlugins[i] || null;
 79            fakePlugins.namedItem = (n) => fakePlugins.find(p => p.name === n) || null;
 80            fakePlugins.refresh = () => {};
 81            Object.defineProperty(navigator, 'plugins', {
 82              get: () => fakePlugins,
 83              configurable: true,
 84            });
 85          }
 86        } catch {}
 87  
 88        // 4. navigator.languages — guarantee non-empty
 89        //    Some automated contexts return undefined or empty array.
 90        try {
 91          if (!navigator.languages || navigator.languages.length === 0) {
 92            Object.defineProperty(navigator, 'languages', {
 93              get: () => ['en-US', 'en'],
 94              configurable: true,
 95            });
 96          }
 97        } catch {}
 98  
 99        // 5. Permissions.query — normalize notification permission
100        //    Headless Chrome throws on Permissions.query({ name: 'notifications' }).
101        try {
102          const origQuery = window.Permissions?.prototype?.query;
103          if (origQuery) {
104            window.Permissions.prototype.query = function (parameters) {
105              if (parameters?.name === 'notifications') {
106                return Promise.resolve({ state: Notification.permission, onchange: null });
107              }
108              return origQuery.call(this, parameters);
109            };
110          }
111        } catch {}
112  
113        // 6. Clean automation artifacts
114        //    Remove properties left by Playwright, Puppeteer, or CDP injection.
115        try {
116          delete window.__playwright;
117          delete window.__puppeteer;
118          // ChromeDriver injects cdc_ prefixed globals; the suffix varies by version,
119          // so scan window for any matching property rather than hardcoding names.
120          for (const prop of Object.getOwnPropertyNames(window)) {
121            if (prop.startsWith('cdc_') || prop.startsWith('__cdc_')) {
122              try { delete window[prop]; } catch {}
123            }
124          }
125        } catch {}
126  
127        // 7. CDP stack trace cleanup
128        //    Runtime.evaluate injects scripts whose source URLs appear in Error
129        //    stack traces (e.g. __puppeteer_evaluation_script__, pptr:, debugger://).
130        //    Websites detect automation by doing: new Error().stack and inspecting it.
131        //    We override the stack property getter on Error.prototype to filter them.
132        //    Note: Error.prepareStackTrace is V8/Node-only and not available in
133        //    browser page context, so we use a property descriptor approach instead.
134        //    We use generic protocol patterns instead of product-specific names to
135        //    also catch our own injected code frames without leaking identifiers.
136        try {
137          const _origDescriptor = Object.getOwnPropertyDescriptor(Error.prototype, 'stack');
138          const _cdpPatterns = [
139            'puppeteer_evaluation_script',
140            'pptr:',
141            'debugger://',
142            '__playwright',
143            '__puppeteer',
144          ];
145          if (_origDescriptor && _origDescriptor.get) {
146            Object.defineProperty(Error.prototype, 'stack', {
147              get: function () {
148                const raw = _origDescriptor.get.call(this);
149                if (typeof raw !== 'string') return raw;
150                return raw.split('\\n').filter(line =>
151                  !_cdpPatterns.some(p => line.includes(p))
152                ).join('\\n');
153              },
154              configurable: true,
155            });
156          }
157        } catch {}
158  
159        // ── Shared toString disguise infrastructure ──
160        // Save the pristine Function.prototype.toString BEFORE any patches,
161        // so all subsequent disguises use the real native reference.
162        // Anti-bot scripts detect per-instance toString overrides via:
163        //   Function.hasOwnProperty('toString')          → true if patched
164        //   Function.prototype.toString.call(fn) !== fn.toString()
165        // Instead we patch Function.prototype.toString once with a WeakMap
166        // lookup, making disguised functions indistinguishable from native.
167        const _origToString = Function.prototype.toString;
168        const _disguised = new WeakMap();
169        try {
170          Object.defineProperty(Function.prototype, 'toString', {
171            value: function() {
172              const override = _disguised.get(this);
173              return override !== undefined ? override : _origToString.call(this);
174            },
175            writable: true, configurable: true,
176          });
177        } catch {}
178        const _disguise = (fn, name) => {
179          _disguised.set(fn, 'function ' + name + '() { [native code] }');
180          try { Object.defineProperty(fn, 'name', { value: name, configurable: true }); } catch {}
181          return fn;
182        };
183  
184        // 8. Anti-debugger statement trap
185        //    Sites inject debugger statements to detect DevTools/CDP.
186        //    When a CDP debugger is attached, the statement pauses execution
187        //    and the site measures the time gap to confirm automation.
188        //    We neutralize this by overriding the Function constructor and
189        //    eval to strip debugger statements from dynamically created code.
190        //    Note: this does NOT affect static debugger statements in parsed
191        //    scripts — those require CDP Debugger.setBreakpointsActive(false)
192        //    which we handle at the extension level.
193        //    Caveat: the regex targets standalone debugger statements (preceded
194        //    by a statement boundary) to minimise false positives inside string
195        //    literals, but cannot perfectly distinguish all cases without a
196        //    full parser. This is an acceptable trade-off for stealth code.
197        try {
198          const _OrigFunction = Function;
199          // Match standalone debugger statements preceded by a statement
200          // boundary (start of string, semicolon, brace, or newline).
201          // This avoids most false positives inside string literals like
202          // "use debugger mode" while still catching the anti-bot patterns.
203          const _debuggerRe = /(?:^|(?<=[;{}\\n\\r]))\\s*debugger\\s*;?/g;
204          const _cleanDebugger = (src) => typeof src === 'string' ? src.replace(_debuggerRe, '') : src;
205          // Patch Function constructor to strip debugger from dynamic code.
206          // Support both Function('code') and new Function('code') via
207          // new.target / Reflect.construct.
208          const _PatchedFunction = function(...args) {
209            if (args.length > 0) {
210              args[args.length - 1] = _cleanDebugger(args[args.length - 1]);
211            }
212            if (new.target) {
213              return Reflect.construct(_OrigFunction, args, new.target);
214            }
215            return _OrigFunction.apply(this, args);
216          };
217          _PatchedFunction.prototype = _OrigFunction.prototype;
218          Object.setPrototypeOf(_PatchedFunction, _OrigFunction);
219          _disguise(_PatchedFunction, 'Function');
220          try { window.Function = _PatchedFunction; } catch {}
221  
222          // Patch eval to strip debugger
223          const _origEval = window.eval;
224          const _patchedEval = function(code) {
225            return _origEval.call(this, _cleanDebugger(code));
226          };
227          _disguise(_patchedEval, 'eval');
228          try { window.eval = _patchedEval; } catch {}
229        } catch {}
230  
231        // 9. Console method fingerprinting defense
232        //    When CDP Runtime.enable is called, Chrome replaces console.log etc.
233        //    with CDP-bound versions. These bound functions have a different
234        //    toString() output: "function log() { [native code] }" becomes
235        //    something like "function () { [native code] }" (no name) or the
236        //    bound function signature leaks. Anti-bot scripts check:
237        //      console.log.toString().includes('[native code]')
238        //      console.log.name === 'log'
239        //    We re-wrap console methods and register them via the shared
240        //    _disguise infrastructure so Function.prototype.toString.call()
241        //    also returns the correct native string.
242        try {
243          const _consoleMethods = ['log', 'warn', 'error', 'info', 'debug', 'table', 'trace', 'dir', 'group', 'groupEnd', 'groupCollapsed', 'clear', 'count', 'assert', 'profile', 'profileEnd', 'time', 'timeEnd', 'timeStamp'];
244          for (const _m of _consoleMethods) {
245            if (typeof console[_m] !== 'function') continue;
246            const _origMethod = console[_m];
247            const _nativeStr = 'function ' + _m + '() { [native code] }';
248            // Only patch if toString is wrong (i.e. CDP has replaced it)
249            try {
250              const _currentStr = _origToString.call(_origMethod);
251              if (_currentStr === _nativeStr) continue; // already looks native
252            } catch {}
253            const _wrapper = function() { return _origMethod.apply(console, arguments); };
254            Object.defineProperty(_wrapper, 'length', { value: _origMethod.length || 0, configurable: true });
255            _disguise(_wrapper, _m);
256            try { console[_m] = _wrapper; } catch {}
257          }
258        } catch {}
259  
260        // 10. window.outerWidth/outerHeight defense
261        //     When DevTools or CDP debugger is attached, Chrome may alter the
262        //     window dimensions. Anti-bot scripts compare outerWidth/innerWidth
263        //     and outerHeight/innerHeight — a significant difference indicates
264        //     DevTools is open. We freeze the relationship so the delta stays
265        //     consistent with a normal browser window.
266        //     Thresholds: width delta > 100px or height delta > 200px indicates
267        //     a docked DevTools panel. When triggered, we report outerWidth
268        //     equal to innerWidth (normal for maximised windows) and
269        //     outerHeight as innerHeight + the captured "normal" delta (capped
270        //     to a reasonable range), so the result is plausible across OSes.
271        try {
272          const _normalWidthDelta = window.outerWidth - window.innerWidth;
273          const _normalHeightDelta = window.outerHeight - window.innerHeight;
274          // Only patch if the delta looks suspicious (e.g. DevTools docked)
275          if (_normalWidthDelta > 100 || _normalHeightDelta > 200) {
276            Object.defineProperty(window, 'outerWidth', {
277              get: () => window.innerWidth,
278              configurable: true,
279            });
280            // Use a clamped height offset (40-120px covers macOS ~78px,
281            // Windows ~40px, and Linux ~37-50px title bar heights).
282            const _heightOffset = Math.max(40, Math.min(120, _normalHeightDelta));
283            Object.defineProperty(window, 'outerHeight', {
284              get: () => window.innerHeight + _heightOffset,
285              configurable: true,
286            });
287          }
288        } catch {}
289  
290        // 11. Performance API cleanup
291        //     CDP injects internal resources and timing entries that don't exist
292        //     in normal browsing. Filter entries with debugger/devtools URLs.
293        try {
294          const _origGetEntries = Performance.prototype.getEntries;
295          const _origGetByType = Performance.prototype.getEntriesByType;
296          const _origGetByName = Performance.prototype.getEntriesByName;
297          const _suspiciousPatterns = ['debugger', 'devtools', '__puppeteer', '__playwright', 'pptr:'];
298          const _filterEntries = (entries) => {
299            if (!Array.isArray(entries)) return entries;
300            return entries.filter(e => {
301              const name = e.name || '';
302              return !_suspiciousPatterns.some(p => name.includes(p));
303            });
304          };
305          Performance.prototype.getEntries = function() {
306            return _filterEntries(_origGetEntries.call(this));
307          };
308          Performance.prototype.getEntriesByType = function(type) {
309            return _filterEntries(_origGetByType.call(this, type));
310          };
311          Performance.prototype.getEntriesByName = function(name, type) {
312            return _filterEntries(_origGetByName.call(this, name, type));
313          };
314        } catch {}
315  
316        // 12. WebDriver-related property defense
317        //     Some anti-bot systems check additional navigator properties
318        //     and document properties that may indicate automation.
319        try {
320          // document.$cdc_ properties (ChromeDriver specific, backup for #6)
321          for (const _prop of Object.getOwnPropertyNames(document)) {
322            if (_prop.startsWith('$cdc_') || _prop.startsWith('$chrome_')) {
323              try { delete document[_prop]; } catch {}
324            }
325          }
326        } catch {}
327  
328        // 13. Iframe contentWindow.chrome consistency
329        //     Anti-bot scripts create iframes and check if
330        //     iframe.contentWindow.chrome exists and matches the parent.
331        //     CDP-controlled pages may have inconsistent iframe contexts.
332        try {
333          const _origHTMLIFrame = HTMLIFrameElement.prototype;
334          const _origContentWindow = Object.getOwnPropertyDescriptor(_origHTMLIFrame, 'contentWindow');
335          if (_origContentWindow && _origContentWindow.get) {
336            Object.defineProperty(_origHTMLIFrame, 'contentWindow', {
337              get: function() {
338                const _w = _origContentWindow.get.call(this);
339                if (_w) {
340                  try {
341                    if (!_w.chrome) {
342                      Object.defineProperty(_w, 'chrome', {
343                        value: window.chrome,
344                        writable: true,
345                        configurable: true,
346                      });
347                    }
348                  } catch {}
349                }
350                return _w;
351              },
352              configurable: true,
353            });
354          }
355        } catch {}
356  
357        return 'applied';
358      })()
359    `);
360  }