/ src / browser / base-page.ts
base-page.ts
  1  /**
  2   * BasePage — shared IPage method implementations for DOM helpers.
  3   *
  4   * Both Page (daemon-backed) and CDPPage (direct CDP) execute JS the same way
  5   * for DOM operations. This base class deduplicates ~200 lines of identical
  6   * click/type/scroll/wait/snapshot/interceptor methods.
  7   *
  8   * Subclasses implement the transport-specific methods: goto, evaluate,
  9   * getCookies, screenshot, tabs, etc.
 10   */
 11  
 12  import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js';
 13  import { generateSnapshotJs, getFormStateJs } from './dom-snapshot.js';
 14  import {
 15    pressKeyJs,
 16    waitForTextJs,
 17    waitForCaptureJs,
 18    waitForSelectorJs,
 19    scrollJs,
 20    autoScrollJs,
 21    networkRequestsJs,
 22    waitForDomStableJs,
 23  } from './dom-helpers.js';
 24  import {
 25    resolveTargetJs,
 26    clickResolvedJs,
 27    typeResolvedJs,
 28    scrollResolvedJs,
 29    type ResolveOptions,
 30    type TargetMatchLevel,
 31  } from './target-resolver.js';
 32  import { TargetError, type TargetErrorCode } from './target-errors.js';
 33  
 34  export interface ResolveSuccess {
 35    matches_n: number;
 36    /**
 37     * Cascading stale-ref tier the resolver traversed. Callers surface this to
 38     * agents so `stable` / `reidentified` hits are visibly distinct from a
 39     * clean `exact` match — the page changed, the action still succeeded.
 40     */
 41    match_level: TargetMatchLevel;
 42  }
 43  
 44  /**
 45   * Execute `resolveTargetJs` once, throw structured `TargetError` on failure.
 46   * Single helper so click/typeText/scrollTo share one resolution pathway,
 47   * which is what the selector-first contract promises agents.
 48   */
 49  async function runResolve(
 50    page: { evaluate(js: string): Promise<unknown> },
 51    ref: string,
 52    opts: ResolveOptions = {},
 53  ): Promise<ResolveSuccess> {
 54    const resolution = (await page.evaluate(resolveTargetJs(ref, opts))) as
 55      | { ok: true; matches_n: number; match_level: TargetMatchLevel }
 56      | { ok: false; code: TargetErrorCode; message: string; hint: string; candidates?: string[]; matches_n?: number };
 57    if (!resolution.ok) {
 58      throw new TargetError({
 59        code: resolution.code,
 60        message: resolution.message,
 61        hint: resolution.hint,
 62        candidates: resolution.candidates,
 63        matches_n: resolution.matches_n,
 64      });
 65    }
 66    return { matches_n: resolution.matches_n, match_level: resolution.match_level };
 67  }
 68  import { formatSnapshot } from '../snapshotFormatter.js';
 69  export abstract class BasePage implements IPage {
 70    protected _lastUrl: string | null = null;
 71    /** Cached previous snapshot hashes for incremental diff marking */
 72    private _prevSnapshotHashes: string | null = null;
 73  
 74    // ── Transport-specific methods (must be implemented by subclasses) ──
 75  
 76    abstract goto(url: string, options?: { waitUntil?: 'load' | 'none'; settleMs?: number }): Promise<void>;
 77    abstract evaluate(js: string): Promise<unknown>;
 78  
 79    /**
 80     * Safely evaluate JS with pre-serialized arguments.
 81     * Each key in `args` becomes a `const` declaration with JSON-serialized value,
 82     * prepended to the JS code. Prevents injection by design.
 83     *
 84     * Usage:
 85     *   page.evaluateWithArgs(`(async () => { return sym; })()`, { sym: userInput })
 86     */
 87    async evaluateWithArgs(js: string, args: Record<string, unknown>): Promise<unknown> {
 88      const declarations = Object.entries(args)
 89        .map(([key, value]) => {
 90          if (!/^[a-zA-Z_$][a-zA-Z0-9_$]*$/.test(key)) {
 91            throw new Error(`evaluateWithArgs: invalid key "${key}"`);
 92          }
 93          return `const ${key} = ${JSON.stringify(value)};`;
 94        })
 95        .join('\n');
 96      return this.evaluate(`${declarations}\n${js}`);
 97    }
 98  
 99    abstract getCookies(opts?: { domain?: string; url?: string }): Promise<BrowserCookie[]>;
100    abstract screenshot(options?: ScreenshotOptions): Promise<string>;
101    abstract tabs(): Promise<unknown[]>;
102    abstract selectTab(target: number | string): Promise<void>;
103  
104    // ── Shared DOM helper implementations ──
105  
106    async click(ref: string, opts: ResolveOptions = {}): Promise<ResolveSuccess> {
107      // Phase 1: Resolve target with fingerprint verification
108      const resolved = await runResolve(this, ref, opts);
109  
110      // Phase 2: Execute click on resolved element
111      const result = await this.evaluate(clickResolvedJs()) as
112        | string
113        | { status: string; x?: number; y?: number; w?: number; h?: number; error?: string }
114        | null;
115  
116      if (typeof result === 'string' || result == null) return resolved;
117  
118      if (result.status === 'clicked') return resolved;
119  
120      // JS click failed — try CDP native click if coordinates available
121      if (result.x != null && result.y != null) {
122        const success = await this.tryNativeClick(result.x, result.y);
123        if (success) return resolved;
124      }
125  
126      throw new Error(`Click failed: ${result.error ?? 'JS click and CDP fallback both failed'}`);
127    }
128  
129    /** Override in subclasses with CDP native click support */
130    protected async tryNativeClick(_x: number, _y: number): Promise<boolean> {
131      return false;
132    }
133  
134    async typeText(ref: string, text: string, opts: ResolveOptions = {}): Promise<ResolveSuccess> {
135      const resolved = await runResolve(this, ref, opts);
136      await this.evaluate(typeResolvedJs(text));
137      return resolved;
138    }
139  
140    async pressKey(key: string): Promise<void> {
141      await this.evaluate(pressKeyJs(key));
142    }
143  
144    async scrollTo(ref: string, opts: ResolveOptions = {}): Promise<unknown> {
145      const resolved = await runResolve(this, ref, opts);
146      const result = (await this.evaluate(scrollResolvedJs())) as Record<string, unknown> | null;
147      // Fold match_level into the scroll payload so the user-facing envelope
148      // carries it the same way click / type do.
149      if (result && typeof result === 'object') {
150        return { ...result, matches_n: resolved.matches_n, match_level: resolved.match_level };
151      }
152      return { matches_n: resolved.matches_n, match_level: resolved.match_level };
153    }
154  
155    async getFormState(): Promise<Record<string, unknown>> {
156      return (await this.evaluate(getFormStateJs())) as Record<string, unknown>;
157    }
158  
159    async scroll(direction: string = 'down', amount: number = 500): Promise<void> {
160      await this.evaluate(scrollJs(direction, amount));
161    }
162  
163    async autoScroll(options?: { times?: number; delayMs?: number }): Promise<void> {
164      const times = options?.times ?? 3;
165      const delayMs = options?.delayMs ?? 2000;
166      await this.evaluate(autoScrollJs(times, delayMs));
167    }
168  
169    async networkRequests(includeStatic: boolean = false): Promise<unknown[]> {
170      const result = await this.evaluate(networkRequestsJs(includeStatic));
171      return Array.isArray(result) ? result : [];
172    }
173  
174    async consoleMessages(_level: string = 'info'): Promise<unknown[]> {
175      return [];
176    }
177  
178    async wait(options: number | WaitOptions): Promise<void> {
179      if (typeof options === 'number') {
180        if (options >= 1) {
181          try {
182            const maxMs = options * 1000;
183            await this.evaluate(waitForDomStableJs(maxMs, Math.min(500, maxMs)));
184            return;
185          } catch {
186            // Fallback: fixed sleep
187          }
188        }
189        await new Promise(resolve => setTimeout(resolve, options * 1000));
190        return;
191      }
192      if (typeof options.time === 'number') {
193        await new Promise(resolve => setTimeout(resolve, options.time! * 1000));
194        return;
195      }
196      if (options.selector) {
197        const timeout = (options.timeout ?? 10) * 1000;
198        await this.evaluate(waitForSelectorJs(options.selector, timeout));
199        return;
200      }
201      if (options.text) {
202        const timeout = (options.timeout ?? 30) * 1000;
203        await this.evaluate(waitForTextJs(options.text, timeout));
204      }
205    }
206  
207    async snapshot(opts: SnapshotOptions = {}): Promise<unknown> {
208      const snapshotJs = generateSnapshotJs({
209        viewportExpand: opts.viewportExpand ?? 2000,
210        maxDepth: Math.max(1, Math.min(Number(opts.maxDepth) || 50, 200)),
211        interactiveOnly: opts.interactive ?? false,
212        maxTextLength: opts.maxTextLength ?? 120,
213        includeScrollInfo: true,
214        bboxDedup: true,
215        previousHashes: this._prevSnapshotHashes,
216      });
217  
218      try {
219        const result = await this.evaluate(snapshotJs);
220        // Read back the hashes stored by the snapshot for next diff
221        try {
222          const hashes = await this.evaluate('window.__opencli_prev_hashes') as string | null;
223          this._prevSnapshotHashes = typeof hashes === 'string' ? hashes : null;
224        } catch {
225          // Non-fatal: diff is best-effort
226        }
227        return result;
228      } catch (err) {
229        // Log snapshot failure for debugging, then fallback to basic accessibility tree
230        if (process.env.DEBUG_SNAPSHOT) {
231          process.stderr.write(`[snapshot] DOM snapshot failed, falling back to accessibility tree: ${(err as Error)?.message?.slice(0, 200)}\n`);
232        }
233        return this._basicSnapshot(opts);
234      }
235    }
236  
237    async getCurrentUrl(): Promise<string | null> {
238      if (this._lastUrl) return this._lastUrl;
239      try {
240        const current = await this.evaluate('window.location.href');
241        if (typeof current === 'string' && current) {
242          this._lastUrl = current;
243          return current;
244        }
245      } catch {
246        // Best-effort
247      }
248      return null;
249    }
250  
251    async installInterceptor(pattern: string): Promise<void> {
252      const { generateInterceptorJs } = await import('../interceptor.js');
253      await this.evaluate(generateInterceptorJs(JSON.stringify(pattern), {
254        arrayName: '__opencli_xhr',
255        patchGuard: '__opencli_interceptor_patched',
256      }));
257    }
258  
259    async getInterceptedRequests(): Promise<unknown[]> {
260      const { generateReadInterceptedJs } = await import('../interceptor.js');
261      const result = await this.evaluate(generateReadInterceptedJs('__opencli_xhr'));
262      return Array.isArray(result) ? result : [];
263    }
264  
265    async waitForCapture(timeout: number = 10): Promise<void> {
266      const maxMs = timeout * 1000;
267      await this.evaluate(waitForCaptureJs(maxMs));
268    }
269  
270    /** Fallback basic snapshot */
271    protected async _basicSnapshot(opts: Pick<SnapshotOptions, 'interactive' | 'compact' | 'maxDepth' | 'raw'> = {}): Promise<unknown> {
272      const maxDepth = Math.max(1, Math.min(Number(opts.maxDepth) || 50, 200));
273      const code = `
274        (async () => {
275          function buildTree(node, depth) {
276            if (depth > ${maxDepth}) return '';
277            const role = node.getAttribute?.('role') || node.tagName?.toLowerCase() || 'generic';
278            const name = node.getAttribute?.('aria-label') || node.getAttribute?.('alt') || node.textContent?.trim().slice(0, 80) || '';
279            const isInteractive = ['a', 'button', 'input', 'select', 'textarea'].includes(node.tagName?.toLowerCase()) || node.getAttribute?.('tabindex') != null;
280  
281            ${opts.interactive ? 'if (!isInteractive && !node.children?.length) return "";' : ''}
282  
283            let indent = '  '.repeat(depth);
284            let line = indent + role;
285            if (name) line += ' "' + name.replace(/"/g, '\\\\\\"') + '"';
286            if (node.tagName?.toLowerCase() === 'a' && node.href) line += ' [' + node.href + ']';
287            if (node.tagName?.toLowerCase() === 'input') line += ' [' + (node.type || 'text') + ']';
288  
289            let result = line + '\\n';
290            if (node.children) {
291              for (const child of node.children) {
292                result += buildTree(child, depth + 1);
293              }
294            }
295            return result;
296          }
297          return buildTree(document.body, 0);
298        })()
299      `;
300      const raw = await this.evaluate(code);
301      if (opts.raw) return raw;
302      if (typeof raw === 'string') return formatSnapshot(raw, opts);
303      return raw;
304    }
305  }