base-page.ts
1 /** 2 * BasePage — shared IPage method implementations for DOM helpers. 3 * 4 * Both Page (daemon-backed) and CDPPage (direct CDP) execute JS the same way 5 * for DOM operations. This base class deduplicates ~200 lines of identical 6 * click/type/scroll/wait/snapshot/interceptor methods. 7 * 8 * Subclasses implement the transport-specific methods: goto, evaluate, 9 * getCookies, screenshot, tabs, etc. 10 */ 11 12 import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; 13 import { generateSnapshotJs, getFormStateJs } from './dom-snapshot.js'; 14 import { 15 pressKeyJs, 16 waitForTextJs, 17 waitForCaptureJs, 18 waitForSelectorJs, 19 scrollJs, 20 autoScrollJs, 21 networkRequestsJs, 22 waitForDomStableJs, 23 } from './dom-helpers.js'; 24 import { 25 resolveTargetJs, 26 clickResolvedJs, 27 typeResolvedJs, 28 scrollResolvedJs, 29 type ResolveOptions, 30 type TargetMatchLevel, 31 } from './target-resolver.js'; 32 import { TargetError, type TargetErrorCode } from './target-errors.js'; 33 34 export interface ResolveSuccess { 35 matches_n: number; 36 /** 37 * Cascading stale-ref tier the resolver traversed. Callers surface this to 38 * agents so `stable` / `reidentified` hits are visibly distinct from a 39 * clean `exact` match — the page changed, the action still succeeded. 40 */ 41 match_level: TargetMatchLevel; 42 } 43 44 /** 45 * Execute `resolveTargetJs` once, throw structured `TargetError` on failure. 46 * Single helper so click/typeText/scrollTo share one resolution pathway, 47 * which is what the selector-first contract promises agents. 48 */ 49 async function runResolve( 50 page: { evaluate(js: string): Promise<unknown> }, 51 ref: string, 52 opts: ResolveOptions = {}, 53 ): Promise<ResolveSuccess> { 54 const resolution = (await page.evaluate(resolveTargetJs(ref, opts))) as 55 | { ok: true; matches_n: number; match_level: TargetMatchLevel } 56 | { ok: false; code: TargetErrorCode; message: string; hint: string; candidates?: string[]; matches_n?: number }; 57 if (!resolution.ok) { 58 throw new TargetError({ 59 code: resolution.code, 60 message: resolution.message, 61 hint: resolution.hint, 62 candidates: resolution.candidates, 63 matches_n: resolution.matches_n, 64 }); 65 } 66 return { matches_n: resolution.matches_n, match_level: resolution.match_level }; 67 } 68 import { formatSnapshot } from '../snapshotFormatter.js'; 69 export abstract class BasePage implements IPage { 70 protected _lastUrl: string | null = null; 71 /** Cached previous snapshot hashes for incremental diff marking */ 72 private _prevSnapshotHashes: string | null = null; 73 74 // ── Transport-specific methods (must be implemented by subclasses) ── 75 76 abstract goto(url: string, options?: { waitUntil?: 'load' | 'none'; settleMs?: number }): Promise<void>; 77 abstract evaluate(js: string): Promise<unknown>; 78 79 /** 80 * Safely evaluate JS with pre-serialized arguments. 81 * Each key in `args` becomes a `const` declaration with JSON-serialized value, 82 * prepended to the JS code. Prevents injection by design. 83 * 84 * Usage: 85 * page.evaluateWithArgs(`(async () => { return sym; })()`, { sym: userInput }) 86 */ 87 async evaluateWithArgs(js: string, args: Record<string, unknown>): Promise<unknown> { 88 const declarations = Object.entries(args) 89 .map(([key, value]) => { 90 if (!/^[a-zA-Z_$][a-zA-Z0-9_$]*$/.test(key)) { 91 throw new Error(`evaluateWithArgs: invalid key "${key}"`); 92 } 93 return `const ${key} = ${JSON.stringify(value)};`; 94 }) 95 .join('\n'); 96 return this.evaluate(`${declarations}\n${js}`); 97 } 98 99 abstract getCookies(opts?: { domain?: string; url?: string }): Promise<BrowserCookie[]>; 100 abstract screenshot(options?: ScreenshotOptions): Promise<string>; 101 abstract tabs(): Promise<unknown[]>; 102 abstract selectTab(target: number | string): Promise<void>; 103 104 // ── Shared DOM helper implementations ── 105 106 async click(ref: string, opts: ResolveOptions = {}): Promise<ResolveSuccess> { 107 // Phase 1: Resolve target with fingerprint verification 108 const resolved = await runResolve(this, ref, opts); 109 110 // Phase 2: Execute click on resolved element 111 const result = await this.evaluate(clickResolvedJs()) as 112 | string 113 | { status: string; x?: number; y?: number; w?: number; h?: number; error?: string } 114 | null; 115 116 if (typeof result === 'string' || result == null) return resolved; 117 118 if (result.status === 'clicked') return resolved; 119 120 // JS click failed — try CDP native click if coordinates available 121 if (result.x != null && result.y != null) { 122 const success = await this.tryNativeClick(result.x, result.y); 123 if (success) return resolved; 124 } 125 126 throw new Error(`Click failed: ${result.error ?? 'JS click and CDP fallback both failed'}`); 127 } 128 129 /** Override in subclasses with CDP native click support */ 130 protected async tryNativeClick(_x: number, _y: number): Promise<boolean> { 131 return false; 132 } 133 134 async typeText(ref: string, text: string, opts: ResolveOptions = {}): Promise<ResolveSuccess> { 135 const resolved = await runResolve(this, ref, opts); 136 await this.evaluate(typeResolvedJs(text)); 137 return resolved; 138 } 139 140 async pressKey(key: string): Promise<void> { 141 await this.evaluate(pressKeyJs(key)); 142 } 143 144 async scrollTo(ref: string, opts: ResolveOptions = {}): Promise<unknown> { 145 const resolved = await runResolve(this, ref, opts); 146 const result = (await this.evaluate(scrollResolvedJs())) as Record<string, unknown> | null; 147 // Fold match_level into the scroll payload so the user-facing envelope 148 // carries it the same way click / type do. 149 if (result && typeof result === 'object') { 150 return { ...result, matches_n: resolved.matches_n, match_level: resolved.match_level }; 151 } 152 return { matches_n: resolved.matches_n, match_level: resolved.match_level }; 153 } 154 155 async getFormState(): Promise<Record<string, unknown>> { 156 return (await this.evaluate(getFormStateJs())) as Record<string, unknown>; 157 } 158 159 async scroll(direction: string = 'down', amount: number = 500): Promise<void> { 160 await this.evaluate(scrollJs(direction, amount)); 161 } 162 163 async autoScroll(options?: { times?: number; delayMs?: number }): Promise<void> { 164 const times = options?.times ?? 3; 165 const delayMs = options?.delayMs ?? 2000; 166 await this.evaluate(autoScrollJs(times, delayMs)); 167 } 168 169 async networkRequests(includeStatic: boolean = false): Promise<unknown[]> { 170 const result = await this.evaluate(networkRequestsJs(includeStatic)); 171 return Array.isArray(result) ? result : []; 172 } 173 174 async consoleMessages(_level: string = 'info'): Promise<unknown[]> { 175 return []; 176 } 177 178 async wait(options: number | WaitOptions): Promise<void> { 179 if (typeof options === 'number') { 180 if (options >= 1) { 181 try { 182 const maxMs = options * 1000; 183 await this.evaluate(waitForDomStableJs(maxMs, Math.min(500, maxMs))); 184 return; 185 } catch { 186 // Fallback: fixed sleep 187 } 188 } 189 await new Promise(resolve => setTimeout(resolve, options * 1000)); 190 return; 191 } 192 if (typeof options.time === 'number') { 193 await new Promise(resolve => setTimeout(resolve, options.time! * 1000)); 194 return; 195 } 196 if (options.selector) { 197 const timeout = (options.timeout ?? 10) * 1000; 198 await this.evaluate(waitForSelectorJs(options.selector, timeout)); 199 return; 200 } 201 if (options.text) { 202 const timeout = (options.timeout ?? 30) * 1000; 203 await this.evaluate(waitForTextJs(options.text, timeout)); 204 } 205 } 206 207 async snapshot(opts: SnapshotOptions = {}): Promise<unknown> { 208 const snapshotJs = generateSnapshotJs({ 209 viewportExpand: opts.viewportExpand ?? 2000, 210 maxDepth: Math.max(1, Math.min(Number(opts.maxDepth) || 50, 200)), 211 interactiveOnly: opts.interactive ?? false, 212 maxTextLength: opts.maxTextLength ?? 120, 213 includeScrollInfo: true, 214 bboxDedup: true, 215 previousHashes: this._prevSnapshotHashes, 216 }); 217 218 try { 219 const result = await this.evaluate(snapshotJs); 220 // Read back the hashes stored by the snapshot for next diff 221 try { 222 const hashes = await this.evaluate('window.__opencli_prev_hashes') as string | null; 223 this._prevSnapshotHashes = typeof hashes === 'string' ? hashes : null; 224 } catch { 225 // Non-fatal: diff is best-effort 226 } 227 return result; 228 } catch (err) { 229 // Log snapshot failure for debugging, then fallback to basic accessibility tree 230 if (process.env.DEBUG_SNAPSHOT) { 231 process.stderr.write(`[snapshot] DOM snapshot failed, falling back to accessibility tree: ${(err as Error)?.message?.slice(0, 200)}\n`); 232 } 233 return this._basicSnapshot(opts); 234 } 235 } 236 237 async getCurrentUrl(): Promise<string | null> { 238 if (this._lastUrl) return this._lastUrl; 239 try { 240 const current = await this.evaluate('window.location.href'); 241 if (typeof current === 'string' && current) { 242 this._lastUrl = current; 243 return current; 244 } 245 } catch { 246 // Best-effort 247 } 248 return null; 249 } 250 251 async installInterceptor(pattern: string): Promise<void> { 252 const { generateInterceptorJs } = await import('../interceptor.js'); 253 await this.evaluate(generateInterceptorJs(JSON.stringify(pattern), { 254 arrayName: '__opencli_xhr', 255 patchGuard: '__opencli_interceptor_patched', 256 })); 257 } 258 259 async getInterceptedRequests(): Promise<unknown[]> { 260 const { generateReadInterceptedJs } = await import('../interceptor.js'); 261 const result = await this.evaluate(generateReadInterceptedJs('__opencli_xhr')); 262 return Array.isArray(result) ? result : []; 263 } 264 265 async waitForCapture(timeout: number = 10): Promise<void> { 266 const maxMs = timeout * 1000; 267 await this.evaluate(waitForCaptureJs(maxMs)); 268 } 269 270 /** Fallback basic snapshot */ 271 protected async _basicSnapshot(opts: Pick<SnapshotOptions, 'interactive' | 'compact' | 'maxDepth' | 'raw'> = {}): Promise<unknown> { 272 const maxDepth = Math.max(1, Math.min(Number(opts.maxDepth) || 50, 200)); 273 const code = ` 274 (async () => { 275 function buildTree(node, depth) { 276 if (depth > ${maxDepth}) return ''; 277 const role = node.getAttribute?.('role') || node.tagName?.toLowerCase() || 'generic'; 278 const name = node.getAttribute?.('aria-label') || node.getAttribute?.('alt') || node.textContent?.trim().slice(0, 80) || ''; 279 const isInteractive = ['a', 'button', 'input', 'select', 'textarea'].includes(node.tagName?.toLowerCase()) || node.getAttribute?.('tabindex') != null; 280 281 ${opts.interactive ? 'if (!isInteractive && !node.children?.length) return "";' : ''} 282 283 let indent = ' '.repeat(depth); 284 let line = indent + role; 285 if (name) line += ' "' + name.replace(/"/g, '\\\\\\"') + '"'; 286 if (node.tagName?.toLowerCase() === 'a' && node.href) line += ' [' + node.href + ']'; 287 if (node.tagName?.toLowerCase() === 'input') line += ' [' + (node.type || 'text') + ']'; 288 289 let result = line + '\\n'; 290 if (node.children) { 291 for (const child of node.children) { 292 result += buildTree(child, depth + 1); 293 } 294 } 295 return result; 296 } 297 return buildTree(document.body, 0); 298 })() 299 `; 300 const raw = await this.evaluate(code); 301 if (opts.raw) return raw; 302 if (typeof raw === 'string') return formatSnapshot(raw, opts); 303 return raw; 304 } 305 }