client.ts
1 import { chromium, type Browser, type Page, type ElementHandle } from "playwright"; 2 import type { 3 GetPageRequest, 4 GetPageResponse, 5 ListPagesResponse, 6 ServerInfoResponse, 7 ViewportSize, 8 } from "./types"; 9 import { getSnapshotScript } from "./snapshot/browser-script"; 10 11 /** 12 * Options for waiting for page load 13 */ 14 export interface WaitForPageLoadOptions { 15 /** Maximum time to wait in ms (default: 10000) */ 16 timeout?: number; 17 /** How often to check page state in ms (default: 50) */ 18 pollInterval?: number; 19 /** Minimum time to wait even if page appears ready in ms (default: 100) */ 20 minimumWait?: number; 21 /** Wait for network to be idle (no pending requests) (default: true) */ 22 waitForNetworkIdle?: boolean; 23 } 24 25 /** 26 * Result of waiting for page load 27 */ 28 export interface WaitForPageLoadResult { 29 /** Whether the page is considered loaded */ 30 success: boolean; 31 /** Document ready state when finished */ 32 readyState: string; 33 /** Number of pending network requests when finished */ 34 pendingRequests: number; 35 /** Time spent waiting in ms */ 36 waitTimeMs: number; 37 /** Whether timeout was reached */ 38 timedOut: boolean; 39 } 40 41 interface PageLoadState { 42 documentReadyState: string; 43 documentLoading: boolean; 44 pendingRequests: PendingRequest[]; 45 } 46 47 interface PendingRequest { 48 url: string; 49 loadingDurationMs: number; 50 resourceType: string; 51 } 52 53 /** 54 * Wait for a page to finish loading using document.readyState and performance API. 55 * 56 * Uses browser-use's approach of: 57 * - Checking document.readyState for 'complete' 58 * - Monitoring pending network requests via Performance API 59 * - Filtering out ads, tracking, and non-critical resources 60 * - Graceful timeout handling (continues even if timeout reached) 61 */ 62 export async function waitForPageLoad( 63 page: Page, 64 options: WaitForPageLoadOptions = {} 65 ): Promise<WaitForPageLoadResult> { 66 const { 67 timeout = 10000, 68 pollInterval = 50, 69 minimumWait = 100, 70 waitForNetworkIdle = true, 71 } = options; 72 73 const startTime = Date.now(); 74 let lastState: PageLoadState | null = null; 75 76 // Wait minimum time first 77 if (minimumWait > 0) { 78 await new Promise((resolve) => setTimeout(resolve, minimumWait)); 79 } 80 81 // Poll until ready or timeout 82 while (Date.now() - startTime < timeout) { 83 try { 84 lastState = await getPageLoadState(page); 85 86 // Check if document is complete 87 const documentReady = lastState.documentReadyState === "complete"; 88 89 // Check if network is idle (no pending critical requests) 90 const networkIdle = !waitForNetworkIdle || lastState.pendingRequests.length === 0; 91 92 if (documentReady && networkIdle) { 93 return { 94 success: true, 95 readyState: lastState.documentReadyState, 96 pendingRequests: lastState.pendingRequests.length, 97 waitTimeMs: Date.now() - startTime, 98 timedOut: false, 99 }; 100 } 101 } catch { 102 // Page may be navigating, continue polling 103 } 104 105 await new Promise((resolve) => setTimeout(resolve, pollInterval)); 106 } 107 108 // Timeout reached - return current state 109 return { 110 success: false, 111 readyState: lastState?.documentReadyState ?? "unknown", 112 pendingRequests: lastState?.pendingRequests.length ?? 0, 113 waitTimeMs: Date.now() - startTime, 114 timedOut: true, 115 }; 116 } 117 118 /** 119 * Get the current page load state including document ready state and pending requests. 120 * Filters out ads, tracking, and non-critical resources that shouldn't block loading. 121 */ 122 async function getPageLoadState(page: Page): Promise<PageLoadState> { 123 const result = await page.evaluate(() => { 124 // Access browser globals via globalThis for TypeScript compatibility 125 /* eslint-disable @typescript-eslint/no-explicit-any */ 126 const g = globalThis as { document?: any; performance?: any }; 127 /* eslint-enable @typescript-eslint/no-explicit-any */ 128 const perf = g.performance!; 129 const doc = g.document!; 130 131 const now = perf.now(); 132 const resources = perf.getEntriesByType("resource"); 133 const pending: Array<{ url: string; loadingDurationMs: number; resourceType: string }> = []; 134 135 // Common ad/tracking domains and patterns to filter out 136 const adPatterns = [ 137 "doubleclick.net", 138 "googlesyndication.com", 139 "googletagmanager.com", 140 "google-analytics.com", 141 "facebook.net", 142 "connect.facebook.net", 143 "analytics", 144 "ads", 145 "tracking", 146 "pixel", 147 "hotjar.com", 148 "clarity.ms", 149 "mixpanel.com", 150 "segment.com", 151 "newrelic.com", 152 "nr-data.net", 153 "/tracker/", 154 "/collector/", 155 "/beacon/", 156 "/telemetry/", 157 "/log/", 158 "/events/", 159 "/track.", 160 "/metrics/", 161 ]; 162 163 // Non-critical resource types 164 const nonCriticalTypes = ["img", "image", "icon", "font"]; 165 166 for (const entry of resources) { 167 // Resources with responseEnd === 0 are still loading 168 if (entry.responseEnd === 0) { 169 const url = entry.name; 170 171 // Filter out ads and tracking 172 const isAd = adPatterns.some((pattern) => url.includes(pattern)); 173 if (isAd) continue; 174 175 // Filter out data: URLs and very long URLs 176 if (url.startsWith("data:") || url.length > 500) continue; 177 178 const loadingDuration = now - entry.startTime; 179 180 // Skip requests loading > 10 seconds (likely stuck/polling) 181 if (loadingDuration > 10000) continue; 182 183 const resourceType = entry.initiatorType || "unknown"; 184 185 // Filter out non-critical resources loading > 3 seconds 186 if (nonCriticalTypes.includes(resourceType) && loadingDuration > 3000) continue; 187 188 // Filter out image URLs even if type is unknown 189 const isImageUrl = /\.(jpg|jpeg|png|gif|webp|svg|ico)(\?|$)/i.test(url); 190 if (isImageUrl && loadingDuration > 3000) continue; 191 192 pending.push({ 193 url, 194 loadingDurationMs: Math.round(loadingDuration), 195 resourceType, 196 }); 197 } 198 } 199 200 return { 201 documentReadyState: doc.readyState, 202 documentLoading: doc.readyState !== "complete", 203 pendingRequests: pending, 204 }; 205 }); 206 207 return result; 208 } 209 210 /** Server mode information */ 211 export interface ServerInfo { 212 wsEndpoint: string; 213 mode: "launch" | "extension"; 214 extensionConnected?: boolean; 215 } 216 217 /** 218 * Options for creating or getting a page 219 */ 220 export interface PageOptions { 221 /** Viewport size for new pages */ 222 viewport?: ViewportSize; 223 } 224 225 export interface DevBrowserClient { 226 page: (name: string, options?: PageOptions) => Promise<Page>; 227 list: () => Promise<string[]>; 228 close: (name: string) => Promise<void>; 229 disconnect: () => Promise<void>; 230 /** 231 * Get AI-friendly ARIA snapshot for a page. 232 * Returns YAML format with refs like [ref=e1], [ref=e2]. 233 * Refs are stored on window.__devBrowserRefs for cross-connection persistence. 234 */ 235 getAISnapshot: (name: string) => Promise<string>; 236 /** 237 * Get an element handle by its ref from the last getAISnapshot call. 238 * Refs persist across Playwright connections. 239 */ 240 selectSnapshotRef: (name: string, ref: string) => Promise<ElementHandle | null>; 241 /** 242 * Get server information including mode and extension connection status. 243 */ 244 getServerInfo: () => Promise<ServerInfo>; 245 } 246 247 export async function connect(serverUrl = "http://localhost:9222"): Promise<DevBrowserClient> { 248 let browser: Browser | null = null; 249 let wsEndpoint: string | null = null; 250 let connectingPromise: Promise<Browser> | null = null; 251 252 async function ensureConnected(): Promise<Browser> { 253 // Return existing connection if still active 254 if (browser && browser.isConnected()) { 255 return browser; 256 } 257 258 // If already connecting, wait for that connection (prevents race condition) 259 if (connectingPromise) { 260 return connectingPromise; 261 } 262 263 // Start new connection with mutex 264 connectingPromise = (async () => { 265 try { 266 // Fetch wsEndpoint from server 267 const res = await fetch(serverUrl); 268 if (!res.ok) { 269 throw new Error(`Server returned ${res.status}: ${await res.text()}`); 270 } 271 const info = (await res.json()) as ServerInfoResponse; 272 wsEndpoint = info.wsEndpoint; 273 274 // Connect to the browser via CDP 275 browser = await chromium.connectOverCDP(wsEndpoint); 276 return browser; 277 } finally { 278 connectingPromise = null; 279 } 280 })(); 281 282 return connectingPromise; 283 } 284 285 // Find page by CDP targetId - more reliable than JS globals 286 async function findPageByTargetId(b: Browser, targetId: string): Promise<Page | null> { 287 for (const context of b.contexts()) { 288 for (const page of context.pages()) { 289 let cdpSession; 290 try { 291 cdpSession = await context.newCDPSession(page); 292 const { targetInfo } = await cdpSession.send("Target.getTargetInfo"); 293 if (targetInfo.targetId === targetId) { 294 return page; 295 } 296 } catch (err) { 297 // Only ignore "target closed" errors, log unexpected ones 298 const msg = err instanceof Error ? err.message : String(err); 299 if (!msg.includes("Target closed") && !msg.includes("Session closed")) { 300 console.warn(`Unexpected error checking page target: ${msg}`); 301 } 302 } finally { 303 if (cdpSession) { 304 try { 305 await cdpSession.detach(); 306 } catch { 307 // Ignore detach errors - session may already be closed 308 } 309 } 310 } 311 } 312 } 313 return null; 314 } 315 316 // Helper to get a page by name (used by multiple methods) 317 async function getPage(name: string, options?: PageOptions): Promise<Page> { 318 // Request the page from server (creates if doesn't exist) 319 const res = await fetch(`${serverUrl}/pages`, { 320 method: "POST", 321 headers: { "Content-Type": "application/json" }, 322 body: JSON.stringify({ name, viewport: options?.viewport } satisfies GetPageRequest), 323 }); 324 325 if (!res.ok) { 326 throw new Error(`Failed to get page: ${await res.text()}`); 327 } 328 329 const pageInfo = (await res.json()) as GetPageResponse & { url?: string }; 330 const { targetId } = pageInfo; 331 332 // Connect to browser 333 const b = await ensureConnected(); 334 335 // Check if we're in extension mode 336 const infoRes = await fetch(serverUrl); 337 const info = (await infoRes.json()) as { mode?: string }; 338 const isExtensionMode = info.mode === "extension"; 339 340 if (isExtensionMode) { 341 // In extension mode, DON'T use findPageByTargetId as it corrupts page state 342 // Instead, find page by URL or use the only available page 343 const allPages = b.contexts().flatMap((ctx) => ctx.pages()); 344 345 if (allPages.length === 0) { 346 throw new Error(`No pages available in browser`); 347 } 348 349 if (allPages.length === 1) { 350 return allPages[0]!; 351 } 352 353 // Multiple pages - try to match by URL if available 354 if (pageInfo.url) { 355 const matchingPage = allPages.find((p) => p.url() === pageInfo.url); 356 if (matchingPage) { 357 return matchingPage; 358 } 359 } 360 361 // Fall back to first page 362 if (!allPages[0]) { 363 throw new Error(`No pages available in browser`); 364 } 365 return allPages[0]; 366 } 367 368 // In launch mode, use the original targetId-based lookup 369 const page = await findPageByTargetId(b, targetId); 370 if (!page) { 371 throw new Error(`Page "${name}" not found in browser contexts`); 372 } 373 374 return page; 375 } 376 377 return { 378 page: getPage, 379 380 async list(): Promise<string[]> { 381 const res = await fetch(`${serverUrl}/pages`); 382 const data = (await res.json()) as ListPagesResponse; 383 return data.pages; 384 }, 385 386 async close(name: string): Promise<void> { 387 const res = await fetch(`${serverUrl}/pages/${encodeURIComponent(name)}`, { 388 method: "DELETE", 389 }); 390 391 if (!res.ok) { 392 throw new Error(`Failed to close page: ${await res.text()}`); 393 } 394 }, 395 396 async disconnect(): Promise<void> { 397 // Just disconnect the CDP connection - pages persist on server 398 if (browser) { 399 await browser.close(); 400 browser = null; 401 } 402 }, 403 404 async getAISnapshot(name: string): Promise<string> { 405 // Get the page 406 const page = await getPage(name); 407 408 // Inject the snapshot script and call getAISnapshot 409 const snapshotScript = getSnapshotScript(); 410 const snapshot = await page.evaluate((script: string) => { 411 // Inject script if not already present 412 // Note: page.evaluate runs in browser context where window exists 413 // eslint-disable-next-line @typescript-eslint/no-explicit-any 414 const w = globalThis as any; 415 if (!w.__devBrowser_getAISnapshot) { 416 // eslint-disable-next-line no-eval 417 eval(script); 418 } 419 return w.__devBrowser_getAISnapshot(); 420 }, snapshotScript); 421 422 return snapshot; 423 }, 424 425 async selectSnapshotRef(name: string, ref: string): Promise<ElementHandle | null> { 426 // Get the page 427 const page = await getPage(name); 428 429 // Find the element using the stored refs 430 const elementHandle = await page.evaluateHandle((refId: string) => { 431 // Note: page.evaluateHandle runs in browser context where globalThis is the window 432 // eslint-disable-next-line @typescript-eslint/no-explicit-any 433 const w = globalThis as any; 434 const refs = w.__devBrowserRefs; 435 if (!refs) { 436 throw new Error("No snapshot refs found. Call getAISnapshot first."); 437 } 438 const element = refs[refId]; 439 if (!element) { 440 throw new Error( 441 `Ref "${refId}" not found. Available refs: ${Object.keys(refs).join(", ")}` 442 ); 443 } 444 return element; 445 }, ref); 446 447 // Check if we got an element 448 const element = elementHandle.asElement(); 449 if (!element) { 450 await elementHandle.dispose(); 451 return null; 452 } 453 454 return element; 455 }, 456 457 async getServerInfo(): Promise<ServerInfo> { 458 const res = await fetch(serverUrl); 459 if (!res.ok) { 460 throw new Error(`Server returned ${res.status}: ${await res.text()}`); 461 } 462 const info = (await res.json()) as { 463 wsEndpoint: string; 464 mode?: string; 465 extensionConnected?: boolean; 466 }; 467 return { 468 wsEndpoint: info.wsEndpoint, 469 mode: (info.mode as "launch" | "extension") ?? "launch", 470 extensionConnected: info.extensionConnected, 471 }; 472 }, 473 }; 474 }