/ skills / dev-browser / src / client.ts
client.ts
  1  import { chromium, type Browser, type Page, type ElementHandle } from "playwright";
  2  import type {
  3    GetPageRequest,
  4    GetPageResponse,
  5    ListPagesResponse,
  6    ServerInfoResponse,
  7    ViewportSize,
  8  } from "./types";
  9  import { getSnapshotScript } from "./snapshot/browser-script";
 10  
 11  /**
 12   * Options for waiting for page load
 13   */
 14  export interface WaitForPageLoadOptions {
 15    /** Maximum time to wait in ms (default: 10000) */
 16    timeout?: number;
 17    /** How often to check page state in ms (default: 50) */
 18    pollInterval?: number;
 19    /** Minimum time to wait even if page appears ready in ms (default: 100) */
 20    minimumWait?: number;
 21    /** Wait for network to be idle (no pending requests) (default: true) */
 22    waitForNetworkIdle?: boolean;
 23  }
 24  
 25  /**
 26   * Result of waiting for page load
 27   */
 28  export interface WaitForPageLoadResult {
 29    /** Whether the page is considered loaded */
 30    success: boolean;
 31    /** Document ready state when finished */
 32    readyState: string;
 33    /** Number of pending network requests when finished */
 34    pendingRequests: number;
 35    /** Time spent waiting in ms */
 36    waitTimeMs: number;
 37    /** Whether timeout was reached */
 38    timedOut: boolean;
 39  }
 40  
 41  interface PageLoadState {
 42    documentReadyState: string;
 43    documentLoading: boolean;
 44    pendingRequests: PendingRequest[];
 45  }
 46  
 47  interface PendingRequest {
 48    url: string;
 49    loadingDurationMs: number;
 50    resourceType: string;
 51  }
 52  
 53  /**
 54   * Wait for a page to finish loading using document.readyState and performance API.
 55   *
 56   * Uses browser-use's approach of:
 57   * - Checking document.readyState for 'complete'
 58   * - Monitoring pending network requests via Performance API
 59   * - Filtering out ads, tracking, and non-critical resources
 60   * - Graceful timeout handling (continues even if timeout reached)
 61   */
 62  export async function waitForPageLoad(
 63    page: Page,
 64    options: WaitForPageLoadOptions = {}
 65  ): Promise<WaitForPageLoadResult> {
 66    const {
 67      timeout = 10000,
 68      pollInterval = 50,
 69      minimumWait = 100,
 70      waitForNetworkIdle = true,
 71    } = options;
 72  
 73    const startTime = Date.now();
 74    let lastState: PageLoadState | null = null;
 75  
 76    // Wait minimum time first
 77    if (minimumWait > 0) {
 78      await new Promise((resolve) => setTimeout(resolve, minimumWait));
 79    }
 80  
 81    // Poll until ready or timeout
 82    while (Date.now() - startTime < timeout) {
 83      try {
 84        lastState = await getPageLoadState(page);
 85  
 86        // Check if document is complete
 87        const documentReady = lastState.documentReadyState === "complete";
 88  
 89        // Check if network is idle (no pending critical requests)
 90        const networkIdle = !waitForNetworkIdle || lastState.pendingRequests.length === 0;
 91  
 92        if (documentReady && networkIdle) {
 93          return {
 94            success: true,
 95            readyState: lastState.documentReadyState,
 96            pendingRequests: lastState.pendingRequests.length,
 97            waitTimeMs: Date.now() - startTime,
 98            timedOut: false,
 99          };
100        }
101      } catch {
102        // Page may be navigating, continue polling
103      }
104  
105      await new Promise((resolve) => setTimeout(resolve, pollInterval));
106    }
107  
108    // Timeout reached - return current state
109    return {
110      success: false,
111      readyState: lastState?.documentReadyState ?? "unknown",
112      pendingRequests: lastState?.pendingRequests.length ?? 0,
113      waitTimeMs: Date.now() - startTime,
114      timedOut: true,
115    };
116  }
117  
118  /**
119   * Get the current page load state including document ready state and pending requests.
120   * Filters out ads, tracking, and non-critical resources that shouldn't block loading.
121   */
122  async function getPageLoadState(page: Page): Promise<PageLoadState> {
123    const result = await page.evaluate(() => {
124      // Access browser globals via globalThis for TypeScript compatibility
125      /* eslint-disable @typescript-eslint/no-explicit-any */
126      const g = globalThis as { document?: any; performance?: any };
127      /* eslint-enable @typescript-eslint/no-explicit-any */
128      const perf = g.performance!;
129      const doc = g.document!;
130  
131      const now = perf.now();
132      const resources = perf.getEntriesByType("resource");
133      const pending: Array<{ url: string; loadingDurationMs: number; resourceType: string }> = [];
134  
135      // Common ad/tracking domains and patterns to filter out
136      const adPatterns = [
137        "doubleclick.net",
138        "googlesyndication.com",
139        "googletagmanager.com",
140        "google-analytics.com",
141        "facebook.net",
142        "connect.facebook.net",
143        "analytics",
144        "ads",
145        "tracking",
146        "pixel",
147        "hotjar.com",
148        "clarity.ms",
149        "mixpanel.com",
150        "segment.com",
151        "newrelic.com",
152        "nr-data.net",
153        "/tracker/",
154        "/collector/",
155        "/beacon/",
156        "/telemetry/",
157        "/log/",
158        "/events/",
159        "/track.",
160        "/metrics/",
161      ];
162  
163      // Non-critical resource types
164      const nonCriticalTypes = ["img", "image", "icon", "font"];
165  
166      for (const entry of resources) {
167        // Resources with responseEnd === 0 are still loading
168        if (entry.responseEnd === 0) {
169          const url = entry.name;
170  
171          // Filter out ads and tracking
172          const isAd = adPatterns.some((pattern) => url.includes(pattern));
173          if (isAd) continue;
174  
175          // Filter out data: URLs and very long URLs
176          if (url.startsWith("data:") || url.length > 500) continue;
177  
178          const loadingDuration = now - entry.startTime;
179  
180          // Skip requests loading > 10 seconds (likely stuck/polling)
181          if (loadingDuration > 10000) continue;
182  
183          const resourceType = entry.initiatorType || "unknown";
184  
185          // Filter out non-critical resources loading > 3 seconds
186          if (nonCriticalTypes.includes(resourceType) && loadingDuration > 3000) continue;
187  
188          // Filter out image URLs even if type is unknown
189          const isImageUrl = /\.(jpg|jpeg|png|gif|webp|svg|ico)(\?|$)/i.test(url);
190          if (isImageUrl && loadingDuration > 3000) continue;
191  
192          pending.push({
193            url,
194            loadingDurationMs: Math.round(loadingDuration),
195            resourceType,
196          });
197        }
198      }
199  
200      return {
201        documentReadyState: doc.readyState,
202        documentLoading: doc.readyState !== "complete",
203        pendingRequests: pending,
204      };
205    });
206  
207    return result;
208  }
209  
210  /** Server mode information */
211  export interface ServerInfo {
212    wsEndpoint: string;
213    mode: "launch" | "extension";
214    extensionConnected?: boolean;
215  }
216  
217  /**
218   * Options for creating or getting a page
219   */
220  export interface PageOptions {
221    /** Viewport size for new pages */
222    viewport?: ViewportSize;
223  }
224  
225  export interface DevBrowserClient {
226    page: (name: string, options?: PageOptions) => Promise<Page>;
227    list: () => Promise<string[]>;
228    close: (name: string) => Promise<void>;
229    disconnect: () => Promise<void>;
230    /**
231     * Get AI-friendly ARIA snapshot for a page.
232     * Returns YAML format with refs like [ref=e1], [ref=e2].
233     * Refs are stored on window.__devBrowserRefs for cross-connection persistence.
234     */
235    getAISnapshot: (name: string) => Promise<string>;
236    /**
237     * Get an element handle by its ref from the last getAISnapshot call.
238     * Refs persist across Playwright connections.
239     */
240    selectSnapshotRef: (name: string, ref: string) => Promise<ElementHandle | null>;
241    /**
242     * Get server information including mode and extension connection status.
243     */
244    getServerInfo: () => Promise<ServerInfo>;
245  }
246  
247  export async function connect(serverUrl = "http://localhost:9222"): Promise<DevBrowserClient> {
248    let browser: Browser | null = null;
249    let wsEndpoint: string | null = null;
250    let connectingPromise: Promise<Browser> | null = null;
251  
252    async function ensureConnected(): Promise<Browser> {
253      // Return existing connection if still active
254      if (browser && browser.isConnected()) {
255        return browser;
256      }
257  
258      // If already connecting, wait for that connection (prevents race condition)
259      if (connectingPromise) {
260        return connectingPromise;
261      }
262  
263      // Start new connection with mutex
264      connectingPromise = (async () => {
265        try {
266          // Fetch wsEndpoint from server
267          const res = await fetch(serverUrl);
268          if (!res.ok) {
269            throw new Error(`Server returned ${res.status}: ${await res.text()}`);
270          }
271          const info = (await res.json()) as ServerInfoResponse;
272          wsEndpoint = info.wsEndpoint;
273  
274          // Connect to the browser via CDP
275          browser = await chromium.connectOverCDP(wsEndpoint);
276          return browser;
277        } finally {
278          connectingPromise = null;
279        }
280      })();
281  
282      return connectingPromise;
283    }
284  
285    // Find page by CDP targetId - more reliable than JS globals
286    async function findPageByTargetId(b: Browser, targetId: string): Promise<Page | null> {
287      for (const context of b.contexts()) {
288        for (const page of context.pages()) {
289          let cdpSession;
290          try {
291            cdpSession = await context.newCDPSession(page);
292            const { targetInfo } = await cdpSession.send("Target.getTargetInfo");
293            if (targetInfo.targetId === targetId) {
294              return page;
295            }
296          } catch (err) {
297            // Only ignore "target closed" errors, log unexpected ones
298            const msg = err instanceof Error ? err.message : String(err);
299            if (!msg.includes("Target closed") && !msg.includes("Session closed")) {
300              console.warn(`Unexpected error checking page target: ${msg}`);
301            }
302          } finally {
303            if (cdpSession) {
304              try {
305                await cdpSession.detach();
306              } catch {
307                // Ignore detach errors - session may already be closed
308              }
309            }
310          }
311        }
312      }
313      return null;
314    }
315  
316    // Helper to get a page by name (used by multiple methods)
317    async function getPage(name: string, options?: PageOptions): Promise<Page> {
318      // Request the page from server (creates if doesn't exist)
319      const res = await fetch(`${serverUrl}/pages`, {
320        method: "POST",
321        headers: { "Content-Type": "application/json" },
322        body: JSON.stringify({ name, viewport: options?.viewport } satisfies GetPageRequest),
323      });
324  
325      if (!res.ok) {
326        throw new Error(`Failed to get page: ${await res.text()}`);
327      }
328  
329      const pageInfo = (await res.json()) as GetPageResponse & { url?: string };
330      const { targetId } = pageInfo;
331  
332      // Connect to browser
333      const b = await ensureConnected();
334  
335      // Check if we're in extension mode
336      const infoRes = await fetch(serverUrl);
337      const info = (await infoRes.json()) as { mode?: string };
338      const isExtensionMode = info.mode === "extension";
339  
340      if (isExtensionMode) {
341        // In extension mode, DON'T use findPageByTargetId as it corrupts page state
342        // Instead, find page by URL or use the only available page
343        const allPages = b.contexts().flatMap((ctx) => ctx.pages());
344  
345        if (allPages.length === 0) {
346          throw new Error(`No pages available in browser`);
347        }
348  
349        if (allPages.length === 1) {
350          return allPages[0]!;
351        }
352  
353        // Multiple pages - try to match by URL if available
354        if (pageInfo.url) {
355          const matchingPage = allPages.find((p) => p.url() === pageInfo.url);
356          if (matchingPage) {
357            return matchingPage;
358          }
359        }
360  
361        // Fall back to first page
362        if (!allPages[0]) {
363          throw new Error(`No pages available in browser`);
364        }
365        return allPages[0];
366      }
367  
368      // In launch mode, use the original targetId-based lookup
369      const page = await findPageByTargetId(b, targetId);
370      if (!page) {
371        throw new Error(`Page "${name}" not found in browser contexts`);
372      }
373  
374      return page;
375    }
376  
377    return {
378      page: getPage,
379  
380      async list(): Promise<string[]> {
381        const res = await fetch(`${serverUrl}/pages`);
382        const data = (await res.json()) as ListPagesResponse;
383        return data.pages;
384      },
385  
386      async close(name: string): Promise<void> {
387        const res = await fetch(`${serverUrl}/pages/${encodeURIComponent(name)}`, {
388          method: "DELETE",
389        });
390  
391        if (!res.ok) {
392          throw new Error(`Failed to close page: ${await res.text()}`);
393        }
394      },
395  
396      async disconnect(): Promise<void> {
397        // Just disconnect the CDP connection - pages persist on server
398        if (browser) {
399          await browser.close();
400          browser = null;
401        }
402      },
403  
404      async getAISnapshot(name: string): Promise<string> {
405        // Get the page
406        const page = await getPage(name);
407  
408        // Inject the snapshot script and call getAISnapshot
409        const snapshotScript = getSnapshotScript();
410        const snapshot = await page.evaluate((script: string) => {
411          // Inject script if not already present
412          // Note: page.evaluate runs in browser context where window exists
413          // eslint-disable-next-line @typescript-eslint/no-explicit-any
414          const w = globalThis as any;
415          if (!w.__devBrowser_getAISnapshot) {
416            // eslint-disable-next-line no-eval
417            eval(script);
418          }
419          return w.__devBrowser_getAISnapshot();
420        }, snapshotScript);
421  
422        return snapshot;
423      },
424  
425      async selectSnapshotRef(name: string, ref: string): Promise<ElementHandle | null> {
426        // Get the page
427        const page = await getPage(name);
428  
429        // Find the element using the stored refs
430        const elementHandle = await page.evaluateHandle((refId: string) => {
431          // Note: page.evaluateHandle runs in browser context where globalThis is the window
432          // eslint-disable-next-line @typescript-eslint/no-explicit-any
433          const w = globalThis as any;
434          const refs = w.__devBrowserRefs;
435          if (!refs) {
436            throw new Error("No snapshot refs found. Call getAISnapshot first.");
437          }
438          const element = refs[refId];
439          if (!element) {
440            throw new Error(
441              `Ref "${refId}" not found. Available refs: ${Object.keys(refs).join(", ")}`
442            );
443          }
444          return element;
445        }, ref);
446  
447        // Check if we got an element
448        const element = elementHandle.asElement();
449        if (!element) {
450          await elementHandle.dispose();
451          return null;
452        }
453  
454        return element;
455      },
456  
457      async getServerInfo(): Promise<ServerInfo> {
458        const res = await fetch(serverUrl);
459        if (!res.ok) {
460          throw new Error(`Server returned ${res.status}: ${await res.text()}`);
461        }
462        const info = (await res.json()) as {
463          wsEndpoint: string;
464          mode?: string;
465          extensionConnected?: boolean;
466        };
467        return {
468          wsEndpoint: info.wsEndpoint,
469          mode: (info.mode as "launch" | "extension") ?? "launch",
470          extensionConnected: info.extensionConnected,
471        };
472      },
473    };
474  }