browser_cdp_tool.py
1 #!/usr/bin/env python3 2 """ 3 Raw Chrome DevTools Protocol (CDP) passthrough tool. 4 5 Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to 6 the browser's DevTools WebSocket endpoint. Works when a CDP URL is 7 configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or 8 ``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider 9 session is active. 10 11 This is the escape hatch for browser operations not covered by the main 12 browser tool surface (``browser_navigate``, ``browser_click``, 13 ``browser_console``, etc.) — handling native dialogs, iframe-scoped 14 evaluation, cookie/network control, low-level tab management, etc. 15 16 Method reference: https://chromedevtools.github.io/devtools-protocol/ 17 """ 18 from __future__ import annotations 19 20 import asyncio 21 import json 22 import logging 23 from typing import Any, Dict, Optional 24 25 from tools.registry import registry, tool_error 26 27 logger = logging.getLogger(__name__) 28 29 CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/" 30 31 # ``websockets`` is a transitive dependency of hermes-agent (via fal_client 32 # and firecrawl-py) and is already imported by gateway/platforms/feishu.py. 33 # Wrap the import so a clean error surfaces if the package is ever absent. 34 try: 35 import websockets 36 from websockets.exceptions import WebSocketException 37 38 _WS_AVAILABLE = True 39 except ImportError: 40 websockets = None # type: ignore[assignment] 41 WebSocketException = Exception # type: ignore[assignment,misc] 42 _WS_AVAILABLE = False 43 44 45 # --------------------------------------------------------------------------- 46 # Async-from-sync bridge (matches the pattern in homeassistant_tool.py) 47 # --------------------------------------------------------------------------- 48 49 50 def _run_async(coro): 51 """Run an async coroutine from a sync handler, safe inside or outside a loop.""" 52 try: 53 loop = asyncio.get_running_loop() 54 except RuntimeError: 55 loop = None 56 57 if loop and loop.is_running(): 58 import concurrent.futures 59 60 with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: 61 future = pool.submit(asyncio.run, coro) 62 return future.result() 63 return asyncio.run(coro) 64 65 66 # --------------------------------------------------------------------------- 67 # Endpoint resolution 68 # --------------------------------------------------------------------------- 69 70 71 def _resolve_cdp_endpoint() -> str: 72 """Return the normalized CDP WebSocket URL, or empty string if unavailable. 73 74 Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays 75 consistent with the rest of the browser tool surface: 76 77 1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``) 78 2. ``browser.cdp_url`` in ``config.yaml`` 79 """ 80 try: 81 from tools.browser_tool import _get_cdp_override # type: ignore[import-not-found] 82 83 return (_get_cdp_override() or "").strip() 84 except Exception as exc: # pragma: no cover — defensive 85 logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc) 86 return "" 87 88 89 # --------------------------------------------------------------------------- 90 # Core CDP call 91 # --------------------------------------------------------------------------- 92 93 94 async def _cdp_call( 95 ws_url: str, 96 method: str, 97 params: Dict[str, Any], 98 target_id: Optional[str], 99 timeout: float, 100 ) -> Dict[str, Any]: 101 """Make a single CDP call, optionally attaching to a target first. 102 103 When ``target_id`` is provided, we call ``Target.attachToTarget`` with 104 ``flatten=True`` to multiplex a page-level session over the same 105 browser-level WebSocket, then send ``method`` with that ``sessionId``. 106 When ``target_id`` is None, ``method`` is sent at browser level — which 107 works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other 108 globally-scoped domains. 109 """ 110 assert websockets is not None # guarded by _WS_AVAILABLE at call-site 111 112 async with websockets.connect( 113 ws_url, 114 max_size=None, # CDP responses (e.g. DOM.getDocument) can be large 115 open_timeout=timeout, 116 close_timeout=5, 117 ping_interval=None, # CDP server doesn't expect pings 118 ) as ws: 119 next_id = 1 120 session_id: Optional[str] = None 121 122 # --- Step 1: attach to target if requested --- 123 if target_id: 124 attach_id = next_id 125 next_id += 1 126 await ws.send( 127 json.dumps( 128 { 129 "id": attach_id, 130 "method": "Target.attachToTarget", 131 "params": {"targetId": target_id, "flatten": True}, 132 } 133 ) 134 ) 135 deadline = asyncio.get_event_loop().time() + timeout 136 while True: 137 remaining = deadline - asyncio.get_event_loop().time() 138 if remaining <= 0: 139 raise TimeoutError( 140 f"Timed out attaching to target {target_id}" 141 ) 142 raw = await asyncio.wait_for(ws.recv(), timeout=remaining) 143 msg = json.loads(raw) 144 if msg.get("id") == attach_id: 145 if "error" in msg: 146 raise RuntimeError( 147 f"Target.attachToTarget failed: {msg['error']}" 148 ) 149 session_id = msg.get("result", {}).get("sessionId") 150 if not session_id: 151 raise RuntimeError( 152 "Target.attachToTarget did not return a sessionId" 153 ) 154 break 155 # Ignore events (messages without "id") while waiting 156 157 # --- Step 2: dispatch the real method --- 158 call_id = next_id 159 next_id += 1 160 req: Dict[str, Any] = { 161 "id": call_id, 162 "method": method, 163 "params": params or {}, 164 } 165 if session_id: 166 req["sessionId"] = session_id 167 await ws.send(json.dumps(req)) 168 169 deadline = asyncio.get_event_loop().time() + timeout 170 while True: 171 remaining = deadline - asyncio.get_event_loop().time() 172 if remaining <= 0: 173 raise TimeoutError( 174 f"Timed out waiting for response to {method}" 175 ) 176 raw = await asyncio.wait_for(ws.recv(), timeout=remaining) 177 msg = json.loads(raw) 178 if msg.get("id") == call_id: 179 if "error" in msg: 180 raise RuntimeError(f"CDP error: {msg['error']}") 181 return msg.get("result", {}) 182 # Ignore events / out-of-order responses 183 184 185 # --------------------------------------------------------------------------- 186 # Public tool function 187 # --------------------------------------------------------------------------- 188 189 190 def _browser_cdp_via_supervisor( 191 task_id: str, 192 frame_id: str, 193 method: str, 194 params: Optional[Dict[str, Any]], 195 timeout: float, 196 ) -> str: 197 """Route a CDP call through the live supervisor session for an OOPIF frame. 198 199 Looks up the frame in the supervisor's snapshot, extracts its child 200 ``cdp_session_id``, and dispatches ``method`` with that sessionId via 201 the supervisor's already-connected WebSocket (using 202 ``asyncio.run_coroutine_threadsafe`` onto the supervisor loop). 203 """ 204 try: 205 from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found] 206 except Exception as exc: # pragma: no cover — defensive 207 return tool_error( 208 f"CDP supervisor is not available: {exc}. frame_id routing requires " 209 f"a running supervisor attached via /browser connect or an active " 210 f"Browserbase session." 211 ) 212 213 supervisor = SUPERVISOR_REGISTRY.get(task_id) 214 if supervisor is None: 215 return tool_error( 216 f"No CDP supervisor is attached for task={task_id!r}. Call " 217 f"browser_navigate or /browser connect first so the supervisor " 218 f"can attach. Once attached, browser_snapshot will populate " 219 f"frame_tree with frame_ids you can pass here." 220 ) 221 222 snap = supervisor.snapshot() 223 # Search both the top frame and the children for the requested id. 224 top = snap.frame_tree.get("top") 225 frame_info: Optional[Dict[str, Any]] = None 226 if top and top.get("frame_id") == frame_id: 227 frame_info = top 228 else: 229 for child in snap.frame_tree.get("children", []) or []: 230 if child.get("frame_id") == frame_id: 231 frame_info = child 232 break 233 if frame_info is None: 234 # Check the raw frames dict too (frame_tree is capped at 30 entries) 235 with supervisor._state_lock: # type: ignore[attr-defined] 236 raw = supervisor._frames.get(frame_id) # type: ignore[attr-defined] 237 if raw is not None: 238 frame_info = raw.to_dict() 239 240 if frame_info is None: 241 return tool_error( 242 f"frame_id {frame_id!r} not found in supervisor state. " 243 f"Call browser_snapshot to see current frame_tree." 244 ) 245 246 child_sid = frame_info.get("session_id") 247 if not child_sid: 248 # Not an OOPIF — fall back to top-level session (evaluating at page 249 # scope). Same-origin iframes don't get their own sessionId; the 250 # agent can still use contentWindow/contentDocument from the parent. 251 return tool_error( 252 f"frame_id {frame_id!r} is not an out-of-process iframe (no " 253 f"dedicated CDP session). For same-origin iframes, use " 254 f"`browser_cdp(method='Runtime.evaluate', params={{'expression': " 255 f"\"document.querySelector('iframe').contentDocument.title\"}})` " 256 f"at the top-level page instead." 257 ) 258 259 # Dispatch onto the supervisor's loop. 260 import asyncio as _asyncio 261 loop = supervisor._loop # type: ignore[attr-defined] 262 if loop is None or not loop.is_running(): 263 return tool_error( 264 "CDP supervisor loop is not running. Try reconnecting with " 265 "/browser connect." 266 ) 267 268 async def _do_cdp(): 269 return await supervisor._cdp( # type: ignore[attr-defined] 270 method, 271 params or {}, 272 session_id=child_sid, 273 timeout=timeout, 274 ) 275 276 try: 277 fut = _asyncio.run_coroutine_threadsafe(_do_cdp(), loop) 278 result_msg = fut.result(timeout=timeout + 2) 279 except Exception as exc: 280 return tool_error( 281 f"CDP call via supervisor failed: {type(exc).__name__}: {exc}", 282 cdp_docs=CDP_DOCS_URL, 283 ) 284 285 payload: Dict[str, Any] = { 286 "success": True, 287 "method": method, 288 "frame_id": frame_id, 289 "session_id": child_sid, 290 "result": result_msg.get("result", {}), 291 } 292 return json.dumps(payload, ensure_ascii=False) 293 294 295 def browser_cdp( 296 method: str, 297 params: Optional[Dict[str, Any]] = None, 298 target_id: Optional[str] = None, 299 frame_id: Optional[str] = None, 300 timeout: float = 30.0, 301 task_id: Optional[str] = None, 302 ) -> str: 303 """Send a raw CDP command. See ``CDP_DOCS_URL`` for method documentation. 304 305 Args: 306 method: CDP method name, e.g. ``"Target.getTargets"``. 307 params: Method-specific parameters; defaults to ``{}``. 308 target_id: Optional target/tab ID for page-level methods. When set, 309 we first attach to the target (``flatten=True``) and send 310 ``method`` with the resulting ``sessionId``. Uses a fresh 311 stateless CDP connection. 312 frame_id: Optional cross-origin (OOPIF) iframe ``frame_id`` from 313 ``browser_snapshot.frame_tree.children[]``. When set (and the 314 frame is an OOPIF with a live session tracked by the CDP 315 supervisor), routes the call through the supervisor's existing 316 WebSocket — which is how you Runtime.evaluate *inside* an 317 iframe on backends where per-call fresh CDP connections would 318 hit signed-URL expiry (Browserbase) or expensive reattach. 319 timeout: Seconds to wait for the call to complete. 320 task_id: Task identifier for supervisor lookup. When ``frame_id`` 321 is set, this identifies which task's supervisor to use; the 322 handler will default to ``"default"`` otherwise. 323 324 Returns: 325 JSON string ``{"success": True, "method": ..., "result": {...}}`` on 326 success, or ``{"error": "..."}`` on failure. 327 """ 328 # --- Route iframe-scoped calls through the supervisor --------------- 329 if frame_id: 330 return _browser_cdp_via_supervisor( 331 task_id=task_id or "default", 332 frame_id=frame_id, 333 method=method, 334 params=params, 335 timeout=timeout, 336 ) 337 del task_id # stateless path below 338 339 if not method or not isinstance(method, str): 340 return tool_error( 341 "'method' is required (e.g. 'Target.getTargets')", 342 cdp_docs=CDP_DOCS_URL, 343 ) 344 345 if not _WS_AVAILABLE: 346 return tool_error( 347 "The 'websockets' Python package is required but not installed. " 348 "Install it with: pip install websockets" 349 ) 350 351 endpoint = _resolve_cdp_endpoint() 352 if not endpoint: 353 return tool_error( 354 "No CDP endpoint is available. Run '/browser connect' to attach " 355 "to a running Chrome, or set 'browser.cdp_url' in config.yaml. " 356 "The Camofox backend is REST-only and does not expose CDP.", 357 cdp_docs=CDP_DOCS_URL, 358 ) 359 360 if not endpoint.startswith(("ws://", "wss://")): 361 return tool_error( 362 f"CDP endpoint is not a WebSocket URL: {endpoint!r}. " 363 "Expected ws://... or wss://... — the /browser connect " 364 "resolver should have rewritten this. Check that Chrome is " 365 "actually listening on the debug port." 366 ) 367 368 call_params: Dict[str, Any] = params or {} 369 if not isinstance(call_params, dict): 370 return tool_error( 371 f"'params' must be an object/dict, got {type(call_params).__name__}" 372 ) 373 374 try: 375 safe_timeout = float(timeout) if timeout else 30.0 376 except (TypeError, ValueError): 377 safe_timeout = 30.0 378 safe_timeout = max(1.0, min(safe_timeout, 300.0)) 379 380 try: 381 result = _run_async( 382 _cdp_call(endpoint, method, call_params, target_id, safe_timeout) 383 ) 384 except asyncio.TimeoutError as exc: 385 return tool_error( 386 f"CDP call timed out after {safe_timeout}s: {exc}", 387 method=method, 388 ) 389 except TimeoutError as exc: 390 return tool_error(str(exc), method=method) 391 except RuntimeError as exc: 392 return tool_error(str(exc), method=method) 393 except WebSocketException as exc: 394 return tool_error( 395 f"WebSocket error talking to CDP at {endpoint}: {exc}. The " 396 "browser may have disconnected — try '/browser connect' again.", 397 method=method, 398 ) 399 except Exception as exc: # pragma: no cover — unexpected 400 logger.exception("browser_cdp unexpected error") 401 return tool_error( 402 f"Unexpected error: {type(exc).__name__}: {exc}", 403 method=method, 404 ) 405 406 payload: Dict[str, Any] = { 407 "success": True, 408 "method": method, 409 "result": result, 410 } 411 if target_id: 412 payload["target_id"] = target_id 413 return json.dumps(payload, ensure_ascii=False) 414 415 416 # --------------------------------------------------------------------------- 417 # Registry 418 # --------------------------------------------------------------------------- 419 420 421 BROWSER_CDP_SCHEMA: Dict[str, Any] = { 422 "name": "browser_cdp", 423 "description": ( 424 "Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for " 425 "browser operations not covered by browser_navigate, browser_click, " 426 "browser_console, etc.\n\n" 427 "**Requires a reachable CDP endpoint.** Available when the user has " 428 "run '/browser connect' to attach to a running Chrome, or when " 429 "'browser.cdp_url' is set in config.yaml. Not currently wired up for " 430 "cloud backends (Browserbase, Browser Use, Firecrawl) — those expose " 431 "CDP per session but live-session routing is a follow-up. Camofox is " 432 "REST-only and will never support CDP. If the tool is in your toolset " 433 "at all, a CDP endpoint is already reachable.\n\n" 434 f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a " 435 "method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') " 436 "to look up parameters and return shape.\n\n" 437 "**Common patterns:**\n" 438 "- List tabs: method='Target.getTargets', params={}\n" 439 "- Handle a native JS dialog: method='Page.handleJavaScriptDialog', " 440 "params={'accept': true, 'promptText': ''}, target_id=<tabId>\n" 441 "- Get all cookies: method='Network.getAllCookies', params={}\n" 442 "- Eval in a specific tab: method='Runtime.evaluate', " 443 "params={'expression': '...', 'returnByValue': true}, " 444 "target_id=<tabId>\n" 445 "- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', " 446 "params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, " 447 "'mobile': false}, target_id=<tabId>\n\n" 448 "**Usage rules:**\n" 449 "- Browser-level methods (Target.*, Browser.*, Storage.*): omit " 450 "target_id and frame_id.\n" 451 "- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, " 452 "Network.* scoped to a tab): pass target_id from Target.getTargets.\n" 453 "- **Cross-origin iframe scope** (Runtime.evaluate inside an OOPIF, " 454 "Page.* targeting a frame target, etc.): pass frame_id from the " 455 "browser_snapshot frame_tree output. This routes through the CDP " 456 "supervisor's live connection — the only reliable way on " 457 "Browserbase where stateless CDP calls hit signed-URL expiry.\n" 458 "- Each stateless call (without frame_id) is independent — sessions " 459 "and event subscriptions do not persist between calls. For stateful " 460 "workflows, prefer the dedicated browser tools or use frame_id " 461 "routing." 462 ), 463 "parameters": { 464 "type": "object", 465 "properties": { 466 "method": { 467 "type": "string", 468 "description": ( 469 "CDP method name, e.g. 'Target.getTargets', " 470 "'Runtime.evaluate', 'Page.handleJavaScriptDialog'." 471 ), 472 }, 473 "params": { 474 "type": "object", 475 "description": ( 476 "Method-specific parameters as a JSON object. Omit or " 477 "pass {} for methods that take no parameters." 478 ), 479 "properties": {}, 480 "additionalProperties": True, 481 }, 482 "target_id": { 483 "type": "string", 484 "description": ( 485 "Optional. Target/tab ID from Target.getTargets result " 486 "(each entry's 'targetId'). Use for page-level methods " 487 "at the top-level tab scope. Mutually exclusive with " 488 "frame_id." 489 ), 490 }, 491 "frame_id": { 492 "type": "string", 493 "description": ( 494 "Optional. Out-of-process iframe (OOPIF) frame_id from " 495 "browser_snapshot.frame_tree.children[] where " 496 "is_oopif=true. When set, routes the call through the " 497 "CDP supervisor's live session for that iframe. " 498 "Essential for Runtime.evaluate inside cross-origin " 499 "iframes, especially on Browserbase where fresh " 500 "per-call CDP connections can't keep up with signed " 501 "URL rotation. For same-origin iframes, use parent " 502 "contentWindow/contentDocument from Runtime.evaluate " 503 "at the top-level page instead." 504 ), 505 }, 506 "timeout": { 507 "type": "number", 508 "description": ( 509 "Timeout in seconds (default 30, max 300)." 510 ), 511 "default": 30, 512 }, 513 }, 514 "required": ["method"], 515 }, 516 } 517 518 519 def _browser_cdp_check() -> bool: 520 """Availability check for browser_cdp. 521 522 The tool is only offered when the Python side can actually reach a CDP 523 endpoint right now — meaning a static URL is set via ``/browser connect`` 524 (``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``. 525 526 Backends that do *not* currently expose CDP to us — Camofox (REST-only), 527 the default local agent-browser mode (Playwright hides its internal CDP 528 port), and cloud providers whose per-session ``cdp_url`` is not yet 529 surfaced — are gated out so the model doesn't see a tool that would 530 reliably fail. Cloud-provider CDP routing is a follow-up. 531 532 Kept in a thin wrapper so the registration statement stays at module top 533 level (the tool-discovery AST scan only picks up top-level 534 ``registry.register(...)`` calls). 535 """ 536 try: 537 from tools.browser_tool import ( # type: ignore[import-not-found] 538 _get_cdp_override, 539 check_browser_requirements, 540 ) 541 except ImportError as exc: # pragma: no cover — defensive 542 logger.debug("browser_cdp check: browser_tool import failed: %s", exc) 543 return False 544 if not check_browser_requirements(): 545 return False 546 return bool(_get_cdp_override()) 547 548 549 registry.register( 550 name="browser_cdp", 551 toolset="browser-cdp", 552 schema=BROWSER_CDP_SCHEMA, 553 handler=lambda args, **kw: browser_cdp( 554 method=args.get("method", ""), 555 params=args.get("params"), 556 target_id=args.get("target_id"), 557 frame_id=args.get("frame_id"), 558 timeout=args.get("timeout", 30.0), 559 task_id=kw.get("task_id"), 560 ), 561 check_fn=_browser_cdp_check, 562 emoji="🧪", 563 )