browser_dialog_tool.py
1 """Agent-facing tool: respond to a native JS dialog captured by the CDP supervisor. 2 3 This tool is response-only — the agent first reads ``pending_dialogs`` from 4 ``browser_snapshot`` output, then calls ``browser_dialog(action=...)`` to 5 accept or dismiss. 6 7 Gated on the same ``_browser_cdp_check`` as ``browser_cdp`` so it only 8 appears when a CDP endpoint is reachable (Browserbase with a 9 ``connectUrl``, local Chrome via ``/browser connect``, or 10 ``browser.cdp_url`` set in config). 11 12 See ``website/docs/developer-guide/browser-supervisor.md`` for the full 13 design. 14 """ 15 16 from __future__ import annotations 17 18 import json 19 import logging 20 from typing import Any, Dict, Optional 21 22 from tools.browser_supervisor import SUPERVISOR_REGISTRY 23 from tools.registry import registry 24 25 logger = logging.getLogger(__name__) 26 27 28 BROWSER_DIALOG_SCHEMA: Dict[str, Any] = { 29 "name": "browser_dialog", 30 "description": ( 31 "Respond to a native JavaScript dialog (alert / confirm / prompt / " 32 "beforeunload) that is currently blocking the page.\n\n" 33 "**Workflow:** call ``browser_snapshot`` first — if a dialog is open, " 34 "it appears in the ``pending_dialogs`` field with ``id``, ``type``, " 35 "and ``message``. Then call this tool with ``action='accept'`` or " 36 "``action='dismiss'``.\n\n" 37 "**Prompt dialogs:** pass ``prompt_text`` to supply the response " 38 "string. Ignored for alert/confirm/beforeunload.\n\n" 39 "**Multiple dialogs:** if more than one dialog is queued (rare — " 40 "happens when a second dialog fires while the first is still open), " 41 "pass ``dialog_id`` from the snapshot to disambiguate.\n\n" 42 "**Availability:** only present when a CDP-capable backend is " 43 "attached — Browserbase sessions, local Chrome via " 44 "``/browser connect``, or ``browser.cdp_url`` in config.yaml. " 45 "Not available on Camofox (REST-only) or the default Playwright " 46 "local browser (CDP port is hidden)." 47 ), 48 "parameters": { 49 "type": "object", 50 "properties": { 51 "action": { 52 "type": "string", 53 "enum": ["accept", "dismiss"], 54 "description": ( 55 "'accept' clicks OK / returns the prompt text. " 56 "'dismiss' clicks Cancel / returns null from prompt(). " 57 "For ``beforeunload`` dialogs: 'accept' allows the " 58 "navigation, 'dismiss' keeps the page." 59 ), 60 }, 61 "prompt_text": { 62 "type": "string", 63 "description": ( 64 "Response string for a ``prompt()`` dialog. Ignored for " 65 "other dialog types. Defaults to empty string." 66 ), 67 }, 68 "dialog_id": { 69 "type": "string", 70 "description": ( 71 "Specific dialog to respond to, from " 72 "``browser_snapshot.pending_dialogs[].id``. Required " 73 "only when multiple dialogs are queued." 74 ), 75 }, 76 }, 77 "required": ["action"], 78 }, 79 } 80 81 82 def browser_dialog( 83 action: str, 84 prompt_text: Optional[str] = None, 85 dialog_id: Optional[str] = None, 86 task_id: Optional[str] = None, 87 ) -> str: 88 """Respond to a pending dialog on the active task's CDP supervisor.""" 89 effective_task_id = task_id or "default" 90 supervisor = SUPERVISOR_REGISTRY.get(effective_task_id) 91 if supervisor is None: 92 return json.dumps( 93 { 94 "success": False, 95 "error": ( 96 "No CDP supervisor is attached to this task. Either the " 97 "browser backend doesn't expose CDP (Camofox, default " 98 "Playwright) or no browser session has been started yet. " 99 "Call browser_navigate or /browser connect first." 100 ), 101 } 102 ) 103 104 result = supervisor.respond_to_dialog( 105 action=action, 106 prompt_text=prompt_text, 107 dialog_id=dialog_id, 108 ) 109 if result.get("ok"): 110 return json.dumps( 111 { 112 "success": True, 113 "action": action, 114 "dialog": result.get("dialog", {}), 115 } 116 ) 117 return json.dumps({"success": False, "error": result.get("error", "unknown error")}) 118 119 120 def _browser_dialog_check() -> bool: 121 """Gate: same as ``browser_cdp`` — only offered when CDP is reachable. 122 123 Kept identical so the two tools appear and disappear together. The 124 supervisor itself is started lazily by ``browser_navigate`` / 125 ``/browser connect`` / Browserbase session creation, so a reachable 126 CDP URL is enough to commit to showing the tool. 127 """ 128 try: 129 from tools.browser_cdp_tool import _browser_cdp_check # type: ignore[import-not-found] 130 except Exception as exc: # pragma: no cover — defensive 131 logger.debug("browser_dialog check: browser_cdp_tool import failed: %s", exc) 132 return False 133 return _browser_cdp_check() 134 135 136 registry.register( 137 name="browser_dialog", 138 toolset="browser-cdp", 139 schema=BROWSER_DIALOG_SCHEMA, 140 handler=lambda args, **kw: browser_dialog( 141 action=args.get("action", ""), 142 prompt_text=args.get("prompt_text"), 143 dialog_id=args.get("dialog_id"), 144 task_id=kw.get("task_id"), 145 ), 146 check_fn=_browser_dialog_check, 147 emoji="💬", 148 )