/ tools / browser_dialog_tool.py
browser_dialog_tool.py
  1  """Agent-facing tool: respond to a native JS dialog captured by the CDP supervisor.
  2  
  3  This tool is response-only — the agent first reads ``pending_dialogs`` from
  4  ``browser_snapshot`` output, then calls ``browser_dialog(action=...)`` to
  5  accept or dismiss.
  6  
  7  Gated on the same ``_browser_cdp_check`` as ``browser_cdp`` so it only
  8  appears when a CDP endpoint is reachable (Browserbase with a
  9  ``connectUrl``, local Chrome via ``/browser connect``, or
 10  ``browser.cdp_url`` set in config).
 11  
 12  See ``website/docs/developer-guide/browser-supervisor.md`` for the full
 13  design.
 14  """
 15  
 16  from __future__ import annotations
 17  
 18  import json
 19  import logging
 20  from typing import Any, Dict, Optional
 21  
 22  from tools.browser_supervisor import SUPERVISOR_REGISTRY
 23  from tools.registry import registry
 24  
 25  logger = logging.getLogger(__name__)
 26  
 27  
 28  BROWSER_DIALOG_SCHEMA: Dict[str, Any] = {
 29      "name": "browser_dialog",
 30      "description": (
 31          "Respond to a native JavaScript dialog (alert / confirm / prompt / "
 32          "beforeunload) that is currently blocking the page.\n\n"
 33          "**Workflow:** call ``browser_snapshot`` first — if a dialog is open, "
 34          "it appears in the ``pending_dialogs`` field with ``id``, ``type``, "
 35          "and ``message``. Then call this tool with ``action='accept'`` or "
 36          "``action='dismiss'``.\n\n"
 37          "**Prompt dialogs:** pass ``prompt_text`` to supply the response "
 38          "string. Ignored for alert/confirm/beforeunload.\n\n"
 39          "**Multiple dialogs:** if more than one dialog is queued (rare — "
 40          "happens when a second dialog fires while the first is still open), "
 41          "pass ``dialog_id`` from the snapshot to disambiguate.\n\n"
 42          "**Availability:** only present when a CDP-capable backend is "
 43          "attached — Browserbase sessions, local Chrome via "
 44          "``/browser connect``, or ``browser.cdp_url`` in config.yaml. "
 45          "Not available on Camofox (REST-only) or the default Playwright "
 46          "local browser (CDP port is hidden)."
 47      ),
 48      "parameters": {
 49          "type": "object",
 50          "properties": {
 51              "action": {
 52                  "type": "string",
 53                  "enum": ["accept", "dismiss"],
 54                  "description": (
 55                      "'accept' clicks OK / returns the prompt text. "
 56                      "'dismiss' clicks Cancel / returns null from prompt(). "
 57                      "For ``beforeunload`` dialogs: 'accept' allows the "
 58                      "navigation, 'dismiss' keeps the page."
 59                  ),
 60              },
 61              "prompt_text": {
 62                  "type": "string",
 63                  "description": (
 64                      "Response string for a ``prompt()`` dialog. Ignored for "
 65                      "other dialog types. Defaults to empty string."
 66                  ),
 67              },
 68              "dialog_id": {
 69                  "type": "string",
 70                  "description": (
 71                      "Specific dialog to respond to, from "
 72                      "``browser_snapshot.pending_dialogs[].id``. Required "
 73                      "only when multiple dialogs are queued."
 74                  ),
 75              },
 76          },
 77          "required": ["action"],
 78      },
 79  }
 80  
 81  
 82  def browser_dialog(
 83      action: str,
 84      prompt_text: Optional[str] = None,
 85      dialog_id: Optional[str] = None,
 86      task_id: Optional[str] = None,
 87  ) -> str:
 88      """Respond to a pending dialog on the active task's CDP supervisor."""
 89      effective_task_id = task_id or "default"
 90      supervisor = SUPERVISOR_REGISTRY.get(effective_task_id)
 91      if supervisor is None:
 92          return json.dumps(
 93              {
 94                  "success": False,
 95                  "error": (
 96                      "No CDP supervisor is attached to this task. Either the "
 97                      "browser backend doesn't expose CDP (Camofox, default "
 98                      "Playwright) or no browser session has been started yet. "
 99                      "Call browser_navigate or /browser connect first."
100                  ),
101              }
102          )
103  
104      result = supervisor.respond_to_dialog(
105          action=action,
106          prompt_text=prompt_text,
107          dialog_id=dialog_id,
108      )
109      if result.get("ok"):
110          return json.dumps(
111              {
112                  "success": True,
113                  "action": action,
114                  "dialog": result.get("dialog", {}),
115              }
116          )
117      return json.dumps({"success": False, "error": result.get("error", "unknown error")})
118  
119  
120  def _browser_dialog_check() -> bool:
121      """Gate: same as ``browser_cdp`` — only offered when CDP is reachable.
122  
123      Kept identical so the two tools appear and disappear together. The
124      supervisor itself is started lazily by ``browser_navigate`` /
125      ``/browser connect`` / Browserbase session creation, so a reachable
126      CDP URL is enough to commit to showing the tool.
127      """
128      try:
129          from tools.browser_cdp_tool import _browser_cdp_check  # type: ignore[import-not-found]
130      except Exception as exc:  # pragma: no cover — defensive
131          logger.debug("browser_dialog check: browser_cdp_tool import failed: %s", exc)
132          return False
133      return _browser_cdp_check()
134  
135  
136  registry.register(
137      name="browser_dialog",
138      toolset="browser-cdp",
139      schema=BROWSER_DIALOG_SCHEMA,
140      handler=lambda args, **kw: browser_dialog(
141          action=args.get("action", ""),
142          prompt_text=args.get("prompt_text"),
143          dialog_id=args.get("dialog_id"),
144          task_id=kw.get("task_id"),
145      ),
146      check_fn=_browser_dialog_check,
147      emoji="💬",
148  )