/ plugins / google_meet / tools.py
tools.py
  1  """Agent-facing tools for the google_meet plugin.
  2  
  3  Tools:
  4    meet_join        — join a Google Meet URL (spawns Playwright bot locally
  5                       OR on a remote node host via node=<name>)
  6    meet_status      — report bot liveness + transcript progress
  7    meet_transcript  — read the current transcript (optional last-N)
  8    meet_leave       — signal the bot to leave cleanly
  9    meet_say         — (v2) speak text through the realtime audio bridge.
 10                       Requires the active meeting to have been joined with
 11                       mode='realtime'.
 12  """
 13  
 14  from __future__ import annotations
 15  
 16  import json
 17  from typing import Any, Dict, Optional
 18  
 19  from plugins.google_meet import process_manager as pm
 20  
 21  
 22  # ---------------------------------------------------------------------------
 23  # Runtime gate
 24  # ---------------------------------------------------------------------------
 25  
 26  def check_meet_requirements() -> bool:
 27      """Return True when the plugin can actually run LOCALLY.
 28  
 29      Gates on:
 30        * Python ``playwright`` package importable
 31        * the plugin being on a supported platform (Linux or macOS)
 32  
 33      Note: remote-node operation (``node=<name>``) only needs the
 34      ``websockets`` dep on the gateway side — Chromium lives on the node.
 35      But the plugin-level gate keeps the v1 semantics; individual tool
 36      handlers relax the requirement when a node is addressed.
 37      """
 38      import platform as _p
 39      if _p.system().lower() not in ("linux", "darwin"):
 40          return False
 41      try:
 42          import playwright  # noqa: F401
 43      except ImportError:
 44          return False
 45      return True
 46  
 47  
 48  # ---------------------------------------------------------------------------
 49  # Node client helper
 50  # ---------------------------------------------------------------------------
 51  
 52  def _resolve_node_client(node: Optional[str]):
 53      """Return (NodeClient, node_name) for *node*, or (None, None) to run local.
 54  
 55      Raises RuntimeError with a readable message if the node is named but
 56      unresolvable, so the handler can surface a clear error to the agent.
 57      """
 58      if node is None or node == "":
 59          return None, None
 60      from plugins.google_meet.node.registry import NodeRegistry
 61      from plugins.google_meet.node.client import NodeClient
 62  
 63      reg = NodeRegistry()
 64      entry = reg.resolve(node if node != "auto" else None)
 65      if entry is None:
 66          raise RuntimeError(
 67              f"no registered meet node matches {node!r} — "
 68              "run `hermes meet node approve <name> <url> <token>` first"
 69          )
 70      client = NodeClient(url=entry["url"], token=entry["token"])
 71      return client, entry.get("name")
 72  
 73  
 74  # ---------------------------------------------------------------------------
 75  # Schemas
 76  # ---------------------------------------------------------------------------
 77  
 78  MEET_JOIN_SCHEMA: Dict[str, Any] = {
 79      "name": "meet_join",
 80      "description": (
 81          "Join a Google Meet call and start scraping live captions into a "
 82          "transcript file. Only meet.google.com URLs are accepted; no calendar "
 83          "scanning, no auto-dial. Spawns a headless Chromium subprocess that "
 84          "runs in parallel with the agent loop — returns immediately. Poll "
 85          "with meet_status and read captions with meet_transcript. Reminder "
 86          "to the agent: you should announce yourself in the meeting (there is "
 87          "no automatic consent announcement)."
 88      ),
 89      "parameters": {
 90          "type": "object",
 91          "properties": {
 92              "url": {
 93                  "type": "string",
 94                  "description": (
 95                      "Full https://meet.google.com/... URL. Required."
 96                  ),
 97              },
 98              "mode": {
 99                  "type": "string",
100                  "enum": ["transcribe", "realtime"],
101                  "description": (
102                      "transcribe (default): listen-only, scrape captions. "
103                      "realtime: also enable agent speech via meet_say "
104                      "(requires OpenAI Realtime key + platform audio bridge)."
105                  ),
106              },
107              "guest_name": {
108                  "type": "string",
109                  "description": (
110                      "Display name to use when joining as guest. Defaults to "
111                      "'Hermes Agent'."
112                  ),
113              },
114              "duration": {
115                  "type": "string",
116                  "description": (
117                      "Optional max duration before auto-leave (e.g. '30m', "
118                      "'2h', '90s'). Omit to stay until meet_leave is called."
119                  ),
120              },
121              "headed": {
122                  "type": "boolean",
123                  "description": (
124                      "Run Chromium headed instead of headless (debug only). "
125                      "Default false."
126                  ),
127              },
128              "node": {
129                  "type": "string",
130                  "description": (
131                      "Name of a registered remote node to run the bot on "
132                      "(useful when the gateway runs on a headless Linux box "
133                      "but the user's Chrome with a signed-in Google profile "
134                      "lives on their Mac). Pass 'auto' to use the single "
135                      "registered node. Default: run locally. Nodes are "
136                      "approved via `hermes meet node approve`."
137                  ),
138              },
139          },
140          "required": ["url"],
141          "additionalProperties": False,
142      },
143  }
144  
145  MEET_STATUS_SCHEMA: Dict[str, Any] = {
146      "name": "meet_status",
147      "description": (
148          "Report the current Meet session state — whether the bot is alive, "
149          "has joined, is sitting in the lobby, number of transcript lines "
150          "captured, and last-caption timestamp."
151      ),
152      "parameters": {
153          "type": "object",
154          "properties": {
155              "node": {"type": "string"},
156          },
157          "additionalProperties": False,
158      },
159  }
160  
161  MEET_TRANSCRIPT_SCHEMA: Dict[str, Any] = {
162      "name": "meet_transcript",
163      "description": (
164          "Read the scraped transcript for the active Meet session. Returns "
165          "full transcript unless 'last' is set, in which case returns the last "
166          "N lines only."
167      ),
168      "parameters": {
169          "type": "object",
170          "properties": {
171              "last": {
172                  "type": "integer",
173                  "description": (
174                      "Optional: return only the last N caption lines. Useful "
175                      "for polling during a meeting without re-reading the "
176                      "whole transcript."
177                  ),
178                  "minimum": 1,
179              },
180              "node": {"type": "string"},
181          },
182          "additionalProperties": False,
183      },
184  }
185  
186  MEET_LEAVE_SCHEMA: Dict[str, Any] = {
187      "name": "meet_leave",
188      "description": (
189          "Leave the active Meet call cleanly, stop caption scraping, and "
190          "finalize the transcript file. Safe to call when no meeting is "
191          "active — returns ok=false with a reason."
192      ),
193      "parameters": {
194          "type": "object",
195          "properties": {
196              "node": {"type": "string"},
197          },
198          "additionalProperties": False,
199      },
200  }
201  
202  MEET_SAY_SCHEMA: Dict[str, Any] = {
203      "name": "meet_say",
204      "description": (
205          "Speak text into the active Meet call. Requires the active meeting "
206          "to have been joined with mode='realtime'. The text is queued to "
207          "the bot's OpenAI Realtime session; the generated audio is streamed "
208          "into Chrome's fake microphone via a virtual audio device "
209          "(PulseAudio null-sink on Linux, BlackHole on macOS). Returns "
210          "immediately — the actual speech lags by a couple of seconds."
211      ),
212      "parameters": {
213          "type": "object",
214          "properties": {
215              "text": {"type": "string", "description": "Text to speak."},
216              "node": {"type": "string"},
217          },
218          "required": ["text"],
219          "additionalProperties": False,
220      },
221  }
222  
223  
224  # ---------------------------------------------------------------------------
225  # Handlers
226  # ---------------------------------------------------------------------------
227  
228  def _json(obj: Any) -> str:
229      return json.dumps(obj, ensure_ascii=False)
230  
231  
232  def _err(msg: str, **extra) -> str:
233      return _json({"success": False, "error": msg, **extra})
234  
235  
236  def handle_meet_join(args: Dict[str, Any], **_kw) -> str:
237      url = (args.get("url") or "").strip()
238      if not url:
239          return _err("url is required")
240      mode = (args.get("mode") or "transcribe").strip().lower()
241      if mode not in ("transcribe", "realtime"):
242          return _err(f"mode must be 'transcribe' or 'realtime' (got {mode!r})")
243  
244      node = args.get("node")
245      try:
246          client, node_name = _resolve_node_client(node)
247      except RuntimeError as e:
248          return _err(str(e))
249  
250      if client is not None:
251          # Remote path — delegate to the node host.
252          try:
253              res = client.start_bot(
254                  url=url,
255                  guest_name=str(args.get("guest_name") or "Hermes Agent"),
256                  duration=str(args.get("duration")) if args.get("duration") else None,
257                  headed=bool(args.get("headed", False)),
258                  mode=mode,
259              )
260              return _json({"success": bool(res.get("ok")), "node": node_name, **res})
261          except Exception as e:
262              return _err(f"remote node start_bot failed: {e}", node=node_name)
263  
264      # Local path — same as v1, with v2 params.
265      if not check_meet_requirements():
266          return _err(
267              "google_meet plugin prerequisites missing — install with "
268              "`pip install playwright && python -m playwright install "
269              "chromium`. Plugin is supported on Linux and macOS only."
270          )
271      res = pm.start(
272          url=url,
273          headed=bool(args.get("headed", False)),
274          guest_name=str(args.get("guest_name") or "Hermes Agent"),
275          duration=str(args.get("duration")) if args.get("duration") else None,
276          mode=mode,
277      )
278      return _json({"success": bool(res.get("ok")), **res})
279  
280  
281  def handle_meet_status(args: Dict[str, Any], **_kw) -> str:
282      try:
283          client, node_name = _resolve_node_client(args.get("node"))
284      except RuntimeError as e:
285          return _err(str(e))
286      if client is not None:
287          try:
288              res = client.status()
289              return _json({"success": bool(res.get("ok")), "node": node_name, **res})
290          except Exception as e:
291              return _err(f"remote node status failed: {e}", node=node_name)
292      res = pm.status()
293      return _json({"success": bool(res.get("ok")), **res})
294  
295  
296  def handle_meet_transcript(args: Dict[str, Any], **_kw) -> str:
297      last = args.get("last")
298      try:
299          last_i = int(last) if last is not None else None
300          if last_i is not None and last_i < 1:
301              last_i = None
302      except (TypeError, ValueError):
303          last_i = None
304      try:
305          client, node_name = _resolve_node_client(args.get("node"))
306      except RuntimeError as e:
307          return _err(str(e))
308      if client is not None:
309          try:
310              res = client.transcript(last=last_i)
311              return _json({"success": bool(res.get("ok")), "node": node_name, **res})
312          except Exception as e:
313              return _err(f"remote node transcript failed: {e}", node=node_name)
314      res = pm.transcript(last=last_i)
315      return _json({"success": bool(res.get("ok")), **res})
316  
317  
318  def handle_meet_leave(args: Dict[str, Any], **_kw) -> str:
319      try:
320          client, node_name = _resolve_node_client(args.get("node"))
321      except RuntimeError as e:
322          return _err(str(e))
323      if client is not None:
324          try:
325              res = client.stop()
326              return _json({"success": bool(res.get("ok")), "node": node_name, **res})
327          except Exception as e:
328              return _err(f"remote node stop failed: {e}", node=node_name)
329      res = pm.stop(reason="agent called meet_leave")
330      return _json({"success": bool(res.get("ok")), **res})
331  
332  
333  def handle_meet_say(args: Dict[str, Any], **_kw) -> str:
334      text = (args.get("text") or "").strip()
335      if not text:
336          return _err("text is required")
337      try:
338          client, node_name = _resolve_node_client(args.get("node"))
339      except RuntimeError as e:
340          return _err(str(e))
341      if client is not None:
342          try:
343              res = client.say(text)
344              return _json({"success": bool(res.get("ok")), "node": node_name, **res})
345          except Exception as e:
346              return _err(f"remote node say failed: {e}", node=node_name)
347      res = pm.enqueue_say(text)
348      return _json({"success": bool(res.get("ok")), **res})