tools.py
1 """Agent-facing tools for the google_meet plugin. 2 3 Tools: 4 meet_join — join a Google Meet URL (spawns Playwright bot locally 5 OR on a remote node host via node=<name>) 6 meet_status — report bot liveness + transcript progress 7 meet_transcript — read the current transcript (optional last-N) 8 meet_leave — signal the bot to leave cleanly 9 meet_say — (v2) speak text through the realtime audio bridge. 10 Requires the active meeting to have been joined with 11 mode='realtime'. 12 """ 13 14 from __future__ import annotations 15 16 import json 17 from typing import Any, Dict, Optional 18 19 from plugins.google_meet import process_manager as pm 20 21 22 # --------------------------------------------------------------------------- 23 # Runtime gate 24 # --------------------------------------------------------------------------- 25 26 def check_meet_requirements() -> bool: 27 """Return True when the plugin can actually run LOCALLY. 28 29 Gates on: 30 * Python ``playwright`` package importable 31 * the plugin being on a supported platform (Linux or macOS) 32 33 Note: remote-node operation (``node=<name>``) only needs the 34 ``websockets`` dep on the gateway side — Chromium lives on the node. 35 But the plugin-level gate keeps the v1 semantics; individual tool 36 handlers relax the requirement when a node is addressed. 37 """ 38 import platform as _p 39 if _p.system().lower() not in ("linux", "darwin"): 40 return False 41 try: 42 import playwright # noqa: F401 43 except ImportError: 44 return False 45 return True 46 47 48 # --------------------------------------------------------------------------- 49 # Node client helper 50 # --------------------------------------------------------------------------- 51 52 def _resolve_node_client(node: Optional[str]): 53 """Return (NodeClient, node_name) for *node*, or (None, None) to run local. 54 55 Raises RuntimeError with a readable message if the node is named but 56 unresolvable, so the handler can surface a clear error to the agent. 57 """ 58 if node is None or node == "": 59 return None, None 60 from plugins.google_meet.node.registry import NodeRegistry 61 from plugins.google_meet.node.client import NodeClient 62 63 reg = NodeRegistry() 64 entry = reg.resolve(node if node != "auto" else None) 65 if entry is None: 66 raise RuntimeError( 67 f"no registered meet node matches {node!r} — " 68 "run `hermes meet node approve <name> <url> <token>` first" 69 ) 70 client = NodeClient(url=entry["url"], token=entry["token"]) 71 return client, entry.get("name") 72 73 74 # --------------------------------------------------------------------------- 75 # Schemas 76 # --------------------------------------------------------------------------- 77 78 MEET_JOIN_SCHEMA: Dict[str, Any] = { 79 "name": "meet_join", 80 "description": ( 81 "Join a Google Meet call and start scraping live captions into a " 82 "transcript file. Only meet.google.com URLs are accepted; no calendar " 83 "scanning, no auto-dial. Spawns a headless Chromium subprocess that " 84 "runs in parallel with the agent loop — returns immediately. Poll " 85 "with meet_status and read captions with meet_transcript. Reminder " 86 "to the agent: you should announce yourself in the meeting (there is " 87 "no automatic consent announcement)." 88 ), 89 "parameters": { 90 "type": "object", 91 "properties": { 92 "url": { 93 "type": "string", 94 "description": ( 95 "Full https://meet.google.com/... URL. Required." 96 ), 97 }, 98 "mode": { 99 "type": "string", 100 "enum": ["transcribe", "realtime"], 101 "description": ( 102 "transcribe (default): listen-only, scrape captions. " 103 "realtime: also enable agent speech via meet_say " 104 "(requires OpenAI Realtime key + platform audio bridge)." 105 ), 106 }, 107 "guest_name": { 108 "type": "string", 109 "description": ( 110 "Display name to use when joining as guest. Defaults to " 111 "'Hermes Agent'." 112 ), 113 }, 114 "duration": { 115 "type": "string", 116 "description": ( 117 "Optional max duration before auto-leave (e.g. '30m', " 118 "'2h', '90s'). Omit to stay until meet_leave is called." 119 ), 120 }, 121 "headed": { 122 "type": "boolean", 123 "description": ( 124 "Run Chromium headed instead of headless (debug only). " 125 "Default false." 126 ), 127 }, 128 "node": { 129 "type": "string", 130 "description": ( 131 "Name of a registered remote node to run the bot on " 132 "(useful when the gateway runs on a headless Linux box " 133 "but the user's Chrome with a signed-in Google profile " 134 "lives on their Mac). Pass 'auto' to use the single " 135 "registered node. Default: run locally. Nodes are " 136 "approved via `hermes meet node approve`." 137 ), 138 }, 139 }, 140 "required": ["url"], 141 "additionalProperties": False, 142 }, 143 } 144 145 MEET_STATUS_SCHEMA: Dict[str, Any] = { 146 "name": "meet_status", 147 "description": ( 148 "Report the current Meet session state — whether the bot is alive, " 149 "has joined, is sitting in the lobby, number of transcript lines " 150 "captured, and last-caption timestamp." 151 ), 152 "parameters": { 153 "type": "object", 154 "properties": { 155 "node": {"type": "string"}, 156 }, 157 "additionalProperties": False, 158 }, 159 } 160 161 MEET_TRANSCRIPT_SCHEMA: Dict[str, Any] = { 162 "name": "meet_transcript", 163 "description": ( 164 "Read the scraped transcript for the active Meet session. Returns " 165 "full transcript unless 'last' is set, in which case returns the last " 166 "N lines only." 167 ), 168 "parameters": { 169 "type": "object", 170 "properties": { 171 "last": { 172 "type": "integer", 173 "description": ( 174 "Optional: return only the last N caption lines. Useful " 175 "for polling during a meeting without re-reading the " 176 "whole transcript." 177 ), 178 "minimum": 1, 179 }, 180 "node": {"type": "string"}, 181 }, 182 "additionalProperties": False, 183 }, 184 } 185 186 MEET_LEAVE_SCHEMA: Dict[str, Any] = { 187 "name": "meet_leave", 188 "description": ( 189 "Leave the active Meet call cleanly, stop caption scraping, and " 190 "finalize the transcript file. Safe to call when no meeting is " 191 "active — returns ok=false with a reason." 192 ), 193 "parameters": { 194 "type": "object", 195 "properties": { 196 "node": {"type": "string"}, 197 }, 198 "additionalProperties": False, 199 }, 200 } 201 202 MEET_SAY_SCHEMA: Dict[str, Any] = { 203 "name": "meet_say", 204 "description": ( 205 "Speak text into the active Meet call. Requires the active meeting " 206 "to have been joined with mode='realtime'. The text is queued to " 207 "the bot's OpenAI Realtime session; the generated audio is streamed " 208 "into Chrome's fake microphone via a virtual audio device " 209 "(PulseAudio null-sink on Linux, BlackHole on macOS). Returns " 210 "immediately — the actual speech lags by a couple of seconds." 211 ), 212 "parameters": { 213 "type": "object", 214 "properties": { 215 "text": {"type": "string", "description": "Text to speak."}, 216 "node": {"type": "string"}, 217 }, 218 "required": ["text"], 219 "additionalProperties": False, 220 }, 221 } 222 223 224 # --------------------------------------------------------------------------- 225 # Handlers 226 # --------------------------------------------------------------------------- 227 228 def _json(obj: Any) -> str: 229 return json.dumps(obj, ensure_ascii=False) 230 231 232 def _err(msg: str, **extra) -> str: 233 return _json({"success": False, "error": msg, **extra}) 234 235 236 def handle_meet_join(args: Dict[str, Any], **_kw) -> str: 237 url = (args.get("url") or "").strip() 238 if not url: 239 return _err("url is required") 240 mode = (args.get("mode") or "transcribe").strip().lower() 241 if mode not in ("transcribe", "realtime"): 242 return _err(f"mode must be 'transcribe' or 'realtime' (got {mode!r})") 243 244 node = args.get("node") 245 try: 246 client, node_name = _resolve_node_client(node) 247 except RuntimeError as e: 248 return _err(str(e)) 249 250 if client is not None: 251 # Remote path — delegate to the node host. 252 try: 253 res = client.start_bot( 254 url=url, 255 guest_name=str(args.get("guest_name") or "Hermes Agent"), 256 duration=str(args.get("duration")) if args.get("duration") else None, 257 headed=bool(args.get("headed", False)), 258 mode=mode, 259 ) 260 return _json({"success": bool(res.get("ok")), "node": node_name, **res}) 261 except Exception as e: 262 return _err(f"remote node start_bot failed: {e}", node=node_name) 263 264 # Local path — same as v1, with v2 params. 265 if not check_meet_requirements(): 266 return _err( 267 "google_meet plugin prerequisites missing — install with " 268 "`pip install playwright && python -m playwright install " 269 "chromium`. Plugin is supported on Linux and macOS only." 270 ) 271 res = pm.start( 272 url=url, 273 headed=bool(args.get("headed", False)), 274 guest_name=str(args.get("guest_name") or "Hermes Agent"), 275 duration=str(args.get("duration")) if args.get("duration") else None, 276 mode=mode, 277 ) 278 return _json({"success": bool(res.get("ok")), **res}) 279 280 281 def handle_meet_status(args: Dict[str, Any], **_kw) -> str: 282 try: 283 client, node_name = _resolve_node_client(args.get("node")) 284 except RuntimeError as e: 285 return _err(str(e)) 286 if client is not None: 287 try: 288 res = client.status() 289 return _json({"success": bool(res.get("ok")), "node": node_name, **res}) 290 except Exception as e: 291 return _err(f"remote node status failed: {e}", node=node_name) 292 res = pm.status() 293 return _json({"success": bool(res.get("ok")), **res}) 294 295 296 def handle_meet_transcript(args: Dict[str, Any], **_kw) -> str: 297 last = args.get("last") 298 try: 299 last_i = int(last) if last is not None else None 300 if last_i is not None and last_i < 1: 301 last_i = None 302 except (TypeError, ValueError): 303 last_i = None 304 try: 305 client, node_name = _resolve_node_client(args.get("node")) 306 except RuntimeError as e: 307 return _err(str(e)) 308 if client is not None: 309 try: 310 res = client.transcript(last=last_i) 311 return _json({"success": bool(res.get("ok")), "node": node_name, **res}) 312 except Exception as e: 313 return _err(f"remote node transcript failed: {e}", node=node_name) 314 res = pm.transcript(last=last_i) 315 return _json({"success": bool(res.get("ok")), **res}) 316 317 318 def handle_meet_leave(args: Dict[str, Any], **_kw) -> str: 319 try: 320 client, node_name = _resolve_node_client(args.get("node")) 321 except RuntimeError as e: 322 return _err(str(e)) 323 if client is not None: 324 try: 325 res = client.stop() 326 return _json({"success": bool(res.get("ok")), "node": node_name, **res}) 327 except Exception as e: 328 return _err(f"remote node stop failed: {e}", node=node_name) 329 res = pm.stop(reason="agent called meet_leave") 330 return _json({"success": bool(res.get("ok")), **res}) 331 332 333 def handle_meet_say(args: Dict[str, Any], **_kw) -> str: 334 text = (args.get("text") or "").strip() 335 if not text: 336 return _err("text is required") 337 try: 338 client, node_name = _resolve_node_client(args.get("node")) 339 except RuntimeError as e: 340 return _err(str(e)) 341 if client is not None: 342 try: 343 res = client.say(text) 344 return _json({"success": bool(res.get("ok")), "node": node_name, **res}) 345 except Exception as e: 346 return _err(f"remote node say failed: {e}", node=node_name) 347 res = pm.enqueue_say(text) 348 return _json({"success": bool(res.get("ok")), **res})