micro_server.py
1 """Browser micro-server — runs INSIDE the Playwright container. 2 3 Wraps `playwright.sync_api` over stdlib `http.server`. Zero third-party 4 deps beyond Playwright itself (which is pre-installed in 5 `mcr.microsoft.com/playwright/python`). One module-level BrowserContext 6 per container keeps cookies + navigation state alive across tool calls. 7 8 Endpoints: 9 - POST /health — liveness probe, returns `{ok: true}`. 10 - POST /goto — {url} → {final_url, title} 11 - POST /content — {selector?, format?} → {content, length} 12 - POST /click — {selector} → {url_after, nearby_text} 13 - POST /fill — {selector, value} → {ok} 14 - POST /select — {selector, option} → {ok} 15 - POST /screenshot — {selector?} → {png_b64, width, height} 16 - POST /wait — {selector, timeout?} → {found} 17 - POST /download — {selector, timeout?} → {path, size, mime} 18 - POST /eval — {js} → {result} 19 - POST /storage/load — {state: {...}} 20 - POST /storage/dump — {} → {state: {...}} 21 - POST /close — shuts the context (page stays disposable); used by host on cleanup. 22 23 JSON-in, JSON-out. Errors surface as {"error": "..."} with HTTP 500. 24 25 This file is copied into the container at `/opt/restai_browser/micro_server.py` 26 by the host-side `BrowserManager` at container startup (via `put_archive`). 27 It is **not** imported by the host-side RESTai process — it lives in the 28 container's Python runtime only, so it can import `playwright.sync_api` 29 without demanding Playwright as a host dep. 30 """ 31 from __future__ import annotations 32 33 import base64 34 import json 35 import logging 36 import os 37 import re 38 import sys 39 import threading 40 import time 41 # Single-threaded HTTPServer on purpose: Playwright's sync_api requires 42 # that every call comes from the thread that started `sync_playwright()`. 43 # ThreadingHTTPServer would spawn a new thread per request and break that 44 # invariant with a confusing "cannot switch to a different thread" error. 45 # Tool calls per chat are sequential anyway (the LLM calls one at a time), 46 # so single-threaded is fine. 47 from http.server import BaseHTTPRequestHandler, HTTPServer 48 49 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") 50 _log = logging.getLogger("restai.browser.micro") 51 52 _DOWNLOAD_DIR = "/home/user/downloads" 53 os.makedirs(_DOWNLOAD_DIR, exist_ok=True) 54 55 56 # ─── Playwright lifecycle ──────────────────────────────────────────── 57 58 _lock = threading.Lock() 59 _pw = None 60 _browser = None 61 _context = None 62 _page = None # current active page 63 64 65 def _ensure_context(): 66 """Lazy-start Playwright + a persistent BrowserContext on first use.""" 67 global _pw, _browser, _context, _page 68 with _lock: 69 if _context is not None: 70 return 71 from playwright.sync_api import sync_playwright 72 73 _pw = sync_playwright().start() 74 _browser = _pw.chromium.launch( 75 headless=True, 76 args=[ 77 "--disable-blink-features=AutomationControlled", 78 "--disable-dev-shm-usage", 79 "--no-sandbox", 80 ], 81 ) 82 _context = _browser.new_context( 83 accept_downloads=True, 84 viewport={"width": 1280, "height": 800}, 85 user_agent=( 86 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " 87 "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" 88 ), 89 ) 90 _page = _context.new_page() 91 _log.info("Playwright context initialized.") 92 93 94 def _page_or_new(): 95 """Return the live page, making a new one if the previous closed.""" 96 global _page 97 _ensure_context() 98 if _page is None or _page.is_closed(): 99 _page = _context.new_page() 100 return _page 101 102 103 # ─── HTML sanitation before returning content to the agent ─────────── 104 105 _SCRIPT_RE = re.compile(r"<script\b[^<]*(?:(?!</script>)<[^<]*)*</script>", re.IGNORECASE | re.DOTALL) 106 _STYLE_RE = re.compile(r"<style\b[^<]*(?:(?!</style>)<[^<]*)*</style>", re.IGNORECASE | re.DOTALL) 107 _COMMENT_RE = re.compile(r"<!--.*?-->", re.DOTALL) 108 _MAX_CONTENT_BYTES = 500_000 109 110 111 def _sanitize_html(html: str) -> str: 112 if not html: 113 return "" 114 html = _SCRIPT_RE.sub("", html) 115 html = _STYLE_RE.sub("", html) 116 html = _COMMENT_RE.sub("", html) 117 if len(html) > _MAX_CONTENT_BYTES: 118 html = html[:_MAX_CONTENT_BYTES] + "\n<!-- truncated -->" 119 return html 120 121 122 def _to_markdown(html: str) -> str: 123 """Quick HTML → markdown-ish rendering. Not perfect but cheap.""" 124 # Very lightweight: strip tags, collapse whitespace. The LLM is smart 125 # enough to work with this for most purposes. 126 text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE) 127 text = re.sub(r"</(p|div|section|article|li|tr|h[1-6])>", "\n", text, flags=re.IGNORECASE) 128 text = re.sub(r"<(p|div|section|article|li|tr|h[1-6])[^>]*>", "", text, flags=re.IGNORECASE) 129 text = re.sub(r"<[^>]+>", "", text) # strip remaining tags 130 text = re.sub(r"\n{3,}", "\n\n", text) 131 text = re.sub(r"[ \t]+", " ", text) 132 return text.strip() 133 134 135 # ─── Handlers ──────────────────────────────────────────────────────── 136 137 138 def _handle_goto(payload: dict) -> dict: 139 url = payload.get("url", "") 140 if not url: 141 raise ValueError("url is required") 142 page = _page_or_new() 143 page.goto(url, wait_until="domcontentloaded", timeout=30_000) 144 return {"final_url": page.url, "title": page.title()} 145 146 147 def _handle_content(payload: dict) -> dict: 148 selector = payload.get("selector") 149 fmt = (payload.get("format") or "markdown").lower() 150 page = _page_or_new() 151 if selector: 152 el = page.query_selector(selector) 153 if el is None: 154 raise ValueError(f"selector not found: {selector}") 155 html = el.inner_html() 156 else: 157 html = page.content() 158 html = _sanitize_html(html) 159 if fmt == "html": 160 out = html 161 elif fmt == "text": 162 out = _to_markdown(html) 163 else: # markdown (= cleaned text for now) 164 out = _to_markdown(html) 165 if len(out) > _MAX_CONTENT_BYTES: 166 out = out[:_MAX_CONTENT_BYTES] + "\n… (truncated)" 167 return {"content": out, "length": len(out)} 168 169 170 def _handle_click(payload: dict) -> dict: 171 selector = payload.get("selector", "") 172 if not selector: 173 raise ValueError("selector is required") 174 page = _page_or_new() 175 page.click(selector, timeout=15_000) 176 try: 177 page.wait_for_load_state("domcontentloaded", timeout=5_000) 178 except Exception: 179 pass 180 nearby = "" 181 try: 182 nearby = _to_markdown(page.content())[:500] 183 except Exception: 184 pass 185 return {"url_after": page.url, "nearby_text": nearby} 186 187 188 def _handle_fill(payload: dict) -> dict: 189 selector = payload.get("selector", "") 190 value = payload.get("value", "") 191 if not selector: 192 raise ValueError("selector is required") 193 page = _page_or_new() 194 page.fill(selector, value, timeout=15_000) 195 return {"ok": True} 196 197 198 def _handle_select(payload: dict) -> dict: 199 selector = payload.get("selector", "") 200 option = payload.get("option") 201 if not selector or option is None: 202 raise ValueError("selector + option required") 203 page = _page_or_new() 204 page.select_option(selector, option, timeout=15_000) 205 return {"ok": True} 206 207 208 def _handle_screenshot(payload: dict) -> dict: 209 selector = payload.get("selector") 210 page = _page_or_new() 211 if selector: 212 el = page.query_selector(selector) 213 if el is None: 214 raise ValueError(f"selector not found: {selector}") 215 png = el.screenshot(type="png") 216 else: 217 png = page.screenshot(type="png", full_page=False) 218 if len(png) > 2_000_000: 219 raise ValueError(f"screenshot too large ({len(png)} bytes) — narrow it with a selector") 220 return {"png_b64": base64.b64encode(png).decode("ascii"), "size": len(png)} 221 222 223 def _handle_wait(payload: dict) -> dict: 224 selector = payload.get("selector", "") 225 timeout = int(payload.get("timeout") or 10) * 1000 226 if not selector: 227 raise ValueError("selector is required") 228 page = _page_or_new() 229 try: 230 page.wait_for_selector(selector, timeout=timeout, state="visible") 231 return {"found": True} 232 except Exception: 233 return {"found": False} 234 235 236 def _handle_download(payload: dict) -> dict: 237 selector = payload.get("selector", "") 238 timeout = int(payload.get("timeout") or 30) * 1000 239 if not selector: 240 raise ValueError("selector is required") 241 page = _page_or_new() 242 with page.expect_download(timeout=timeout) as dl_info: 243 page.click(selector) 244 dl = dl_info.value 245 safe_name = re.sub(r"[^A-Za-z0-9._-]+", "_", dl.suggested_filename or "download.bin") 246 path = os.path.join(_DOWNLOAD_DIR, safe_name) 247 dl.save_as(path) 248 size = os.path.getsize(path) 249 mime = "application/octet-stream" 250 try: 251 import mimetypes 252 mime = mimetypes.guess_type(path)[0] or mime 253 except Exception: 254 pass 255 return {"path": path, "size": size, "mime": mime, "filename": safe_name} 256 257 258 def _handle_eval(payload: dict) -> dict: 259 js = payload.get("js", "") 260 if not js: 261 raise ValueError("js is required") 262 page = _page_or_new() 263 result = page.evaluate(js) 264 # JSON-safe coercion — Playwright returns dicts/lists/primitives. 265 try: 266 json.dumps(result) 267 except Exception: 268 result = str(result) 269 return {"result": result} 270 271 272 def _handle_storage_load(payload: dict) -> dict: 273 """Re-apply a saved storage_state (cookies + localStorage).""" 274 global _context, _page 275 state = payload.get("state") 276 if not isinstance(state, dict): 277 raise ValueError("state dict required") 278 _ensure_context() 279 with _lock: 280 # Close the old context + page, open a new one with the state. 281 try: 282 if _page and not _page.is_closed(): 283 _page.close() 284 except Exception: 285 pass 286 try: 287 _context.close() 288 except Exception: 289 pass 290 _context = _browser.new_context( 291 accept_downloads=True, 292 viewport={"width": 1280, "height": 800}, 293 storage_state=state, 294 ) 295 _page = _context.new_page() 296 return {"ok": True} 297 298 299 def _handle_storage_dump(_payload: dict) -> dict: 300 _ensure_context() 301 return {"state": _context.storage_state()} 302 303 304 def _handle_close(_payload: dict) -> dict: 305 global _context, _browser, _pw, _page 306 with _lock: 307 for closer in (_page, _context, _browser, _pw): 308 if closer is None: 309 continue 310 try: 311 if hasattr(closer, "close"): 312 closer.close() 313 elif hasattr(closer, "stop"): 314 closer.stop() 315 except Exception: 316 pass 317 _page = _context = _browser = _pw = None 318 return {"ok": True} 319 320 321 _ROUTES = { 322 "/health": lambda p: {"ok": True}, 323 "/goto": _handle_goto, 324 "/content": _handle_content, 325 "/click": _handle_click, 326 "/fill": _handle_fill, 327 "/select": _handle_select, 328 "/screenshot": _handle_screenshot, 329 "/wait": _handle_wait, 330 "/download": _handle_download, 331 "/eval": _handle_eval, 332 "/storage/load": _handle_storage_load, 333 "/storage/dump": _handle_storage_dump, 334 "/close": _handle_close, 335 } 336 337 338 # ─── HTTP plumbing ─────────────────────────────────────────────────── 339 340 341 class _Handler(BaseHTTPRequestHandler): 342 def log_message(self, fmt, *args): 343 _log.info("%s %s", self.path, args) 344 345 def do_POST(self): 346 path = self.path.split("?", 1)[0] 347 handler = _ROUTES.get(path) 348 if handler is None: 349 self._respond(404, {"error": f"unknown path {path}"}) 350 return 351 try: 352 length = int(self.headers.get("Content-Length") or 0) 353 raw = self.rfile.read(length) if length else b"{}" 354 payload = json.loads(raw.decode("utf-8") or "{}") 355 except Exception as e: 356 self._respond(400, {"error": f"bad json: {e}"}) 357 return 358 try: 359 result = handler(payload) 360 self._respond(200, result) 361 except Exception as e: 362 _log.exception("%s failed: %s", path, e) 363 self._respond(500, {"error": f"{type(e).__name__}: {e}"}) 364 365 def do_GET(self): 366 # Convenience: /health as GET too for docker HEALTHCHECK. 367 if self.path.split("?", 1)[0] == "/health": 368 self._respond(200, {"ok": True}) 369 return 370 self._respond(405, {"error": "POST required"}) 371 372 def _respond(self, status: int, body: dict): 373 data = json.dumps(body).encode("utf-8") 374 self.send_response(status) 375 self.send_header("Content-Type", "application/json") 376 self.send_header("Content-Length", str(len(data))) 377 self.end_headers() 378 self.wfile.write(data) 379 380 381 def main(): 382 port = int(os.environ.get("BROWSER_SERVER_PORT", "7000")) 383 _log.info("RESTai browser micro-server listening on :%d", port) 384 server = HTTPServer(("0.0.0.0", port), _Handler) 385 try: 386 server.serve_forever() 387 except KeyboardInterrupt: 388 pass 389 390 391 if __name__ == "__main__": 392 main()