/ restai / browser / micro_server.py
micro_server.py
  1  """Browser micro-server — runs INSIDE the Playwright container.
  2  
  3  Wraps `playwright.sync_api` over stdlib `http.server`. Zero third-party
  4  deps beyond Playwright itself (which is pre-installed in
  5  `mcr.microsoft.com/playwright/python`). One module-level BrowserContext
  6  per container keeps cookies + navigation state alive across tool calls.
  7  
  8  Endpoints:
  9  - POST /health      — liveness probe, returns `{ok: true}`.
 10  - POST /goto        — {url} → {final_url, title}
 11  - POST /content     — {selector?, format?} → {content, length}
 12  - POST /click       — {selector} → {url_after, nearby_text}
 13  - POST /fill        — {selector, value} → {ok}
 14  - POST /select      — {selector, option} → {ok}
 15  - POST /screenshot  — {selector?} → {png_b64, width, height}
 16  - POST /wait        — {selector, timeout?} → {found}
 17  - POST /download    — {selector, timeout?} → {path, size, mime}
 18  - POST /eval        — {js} → {result}
 19  - POST /storage/load — {state: {...}}
 20  - POST /storage/dump — {} → {state: {...}}
 21  - POST /close       — shuts the context (page stays disposable); used by host on cleanup.
 22  
 23  JSON-in, JSON-out. Errors surface as {"error": "..."} with HTTP 500.
 24  
 25  This file is copied into the container at `/opt/restai_browser/micro_server.py`
 26  by the host-side `BrowserManager` at container startup (via `put_archive`).
 27  It is **not** imported by the host-side RESTai process — it lives in the
 28  container's Python runtime only, so it can import `playwright.sync_api`
 29  without demanding Playwright as a host dep.
 30  """
 31  from __future__ import annotations
 32  
 33  import base64
 34  import json
 35  import logging
 36  import os
 37  import re
 38  import sys
 39  import threading
 40  import time
 41  # Single-threaded HTTPServer on purpose: Playwright's sync_api requires
 42  # that every call comes from the thread that started `sync_playwright()`.
 43  # ThreadingHTTPServer would spawn a new thread per request and break that
 44  # invariant with a confusing "cannot switch to a different thread" error.
 45  # Tool calls per chat are sequential anyway (the LLM calls one at a time),
 46  # so single-threaded is fine.
 47  from http.server import BaseHTTPRequestHandler, HTTPServer
 48  
 49  logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 50  _log = logging.getLogger("restai.browser.micro")
 51  
 52  _DOWNLOAD_DIR = "/home/user/downloads"
 53  os.makedirs(_DOWNLOAD_DIR, exist_ok=True)
 54  
 55  
 56  # ─── Playwright lifecycle ────────────────────────────────────────────
 57  
 58  _lock = threading.Lock()
 59  _pw = None
 60  _browser = None
 61  _context = None
 62  _page = None  # current active page
 63  
 64  
 65  def _ensure_context():
 66      """Lazy-start Playwright + a persistent BrowserContext on first use."""
 67      global _pw, _browser, _context, _page
 68      with _lock:
 69          if _context is not None:
 70              return
 71          from playwright.sync_api import sync_playwright
 72  
 73          _pw = sync_playwright().start()
 74          _browser = _pw.chromium.launch(
 75              headless=True,
 76              args=[
 77                  "--disable-blink-features=AutomationControlled",
 78                  "--disable-dev-shm-usage",
 79                  "--no-sandbox",
 80              ],
 81          )
 82          _context = _browser.new_context(
 83              accept_downloads=True,
 84              viewport={"width": 1280, "height": 800},
 85              user_agent=(
 86                  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
 87                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 88              ),
 89          )
 90          _page = _context.new_page()
 91          _log.info("Playwright context initialized.")
 92  
 93  
 94  def _page_or_new():
 95      """Return the live page, making a new one if the previous closed."""
 96      global _page
 97      _ensure_context()
 98      if _page is None or _page.is_closed():
 99          _page = _context.new_page()
100      return _page
101  
102  
103  # ─── HTML sanitation before returning content to the agent ───────────
104  
105  _SCRIPT_RE = re.compile(r"<script\b[^<]*(?:(?!</script>)<[^<]*)*</script>", re.IGNORECASE | re.DOTALL)
106  _STYLE_RE = re.compile(r"<style\b[^<]*(?:(?!</style>)<[^<]*)*</style>", re.IGNORECASE | re.DOTALL)
107  _COMMENT_RE = re.compile(r"<!--.*?-->", re.DOTALL)
108  _MAX_CONTENT_BYTES = 500_000
109  
110  
111  def _sanitize_html(html: str) -> str:
112      if not html:
113          return ""
114      html = _SCRIPT_RE.sub("", html)
115      html = _STYLE_RE.sub("", html)
116      html = _COMMENT_RE.sub("", html)
117      if len(html) > _MAX_CONTENT_BYTES:
118          html = html[:_MAX_CONTENT_BYTES] + "\n<!-- truncated -->"
119      return html
120  
121  
122  def _to_markdown(html: str) -> str:
123      """Quick HTML → markdown-ish rendering. Not perfect but cheap."""
124      # Very lightweight: strip tags, collapse whitespace. The LLM is smart
125      # enough to work with this for most purposes.
126      text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
127      text = re.sub(r"</(p|div|section|article|li|tr|h[1-6])>", "\n", text, flags=re.IGNORECASE)
128      text = re.sub(r"<(p|div|section|article|li|tr|h[1-6])[^>]*>", "", text, flags=re.IGNORECASE)
129      text = re.sub(r"<[^>]+>", "", text)  # strip remaining tags
130      text = re.sub(r"\n{3,}", "\n\n", text)
131      text = re.sub(r"[ \t]+", " ", text)
132      return text.strip()
133  
134  
135  # ─── Handlers ────────────────────────────────────────────────────────
136  
137  
138  def _handle_goto(payload: dict) -> dict:
139      url = payload.get("url", "")
140      if not url:
141          raise ValueError("url is required")
142      page = _page_or_new()
143      page.goto(url, wait_until="domcontentloaded", timeout=30_000)
144      return {"final_url": page.url, "title": page.title()}
145  
146  
147  def _handle_content(payload: dict) -> dict:
148      selector = payload.get("selector")
149      fmt = (payload.get("format") or "markdown").lower()
150      page = _page_or_new()
151      if selector:
152          el = page.query_selector(selector)
153          if el is None:
154              raise ValueError(f"selector not found: {selector}")
155          html = el.inner_html()
156      else:
157          html = page.content()
158      html = _sanitize_html(html)
159      if fmt == "html":
160          out = html
161      elif fmt == "text":
162          out = _to_markdown(html)
163      else:  # markdown (= cleaned text for now)
164          out = _to_markdown(html)
165      if len(out) > _MAX_CONTENT_BYTES:
166          out = out[:_MAX_CONTENT_BYTES] + "\n… (truncated)"
167      return {"content": out, "length": len(out)}
168  
169  
170  def _handle_click(payload: dict) -> dict:
171      selector = payload.get("selector", "")
172      if not selector:
173          raise ValueError("selector is required")
174      page = _page_or_new()
175      page.click(selector, timeout=15_000)
176      try:
177          page.wait_for_load_state("domcontentloaded", timeout=5_000)
178      except Exception:
179          pass
180      nearby = ""
181      try:
182          nearby = _to_markdown(page.content())[:500]
183      except Exception:
184          pass
185      return {"url_after": page.url, "nearby_text": nearby}
186  
187  
188  def _handle_fill(payload: dict) -> dict:
189      selector = payload.get("selector", "")
190      value = payload.get("value", "")
191      if not selector:
192          raise ValueError("selector is required")
193      page = _page_or_new()
194      page.fill(selector, value, timeout=15_000)
195      return {"ok": True}
196  
197  
198  def _handle_select(payload: dict) -> dict:
199      selector = payload.get("selector", "")
200      option = payload.get("option")
201      if not selector or option is None:
202          raise ValueError("selector + option required")
203      page = _page_or_new()
204      page.select_option(selector, option, timeout=15_000)
205      return {"ok": True}
206  
207  
208  def _handle_screenshot(payload: dict) -> dict:
209      selector = payload.get("selector")
210      page = _page_or_new()
211      if selector:
212          el = page.query_selector(selector)
213          if el is None:
214              raise ValueError(f"selector not found: {selector}")
215          png = el.screenshot(type="png")
216      else:
217          png = page.screenshot(type="png", full_page=False)
218      if len(png) > 2_000_000:
219          raise ValueError(f"screenshot too large ({len(png)} bytes) — narrow it with a selector")
220      return {"png_b64": base64.b64encode(png).decode("ascii"), "size": len(png)}
221  
222  
223  def _handle_wait(payload: dict) -> dict:
224      selector = payload.get("selector", "")
225      timeout = int(payload.get("timeout") or 10) * 1000
226      if not selector:
227          raise ValueError("selector is required")
228      page = _page_or_new()
229      try:
230          page.wait_for_selector(selector, timeout=timeout, state="visible")
231          return {"found": True}
232      except Exception:
233          return {"found": False}
234  
235  
236  def _handle_download(payload: dict) -> dict:
237      selector = payload.get("selector", "")
238      timeout = int(payload.get("timeout") or 30) * 1000
239      if not selector:
240          raise ValueError("selector is required")
241      page = _page_or_new()
242      with page.expect_download(timeout=timeout) as dl_info:
243          page.click(selector)
244      dl = dl_info.value
245      safe_name = re.sub(r"[^A-Za-z0-9._-]+", "_", dl.suggested_filename or "download.bin")
246      path = os.path.join(_DOWNLOAD_DIR, safe_name)
247      dl.save_as(path)
248      size = os.path.getsize(path)
249      mime = "application/octet-stream"
250      try:
251          import mimetypes
252          mime = mimetypes.guess_type(path)[0] or mime
253      except Exception:
254          pass
255      return {"path": path, "size": size, "mime": mime, "filename": safe_name}
256  
257  
258  def _handle_eval(payload: dict) -> dict:
259      js = payload.get("js", "")
260      if not js:
261          raise ValueError("js is required")
262      page = _page_or_new()
263      result = page.evaluate(js)
264      # JSON-safe coercion — Playwright returns dicts/lists/primitives.
265      try:
266          json.dumps(result)
267      except Exception:
268          result = str(result)
269      return {"result": result}
270  
271  
272  def _handle_storage_load(payload: dict) -> dict:
273      """Re-apply a saved storage_state (cookies + localStorage)."""
274      global _context, _page
275      state = payload.get("state")
276      if not isinstance(state, dict):
277          raise ValueError("state dict required")
278      _ensure_context()
279      with _lock:
280          # Close the old context + page, open a new one with the state.
281          try:
282              if _page and not _page.is_closed():
283                  _page.close()
284          except Exception:
285              pass
286          try:
287              _context.close()
288          except Exception:
289              pass
290          _context = _browser.new_context(
291              accept_downloads=True,
292              viewport={"width": 1280, "height": 800},
293              storage_state=state,
294          )
295          _page = _context.new_page()
296      return {"ok": True}
297  
298  
299  def _handle_storage_dump(_payload: dict) -> dict:
300      _ensure_context()
301      return {"state": _context.storage_state()}
302  
303  
304  def _handle_close(_payload: dict) -> dict:
305      global _context, _browser, _pw, _page
306      with _lock:
307          for closer in (_page, _context, _browser, _pw):
308              if closer is None:
309                  continue
310              try:
311                  if hasattr(closer, "close"):
312                      closer.close()
313                  elif hasattr(closer, "stop"):
314                      closer.stop()
315              except Exception:
316                  pass
317          _page = _context = _browser = _pw = None
318      return {"ok": True}
319  
320  
321  _ROUTES = {
322      "/health":         lambda p: {"ok": True},
323      "/goto":           _handle_goto,
324      "/content":        _handle_content,
325      "/click":          _handle_click,
326      "/fill":           _handle_fill,
327      "/select":         _handle_select,
328      "/screenshot":     _handle_screenshot,
329      "/wait":           _handle_wait,
330      "/download":       _handle_download,
331      "/eval":           _handle_eval,
332      "/storage/load":   _handle_storage_load,
333      "/storage/dump":   _handle_storage_dump,
334      "/close":          _handle_close,
335  }
336  
337  
338  # ─── HTTP plumbing ───────────────────────────────────────────────────
339  
340  
341  class _Handler(BaseHTTPRequestHandler):
342      def log_message(self, fmt, *args):
343          _log.info("%s %s", self.path, args)
344  
345      def do_POST(self):
346          path = self.path.split("?", 1)[0]
347          handler = _ROUTES.get(path)
348          if handler is None:
349              self._respond(404, {"error": f"unknown path {path}"})
350              return
351          try:
352              length = int(self.headers.get("Content-Length") or 0)
353              raw = self.rfile.read(length) if length else b"{}"
354              payload = json.loads(raw.decode("utf-8") or "{}")
355          except Exception as e:
356              self._respond(400, {"error": f"bad json: {e}"})
357              return
358          try:
359              result = handler(payload)
360              self._respond(200, result)
361          except Exception as e:
362              _log.exception("%s failed: %s", path, e)
363              self._respond(500, {"error": f"{type(e).__name__}: {e}"})
364  
365      def do_GET(self):
366          # Convenience: /health as GET too for docker HEALTHCHECK.
367          if self.path.split("?", 1)[0] == "/health":
368              self._respond(200, {"ok": True})
369              return
370          self._respond(405, {"error": "POST required"})
371  
372      def _respond(self, status: int, body: dict):
373          data = json.dumps(body).encode("utf-8")
374          self.send_response(status)
375          self.send_header("Content-Type", "application/json")
376          self.send_header("Content-Length", str(len(data)))
377          self.end_headers()
378          self.wfile.write(data)
379  
380  
381  def main():
382      port = int(os.environ.get("BROWSER_SERVER_PORT", "7000"))
383      _log.info("RESTai browser micro-server listening on :%d", port)
384      server = HTTPServer(("0.0.0.0", port), _Handler)
385      try:
386          server.serve_forever()
387      except KeyboardInterrupt:
388          pass
389  
390  
391  if __name__ == "__main__":
392      main()