/ aurin.py
aurin.py
   1  # /// script
   2  # requires-python = ">=3.10"
   3  # dependencies = [
   4  #     "aiohttp>=3.9",
   5  #     "mlx-whisper>=0.4",
   6  # ]
   7  # ///
   8  """
   9  AURYN self-serving server + fast context provider.
  10  
  11  Usage:
  12      uv run aurin.py serve [--port 8080] [--host 0.0.0.0] [--model qwen3:32b]
  13      uv run aurin.py context <file_or_text> [--top N] [--json] [--rebuild]
  14      uv run aurin.py index [--force]
  15  """
  16  
  17  import argparse
  18  import asyncio
  19  import json
  20  import math
  21  import os
  22  import pickle
  23  import re
  24  import shutil
  25  import socket
  26  import ssl
  27  import struct
  28  import subprocess
  29  import tempfile
  30  import time
  31  import uuid
  32  from collections import Counter, defaultdict
  33  from datetime import datetime
  34  from pathlib import Path
  35  
  36  import aiohttp
  37  from aiohttp import web
  38  
  39  AURYN_DIR = Path(__file__).parent.resolve()
  40  RECORDINGS_DIR = AURYN_DIR / "recordings"
  41  TRANSCRIPTS_DIR = AURYN_DIR / "transcripts"
  42  VAULT_DIR = AURYN_DIR.parent  # RealDealVault
  43  INDEX_DIR = Path.home() / ".auryn"
  44  INDEX_FILE = INDEX_DIR / "context-index.pkl"
  45  
  46  
  47  # ============================================================
  48  # Context Provider — Tier 1 (Vocabulary) + Tier 2 (BM25)
  49  # ============================================================
  50  
  51  # BM25 parameters
  52  BM25_K1 = 1.5
  53  BM25_B = 0.75
  54  
  55  
  56  def _tokenize(text: str) -> list[str]:
  57      """Lowercase, strip punctuation, split into tokens."""
  58      text = text.lower()
  59      # Keep alphanumeric, hyphens within words, and unicode letters
  60      text = re.sub(r"[^\w\s-]", " ", text)
  61      text = re.sub(r"\s+", " ", text)
  62      return [w for w in text.split() if len(w) >= 2]
  63  
  64  
  65  def _split_camel(name: str) -> list[str]:
  66      """Split camelCase/PascalCase into words. 'InterBrain' -> ['inter', 'brain']"""
  67      parts = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
  68      parts = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", parts)
  69      return parts.lower().split()
  70  
  71  
  72  def discover_nodes() -> list[dict]:
  73      """Find all DreamNodes in the vault by scanning for .udd files."""
  74      nodes = []
  75      for udd_path in VAULT_DIR.rglob("*.udd"):
  76          # Only top-level vault entries (maxdepth 2)
  77          rel = udd_path.relative_to(VAULT_DIR)
  78          if len(rel.parts) > 2:
  79              continue
  80          try:
  81              data = json.loads(udd_path.read_text())
  82              node_dir = udd_path.parent
  83              readme_path = node_dir / "README.md"
  84              readme = ""
  85              if readme_path.exists():
  86                  readme = readme_path.read_text(errors="replace")
  87  
  88              nodes.append({
  89                  "uuid": data.get("uuid", ""),
  90                  "title": data.get("title", node_dir.name),
  91                  "type": data.get("type", "dream"),
  92                  "folder": node_dir.name,
  93                  "path": str(node_dir),
  94                  "radicle_id": data.get("radicleId", ""),
  95                  "readme": readme,
  96              })
  97          except (json.JSONDecodeError, OSError):
  98              continue
  99      return nodes
 100  
 101  
 102  def build_index(nodes: list[dict] | None = None) -> dict:
 103      """Build the Tier 1 + Tier 2 index from scratch."""
 104      if nodes is None:
 105          nodes = discover_nodes()
 106  
 107      t0 = time.time()
 108  
 109      # --- Tier 1: Vocabulary lookup ---
 110      # Map normalized forms -> node info
 111      vocab: dict[str, list[dict]] = defaultdict(list)  # lowercase phrase -> [{uuid, title, ...}]
 112  
 113      for node in nodes:
 114          title = node["title"]
 115          folder = node["folder"]
 116          uid = node["uuid"]
 117          entry = {"uuid": uid, "title": title, "type": node["type"],
 118                   "path": node["path"], "radicle_id": node["radicle_id"]}
 119  
 120          # Exact title (lowered)
 121          vocab[title.lower()].append(entry)
 122  
 123          # Folder name (lowered)
 124          folder_lower = folder.lower()
 125          if folder_lower != title.lower():
 126              vocab[folder_lower].append(entry)
 127  
 128          # CamelCase-split folder words joined with spaces
 129          camel_words = _split_camel(folder)
 130          if len(camel_words) > 1:
 131              joined = " ".join(camel_words)
 132              if joined != title.lower() and joined != folder_lower:
 133                  vocab[joined].append(entry)
 134  
 135          # Title without special spacing (e.g., "A U R Y N" -> "auryn")
 136          collapsed = re.sub(r"\s+", "", title).lower()
 137          if collapsed != title.lower() and collapsed != folder_lower:
 138              vocab[collapsed].append(entry)
 139  
 140      # Sort vocab keys longest-first for greedy matching
 141      vocab_phrases = sorted(vocab.keys(), key=len, reverse=True)
 142  
 143      # --- Tier 2: BM25 index ---
 144      # Tokenize all READMEs
 145      doc_tokens: list[list[str]] = []  # parallel to nodes
 146      doc_freqs: Counter = Counter()  # how many docs contain each term
 147      total_dl = 0
 148  
 149      for node in nodes:
 150          # Combine title + readme for the document
 151          text = node["title"] + " " + node["readme"]
 152          tokens = _tokenize(text)
 153          doc_tokens.append(tokens)
 154          total_dl += len(tokens)
 155          # Count unique terms per doc
 156          for term in set(tokens):
 157              doc_freqs[term] += 1
 158  
 159      n_docs = len(nodes)
 160      avgdl = total_dl / n_docs if n_docs > 0 else 1
 161  
 162      # Precompute IDF for each term
 163      idf: dict[str, float] = {}
 164      for term, df in doc_freqs.items():
 165          # BM25 IDF formula
 166          idf[term] = math.log((n_docs - df + 0.5) / (df + 0.5) + 1.0)
 167  
 168      # Precompute per-doc term frequencies and doc lengths
 169      doc_tf: list[dict[str, int]] = []
 170      doc_lens: list[int] = []
 171      for tokens in doc_tokens:
 172          tf = Counter(tokens)
 173          doc_tf.append(tf)
 174          doc_lens.append(len(tokens))
 175  
 176      elapsed = time.time() - t0
 177  
 178      index = {
 179          "version": 1,
 180          "built_at": time.time(),
 181          "n_docs": n_docs,
 182          "build_time_ms": round(elapsed * 1000),
 183          # Tier 1
 184          "vocab": dict(vocab),
 185          "vocab_phrases": vocab_phrases,
 186          # Tier 2
 187          "nodes": nodes,
 188          "idf": idf,
 189          "doc_tf": doc_tf,
 190          "doc_lens": doc_lens,
 191          "avgdl": avgdl,
 192      }
 193      return index
 194  
 195  
 196  def save_index(index: dict) -> None:
 197      """Persist index to disk."""
 198      INDEX_DIR.mkdir(parents=True, exist_ok=True)
 199      with open(INDEX_FILE, "wb") as f:
 200          pickle.dump(index, f, protocol=pickle.HIGHEST_PROTOCOL)
 201  
 202  
 203  def load_index() -> dict | None:
 204      """Load cached index from disk."""
 205      if not INDEX_FILE.exists():
 206          return None
 207      try:
 208          with open(INDEX_FILE, "rb") as f:
 209              index = pickle.load(f)
 210          if index.get("version") != 1:
 211              return None
 212          return index
 213      except (pickle.UnpicklingError, EOFError, KeyError):
 214          return None
 215  
 216  
 217  def ensure_index(force: bool = False) -> dict:
 218      """Load or build+save the index."""
 219      if not force:
 220          index = load_index()
 221          if index is not None:
 222              return index
 223      index = build_index()
 224      save_index(index)
 225      return index
 226  
 227  
 228  def tier1_vocab_match(text: str, index: dict) -> dict[str, dict]:
 229      """Tier 1: Vocabulary matching. Scan text for DreamNode titles.
 230      Returns {uuid: {title, type, path, score, source, mentions}}
 231      """
 232      text_lower = text.lower()
 233      results: dict[str, dict] = {}
 234  
 235      for phrase in index["vocab_phrases"]:
 236          # Check if phrase appears in text (word-boundary aware for short phrases)
 237          if len(phrase) <= 3:
 238              # Short phrases need word boundaries to avoid false positives
 239              pattern = r'\b' + re.escape(phrase) + r'\b'
 240              matches = re.findall(pattern, text_lower)
 241          else:
 242              # Longer phrases: simple substring count
 243              matches = []
 244              start = 0
 245              while True:
 246                  idx = text_lower.find(phrase, start)
 247                  if idx == -1:
 248                      break
 249                  matches.append(phrase)
 250                  start = idx + 1
 251  
 252          if not matches:
 253              continue
 254  
 255          count = len(matches)
 256          for entry in index["vocab"][phrase]:
 257              uid = entry["uuid"]
 258              if uid in results:
 259                  results[uid]["mentions"] += count
 260              else:
 261                  results[uid] = {
 262                      "uuid": uid,
 263                      "title": entry["title"],
 264                      "type": entry["type"],
 265                      "path": entry["path"],
 266                      "radicle_id": entry["radicle_id"],
 267                      "score": 1.0,
 268                      "source": "vocabulary",
 269                      "mentions": count,
 270                  }
 271      return results
 272  
 273  
 274  def tier2_bm25(text: str, index: dict, top_k: int = 20) -> dict[str, dict]:
 275      """Tier 2: BM25 scoring of query text against all README documents.
 276      Returns {uuid: {title, type, path, score, source}}
 277      """
 278      query_tokens = _tokenize(text)
 279      if not query_tokens:
 280          return {}
 281  
 282      # Count query term frequencies (for potential weighting, but BM25 uses binary query)
 283      query_terms = set(query_tokens)
 284  
 285      idf = index["idf"]
 286      doc_tf = index["doc_tf"]
 287      doc_lens = index["doc_lens"]
 288      avgdl = index["avgdl"]
 289      nodes = index["nodes"]
 290  
 291      scores: list[float] = []
 292      for i in range(len(nodes)):
 293          score = 0.0
 294          tf_dict = doc_tf[i]
 295          dl = doc_lens[i]
 296  
 297          for term in query_terms:
 298              if term not in idf:
 299                  continue
 300              tf = tf_dict.get(term, 0)
 301              if tf == 0:
 302                  continue
 303              term_idf = idf[term]
 304              # BM25 formula
 305              numerator = tf * (BM25_K1 + 1)
 306              denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * dl / avgdl)
 307              score += term_idf * numerator / denominator
 308  
 309          scores.append(score)
 310  
 311      # Get top-k by score
 312      ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
 313  
 314      results: dict[str, dict] = {}
 315      for rank, i in enumerate(ranked[:top_k]):
 316          if scores[i] <= 0:
 317              break
 318          node = nodes[i]
 319          results[node["uuid"]] = {
 320              "uuid": node["uuid"],
 321              "title": node["title"],
 322              "type": node["type"],
 323              "path": node["path"],
 324              "radicle_id": node["radicle_id"],
 325              "score": round(scores[i], 3),
 326              "source": "bm25",
 327          }
 328      return results
 329  
 330  
 331  def context_search(text: str, index: dict, top_k: int = 15) -> list[dict]:
 332      """Run Tier 1 + Tier 2, merge results, return ranked list."""
 333      t0 = time.time()
 334  
 335      # Tier 1: vocabulary match
 336      t1_results = tier1_vocab_match(text, index)
 337  
 338      # Tier 2: BM25
 339      t2_results = tier2_bm25(text, index, top_k=top_k * 3)
 340  
 341      # Merge: vocab hits get priority, BM25 fills in
 342      merged: dict[str, dict] = {}
 343  
 344      # Normalize BM25 scores to [0, 1] for merging
 345      max_bm25 = max((r["score"] for r in t2_results.values()), default=1.0)
 346      if max_bm25 <= 0:
 347          max_bm25 = 1.0
 348  
 349      for uid, entry in t2_results.items():
 350          merged[uid] = {
 351              **entry,
 352              "bm25_score": entry["score"],
 353              "bm25_norm": round(entry["score"] / max_bm25, 3),
 354              "vocab_hit": False,
 355              "mentions": 0,
 356          }
 357  
 358      for uid, entry in t1_results.items():
 359          if uid in merged:
 360              merged[uid]["vocab_hit"] = True
 361              merged[uid]["mentions"] = entry["mentions"]
 362              merged[uid]["source"] = "both"
 363          else:
 364              merged[uid] = {
 365                  **entry,
 366                  "bm25_score": 0,
 367                  "bm25_norm": 0,
 368                  "vocab_hit": True,
 369              }
 370  
 371      # Combined score: vocab hit is a strong signal, BM25 provides ranking
 372      for uid, entry in merged.items():
 373          vocab_boost = 0.5 if entry.get("vocab_hit") else 0.0
 374          bm25_component = entry.get("bm25_norm", 0) * 0.5
 375          entry["combined_score"] = round(vocab_boost + bm25_component, 3)
 376  
 377      # Sort by combined score, then by mentions
 378      ranked = sorted(
 379          merged.values(),
 380          key=lambda x: (x["combined_score"], x.get("mentions", 0)),
 381          reverse=True,
 382      )[:top_k]
 383  
 384      elapsed_ms = round((time.time() - t0) * 1000, 1)
 385  
 386      # Clean up output
 387      output = []
 388      for entry in ranked:
 389          output.append({
 390              "uuid": entry["uuid"],
 391              "title": entry["title"],
 392              "type": entry["type"],
 393              "path": entry["path"],
 394              "radicle_id": entry.get("radicle_id", ""),
 395              "score": entry["combined_score"],
 396              "bm25": entry.get("bm25_score", 0),
 397              "vocab_hit": entry.get("vocab_hit", False),
 398              "mentions": entry.get("mentions", 0),
 399              "source": entry.get("source", ""),
 400          })
 401  
 402      return output
 403  
 404  
 405  def run_context(args: argparse.Namespace) -> None:
 406      """CLI entry point for context search."""
 407      # Load or build index
 408      index = ensure_index(force=getattr(args, "rebuild", False))
 409  
 410      # Get input text
 411      input_text = args.input
 412      if os.path.isfile(input_text):
 413          input_text = Path(input_text).read_text(errors="replace")
 414  
 415      top_k = getattr(args, "top", 15)
 416  
 417      t0 = time.time()
 418      results = context_search(input_text, index, top_k=top_k)
 419      elapsed = time.time() - t0
 420  
 421      if getattr(args, "json_output", False):
 422          print(json.dumps({
 423              "results": results,
 424              "stats": {
 425                  "elapsed_ms": round(elapsed * 1000, 1),
 426                  "input_words": len(input_text.split()),
 427                  "index_nodes": index["n_docs"],
 428                  "results_count": len(results),
 429              }
 430          }, indent=2))
 431      else:
 432          # Human-readable output
 433          print(f"\n  {len(results)} results in {elapsed*1000:.0f}ms "
 434                f"({len(input_text.split())} words, {index['n_docs']} nodes indexed)\n")
 435          for i, r in enumerate(results):
 436              vocab_marker = " *" if r["vocab_hit"] else "  "
 437              mentions = f" ({r['mentions']}x)" if r["mentions"] > 0 else ""
 438              print(f"  {i+1:2d}.{vocab_marker} {r['score']:.3f}  "
 439                    f"{r['title']:40s}  bm25={r['bm25']:.1f}{mentions}")
 440          print()
 441          print("  * = vocabulary match (title found in text)")
 442          print()
 443  
 444  
 445  def run_index(args: argparse.Namespace) -> None:
 446      """CLI entry point for index building."""
 447      force = getattr(args, "force", False)
 448      t0 = time.time()
 449      nodes = discover_nodes()
 450      index = build_index(nodes)
 451      save_index(index)
 452      elapsed = time.time() - t0
 453      print(f"Indexed {index['n_docs']} DreamNodes in {elapsed*1000:.0f}ms")
 454      print(f"Vocabulary: {len(index['vocab'])} phrases")
 455      print(f"BM25 terms: {len(index['idf'])} unique terms")
 456      print(f"Saved to: {INDEX_FILE}")
 457  
 458  
 459  # ============================================================
 460  # Server code (unchanged from original)
 461  # ============================================================
 462  
 463  def build_injected_index(html: str, models: list[str] | None = None) -> str:
 464      """Patch the AI bridge URL and inject model selector."""
 465      # Replace getAIBridgeWsUrl to return ws://<current host>/ws
 466      html = html.replace(
 467          "function getAIBridgeWsUrl() {\n"
 468          "  // When served by server.py, the InterBrain is on the same machine\n"
 469          "  // Use the page's hostname (works for both localhost and Tailscale IP)\n"
 470          "  const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';\n"
 471          "  const host = window.location.hostname || 'localhost';\n"
 472          "  return `${wsProtocol}//${host}:${AI_BRIDGE_PORT}`;\n"
 473          "}",
 474          "function getAIBridgeWsUrl() {\n"
 475          "  const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';\n"
 476          "  return `${wsProtocol}//${window.location.host}/ws`;\n"
 477          "}",
 478          1,
 479      )
 480  
 481      # Patch sendWsBridgeRequest to include selected model
 482      html = html.replace(
 483          "  aiBridgeWs.send(JSON.stringify({\n"
 484          "    type: 'ai-inference-stream-request',\n"
 485          "    requestId: currentRequestId,\n"
 486          "    messages,\n"
 487          "    complexity: 'standard'\n"
 488          "  }));",
 489          "  aiBridgeWs.send(JSON.stringify({\n"
 490          "    type: 'ai-inference-stream-request',\n"
 491          "    requestId: currentRequestId,\n"
 492          "    messages,\n"
 493          "    complexity: 'standard',\n"
 494          "    options: { model: document.getElementById('auryn-model-select')?.value || '' }\n"
 495          "  }));",
 496          1,
 497      )
 498  
 499      # Inject model selector dropdown
 500      model_options = models or ["qwen3:32b"]
 501      opts_html = "".join(f'<option value="{m}">{m}</option>' for m in model_options)
 502      selector = (
 503          '<div id="auryn-model-picker" style="position:fixed;top:8px;right:8px;z-index:9999;'
 504          'font:12px monospace;opacity:0.7">'
 505          f'<select id="auryn-model-select" style="background:#1a1a1a;color:#c4a54a;'
 506          f'border:1px solid #333;border-radius:4px;padding:2px 4px;font:inherit">'
 507          f'{opts_html}</select></div>'
 508      )
 509      html = html.replace("</body>", selector + "\n</body>", 1)
 510  
 511      # Polyfill crypto.randomUUID for non-secure contexts (HTTP on mobile Safari)
 512      # + add global error handler to surface errors in the UI (no console on mobile)
 513      polyfill = (
 514          '<script>'
 515          'if(!crypto.randomUUID)crypto.randomUUID=function(){'
 516          'return([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g,function(c){'
 517          'return(c^crypto.getRandomValues(new Uint8Array(1))[0]&15>>c/4).toString(16)'
 518          '})};'
 519          'window.addEventListener("error",function(e){'
 520          'var d=document.createElement("div");'
 521          'd.style.cssText="position:fixed;top:0;left:0;right:0;background:#800;color:#fff;'
 522          'padding:8px;font:12px monospace;z-index:99999;white-space:pre-wrap";'
 523          'd.textContent="JS Error: "+e.message+" @ line "+e.lineno;'
 524          'document.body?document.body.appendChild(d):document.addEventListener("DOMContentLoaded",'
 525          'function(){document.body.appendChild(d)});'
 526          '});'
 527          '</script>\n'
 528      )
 529      html = html.replace("<head>", "<head>\n" + polyfill, 1)
 530  
 531      return html
 532  
 533  
 534  # ============================================================
 535  # Inference WebSocket (/ws)
 536  # ============================================================
 537  
 538  async def ws_inference(request: web.Request) -> web.WebSocketResponse:
 539      ws = web.WebSocketResponse()
 540      await ws.prepare(request)
 541  
 542      ollama_url = request.app["ollama_url"]
 543      default_model = request.app["default_model"]
 544      active_tasks: dict[str, asyncio.Task] = {}
 545  
 546      await ws.send_json({"type": "ai-bridge-ready", "version": "2"})
 547  
 548      async for msg in ws:
 549          if msg.type == aiohttp.WSMsgType.TEXT:
 550              try:
 551                  data = json.loads(msg.data)
 552              except json.JSONDecodeError:
 553                  continue
 554  
 555              if data.get("type") == "ai-bridge-probe":
 556                  await ws.send_json({"type": "ai-bridge-ready", "version": "2"})
 557  
 558              elif data.get("type") == "ai-inference-stream-request":
 559                  request_id = data.get("requestId", str(uuid.uuid4()))
 560                  messages = data.get("messages", [])
 561                  options = data.get("options", {})
 562                  model = options.get("model") or default_model
 563  
 564                  if model.startswith("claude-"):
 565                      coro = _stream_claude(
 566                          ws, request_id, messages, model,
 567                          request.app.get("claude_api_key", ""),
 568                      )
 569                  else:
 570                      coro = _stream_ollama(ws, request_id, messages, model, ollama_url)
 571  
 572                  task = asyncio.create_task(coro)
 573                  active_tasks[request_id] = task
 574                  task.add_done_callback(lambda t, rid=request_id: active_tasks.pop(rid, None))
 575  
 576              elif data.get("type") == "ai-inference-stream-cancel":
 577                  rid = data.get("requestId")
 578                  task = active_tasks.pop(rid, None)
 579                  if task:
 580                      task.cancel()
 581  
 582          elif msg.type in (aiohttp.WSMsgType.ERROR, aiohttp.WSMsgType.CLOSE):
 583              break
 584  
 585      for task in active_tasks.values():
 586          task.cancel()
 587      active_tasks.clear()
 588      return ws
 589  
 590  
 591  async def _stream_ollama(
 592      ws: web.WebSocketResponse,
 593      request_id: str,
 594      messages: list[dict],
 595      model: str,
 596      ollama_url: str,
 597  ) -> None:
 598      """Stream inference from Ollama and relay chunks over WebSocket."""
 599      url = f"{ollama_url}/api/chat"
 600      payload = {"model": model, "messages": messages, "stream": True}
 601      partial_content = ""
 602  
 603      try:
 604          async with aiohttp.ClientSession() as session:
 605              async with session.post(url, json=payload) as resp:
 606                  if resp.status != 200:
 607                      error_text = await resp.text()
 608                      await ws.send_json({
 609                          "type": "ai-inference-stream-error",
 610                          "requestId": request_id,
 611                          "error": f"Ollama error ({resp.status}): {error_text}",
 612                      })
 613                      return
 614  
 615                  prompt_tokens = 0
 616                  completion_tokens = 0
 617  
 618                  async for line in resp.content:
 619                      line = line.strip()
 620                      if not line:
 621                          continue
 622                      try:
 623                          chunk_data = json.loads(line)
 624                      except json.JSONDecodeError:
 625                          continue
 626  
 627                      if chunk_data.get("done"):
 628                          prompt_tokens = chunk_data.get("prompt_eval_count", 0)
 629                          completion_tokens = chunk_data.get("eval_count", 0)
 630                          break
 631  
 632                      token = chunk_data.get("message", {}).get("content", "")
 633                      if token:
 634                          partial_content += token
 635                          await ws.send_json({
 636                              "type": "ai-inference-stream-chunk",
 637                              "requestId": request_id,
 638                              "chunk": token,
 639                          })
 640  
 641                  await ws.send_json({
 642                      "type": "ai-inference-stream-done",
 643                      "requestId": request_id,
 644                      "provider": "ollama",
 645                      "model": model,
 646                      "usage": {
 647                          "promptTokens": prompt_tokens,
 648                          "completionTokens": completion_tokens,
 649                      },
 650                  })
 651  
 652      except asyncio.CancelledError:
 653          pass
 654      except Exception as e:
 655          try:
 656              await ws.send_json({
 657                  "type": "ai-inference-stream-error",
 658                  "requestId": request_id,
 659                  "error": str(e),
 660                  "partialContent": partial_content or None,
 661              })
 662          except ConnectionResetError:
 663              pass
 664  
 665  
 666  async def _stream_claude(
 667      ws: web.WebSocketResponse,
 668      request_id: str,
 669      messages: list[dict],
 670      model: str,
 671      api_key: str,
 672  ) -> None:
 673      """Stream inference from Claude API and relay chunks over WebSocket."""
 674      if not api_key:
 675          await ws.send_json({
 676              "type": "ai-inference-stream-error",
 677              "requestId": request_id,
 678              "error": "No Claude API key configured. Set claudeApiKey in InterBrain settings.",
 679          })
 680          return
 681  
 682      system_msg = None
 683      api_messages = []
 684      for m in messages:
 685          if m.get("role") == "system":
 686              system_msg = m.get("content", "")
 687          else:
 688              api_messages.append({"role": m["role"], "content": m["content"]})
 689  
 690      payload: dict = {
 691          "model": model,
 692          "max_tokens": 8192,
 693          "messages": api_messages,
 694          "stream": True,
 695      }
 696      if system_msg:
 697          payload["system"] = system_msg
 698  
 699      headers = {
 700          "x-api-key": api_key,
 701          "anthropic-version": "2023-06-01",
 702          "content-type": "application/json",
 703      }
 704  
 705      partial_content = ""
 706      prompt_tokens = 0
 707      completion_tokens = 0
 708  
 709      try:
 710          async with aiohttp.ClientSession() as session:
 711              async with session.post(
 712                  "https://api.anthropic.com/v1/messages",
 713                  json=payload,
 714                  headers=headers,
 715              ) as resp:
 716                  if resp.status != 200:
 717                      error_text = await resp.text()
 718                      await ws.send_json({
 719                          "type": "ai-inference-stream-error",
 720                          "requestId": request_id,
 721                          "error": f"Claude API error ({resp.status}): {error_text}",
 722                      })
 723                      return
 724  
 725                  # SSE stream
 726                  async for line in resp.content:
 727                      line = line.decode("utf-8", errors="replace").strip()
 728                      if not line.startswith("data: "):
 729                          continue
 730                      data_str = line[6:]
 731                      if data_str == "[DONE]":
 732                          break
 733                      try:
 734                          event = json.loads(data_str)
 735                      except json.JSONDecodeError:
 736                          continue
 737  
 738                      etype = event.get("type")
 739  
 740                      if etype == "content_block_delta":
 741                          delta = event.get("delta", {})
 742                          if delta.get("type") == "text_delta":
 743                              token = delta.get("text", "")
 744                              if token:
 745                                  partial_content += token
 746                                  await ws.send_json({
 747                                      "type": "ai-inference-stream-chunk",
 748                                      "requestId": request_id,
 749                                      "chunk": token,
 750                                  })
 751  
 752                      elif etype == "message_delta":
 753                          usage = event.get("usage", {})
 754                          completion_tokens = usage.get("output_tokens", 0)
 755  
 756                      elif etype == "message_start":
 757                          msg_usage = event.get("message", {}).get("usage", {})
 758                          prompt_tokens = msg_usage.get("input_tokens", 0)
 759  
 760                  await ws.send_json({
 761                      "type": "ai-inference-stream-done",
 762                      "requestId": request_id,
 763                      "provider": "anthropic",
 764                      "model": model,
 765                      "usage": {
 766                          "promptTokens": prompt_tokens,
 767                          "completionTokens": completion_tokens,
 768                      },
 769                  })
 770  
 771      except asyncio.CancelledError:
 772          pass
 773      except Exception as e:
 774          try:
 775              await ws.send_json({
 776                  "type": "ai-inference-stream-error",
 777                  "requestId": request_id,
 778                  "error": str(e),
 779                  "partialContent": partial_content or None,
 780              })
 781          except ConnectionResetError:
 782              pass
 783  
 784  
 785  # ============================================================
 786  # Transcription WebSocket (/ws/transcribe) — Whisper chunked
 787  # with vocabulary feedback loop via context provider
 788  # ============================================================
 789  
 790  # mlx-whisper model mapping (size -> HuggingFace repo)
 791  _MLX_WHISPER_MODELS = {
 792      "tiny": "mlx-community/whisper-tiny",
 793      "base": "mlx-community/whisper-base-mlx",
 794      "small": "mlx-community/whisper-small-mlx",
 795      "medium": "mlx-community/whisper-medium-mlx",
 796      "large": "mlx-community/whisper-large-v3-mlx",
 797      "turbo": "mlx-community/whisper-large-v3-turbo",
 798  }
 799  _whisper_model_size = "base"
 800  _whisper_repo: str = _MLX_WHISPER_MODELS["base"]
 801  _whisper_warmed_up = False
 802  _whisper_lock = asyncio.Lock()
 803  
 804  
 805  async def _warm_up_whisper():
 806      """Pre-download the model on first use so subsequent calls are fast."""
 807      global _whisper_warmed_up
 808      if _whisper_warmed_up:
 809          return
 810      async with _whisper_lock:
 811          if _whisper_warmed_up:
 812              return
 813          import mlx_whisper
 814          print(f"[Whisper] Warming up {_whisper_model_size} model ({_whisper_repo})...")
 815          loop = asyncio.get_event_loop()
 816          # Transcribe a tiny silent WAV to trigger model download/cache
 817          silent = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
 818          silent.close()
 819          await asyncio.create_subprocess_exec(
 820              "ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=16000:cl=mono",
 821              "-t", "0.5", "-ar", "16000", "-ac", "1", silent.name,
 822              stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
 823          )
 824          try:
 825              await loop.run_in_executor(
 826                  None,
 827                  lambda: mlx_whisper.transcribe(
 828                      silent.name, path_or_hf_repo=_whisper_repo, language="en",
 829                  ),
 830              )
 831          except Exception:
 832              pass
 833          finally:
 834              try:
 835                  os.unlink(silent.name)
 836              except OSError:
 837                  pass
 838          _whisper_warmed_up = True
 839          print("[Whisper] Model ready.")
 840  
 841  
 842  async def _probe_duration_file(path: str) -> float | None:
 843      """Get duration in seconds of an audio file on disk."""
 844      proc = await asyncio.create_subprocess_exec(
 845          "ffprobe", "-v", "quiet", "-show_entries", "format=duration",
 846          "-of", "csv=p=0", path,
 847          stdout=asyncio.subprocess.PIPE,
 848          stderr=asyncio.subprocess.PIPE,
 849      )
 850      stdout, _ = await proc.communicate()
 851      try:
 852          return float(stdout.decode().strip())
 853      except (ValueError, AttributeError):
 854          return None
 855  
 856  
 857  async def _extract_time_range_file(path: str, start_sec: float) -> str | None:
 858      """Extract audio from start_sec to end into a WAV file."""
 859      tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
 860      tmp.close()
 861      proc = await asyncio.create_subprocess_exec(
 862          "ffmpeg", "-y", "-ss", f"{start_sec:.2f}", "-i", path,
 863          "-ar", "16000", "-ac", "1", tmp.name,
 864          stdout=asyncio.subprocess.PIPE,
 865          stderr=asyncio.subprocess.PIPE,
 866      )
 867      await proc.communicate()
 868      if proc.returncode != 0:
 869          try:
 870              os.unlink(tmp.name)
 871          except OSError:
 872              pass
 873          return None
 874      return tmp.name
 875  
 876  
 877  async def _decode_webm_to_wav(webm_data: bytes) -> str | None:
 878      """Decode webm/opus audio to a temporary WAV file for Whisper."""
 879      tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
 880      tmp.close()
 881      proc = await asyncio.create_subprocess_exec(
 882          "ffmpeg", "-y", "-i", "pipe:0",
 883          "-ar", "16000", "-ac", "1", tmp.name,
 884          stdin=asyncio.subprocess.PIPE,
 885          stdout=asyncio.subprocess.PIPE,
 886          stderr=asyncio.subprocess.PIPE,
 887      )
 888      _, stderr = await proc.communicate(input=webm_data)
 889      if proc.returncode != 0:
 890          os.unlink(tmp.name)
 891          return None
 892      return tmp.name
 893  
 894  
 895  def _filter_hallucination(text: str) -> str:
 896      """Drop text if Whisper is hallucinating (repeated words/phrases on silence)."""
 897      words = text.split()
 898      if len(words) < 4:
 899          return text
 900      # If any single word makes up >60% of the text, it's a hallucination loop
 901      counts = Counter(w.lower().strip(".,!?") for w in words)
 902      most_common_word, most_common_count = counts.most_common(1)[0]
 903      if most_common_count / len(words) > 0.6:
 904          return ""
 905      # If a 2-3 word phrase repeats >4 times, also hallucination
 906      for n in (2, 3):
 907          if len(words) < n * 4:
 908              continue
 909          ngrams = [" ".join(words[i:i+n]).lower() for i in range(len(words) - n + 1)]
 910          ngram_counts = Counter(ngrams)
 911          top_ngram, top_count = ngram_counts.most_common(1)[0]
 912          if top_count > 4:
 913              return ""
 914      return text
 915  
 916  
 917  _CORE_VOCAB = [
 918      "AURYN", "InterBrain", "DreamNode", "DreamOS", "DreamTalk",
 919      "DreamSong", "PRISM", "ABRACADABRA", "Radicle",
 920  ]
 921  
 922  # Max terms in Whisper's initial_prompt.
 923  # R&D (2026-03-05): medium model tested up to 100 terms with zero quality
 924  # degradation and no speed penalty. The old limit of 20 was from tiny/base.
 925  _MAX_VOCAB_TERMS = 50
 926  
 927  
 928  def _build_vocab_prompt(
 929      pinned: list[str] | None = None,
 930      ephemeral: list[str] | None = None,
 931  ) -> str:
 932      """Build Whisper initial_prompt from pinned + ephemeral DreamNode titles.
 933  
 934      Medium model tested up to 100 terms with no quality degradation.
 935      Cap at 50 for now — conservative margin with room to grow.
 936  
 937      Two tiers:
 938      - Pinned: Core vocab + any DreamNode whose name was recognized in the
 939        transcript. Once mentioned, stays for the entire session.
 940      - Ephemeral: BM25 context provider suggestions from recent transcript.
 941        These rotate as conversation shifts. Fill remaining slots up to cap.
 942      """
 943      terms: list[str] = []
 944      seen = set()
 945  
 946      def _add(name: str):
 947          if name.lower() not in seen and len(name) >= 3:
 948              seen.add(name.lower())
 949              terms.append(name)
 950  
 951      # 1. Core DreamOS vocabulary — always present
 952      for term in _CORE_VOCAB:
 953          _add(term)
 954  
 955      # 2. Session-pinned titles (DreamNodes whose name appeared in transcript)
 956      if pinned:
 957          for t in pinned:
 958              _add(t)
 959  
 960      # 3. Ephemeral BM25 suggestions — fill remaining slots
 961      remaining = _MAX_VOCAB_TERMS - len(terms)
 962      if ephemeral and remaining > 0:
 963          for t in ephemeral[:remaining]:
 964              _add(t)
 965  
 966      if not terms:
 967          return ""
 968  
 969      return ", ".join(terms)
 970  
 971  
 972  async def ws_transcribe(request: web.Request) -> web.WebSocketResponse:
 973      ws = web.WebSocketResponse()
 974      await ws.prepare(request)
 975  
 976      import mlx_whisper
 977      await _warm_up_whisper()
 978      loop = asyncio.get_event_loop()
 979  
 980      session_id: str | None = None
 981      audio_file = None
 982      transcript_parts: list[str] = []
 983      start_time: float = 0.0
 984      cumulative_webm = bytearray()
 985      last_processed_bytes = 0
 986      chunk_interval = 8.0  # seconds between transcription passes
 987      process_task: asyncio.Task | None = None
 988  
 989      # Vocabulary feedback loop state
 990      context_index = load_index()
 991      pinned_vocab: list[str] = []   # DreamNodes whose name appeared in transcript (permanent)
 992      ephemeral_vocab: list[str] = []  # BM25 suggestions from recent text (rotating)
 993      sliding_window: list[str] = []  # last N transcript chunks for BM25 context
 994  
 995      SLIDING_WINDOW_SIZE = 5  # ~40-60 seconds of conversation context
 996  
 997      # Build vocab lookup: multiple key forms -> {folder, title, uuid, path}
 998      # Keys: folder (lowered), title (lowered), collapsed spaced title, camelCase-split
 999      # This is the single lookup table: title -> (uuid, path) for the whole system
1000      _vocab_lookup: dict[str, dict] = {}
1001      if context_index and "nodes" in context_index:
1002          for node in context_index["nodes"]:
1003              folder = node.get("folder", "")
1004              title = node.get("title", "")
1005              info = {
1006                  "folder": folder, "title": title,
1007                  "uuid": node.get("uuid", ""), "path": node.get("path", ""),
1008              }
1009              # Index by folder name
1010              if folder:
1011                  _vocab_lookup[folder.lower()] = info
1012              # Index by title
1013              if title:
1014                  _vocab_lookup[title.lower()] = info
1015              # Collapsed spaced titles (A U R Y N -> auryn)
1016              collapsed = re.sub(r"\s+", "", title).lower()
1017              if collapsed and collapsed not in _vocab_lookup:
1018                  _vocab_lookup[collapsed] = info
1019              # CamelCase-split (InterBrain -> inter brain)
1020              camel_words = _split_camel(folder)
1021              if len(camel_words) > 1:
1022                  joined = " ".join(camel_words)
1023                  if joined not in _vocab_lookup:
1024                      _vocab_lookup[joined] = info
1025  
1026      vocab_prompt = _build_vocab_prompt()
1027      print(f"[Whisper] Initial vocab: {vocab_prompt}")
1028  
1029      _transcript_file: Path | None = None
1030  
1031      def _start_transcript_session(recording_name: str, session_start: float):
1032          """Write a session header to the daily transcript file."""
1033          nonlocal _transcript_file
1034          _transcript_file = TRANSCRIPTS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.md"
1035          wall_time = datetime.now().strftime("%H:%M")
1036          with open(_transcript_file, "a", encoding="utf-8") as f:
1037              f.write(f"\n---\n\n### {wall_time} · `{recording_name}`\n\n")
1038              f.flush()
1039  
1040      def _write_transcript_chunk(text: str, session_start: float):
1041          """Append a confirmed transcript chunk to the daily file."""
1042          elapsed = time.time() - session_start
1043          minutes = int(elapsed // 60)
1044          seconds = int(elapsed % 60)
1045          timestamp = f"{minutes}:{seconds:02d}"
1046          if _transcript_file is None:
1047              return
1048          with open(_transcript_file, "a", encoding="utf-8") as f:
1049              f.write(f"[{timestamp}] {text}\n\n")
1050              f.flush()
1051  
1052      def _check_vocab_hits(text: str) -> list[dict]:
1053          """Check if any DreamNode names appear in the transcribed text.
1054          Returns list of matched DreamNode info dicts (deduped by uuid). These get pinned."""
1055          text_lower = text.lower()
1056          hits = []
1057          seen_uuids = set()
1058          for key, info in _vocab_lookup.items():
1059              if info["uuid"] in seen_uuids:
1060                  continue
1061              if len(key) <= 3:
1062                  # Short names need word boundaries
1063                  if re.search(r'\b' + re.escape(key) + r'\b', text_lower):
1064                      hits.append(info)
1065                      seen_uuids.add(info["uuid"])
1066              else:
1067                  if key in text_lower:
1068                      hits.append(info)
1069                      seen_uuids.add(info["uuid"])
1070          return hits
1071  
1072      def _update_ephemeral_vocab(text: str):
1073          """Run BM25 on recent transcript (sliding window), update ephemeral suggestions."""
1074          nonlocal ephemeral_vocab
1075  
1076          # Update sliding window
1077          sliding_window.append(text)
1078          if len(sliding_window) > SLIDING_WINDOW_SIZE:
1079              sliding_window.pop(0)
1080  
1081          if not context_index:
1082              return
1083  
1084          # BM25 on the sliding window
1085          window_text = " ".join(sliding_window)
1086          results = context_search(window_text, context_index, top_k=10)
1087  
1088          # Ephemeral = top BM25 hits that aren't already pinned or core.
1089          # No score threshold — if BM25 ranked it in the top 10, it's relevant
1090          # enough to bias Whisper. The slot cap (20 total) is the only limit.
1091          pinned_set = {p.lower() for p in pinned_vocab}
1092          core_set = {t.lower() for t in _CORE_VOCAB}
1093          remaining_slots = _MAX_VOCAB_TERMS - len(_CORE_VOCAB) - len(pinned_vocab)
1094          new_ephemeral = []
1095          for r in results:
1096              if remaining_slots <= 0:
1097                  break
1098              title = r.get("title", "")
1099              if not title:
1100                  continue
1101              if title.lower() not in pinned_set and title.lower() not in core_set:
1102                  new_ephemeral.append(title)
1103                  remaining_slots -= 1
1104  
1105          if new_ephemeral != ephemeral_vocab:
1106              ephemeral_vocab = new_ephemeral
1107              print(f"[Whisper] Ephemeral vocab updated: {ephemeral_vocab[:5]}")
1108  
1109      def _rebuild_prompt():
1110          """Rebuild the vocab prompt from current pinned + ephemeral state."""
1111          nonlocal vocab_prompt
1112          vocab_prompt = _build_vocab_prompt(pinned_vocab, ephemeral_vocab)
1113  
1114      async def _send_vocab_state():
1115          """Send current vocabulary state to UI for debugging."""
1116          try:
1117              await ws.send_json({
1118                  "type": "vocab_update",
1119                  "core": list(_CORE_VOCAB),
1120                  "pinned": list(pinned_vocab),
1121                  "ephemeral": list(ephemeral_vocab),
1122                  "prompt": vocab_prompt,
1123              })
1124          except Exception:
1125              pass
1126  
1127      async def _notify_dreamnode_hit(info: dict):
1128          """Send dreamnode_detected message to UI when a name is recognized."""
1129          try:
1130              await ws.send_json({
1131                  "type": "dreamnode_detected",
1132                  "folder": info["folder"],
1133                  "title": info["title"],
1134                  "uuid": info["uuid"],
1135                  "path": info.get("path", ""),
1136              })
1137          except Exception:
1138              pass
1139  
1140      def _fix_capitalization(text: str, titles: list[str]) -> str:
1141          """Fix capitalization of DreamNode titles in text that was transcribed
1142          before the term was in the vocab prompt. Case-insensitive replace
1143          with the canonical title form."""
1144          for title in titles:
1145              pattern = re.compile(re.escape(title), re.IGNORECASE)
1146              text = pattern.sub(title, text)
1147          return text
1148  
1149      async def _process_new_text(new_text: str) -> str:
1150          """Process newly transcribed text: check vocab hits, update feedback loop.
1151          Returns the (possibly corrected) text."""
1152          # 1. Check for DreamNode name matches (Tier 1 vocab) → pin them
1153          hits = _check_vocab_hits(new_text)
1154          new_pins = False
1155          newly_pinned_titles = []
1156          for info in hits:
1157              title = info["title"]
1158              if title not in pinned_vocab:
1159                  pinned_vocab.append(title)
1160                  newly_pinned_titles.append(title)
1161                  new_pins = True
1162                  print(f"[Whisper] PINNED: {title} (uuid={info['uuid']}, path={info['path']})")
1163                  await _notify_dreamnode_hit(info)
1164  
1165          # Fix capitalization in *this* chunk for newly pinned titles
1166          # (the chunk was transcribed before these terms were in the prompt)
1167          if newly_pinned_titles:
1168              new_text = _fix_capitalization(new_text, newly_pinned_titles)
1169  
1170          # 2. Update ephemeral vocab via BM25 sliding window
1171          _update_ephemeral_vocab(new_text)
1172  
1173          # 3. Rebuild prompt if anything changed
1174          if new_pins or True:  # always rebuild to reflect ephemeral changes
1175              _rebuild_prompt()
1176  
1177          return new_text
1178  
1179      last_transcribed_sec = 0.0  # seconds of audio already transcribed
1180  
1181      async def periodic_transcribe():
1182          """Every chunk_interval seconds, transcribe only the new audio."""
1183          nonlocal last_processed_bytes, last_transcribed_sec
1184          while True:
1185              await asyncio.sleep(chunk_interval)
1186              if not audio_file or not audio_file.exists():
1187                  continue
1188              if len(cumulative_webm) <= last_processed_bytes:
1189                  continue
1190  
1191              # Elapsed time since session start = total audio duration
1192              # (MediaRecorder streams in real time, so wall clock ≈ audio clock)
1193              total_sec = time.time() - start_time
1194  
1195              # Extract new audio with 1.5s overlap so Whisper gets full
1196              # sentence context instead of cutting mid-word
1197              overlap = 1.5 if last_transcribed_sec > 0 else 0.0
1198              extract_from = max(0, last_transcribed_sec - overlap)
1199              wav_path = await _extract_time_range_file(
1200                  str(audio_file), extract_from
1201              )
1202              if not wav_path:
1203                  last_processed_bytes = len(cumulative_webm)
1204                  continue
1205  
1206              try:
1207                  # Prompt: vocab terms + last transcript chunk for continuity
1208                  prompt = vocab_prompt
1209                  if transcript_parts:
1210                      recent = transcript_parts[-1]
1211                      prompt = vocab_prompt + ". " + recent if vocab_prompt else recent
1212  
1213                  result = await loop.run_in_executor(
1214                      None,
1215                      lambda p=prompt: mlx_whisper.transcribe(
1216                          wav_path,
1217                          path_or_hf_repo=_whisper_repo,
1218                          language="en",
1219                          initial_prompt=p,
1220                      ),
1221                  )
1222  
1223                  # Skip segments in the overlap region — they belong to
1224                  # the previous chunk. Keep segments starting after overlap.
1225                  # Use a soft threshold: any segment whose midpoint is past
1226                  # the overlap belongs to this chunk.
1227                  segs = result["segments"]
1228                  if overlap > 0 and segs:
1229                      segs = [s for s in segs
1230                              if (s["start"] + s["end"]) / 2 >= overlap]
1231  
1232                  new_text = " ".join(
1233                      s["text"].strip() for s in segs
1234                  ).strip()
1235  
1236                  # Filter hallucinated repetition (Whisper loops on silence)
1237                  new_text = _filter_hallucination(new_text)
1238  
1239                  if new_text:
1240                      # Process vocab hits first so capitalization is fixed
1241                      # before sending to UI and writing to transcript
1242                      new_text = await _process_new_text(new_text)
1243                      chunk_text = " " + new_text if transcript_parts else new_text
1244                      await ws.send_json({"type": "transcript_chunk", "text": chunk_text})
1245                      transcript_parts.append(new_text)
1246                      _write_transcript_chunk(new_text, start_time)
1247                      await _send_vocab_state()
1248  
1249                  last_transcribed_sec = total_sec
1250                  last_processed_bytes = len(cumulative_webm)
1251  
1252              except Exception as e:
1253                  print(f"[Whisper] Transcription error: {e}")
1254              finally:
1255                  try:
1256                      os.unlink(wav_path)
1257                  except OSError:
1258                      pass
1259  
1260      async for msg in ws:
1261          if msg.type == aiohttp.WSMsgType.TEXT:
1262              try:
1263                  data = json.loads(msg.data)
1264              except json.JSONDecodeError:
1265                  continue
1266  
1267              if data.get("type") == "start_stream":
1268                  session_id = str(uuid.uuid4())
1269                  start_time = time.time()
1270                  transcript_parts.clear()
1271                  cumulative_webm.clear()
1272                  last_processed_bytes = 0
1273                  last_transcribed_sec = 0.0
1274                  pinned_vocab.clear()
1275                  ephemeral_vocab.clear()
1276                  sliding_window.clear()
1277                  vocab_prompt = _build_vocab_prompt()
1278  
1279                  RECORDINGS_DIR.mkdir(exist_ok=True)
1280                  TRANSCRIPTS_DIR.mkdir(exist_ok=True)
1281  
1282                  ts = datetime.now().strftime("%Y%m%d-%H%M%S")
1283                  audio_file = RECORDINGS_DIR / f"{ts}.webm"
1284                  _start_transcript_session(f"{ts}.webm", start_time)
1285  
1286                  process_task = asyncio.create_task(periodic_transcribe())
1287                  await ws.send_json({"type": "session_started", "session_id": session_id})
1288  
1289              elif data.get("type") == "end_stream":
1290                  if process_task:
1291                      process_task.cancel()
1292                      try:
1293                          await process_task
1294                      except asyncio.CancelledError:
1295                          pass
1296                      process_task = None
1297  
1298                  # Final transcription pass — only the tail not yet transcribed
1299                  if audio_file and audio_file.exists() and last_transcribed_sec < (time.time() - start_time - 1):
1300                      wav_path = await _extract_time_range_file(
1301                          str(audio_file), last_transcribed_sec
1302                      )
1303                      if wav_path:
1304                          try:
1305                              prompt = vocab_prompt
1306                              if transcript_parts:
1307                                  recent = transcript_parts[-1]
1308                                  prompt = vocab_prompt + ". " + recent if vocab_prompt else recent
1309  
1310                              result = await loop.run_in_executor(
1311                                  None,
1312                                  lambda: mlx_whisper.transcribe(
1313                                      wav_path,
1314                                      path_or_hf_repo=_whisper_repo,
1315                                      language="en",
1316                                      initial_prompt=prompt,
1317                                  ),
1318                              )
1319  
1320                              new_text = " ".join(
1321                                  s["text"].strip() for s in result["segments"]
1322                              ).strip()
1323                              if new_text:
1324                                  new_text = await _process_new_text(new_text)
1325                                  chunk_text = " " + new_text if transcript_parts else new_text
1326                                  await ws.send_json({"type": "transcript_chunk", "text": chunk_text})
1327                                  transcript_parts.append(new_text)
1328                                  _write_transcript_chunk(new_text, start_time)
1329                                  await _send_vocab_state()
1330                          except Exception as e:
1331                              print(f"[Whisper] Final transcription error: {e}")
1332                          finally:
1333                              try:
1334                                  os.unlink(wav_path)
1335                              except OSError:
1336                                  pass
1337  
1338                  # Send final state: all pinned DreamNodes from the session
1339                  full_text = " ".join(transcript_parts)
1340                  await ws.send_json({
1341                      "type": "stream_ended",
1342                      "full_text": full_text,
1343                      "pinned_dreamnodes": pinned_vocab,
1344                  })
1345                  session_id = None
1346  
1347          elif msg.type == aiohttp.WSMsgType.BINARY:
1348              cumulative_webm.extend(msg.data)
1349              if audio_file:
1350                  with open(audio_file, "ab") as f:
1351                      f.write(msg.data)
1352  
1353          elif msg.type in (aiohttp.WSMsgType.ERROR, aiohttp.WSMsgType.CLOSE):
1354              break
1355  
1356      if process_task:
1357          process_task.cancel()
1358  
1359      return ws
1360  
1361  
1362  # ============================================================
1363  # Audio upload (POST /upload/audio) — Whisper with vocabulary
1364  # ============================================================
1365  
1366  async def handle_audio_upload(request: web.Request) -> web.Response:
1367      reader = await request.multipart()
1368      field = await reader.next()
1369      if field is None or field.name != "file":
1370          return web.json_response({"error": "Expected 'file' field"}, status=400)
1371  
1372      filename = field.filename or "audio.webm"
1373      ext = Path(filename).suffix or ".webm"
1374  
1375      with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
1376          while True:
1377              chunk = await field.read_chunk()
1378              if chunk is None:
1379                  break
1380              f.write(chunk)
1381          tmp_path = f.name
1382  
1383      try:
1384          import mlx_whisper
1385          await _warm_up_whisper()
1386          vocab_prompt = _build_vocab_prompt()
1387  
1388          loop = asyncio.get_event_loop()
1389          result = await loop.run_in_executor(
1390              None,
1391              lambda: mlx_whisper.transcribe(
1392                  tmp_path,
1393                  path_or_hf_repo=_whisper_repo,
1394                  language="en",
1395                  initial_prompt=vocab_prompt,
1396              ),
1397          )
1398          text = " ".join(s["text"].strip() for s in result["segments"])
1399          if not text:
1400              return web.json_response({"error": "Whisper produced no output"}, status=500)
1401          return web.json_response({"text": text})
1402      finally:
1403          os.unlink(tmp_path)
1404  
1405  
1406  # ============================================================
1407  # Static file serving
1408  # ============================================================
1409  
1410  async def handle_index(request: web.Request) -> web.Response:
1411      return web.Response(
1412          text=request.app["injected_html"],
1413          content_type="text/html",
1414      )
1415  
1416  
1417  async def handle_static(request: web.Request) -> web.StreamResponse:
1418      rel_path = request.match_info.get("path", "")
1419      file_path = (AURYN_DIR / rel_path).resolve()
1420  
1421      if not str(file_path).startswith(str(AURYN_DIR)):
1422          raise web.HTTPForbidden()
1423      if not file_path.is_file():
1424          raise web.HTTPNotFound()
1425  
1426      return web.FileResponse(file_path)
1427  
1428  
1429  # ============================================================
1430  # Network: Tailscale, SSL, IP detection
1431  # ============================================================
1432  
1433  def get_local_ip() -> str:
1434      """Get the local LAN IP address."""
1435      try:
1436          s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
1437          s.connect(("8.8.8.8", 80))
1438          ip = s.getsockname()[0]
1439          s.close()
1440          return ip
1441      except Exception:
1442          return "127.0.0.1"
1443  
1444  
1445  def ensure_tailscale() -> str | None:
1446      """Ensure Tailscale is running. Returns Tailscale IP or None."""
1447      if not shutil.which("tailscale"):
1448          return None
1449  
1450      result = subprocess.run(
1451          ["tailscale", "ip", "-4"],
1452          capture_output=True, text=True, timeout=5,
1453      )
1454      if result.returncode == 0:
1455          ip = result.stdout.strip().split("\n")[0]
1456          print(f"[Tailscale] Connected: {ip}")
1457          return ip
1458  
1459      print("[Tailscale] Not connected, attempting 'tailscale up'...")
1460      result = subprocess.run(
1461          ["tailscale", "up"],
1462          capture_output=True, text=True, timeout=30,
1463      )
1464      if result.returncode != 0:
1465          print(f"[Tailscale] Failed: {result.stderr.strip()}")
1466          print("[Tailscale] Run 'sudo brew services start tailscale' manually if needed")
1467          return None
1468  
1469      result = subprocess.run(
1470          ["tailscale", "ip", "-4"],
1471          capture_output=True, text=True, timeout=5,
1472      )
1473      if result.returncode == 0:
1474          ip = result.stdout.strip().split("\n")[0]
1475          print(f"[Tailscale] Connected: {ip}")
1476          return ip
1477      return None
1478  
1479  
1480  def get_or_create_ssl_context(local_ip: str, tailscale_ip: str | None) -> ssl.SSLContext | None:
1481      """Generate self-signed cert with SANs for all access IPs."""
1482      cert_file = AURYN_DIR / "cert.pem"
1483      key_file = AURYN_DIR / "key.pem"
1484  
1485      sans = [f"IP:127.0.0.1", f"IP:{local_ip}", "DNS:localhost"]
1486      if tailscale_ip:
1487          sans.append(f"IP:{tailscale_ip}")
1488  
1489      regen = False
1490      if not cert_file.exists() or not key_file.exists():
1491          regen = True
1492      else:
1493          try:
1494              result = subprocess.run(
1495                  ["openssl", "x509", "-in", str(cert_file), "-noout", "-text"],
1496                  capture_output=True, text=True,
1497              )
1498              cert_text = result.stdout
1499              if tailscale_ip and tailscale_ip not in cert_text:
1500                  regen = True
1501              if local_ip not in cert_text:
1502                  regen = True
1503          except Exception:
1504              regen = True
1505  
1506      if regen:
1507          san_str = ",".join(sans)
1508          try:
1509              subprocess.run([
1510                  "openssl", "req", "-x509", "-newkey", "rsa:2048",
1511                  "-keyout", str(key_file), "-out", str(cert_file),
1512                  "-days", "365", "-nodes",
1513                  "-subj", "/CN=AURYN",
1514                  "-addext", f"subjectAltName={san_str}",
1515              ], check=True, capture_output=True)
1516              print(f"[SSL] Generated certificate (SANs: {san_str})")
1517          except (subprocess.CalledProcessError, FileNotFoundError) as e:
1518              print(f"[SSL] Could not generate certificate: {e}")
1519              return None
1520  
1521      ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
1522      ctx.load_cert_chain(str(cert_file), str(key_file))
1523      return ctx
1524  
1525  
1526  async def handle_install_cert(request: web.Request) -> web.Response:
1527      """Serve the CA cert for iOS installation."""
1528      cert_file = AURYN_DIR / "cert.pem"
1529      if not cert_file.exists():
1530          raise web.HTTPNotFound()
1531      return web.Response(
1532          body=cert_file.read_bytes(),
1533          content_type="application/x-x509-ca-cert",
1534          headers={"Content-Disposition": "attachment; filename=auryn-ca.pem"},
1535      )
1536  
1537  
1538  # ============================================================
1539  # CLI & App setup
1540  # ============================================================
1541  
1542  async def check_ollama(url: str) -> bool:
1543      try:
1544          async with aiohttp.ClientSession() as session:
1545              async with session.get(url, timeout=aiohttp.ClientTimeout(total=3)) as resp:
1546                  return resp.status == 200
1547      except Exception:
1548          return False
1549  
1550  
1551  def load_interbrain_api_key() -> str:
1552      """Load Claude API key from InterBrain's Obsidian plugin settings."""
1553      paths = [
1554          Path.home() / "RealDealVault" / ".obsidian" / "plugins" / "interbrain" / "data.json",
1555      ]
1556      for p in paths:
1557          if p.exists():
1558              try:
1559                  data = json.loads(p.read_text())
1560                  key = data.get("claudeApiKey", "")
1561                  if key:
1562                      print(f"[Claude] API key loaded from InterBrain settings")
1563                      return key
1564              except Exception:
1565                  pass
1566      key = os.environ.get("ANTHROPIC_API_KEY", "")
1567      if key:
1568          print("[Claude] API key loaded from ANTHROPIC_API_KEY env var")
1569      return key
1570  
1571  
1572  def get_ollama_models(ollama_url: str) -> list[str]:
1573      """Fetch available Ollama model names."""
1574      import urllib.request
1575      try:
1576          with urllib.request.urlopen(f"{ollama_url}/api/tags", timeout=3) as resp:
1577              data = json.loads(resp.read())
1578              return [m["name"] for m in data.get("models", [])
1579                      if not m["name"].startswith("nomic")]
1580      except Exception:
1581          return []
1582  
1583  
1584  # ============================================================
1585  # Knowledge Gardening — route insights to DreamNode READMEs
1586  # ============================================================
1587  
1588  GARDEN_SYSTEM_PROMPT = """You are AURYN's knowledge gardener. Your job: read a conversation and its loaded DreamNode context, then surgically update the relevant READMEs with new insights.
1589  
1590  RULES:
1591  - Only add genuinely new insights not already present in the README
1592  - Be surgical — edit specific sections, don't rewrite entire files
1593  - Preserve the existing voice and style of each README
1594  - If a README has no relevant section for the insight, add a minimal new section
1595  - Signal over noise: concrete insights, structural decisions, distilled wisdom. NOT stream of consciousness or redundant reformulations
1596  - Output ONLY valid JSON — no markdown fences, no commentary before or after
1597  
1598  OUTPUT FORMAT — a JSON array of edits:
1599  [
1600    {
1601      "dreamnode": "Title of DreamNode",
1602      "path": "/absolute/path/to/README.md",
1603      "reason": "Brief explanation of what insight is being added",
1604      "edits": [
1605        {
1606          "old": "exact existing text to find and replace",
1607          "new": "the replacement text with the new insight woven in"
1608        }
1609      ]
1610    }
1611  ]
1612  
1613  If no edits are needed, return an empty array: []
1614  
1615  Each "old" string must be an EXACT substring of the current README content. Keep edits minimal — include just enough surrounding context in "old" to uniquely identify the location."""
1616  
1617  
1618  async def _ai_bridge_inference(messages: list[dict], port: int = 27182) -> str:
1619      """Send an inference request to the AI bridge WebSocket and collect the full response.
1620  
1621      Uses raw sockets because the InterBrain's WebSocket server computes
1622      Sec-WebSocket-Accept incorrectly — browsers don't care, but Python
1623      WebSocket libraries reject the handshake. Raw TCP bypasses this.
1624      """
1625      import base64 as b64
1626  
1627      request_id = str(uuid.uuid4())
1628      chunks: list[str] = []
1629  
1630      reader, writer = await asyncio.open_connection("localhost", port)
1631  
1632      # WebSocket handshake
1633      ws_key = b64.b64encode(os.urandom(16)).decode()
1634      handshake = (
1635          "GET / HTTP/1.1\r\n"
1636          "Host: localhost:{}\r\n"
1637          "Upgrade: websocket\r\n"
1638          "Connection: Upgrade\r\n"
1639          "Sec-WebSocket-Key: {}\r\n"
1640          "Sec-WebSocket-Version: 13\r\n"
1641          "\r\n"
1642      ).format(port, ws_key)
1643      writer.write(handshake.encode())
1644      await writer.drain()
1645  
1646      # Read handshake response (skip validation of Sec-WebSocket-Accept)
1647      resp = b""
1648      while b"\r\n\r\n" not in resp:
1649          resp += await reader.read(4096)
1650      if b"101" not in resp:
1651          writer.close()
1652          raise RuntimeError(f"AI bridge handshake failed: {resp[:200].decode()}")
1653  
1654      # Any data after the headers is the start of the first frame
1655      after_headers = resp.split(b"\r\n\r\n", 1)[1]
1656      buffer = bytearray(after_headers)
1657  
1658      async def _read_frame() -> str:
1659          """Read one WebSocket text frame."""
1660          nonlocal buffer
1661          # Ensure we have at least 2 bytes for the header
1662          while len(buffer) < 2:
1663              buffer.extend(await reader.read(4096))
1664  
1665          b0, b1 = buffer[0], buffer[1]
1666          masked = bool(b1 & 0x80)
1667          length = b1 & 0x7F
1668          offset = 2
1669  
1670          if length == 126:
1671              while len(buffer) < 4:
1672                  buffer.extend(await reader.read(4096))
1673              length = struct.unpack(">H", buffer[2:4])[0]
1674              offset = 4
1675          elif length == 127:
1676              while len(buffer) < 10:
1677                  buffer.extend(await reader.read(4096))
1678              length = struct.unpack(">Q", buffer[2:10])[0]
1679              offset = 10
1680  
1681          if masked:
1682              offset += 4  # skip mask key (server shouldn't mask, but just in case)
1683  
1684          total = offset + length
1685          while len(buffer) < total:
1686              buffer.extend(await reader.read(4096))
1687  
1688          payload = buffer[offset:total]
1689          if masked:
1690              mask = buffer[offset - 4:offset]
1691              payload = bytes(b ^ mask[i % 4] for i, b in enumerate(payload))
1692  
1693          buffer = buffer[total:]
1694          return payload.decode("utf-8", errors="replace")
1695  
1696      def _make_frame(text: str) -> bytes:
1697          """Create a masked WebSocket text frame (client must mask)."""
1698          payload = text.encode("utf-8")
1699          frame = bytearray()
1700          frame.append(0x81)  # FIN + text opcode
1701  
1702          mask_key = os.urandom(4)
1703          length = len(payload)
1704          if length < 126:
1705              frame.append(0x80 | length)  # masked
1706          elif length < 65536:
1707              frame.append(0x80 | 126)
1708              frame.extend(struct.pack(">H", length))
1709          else:
1710              frame.append(0x80 | 127)
1711              frame.extend(struct.pack(">Q", length))
1712  
1713          frame.extend(mask_key)
1714          frame.extend(bytes(b ^ mask_key[i % 4] for i, b in enumerate(payload)))
1715          return bytes(frame)
1716  
1717      try:
1718          # Wait for ai-bridge-ready
1719          ready_msg = await asyncio.wait_for(_read_frame(), timeout=10)
1720          data = json.loads(ready_msg)
1721          if data.get("type") != "ai-bridge-ready":
1722              raise RuntimeError(f"Expected ai-bridge-ready, got: {data}")
1723  
1724          # Send inference request
1725          req = json.dumps({
1726              "type": "ai-inference-stream-request",
1727              "requestId": request_id,
1728              "messages": messages,
1729              "complexity": "standard",
1730          })
1731          writer.write(_make_frame(req))
1732          await writer.drain()
1733  
1734          # Collect streamed response
1735          while True:
1736              raw = await asyncio.wait_for(_read_frame(), timeout=120)
1737              data = json.loads(raw)
1738              if data.get("requestId") != request_id:
1739                  continue
1740              if data["type"] == "ai-inference-stream-chunk":
1741                  chunks.append(data["chunk"])
1742              elif data["type"] == "ai-inference-stream-done":
1743                  break
1744              elif data["type"] == "ai-inference-stream-error":
1745                  raise RuntimeError(f"AI bridge error: {data.get('error')}")
1746  
1747      finally:
1748          writer.close()
1749  
1750      return "".join(chunks)
1751  
1752  
1753  def _apply_garden_edits(edits: list[dict]) -> list[dict]:
1754      """Apply edits to README files. Returns list of results."""
1755      results = []
1756      for edit_group in edits:
1757          path = edit_group.get("path", "")
1758          title = edit_group.get("dreamnode", "")
1759          reason = edit_group.get("reason", "")
1760  
1761          if not path or not Path(path).exists():
1762              results.append({"dreamnode": title, "status": "error", "reason": f"File not found: {path}"})
1763              continue
1764  
1765          readme_path = Path(path)
1766          if readme_path.is_dir():
1767              readme_path = readme_path / "README.md"
1768  
1769          if not readme_path.exists():
1770              results.append({"dreamnode": title, "status": "error", "reason": f"README not found: {readme_path}"})
1771              continue
1772  
1773          content = readme_path.read_text(encoding="utf-8")
1774          modified = False
1775  
1776          for edit in edit_group.get("edits", []):
1777              old = edit.get("old", "")
1778              new = edit.get("new", "")
1779              if not old or old == new:
1780                  continue
1781              if old in content:
1782                  content = content.replace(old, new, 1)
1783                  modified = True
1784              else:
1785                  results.append({
1786                      "dreamnode": title, "status": "warning",
1787                      "reason": f"Could not find text to replace (skipped): {old[:60]}...",
1788                  })
1789  
1790          if modified:
1791              readme_path.write_text(content, encoding="utf-8")
1792              # Git commit
1793              try:
1794                  subprocess.run(
1795                      ["git", "add", str(readme_path)],
1796                      cwd=readme_path.parent, capture_output=True, timeout=10,
1797                  )
1798                  subprocess.run(
1799                      ["git", "commit", "-m", f"Garden: {reason}\n\nCo-Authored-By: AURYN <auryn@dreamos.local>"],
1800                      cwd=readme_path.parent, capture_output=True, timeout=10,
1801                  )
1802              except Exception as e:
1803                  print(f"[Garden] Git commit failed for {title}: {e}")
1804  
1805              results.append({"dreamnode": title, "status": "ok", "reason": reason})
1806  
1807      return results
1808  
1809  
1810  async def ws_garden(request: web.Request) -> web.WebSocketResponse:
1811      """WebSocket endpoint for knowledge gardening.
1812  
1813      Expects a message: {
1814          type: "garden",
1815          conversation: [{role, content}, ...],
1816          context: [{title, id, path}, ...]
1817      }
1818  
1819      Reads READMEs for each context node, sends everything to AI bridge,
1820      applies returned edits, reports results.
1821      """
1822      ws = web.WebSocketResponse()
1823      await ws.prepare(request)
1824  
1825      async for msg in ws:
1826          if msg.type != aiohttp.WSMsgType.TEXT:
1827              continue
1828          try:
1829              data = json.loads(msg.data)
1830          except json.JSONDecodeError:
1831              continue
1832  
1833          if data.get("type") != "garden":
1834              continue
1835  
1836          conversation = data.get("conversation", [])
1837          context_nodes = data.get("context", [])
1838  
1839          if not conversation:
1840              await ws.send_json({"type": "garden_result", "status": "empty", "message": "No conversation to garden."})
1841              continue
1842  
1843          await ws.send_json({"type": "garden_status", "message": "Reading DreamNode context..."})
1844  
1845          # Build context block with READMEs
1846          context_block = ""
1847          for node in context_nodes:
1848              path = node.get("path", "")
1849              title = node.get("title", "")
1850              node_id = node.get("id", "")
1851              readme_path = Path(path) / "README.md" if path else None
1852  
1853              if readme_path and readme_path.exists():
1854                  readme_content = readme_path.read_text(encoding="utf-8")
1855                  context_block += f"\n\n---\n### DreamNode: {title}\n- ID: {node_id}\n- Path: {path}\n- README path: {readme_path}\n\n{readme_content}"
1856              else:
1857                  context_block += f"\n\n---\n### DreamNode: {title}\n- ID: {node_id}\n- Path: {path}\n- README: (not found)\n"
1858  
1859          # Build the messages for the AI bridge
1860          garden_messages = [
1861              {"role": "system", "content": GARDEN_SYSTEM_PROMPT},
1862              {"role": "user", "content": f"""Here is the loaded DreamNode context (these are the READMEs you may edit):
1863  
1864  {context_block}
1865  
1866  ---
1867  
1868  Here is the conversation to extract insights from:
1869  
1870  {json.dumps(conversation, indent=2)}
1871  
1872  Analyze the conversation. For each DreamNode whose README should be updated with new insights from this conversation, produce the surgical edits. Return ONLY the JSON array."""},
1873          ]
1874  
1875          await ws.send_json({"type": "garden_status", "message": "Routing insights..."})
1876  
1877          try:
1878              response = await _ai_bridge_inference(garden_messages)
1879  
1880              # Parse the JSON response — strip markdown fences if present
1881              cleaned = response.strip()
1882              if cleaned.startswith("```"):
1883                  cleaned = re.sub(r"^```\w*\n?", "", cleaned)
1884                  cleaned = re.sub(r"\n?```$", "", cleaned)
1885                  cleaned = cleaned.strip()
1886  
1887              edits = json.loads(cleaned)
1888  
1889              if not edits:
1890                  await ws.send_json({
1891                      "type": "garden_result", "status": "no_changes",
1892                      "message": "No new insights to route — READMEs are up to date.",
1893                  })
1894                  continue
1895  
1896              await ws.send_json({"type": "garden_status", "message": f"Applying {len(edits)} edit(s)..."})
1897  
1898              results = _apply_garden_edits(edits)
1899              await ws.send_json({
1900                  "type": "garden_result", "status": "done",
1901                  "edits": results,
1902                  "message": f"Gardened {sum(1 for r in results if r['status'] == 'ok')} DreamNode(s).",
1903              })
1904  
1905          except json.JSONDecodeError as e:
1906              await ws.send_json({
1907                  "type": "garden_result", "status": "error",
1908                  "message": f"Failed to parse AI response as JSON: {e}\n\nRaw response:\n{response[:500]}",
1909              })
1910          except Exception as e:
1911              await ws.send_json({
1912                  "type": "garden_result", "status": "error",
1913                  "message": f"Garden error: {e}",
1914              })
1915  
1916      return ws
1917  
1918  
1919  # ============================================================
1920  # Claude Code Sub-Agent
1921  # ============================================================
1922  
1923  async def run_claude_code(
1924      prompt: str,
1925      session_id: str | None = None,
1926      resume: bool = False,
1927      model: str = "sonnet",
1928      max_budget: float = 0.50,
1929      cwd: str | None = None,
1930      allowed_tools: str | None = None,
1931  ) -> dict:
1932      """Run Claude Code as a sub-agent in headless mode.
1933  
1934      Returns parsed JSON output with result, cost, session_id, etc.
1935      """
1936      cmd = [
1937          "claude", "-p",
1938          "--output-format", "json",
1939          "--dangerously-skip-permissions",
1940          "--model", model,
1941          "--max-budget-usd", str(max_budget),
1942      ]
1943  
1944      if resume and session_id:
1945          cmd.extend(["--resume", session_id])
1946      elif session_id:
1947          cmd.extend(["--session-id", session_id])
1948  
1949      if allowed_tools:
1950          for tool in allowed_tools.split():
1951              cmd.extend(["--allowedTools", tool])
1952  
1953      # Pass prompt via stdin to avoid shell argument length limits
1954  
1955      env = os.environ.copy()
1956      env.pop("CLAUDECODE", None)  # Allow nested sessions
1957  
1958      proc = await asyncio.create_subprocess_exec(
1959          *cmd,
1960          stdin=asyncio.subprocess.PIPE,
1961          stdout=asyncio.subprocess.PIPE,
1962          stderr=asyncio.subprocess.PIPE,
1963          cwd=cwd or str(AURYN_DIR),
1964          env=env,
1965      )
1966      stdout, stderr = await proc.communicate(input=prompt.encode("utf-8"))
1967  
1968      output = stdout.decode("utf-8", errors="replace")
1969      try:
1970          parsed = json.loads(output)
1971          # Extract the result from the JSON array
1972          if isinstance(parsed, list):
1973              for item in parsed:
1974                  if item.get("type") == "result":
1975                      return item
1976              return {"type": "result", "raw": parsed}
1977          return parsed
1978      except json.JSONDecodeError:
1979          return {
1980              "type": "result", "is_error": True,
1981              "result": output or stderr.decode("utf-8", errors="replace"),
1982          }
1983  
1984  
1985  # ============================================================
1986  # Server Reload
1987  # ============================================================
1988  
1989  async def handle_reload(request: web.Request) -> web.Response:
1990      """Restart the AURYN server process to pick up code changes.
1991  
1992      Spawns a detached shell that waits for this process to die,
1993      then starts a fresh server with the same args.
1994      """
1995      import sys
1996  
1997      async def _do_restart():
1998          await asyncio.sleep(0.3)  # Let the HTTP response send
1999          pid = os.getpid()
2000          cmd_args = " ".join(sys.argv[1:])
2001          script = f'while kill -0 {pid} 2>/dev/null; do sleep 0.2; done; cd "{AURYN_DIR}" && uv run aurin.py {cmd_args} &'
2002          subprocess.Popen(["bash", "-c", script], start_new_session=True)
2003          os._exit(0)
2004  
2005      asyncio.create_task(_do_restart())
2006      return web.Response(text="Restarting...", content_type="text/plain")
2007  
2008  
2009  # ============================================================
2010  # Claude Code WebSocket — /do command handler
2011  # ============================================================
2012  
2013  async def ws_claude(request: web.Request) -> web.WebSocketResponse:
2014      """WebSocket endpoint for Claude Code sub-agent.
2015  
2016      Expects: {
2017          type: "claude_code",
2018          prompt: "what to do",
2019          context: [{title, id, path}, ...],
2020          conversation: [{role, content}, ...]
2021      }
2022  
2023      Spawns Claude Code in headless mode, streams status back.
2024      Supports session continuity via resume.
2025      """
2026      ws = web.WebSocketResponse()
2027      await ws.prepare(request)
2028  
2029      active_session_id: str | None = None
2030  
2031      async for msg in ws:
2032          if msg.type != aiohttp.WSMsgType.TEXT:
2033              continue
2034          try:
2035              data = json.loads(msg.data)
2036          except json.JSONDecodeError:
2037              continue
2038  
2039          if data.get("type") != "claude_code":
2040              continue
2041  
2042          prompt = data.get("prompt", "")
2043          context_nodes = data.get("context", [])
2044          conversation = data.get("conversation", [])
2045  
2046          if not prompt:
2047              await ws.send_json({"type": "claude_status", "message": "No prompt provided."})
2048              continue
2049  
2050          # Build context for Claude Code
2051          context_parts = []
2052          for node in context_nodes:
2053              path = node.get("path", "")
2054              title = node.get("title", "")
2055              readme_path = Path(path) / "README.md" if path and Path(path).is_dir() else None
2056              if readme_path and readme_path.exists():
2057                  content = readme_path.read_text(encoding="utf-8")
2058                  context_parts.append(f"## DreamNode: {title}\nPath: {path}\n\n{content}")
2059  
2060          # Include recent conversation for context
2061          conv_text = ""
2062          if conversation:
2063              recent = conversation[-10:]  # last 10 messages
2064              conv_text = "\n".join(f"[{m['role']}]: {m['content']}" for m in recent)
2065  
2066          full_prompt = prompt
2067          if context_parts:
2068              full_prompt = (
2069                  "Context — these DreamNodes are relevant:\n\n"
2070                  + "\n\n---\n\n".join(context_parts)
2071                  + "\n\n---\n\nRecent conversation:\n" + conv_text
2072                  + "\n\n---\n\nTask: " + prompt
2073              )
2074  
2075          await ws.send_json({"type": "claude_status", "message": f"Spawning Claude Code..."})
2076  
2077          try:
2078              result = await run_claude_code(
2079                  prompt=full_prompt,
2080                  session_id=active_session_id,
2081                  resume=bool(active_session_id),
2082                  model="sonnet",
2083                  max_budget=1.00,
2084                  cwd=str(AURYN_DIR),
2085                  allowed_tools="Bash Read Edit Write Grep Glob",
2086              )
2087  
2088              active_session_id = result.get("session_id", active_session_id)
2089              response_text = result.get("result", "")
2090              cost = result.get("total_cost_usd", 0)
2091              is_error = result.get("is_error", False)
2092  
2093              await ws.send_json({
2094                  "type": "claude_result",
2095                  "status": "error" if is_error else "done",
2096                  "message": response_text,
2097                  "session_id": active_session_id,
2098                  "cost": cost,
2099              })
2100  
2101          except Exception as e:
2102              await ws.send_json({
2103                  "type": "claude_result",
2104                  "status": "error",
2105                  "message": str(e),
2106              })
2107  
2108      return ws
2109  
2110  
2111  def create_app(
2112      host: str, port: int, model: str, ollama_url: str,
2113      claude_api_key: str = "", models: list[str] | None = None,
2114  ) -> web.Application:
2115      index_path = AURYN_DIR / "index.html"
2116      raw_html = index_path.read_text()
2117      injected_html = build_injected_index(raw_html, models=models)
2118  
2119      app = web.Application()
2120      app["injected_html"] = injected_html
2121      app["default_model"] = model
2122      app["ollama_url"] = ollama_url
2123      app["claude_api_key"] = claude_api_key
2124  
2125      app.router.add_get("/ws", ws_inference)
2126      app.router.add_get("/ws/transcribe", ws_transcribe)
2127      app.router.add_get("/ws/garden", ws_garden)
2128      app.router.add_get("/ws/claude", ws_claude)
2129      app.router.add_post("/upload/audio", handle_audio_upload)
2130      app.router.add_post("/reload", handle_reload)
2131      app.router.add_get("/install-cert", handle_install_cert)
2132      app.router.add_get("/", handle_index)
2133      app.router.add_get("/{path:.*}", handle_static)
2134  
2135      return app
2136  
2137  
2138  async def serve(args: argparse.Namespace) -> None:
2139      ollama_ok = await check_ollama(args.ollama_url)
2140      ollama_status = "\u2713" if ollama_ok else "\u2717 (not reachable)"
2141  
2142      local_ip = get_local_ip()
2143      tailscale_ip = ensure_tailscale()
2144      ssl_ctx = get_or_create_ssl_context(local_ip, tailscale_ip)
2145      protocol = "https" if ssl_ctx else "http"
2146      ws_protocol = "wss" if ssl_ctx else "ws"
2147  
2148      print(f"\nAURYN serving at {protocol}://{args.host}:{args.port}")
2149      print(f"  Local:     {protocol}://localhost:{args.port}")
2150      print(f"  LAN:       {protocol}://{local_ip}:{args.port}")
2151      if tailscale_ip:
2152          print(f"  Tailscale: {protocol}://{tailscale_ip}:{args.port}")
2153      print(f"  WebSocket: {ws_protocol}://{args.host}:{args.port}/ws")
2154      print(f"  Ollama:    {args.ollama_url} {ollama_status}")
2155      if ssl_ctx:
2156          print(f"  SSL:       Self-signed certificate")
2157          cert_url = f"{protocol}://{tailscale_ip or local_ip}:{args.port}/install-cert"
2158          print(f"  Install:   Open {cert_url} on iPhone")
2159          print(f"             Then: Settings > General > VPN & Device Management > Install")
2160          print(f"             Then: Settings > General > About > Certificate Trust Settings > Enable")
2161      claude_api_key = load_interbrain_api_key()
2162  
2163      ollama_models = get_ollama_models(args.ollama_url)
2164      claude_models = []
2165      if claude_api_key:
2166          claude_models = ["claude-sonnet-4-6", "claude-haiku-4-5-20251001"]
2167          print(f"  Claude:    {len(claude_models)} models available")
2168      else:
2169          print("  Claude:    No API key found")
2170      all_models = []
2171      if args.model not in ollama_models and args.model not in claude_models:
2172          all_models.append(args.model)
2173      all_models.extend(ollama_models)
2174      all_models.extend(claude_models)
2175      seen = set()
2176      models = []
2177      for m in all_models:
2178          if m not in seen:
2179              seen.add(m)
2180              models.append(m)
2181  
2182      print(f"  Recordings:  {RECORDINGS_DIR}/")
2183      print(f"  Transcripts: {TRANSCRIPTS_DIR}/")
2184      print()
2185  
2186      app = create_app(
2187          args.host, args.port, args.model, args.ollama_url,
2188          claude_api_key=claude_api_key, models=models,
2189      )
2190      runner = web.AppRunner(app)
2191      await runner.setup()
2192      site = web.TCPSite(runner, args.host, args.port, ssl_context=ssl_ctx)
2193      await site.start()
2194  
2195      try:
2196          await asyncio.Event().wait()
2197      except (KeyboardInterrupt, asyncio.CancelledError):
2198          pass
2199      finally:
2200          await runner.cleanup()
2201  
2202  
2203  async def _run_garden_cli(args: argparse.Namespace) -> None:
2204      """CLI entry point for knowledge gardening."""
2205      import sys
2206  
2207      # Read conversation text
2208      if args.input:
2209          text = Path(args.input).read_text() if Path(args.input).exists() else args.input
2210      else:
2211          text = sys.stdin.read()
2212  
2213      if not text.strip():
2214          print("No input provided. Pipe conversation text or use --input.")
2215          return
2216  
2217      # Parse context nodes
2218      context_nodes = []
2219      for spec in (args.context or []):
2220          if ":" in spec:
2221              title, path = spec.split(":", 1)
2222              context_nodes.append({"title": title, "id": "", "path": path})
2223          else:
2224              context_nodes.append({"title": spec, "id": "", "path": str(VAULT_DIR / spec)})
2225  
2226      # Build context block
2227      context_block = ""
2228      for node in context_nodes:
2229          path = node["path"]
2230          title = node["title"]
2231          readme_path = Path(path) / "README.md" if Path(path).is_dir() else Path(path)
2232          if readme_path.exists():
2233              content = readme_path.read_text(encoding="utf-8")
2234              context_block += f"\n\n---\n### DreamNode: {title}\n- Path: {path}\n- README path: {readme_path}\n\n{content}"
2235  
2236      messages = [
2237          {"role": "system", "content": GARDEN_SYSTEM_PROMPT},
2238          {"role": "user", "content": f"""DreamNode context:\n{context_block}\n\n---\n\nConversation:\n{text}\n\nProduce the JSON array of edits."""},
2239      ]
2240  
2241      print("[Garden] Sending to AI bridge...")
2242      try:
2243          response = await _ai_bridge_inference(messages)
2244          cleaned = response.strip()
2245          if cleaned.startswith("```"):
2246              cleaned = re.sub(r"^```\w*\n?", "", cleaned)
2247              cleaned = re.sub(r"\n?```$", "", cleaned)
2248              cleaned = cleaned.strip()
2249  
2250          edits = json.loads(cleaned)
2251          if not edits:
2252              print("[Garden] No edits needed — READMEs are up to date.")
2253              return
2254  
2255          print(f"[Garden] Applying {len(edits)} edit group(s)...")
2256          results = _apply_garden_edits(edits)
2257          for r in results:
2258              status = r["status"].upper()
2259              print(f"  [{status}] {r['dreamnode']}: {r['reason']}")
2260  
2261      except Exception as e:
2262          print(f"[Garden] Error: {e}")
2263  
2264  
2265  async def _run_claude_cli(args: argparse.Namespace) -> None:
2266      """CLI entry point for Claude Code sub-agent."""
2267      result = await run_claude_code(
2268          prompt=args.prompt,
2269          session_id=args.session_id,
2270          resume=args.resume,
2271          model=args.model,
2272          max_budget=args.budget,
2273          cwd=args.cwd,
2274      )
2275  
2276      if result.get("is_error"):
2277          print(f"Error: {result.get('result', 'Unknown error')}")
2278      else:
2279          print(result.get("result", json.dumps(result, indent=2)))
2280  
2281      if result.get("session_id"):
2282          print(f"\nSession ID: {result['session_id']}")
2283      if result.get("total_cost_usd"):
2284          print(f"Cost: ${result['total_cost_usd']:.4f}")
2285  
2286  
2287  def main() -> None:
2288      parser = argparse.ArgumentParser(description="AURYN self-serving server + context provider")
2289      sub = parser.add_subparsers(dest="command")
2290  
2291      # --- serve ---
2292      serve_parser = sub.add_parser("serve", help="Start the AURYN server")
2293      serve_parser.add_argument("--port", type=int, default=8080)
2294      serve_parser.add_argument("--host", default="0.0.0.0")
2295      serve_parser.add_argument("--model", default="qwen3:32b")
2296      serve_parser.add_argument("--ollama-url", default="http://localhost:11434")
2297      serve_parser.add_argument("--no-ssl", action="store_true", help="Disable HTTPS")
2298      serve_parser.add_argument("--transcription-model", default="base",
2299                                choices=["tiny", "base", "small", "medium", "large", "turbo"],
2300                                help="Whisper model size (tiny=fastest, turbo=best speed/quality)")
2301  
2302      # --- context ---
2303      ctx_parser = sub.add_parser("context", help="Fast context provider (Tier 1 + Tier 2)")
2304      ctx_parser.add_argument("input", help="Text string or path to file")
2305      ctx_parser.add_argument("--top", type=int, default=15, help="Max results (default: 15)")
2306      ctx_parser.add_argument("--json", dest="json_output", action="store_true",
2307                              help="Output as JSON")
2308      ctx_parser.add_argument("--rebuild", action="store_true",
2309                              help="Force rebuild index before searching")
2310  
2311      # --- index ---
2312      idx_parser = sub.add_parser("index", help="Build/rebuild the context index")
2313      idx_parser.add_argument("--force", action="store_true", help="Force rebuild")
2314  
2315      # --- garden ---
2316      garden_parser = sub.add_parser("garden", help="Knowledge garden: route insights to DreamNode READMEs")
2317      garden_parser.add_argument("--context", nargs="+", metavar="TITLE:PATH",
2318                                 help="DreamNode context as title:path pairs")
2319      garden_parser.add_argument("--input", help="Conversation text or file path (default: stdin)")
2320  
2321      # --- claude ---
2322      claude_parser = sub.add_parser("claude", help="Run Claude Code as sub-agent")
2323      claude_parser.add_argument("prompt", help="Prompt for Claude Code")
2324      claude_parser.add_argument("--session-id", help="Session ID for continuity")
2325      claude_parser.add_argument("--resume", action="store_true", help="Resume previous session")
2326      claude_parser.add_argument("--model", default="sonnet", help="Model (default: sonnet)")
2327      claude_parser.add_argument("--budget", type=float, default=0.50, help="Max budget USD")
2328      claude_parser.add_argument("--cwd", help="Working directory")
2329  
2330      args = parser.parse_args()
2331  
2332      if args.command == "serve":
2333          if getattr(args, "no_ssl", False):
2334              global get_or_create_ssl_context
2335              get_or_create_ssl_context = lambda *a, **kw: None
2336          global _whisper_model_size, _whisper_repo
2337          _whisper_model_size = getattr(args, "transcription_model", "base")
2338          _whisper_repo = _MLX_WHISPER_MODELS.get(_whisper_model_size, _MLX_WHISPER_MODELS["base"])
2339          asyncio.run(serve(args))
2340      elif args.command == "context":
2341          run_context(args)
2342      elif args.command == "index":
2343          run_index(args)
2344      elif args.command == "garden":
2345          asyncio.run(_run_garden_cli(args))
2346      elif args.command == "claude":
2347          asyncio.run(_run_claude_cli(args))
2348      else:
2349          parser.print_help()
2350  
2351  
2352  if __name__ == "__main__":
2353      main()