/ aurin.py
aurin.py
1 # /// script 2 # requires-python = ">=3.10" 3 # dependencies = [ 4 # "aiohttp>=3.9", 5 # "mlx-whisper>=0.4", 6 # ] 7 # /// 8 """ 9 AURYN self-serving server + fast context provider. 10 11 Usage: 12 uv run aurin.py serve [--port 8080] [--host 0.0.0.0] [--model qwen3:32b] 13 uv run aurin.py context <file_or_text> [--top N] [--json] [--rebuild] 14 uv run aurin.py index [--force] 15 """ 16 17 import argparse 18 import asyncio 19 import json 20 import math 21 import os 22 import pickle 23 import re 24 import shutil 25 import socket 26 import ssl 27 import struct 28 import subprocess 29 import tempfile 30 import time 31 import uuid 32 from collections import Counter, defaultdict 33 from datetime import datetime 34 from pathlib import Path 35 36 import aiohttp 37 from aiohttp import web 38 39 AURYN_DIR = Path(__file__).parent.resolve() 40 RECORDINGS_DIR = AURYN_DIR / "recordings" 41 TRANSCRIPTS_DIR = AURYN_DIR / "transcripts" 42 VAULT_DIR = AURYN_DIR.parent # RealDealVault 43 INDEX_DIR = Path.home() / ".auryn" 44 INDEX_FILE = INDEX_DIR / "context-index.pkl" 45 46 47 # ============================================================ 48 # Context Provider — Tier 1 (Vocabulary) + Tier 2 (BM25) 49 # ============================================================ 50 51 # BM25 parameters 52 BM25_K1 = 1.5 53 BM25_B = 0.75 54 55 56 def _tokenize(text: str) -> list[str]: 57 """Lowercase, strip punctuation, split into tokens.""" 58 text = text.lower() 59 # Keep alphanumeric, hyphens within words, and unicode letters 60 text = re.sub(r"[^\w\s-]", " ", text) 61 text = re.sub(r"\s+", " ", text) 62 return [w for w in text.split() if len(w) >= 2] 63 64 65 def _split_camel(name: str) -> list[str]: 66 """Split camelCase/PascalCase into words. 'InterBrain' -> ['inter', 'brain']""" 67 parts = re.sub(r"([a-z])([A-Z])", r"\1 \2", name) 68 parts = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", parts) 69 return parts.lower().split() 70 71 72 def discover_nodes() -> list[dict]: 73 """Find all DreamNodes in the vault by scanning for .udd files.""" 74 nodes = [] 75 for udd_path in VAULT_DIR.rglob("*.udd"): 76 # Only top-level vault entries (maxdepth 2) 77 rel = udd_path.relative_to(VAULT_DIR) 78 if len(rel.parts) > 2: 79 continue 80 try: 81 data = json.loads(udd_path.read_text()) 82 node_dir = udd_path.parent 83 readme_path = node_dir / "README.md" 84 readme = "" 85 if readme_path.exists(): 86 readme = readme_path.read_text(errors="replace") 87 88 nodes.append({ 89 "uuid": data.get("uuid", ""), 90 "title": data.get("title", node_dir.name), 91 "type": data.get("type", "dream"), 92 "folder": node_dir.name, 93 "path": str(node_dir), 94 "radicle_id": data.get("radicleId", ""), 95 "readme": readme, 96 }) 97 except (json.JSONDecodeError, OSError): 98 continue 99 return nodes 100 101 102 def build_index(nodes: list[dict] | None = None) -> dict: 103 """Build the Tier 1 + Tier 2 index from scratch.""" 104 if nodes is None: 105 nodes = discover_nodes() 106 107 t0 = time.time() 108 109 # --- Tier 1: Vocabulary lookup --- 110 # Map normalized forms -> node info 111 vocab: dict[str, list[dict]] = defaultdict(list) # lowercase phrase -> [{uuid, title, ...}] 112 113 for node in nodes: 114 title = node["title"] 115 folder = node["folder"] 116 uid = node["uuid"] 117 entry = {"uuid": uid, "title": title, "type": node["type"], 118 "path": node["path"], "radicle_id": node["radicle_id"]} 119 120 # Exact title (lowered) 121 vocab[title.lower()].append(entry) 122 123 # Folder name (lowered) 124 folder_lower = folder.lower() 125 if folder_lower != title.lower(): 126 vocab[folder_lower].append(entry) 127 128 # CamelCase-split folder words joined with spaces 129 camel_words = _split_camel(folder) 130 if len(camel_words) > 1: 131 joined = " ".join(camel_words) 132 if joined != title.lower() and joined != folder_lower: 133 vocab[joined].append(entry) 134 135 # Title without special spacing (e.g., "A U R Y N" -> "auryn") 136 collapsed = re.sub(r"\s+", "", title).lower() 137 if collapsed != title.lower() and collapsed != folder_lower: 138 vocab[collapsed].append(entry) 139 140 # Sort vocab keys longest-first for greedy matching 141 vocab_phrases = sorted(vocab.keys(), key=len, reverse=True) 142 143 # --- Tier 2: BM25 index --- 144 # Tokenize all READMEs 145 doc_tokens: list[list[str]] = [] # parallel to nodes 146 doc_freqs: Counter = Counter() # how many docs contain each term 147 total_dl = 0 148 149 for node in nodes: 150 # Combine title + readme for the document 151 text = node["title"] + " " + node["readme"] 152 tokens = _tokenize(text) 153 doc_tokens.append(tokens) 154 total_dl += len(tokens) 155 # Count unique terms per doc 156 for term in set(tokens): 157 doc_freqs[term] += 1 158 159 n_docs = len(nodes) 160 avgdl = total_dl / n_docs if n_docs > 0 else 1 161 162 # Precompute IDF for each term 163 idf: dict[str, float] = {} 164 for term, df in doc_freqs.items(): 165 # BM25 IDF formula 166 idf[term] = math.log((n_docs - df + 0.5) / (df + 0.5) + 1.0) 167 168 # Precompute per-doc term frequencies and doc lengths 169 doc_tf: list[dict[str, int]] = [] 170 doc_lens: list[int] = [] 171 for tokens in doc_tokens: 172 tf = Counter(tokens) 173 doc_tf.append(tf) 174 doc_lens.append(len(tokens)) 175 176 elapsed = time.time() - t0 177 178 index = { 179 "version": 1, 180 "built_at": time.time(), 181 "n_docs": n_docs, 182 "build_time_ms": round(elapsed * 1000), 183 # Tier 1 184 "vocab": dict(vocab), 185 "vocab_phrases": vocab_phrases, 186 # Tier 2 187 "nodes": nodes, 188 "idf": idf, 189 "doc_tf": doc_tf, 190 "doc_lens": doc_lens, 191 "avgdl": avgdl, 192 } 193 return index 194 195 196 def save_index(index: dict) -> None: 197 """Persist index to disk.""" 198 INDEX_DIR.mkdir(parents=True, exist_ok=True) 199 with open(INDEX_FILE, "wb") as f: 200 pickle.dump(index, f, protocol=pickle.HIGHEST_PROTOCOL) 201 202 203 def load_index() -> dict | None: 204 """Load cached index from disk.""" 205 if not INDEX_FILE.exists(): 206 return None 207 try: 208 with open(INDEX_FILE, "rb") as f: 209 index = pickle.load(f) 210 if index.get("version") != 1: 211 return None 212 return index 213 except (pickle.UnpicklingError, EOFError, KeyError): 214 return None 215 216 217 def ensure_index(force: bool = False) -> dict: 218 """Load or build+save the index.""" 219 if not force: 220 index = load_index() 221 if index is not None: 222 return index 223 index = build_index() 224 save_index(index) 225 return index 226 227 228 def tier1_vocab_match(text: str, index: dict) -> dict[str, dict]: 229 """Tier 1: Vocabulary matching. Scan text for DreamNode titles. 230 Returns {uuid: {title, type, path, score, source, mentions}} 231 """ 232 text_lower = text.lower() 233 results: dict[str, dict] = {} 234 235 for phrase in index["vocab_phrases"]: 236 # Check if phrase appears in text (word-boundary aware for short phrases) 237 if len(phrase) <= 3: 238 # Short phrases need word boundaries to avoid false positives 239 pattern = r'\b' + re.escape(phrase) + r'\b' 240 matches = re.findall(pattern, text_lower) 241 else: 242 # Longer phrases: simple substring count 243 matches = [] 244 start = 0 245 while True: 246 idx = text_lower.find(phrase, start) 247 if idx == -1: 248 break 249 matches.append(phrase) 250 start = idx + 1 251 252 if not matches: 253 continue 254 255 count = len(matches) 256 for entry in index["vocab"][phrase]: 257 uid = entry["uuid"] 258 if uid in results: 259 results[uid]["mentions"] += count 260 else: 261 results[uid] = { 262 "uuid": uid, 263 "title": entry["title"], 264 "type": entry["type"], 265 "path": entry["path"], 266 "radicle_id": entry["radicle_id"], 267 "score": 1.0, 268 "source": "vocabulary", 269 "mentions": count, 270 } 271 return results 272 273 274 def tier2_bm25(text: str, index: dict, top_k: int = 20) -> dict[str, dict]: 275 """Tier 2: BM25 scoring of query text against all README documents. 276 Returns {uuid: {title, type, path, score, source}} 277 """ 278 query_tokens = _tokenize(text) 279 if not query_tokens: 280 return {} 281 282 # Count query term frequencies (for potential weighting, but BM25 uses binary query) 283 query_terms = set(query_tokens) 284 285 idf = index["idf"] 286 doc_tf = index["doc_tf"] 287 doc_lens = index["doc_lens"] 288 avgdl = index["avgdl"] 289 nodes = index["nodes"] 290 291 scores: list[float] = [] 292 for i in range(len(nodes)): 293 score = 0.0 294 tf_dict = doc_tf[i] 295 dl = doc_lens[i] 296 297 for term in query_terms: 298 if term not in idf: 299 continue 300 tf = tf_dict.get(term, 0) 301 if tf == 0: 302 continue 303 term_idf = idf[term] 304 # BM25 formula 305 numerator = tf * (BM25_K1 + 1) 306 denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * dl / avgdl) 307 score += term_idf * numerator / denominator 308 309 scores.append(score) 310 311 # Get top-k by score 312 ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True) 313 314 results: dict[str, dict] = {} 315 for rank, i in enumerate(ranked[:top_k]): 316 if scores[i] <= 0: 317 break 318 node = nodes[i] 319 results[node["uuid"]] = { 320 "uuid": node["uuid"], 321 "title": node["title"], 322 "type": node["type"], 323 "path": node["path"], 324 "radicle_id": node["radicle_id"], 325 "score": round(scores[i], 3), 326 "source": "bm25", 327 } 328 return results 329 330 331 def context_search(text: str, index: dict, top_k: int = 15) -> list[dict]: 332 """Run Tier 1 + Tier 2, merge results, return ranked list.""" 333 t0 = time.time() 334 335 # Tier 1: vocabulary match 336 t1_results = tier1_vocab_match(text, index) 337 338 # Tier 2: BM25 339 t2_results = tier2_bm25(text, index, top_k=top_k * 3) 340 341 # Merge: vocab hits get priority, BM25 fills in 342 merged: dict[str, dict] = {} 343 344 # Normalize BM25 scores to [0, 1] for merging 345 max_bm25 = max((r["score"] for r in t2_results.values()), default=1.0) 346 if max_bm25 <= 0: 347 max_bm25 = 1.0 348 349 for uid, entry in t2_results.items(): 350 merged[uid] = { 351 **entry, 352 "bm25_score": entry["score"], 353 "bm25_norm": round(entry["score"] / max_bm25, 3), 354 "vocab_hit": False, 355 "mentions": 0, 356 } 357 358 for uid, entry in t1_results.items(): 359 if uid in merged: 360 merged[uid]["vocab_hit"] = True 361 merged[uid]["mentions"] = entry["mentions"] 362 merged[uid]["source"] = "both" 363 else: 364 merged[uid] = { 365 **entry, 366 "bm25_score": 0, 367 "bm25_norm": 0, 368 "vocab_hit": True, 369 } 370 371 # Combined score: vocab hit is a strong signal, BM25 provides ranking 372 for uid, entry in merged.items(): 373 vocab_boost = 0.5 if entry.get("vocab_hit") else 0.0 374 bm25_component = entry.get("bm25_norm", 0) * 0.5 375 entry["combined_score"] = round(vocab_boost + bm25_component, 3) 376 377 # Sort by combined score, then by mentions 378 ranked = sorted( 379 merged.values(), 380 key=lambda x: (x["combined_score"], x.get("mentions", 0)), 381 reverse=True, 382 )[:top_k] 383 384 elapsed_ms = round((time.time() - t0) * 1000, 1) 385 386 # Clean up output 387 output = [] 388 for entry in ranked: 389 output.append({ 390 "uuid": entry["uuid"], 391 "title": entry["title"], 392 "type": entry["type"], 393 "path": entry["path"], 394 "radicle_id": entry.get("radicle_id", ""), 395 "score": entry["combined_score"], 396 "bm25": entry.get("bm25_score", 0), 397 "vocab_hit": entry.get("vocab_hit", False), 398 "mentions": entry.get("mentions", 0), 399 "source": entry.get("source", ""), 400 }) 401 402 return output 403 404 405 def run_context(args: argparse.Namespace) -> None: 406 """CLI entry point for context search.""" 407 # Load or build index 408 index = ensure_index(force=getattr(args, "rebuild", False)) 409 410 # Get input text 411 input_text = args.input 412 if os.path.isfile(input_text): 413 input_text = Path(input_text).read_text(errors="replace") 414 415 top_k = getattr(args, "top", 15) 416 417 t0 = time.time() 418 results = context_search(input_text, index, top_k=top_k) 419 elapsed = time.time() - t0 420 421 if getattr(args, "json_output", False): 422 print(json.dumps({ 423 "results": results, 424 "stats": { 425 "elapsed_ms": round(elapsed * 1000, 1), 426 "input_words": len(input_text.split()), 427 "index_nodes": index["n_docs"], 428 "results_count": len(results), 429 } 430 }, indent=2)) 431 else: 432 # Human-readable output 433 print(f"\n {len(results)} results in {elapsed*1000:.0f}ms " 434 f"({len(input_text.split())} words, {index['n_docs']} nodes indexed)\n") 435 for i, r in enumerate(results): 436 vocab_marker = " *" if r["vocab_hit"] else " " 437 mentions = f" ({r['mentions']}x)" if r["mentions"] > 0 else "" 438 print(f" {i+1:2d}.{vocab_marker} {r['score']:.3f} " 439 f"{r['title']:40s} bm25={r['bm25']:.1f}{mentions}") 440 print() 441 print(" * = vocabulary match (title found in text)") 442 print() 443 444 445 def run_index(args: argparse.Namespace) -> None: 446 """CLI entry point for index building.""" 447 force = getattr(args, "force", False) 448 t0 = time.time() 449 nodes = discover_nodes() 450 index = build_index(nodes) 451 save_index(index) 452 elapsed = time.time() - t0 453 print(f"Indexed {index['n_docs']} DreamNodes in {elapsed*1000:.0f}ms") 454 print(f"Vocabulary: {len(index['vocab'])} phrases") 455 print(f"BM25 terms: {len(index['idf'])} unique terms") 456 print(f"Saved to: {INDEX_FILE}") 457 458 459 # ============================================================ 460 # Server code (unchanged from original) 461 # ============================================================ 462 463 def build_injected_index(html: str, models: list[str] | None = None) -> str: 464 """Patch the AI bridge URL and inject model selector.""" 465 # Replace getAIBridgeWsUrl to return ws://<current host>/ws 466 html = html.replace( 467 "function getAIBridgeWsUrl() {\n" 468 " // When served by server.py, the InterBrain is on the same machine\n" 469 " // Use the page's hostname (works for both localhost and Tailscale IP)\n" 470 " const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';\n" 471 " const host = window.location.hostname || 'localhost';\n" 472 " return `${wsProtocol}//${host}:${AI_BRIDGE_PORT}`;\n" 473 "}", 474 "function getAIBridgeWsUrl() {\n" 475 " const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';\n" 476 " return `${wsProtocol}//${window.location.host}/ws`;\n" 477 "}", 478 1, 479 ) 480 481 # Patch sendWsBridgeRequest to include selected model 482 html = html.replace( 483 " aiBridgeWs.send(JSON.stringify({\n" 484 " type: 'ai-inference-stream-request',\n" 485 " requestId: currentRequestId,\n" 486 " messages,\n" 487 " complexity: 'standard'\n" 488 " }));", 489 " aiBridgeWs.send(JSON.stringify({\n" 490 " type: 'ai-inference-stream-request',\n" 491 " requestId: currentRequestId,\n" 492 " messages,\n" 493 " complexity: 'standard',\n" 494 " options: { model: document.getElementById('auryn-model-select')?.value || '' }\n" 495 " }));", 496 1, 497 ) 498 499 # Inject model selector dropdown 500 model_options = models or ["qwen3:32b"] 501 opts_html = "".join(f'<option value="{m}">{m}</option>' for m in model_options) 502 selector = ( 503 '<div id="auryn-model-picker" style="position:fixed;top:8px;right:8px;z-index:9999;' 504 'font:12px monospace;opacity:0.7">' 505 f'<select id="auryn-model-select" style="background:#1a1a1a;color:#c4a54a;' 506 f'border:1px solid #333;border-radius:4px;padding:2px 4px;font:inherit">' 507 f'{opts_html}</select></div>' 508 ) 509 html = html.replace("</body>", selector + "\n</body>", 1) 510 511 # Polyfill crypto.randomUUID for non-secure contexts (HTTP on mobile Safari) 512 # + add global error handler to surface errors in the UI (no console on mobile) 513 polyfill = ( 514 '<script>' 515 'if(!crypto.randomUUID)crypto.randomUUID=function(){' 516 'return([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g,function(c){' 517 'return(c^crypto.getRandomValues(new Uint8Array(1))[0]&15>>c/4).toString(16)' 518 '})};' 519 'window.addEventListener("error",function(e){' 520 'var d=document.createElement("div");' 521 'd.style.cssText="position:fixed;top:0;left:0;right:0;background:#800;color:#fff;' 522 'padding:8px;font:12px monospace;z-index:99999;white-space:pre-wrap";' 523 'd.textContent="JS Error: "+e.message+" @ line "+e.lineno;' 524 'document.body?document.body.appendChild(d):document.addEventListener("DOMContentLoaded",' 525 'function(){document.body.appendChild(d)});' 526 '});' 527 '</script>\n' 528 ) 529 html = html.replace("<head>", "<head>\n" + polyfill, 1) 530 531 return html 532 533 534 # ============================================================ 535 # Inference WebSocket (/ws) 536 # ============================================================ 537 538 async def ws_inference(request: web.Request) -> web.WebSocketResponse: 539 ws = web.WebSocketResponse() 540 await ws.prepare(request) 541 542 ollama_url = request.app["ollama_url"] 543 default_model = request.app["default_model"] 544 active_tasks: dict[str, asyncio.Task] = {} 545 546 await ws.send_json({"type": "ai-bridge-ready", "version": "2"}) 547 548 async for msg in ws: 549 if msg.type == aiohttp.WSMsgType.TEXT: 550 try: 551 data = json.loads(msg.data) 552 except json.JSONDecodeError: 553 continue 554 555 if data.get("type") == "ai-bridge-probe": 556 await ws.send_json({"type": "ai-bridge-ready", "version": "2"}) 557 558 elif data.get("type") == "ai-inference-stream-request": 559 request_id = data.get("requestId", str(uuid.uuid4())) 560 messages = data.get("messages", []) 561 options = data.get("options", {}) 562 model = options.get("model") or default_model 563 564 if model.startswith("claude-"): 565 coro = _stream_claude( 566 ws, request_id, messages, model, 567 request.app.get("claude_api_key", ""), 568 ) 569 else: 570 coro = _stream_ollama(ws, request_id, messages, model, ollama_url) 571 572 task = asyncio.create_task(coro) 573 active_tasks[request_id] = task 574 task.add_done_callback(lambda t, rid=request_id: active_tasks.pop(rid, None)) 575 576 elif data.get("type") == "ai-inference-stream-cancel": 577 rid = data.get("requestId") 578 task = active_tasks.pop(rid, None) 579 if task: 580 task.cancel() 581 582 elif msg.type in (aiohttp.WSMsgType.ERROR, aiohttp.WSMsgType.CLOSE): 583 break 584 585 for task in active_tasks.values(): 586 task.cancel() 587 active_tasks.clear() 588 return ws 589 590 591 async def _stream_ollama( 592 ws: web.WebSocketResponse, 593 request_id: str, 594 messages: list[dict], 595 model: str, 596 ollama_url: str, 597 ) -> None: 598 """Stream inference from Ollama and relay chunks over WebSocket.""" 599 url = f"{ollama_url}/api/chat" 600 payload = {"model": model, "messages": messages, "stream": True} 601 partial_content = "" 602 603 try: 604 async with aiohttp.ClientSession() as session: 605 async with session.post(url, json=payload) as resp: 606 if resp.status != 200: 607 error_text = await resp.text() 608 await ws.send_json({ 609 "type": "ai-inference-stream-error", 610 "requestId": request_id, 611 "error": f"Ollama error ({resp.status}): {error_text}", 612 }) 613 return 614 615 prompt_tokens = 0 616 completion_tokens = 0 617 618 async for line in resp.content: 619 line = line.strip() 620 if not line: 621 continue 622 try: 623 chunk_data = json.loads(line) 624 except json.JSONDecodeError: 625 continue 626 627 if chunk_data.get("done"): 628 prompt_tokens = chunk_data.get("prompt_eval_count", 0) 629 completion_tokens = chunk_data.get("eval_count", 0) 630 break 631 632 token = chunk_data.get("message", {}).get("content", "") 633 if token: 634 partial_content += token 635 await ws.send_json({ 636 "type": "ai-inference-stream-chunk", 637 "requestId": request_id, 638 "chunk": token, 639 }) 640 641 await ws.send_json({ 642 "type": "ai-inference-stream-done", 643 "requestId": request_id, 644 "provider": "ollama", 645 "model": model, 646 "usage": { 647 "promptTokens": prompt_tokens, 648 "completionTokens": completion_tokens, 649 }, 650 }) 651 652 except asyncio.CancelledError: 653 pass 654 except Exception as e: 655 try: 656 await ws.send_json({ 657 "type": "ai-inference-stream-error", 658 "requestId": request_id, 659 "error": str(e), 660 "partialContent": partial_content or None, 661 }) 662 except ConnectionResetError: 663 pass 664 665 666 async def _stream_claude( 667 ws: web.WebSocketResponse, 668 request_id: str, 669 messages: list[dict], 670 model: str, 671 api_key: str, 672 ) -> None: 673 """Stream inference from Claude API and relay chunks over WebSocket.""" 674 if not api_key: 675 await ws.send_json({ 676 "type": "ai-inference-stream-error", 677 "requestId": request_id, 678 "error": "No Claude API key configured. Set claudeApiKey in InterBrain settings.", 679 }) 680 return 681 682 system_msg = None 683 api_messages = [] 684 for m in messages: 685 if m.get("role") == "system": 686 system_msg = m.get("content", "") 687 else: 688 api_messages.append({"role": m["role"], "content": m["content"]}) 689 690 payload: dict = { 691 "model": model, 692 "max_tokens": 8192, 693 "messages": api_messages, 694 "stream": True, 695 } 696 if system_msg: 697 payload["system"] = system_msg 698 699 headers = { 700 "x-api-key": api_key, 701 "anthropic-version": "2023-06-01", 702 "content-type": "application/json", 703 } 704 705 partial_content = "" 706 prompt_tokens = 0 707 completion_tokens = 0 708 709 try: 710 async with aiohttp.ClientSession() as session: 711 async with session.post( 712 "https://api.anthropic.com/v1/messages", 713 json=payload, 714 headers=headers, 715 ) as resp: 716 if resp.status != 200: 717 error_text = await resp.text() 718 await ws.send_json({ 719 "type": "ai-inference-stream-error", 720 "requestId": request_id, 721 "error": f"Claude API error ({resp.status}): {error_text}", 722 }) 723 return 724 725 # SSE stream 726 async for line in resp.content: 727 line = line.decode("utf-8", errors="replace").strip() 728 if not line.startswith("data: "): 729 continue 730 data_str = line[6:] 731 if data_str == "[DONE]": 732 break 733 try: 734 event = json.loads(data_str) 735 except json.JSONDecodeError: 736 continue 737 738 etype = event.get("type") 739 740 if etype == "content_block_delta": 741 delta = event.get("delta", {}) 742 if delta.get("type") == "text_delta": 743 token = delta.get("text", "") 744 if token: 745 partial_content += token 746 await ws.send_json({ 747 "type": "ai-inference-stream-chunk", 748 "requestId": request_id, 749 "chunk": token, 750 }) 751 752 elif etype == "message_delta": 753 usage = event.get("usage", {}) 754 completion_tokens = usage.get("output_tokens", 0) 755 756 elif etype == "message_start": 757 msg_usage = event.get("message", {}).get("usage", {}) 758 prompt_tokens = msg_usage.get("input_tokens", 0) 759 760 await ws.send_json({ 761 "type": "ai-inference-stream-done", 762 "requestId": request_id, 763 "provider": "anthropic", 764 "model": model, 765 "usage": { 766 "promptTokens": prompt_tokens, 767 "completionTokens": completion_tokens, 768 }, 769 }) 770 771 except asyncio.CancelledError: 772 pass 773 except Exception as e: 774 try: 775 await ws.send_json({ 776 "type": "ai-inference-stream-error", 777 "requestId": request_id, 778 "error": str(e), 779 "partialContent": partial_content or None, 780 }) 781 except ConnectionResetError: 782 pass 783 784 785 # ============================================================ 786 # Transcription WebSocket (/ws/transcribe) — Whisper chunked 787 # with vocabulary feedback loop via context provider 788 # ============================================================ 789 790 # mlx-whisper model mapping (size -> HuggingFace repo) 791 _MLX_WHISPER_MODELS = { 792 "tiny": "mlx-community/whisper-tiny", 793 "base": "mlx-community/whisper-base-mlx", 794 "small": "mlx-community/whisper-small-mlx", 795 "medium": "mlx-community/whisper-medium-mlx", 796 "large": "mlx-community/whisper-large-v3-mlx", 797 "turbo": "mlx-community/whisper-large-v3-turbo", 798 } 799 _whisper_model_size = "base" 800 _whisper_repo: str = _MLX_WHISPER_MODELS["base"] 801 _whisper_warmed_up = False 802 _whisper_lock = asyncio.Lock() 803 804 805 async def _warm_up_whisper(): 806 """Pre-download the model on first use so subsequent calls are fast.""" 807 global _whisper_warmed_up 808 if _whisper_warmed_up: 809 return 810 async with _whisper_lock: 811 if _whisper_warmed_up: 812 return 813 import mlx_whisper 814 print(f"[Whisper] Warming up {_whisper_model_size} model ({_whisper_repo})...") 815 loop = asyncio.get_event_loop() 816 # Transcribe a tiny silent WAV to trigger model download/cache 817 silent = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) 818 silent.close() 819 await asyncio.create_subprocess_exec( 820 "ffmpeg", "-y", "-f", "lavfi", "-i", "anullsrc=r=16000:cl=mono", 821 "-t", "0.5", "-ar", "16000", "-ac", "1", silent.name, 822 stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, 823 ) 824 try: 825 await loop.run_in_executor( 826 None, 827 lambda: mlx_whisper.transcribe( 828 silent.name, path_or_hf_repo=_whisper_repo, language="en", 829 ), 830 ) 831 except Exception: 832 pass 833 finally: 834 try: 835 os.unlink(silent.name) 836 except OSError: 837 pass 838 _whisper_warmed_up = True 839 print("[Whisper] Model ready.") 840 841 842 async def _probe_duration_file(path: str) -> float | None: 843 """Get duration in seconds of an audio file on disk.""" 844 proc = await asyncio.create_subprocess_exec( 845 "ffprobe", "-v", "quiet", "-show_entries", "format=duration", 846 "-of", "csv=p=0", path, 847 stdout=asyncio.subprocess.PIPE, 848 stderr=asyncio.subprocess.PIPE, 849 ) 850 stdout, _ = await proc.communicate() 851 try: 852 return float(stdout.decode().strip()) 853 except (ValueError, AttributeError): 854 return None 855 856 857 async def _extract_time_range_file(path: str, start_sec: float) -> str | None: 858 """Extract audio from start_sec to end into a WAV file.""" 859 tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) 860 tmp.close() 861 proc = await asyncio.create_subprocess_exec( 862 "ffmpeg", "-y", "-ss", f"{start_sec:.2f}", "-i", path, 863 "-ar", "16000", "-ac", "1", tmp.name, 864 stdout=asyncio.subprocess.PIPE, 865 stderr=asyncio.subprocess.PIPE, 866 ) 867 await proc.communicate() 868 if proc.returncode != 0: 869 try: 870 os.unlink(tmp.name) 871 except OSError: 872 pass 873 return None 874 return tmp.name 875 876 877 async def _decode_webm_to_wav(webm_data: bytes) -> str | None: 878 """Decode webm/opus audio to a temporary WAV file for Whisper.""" 879 tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) 880 tmp.close() 881 proc = await asyncio.create_subprocess_exec( 882 "ffmpeg", "-y", "-i", "pipe:0", 883 "-ar", "16000", "-ac", "1", tmp.name, 884 stdin=asyncio.subprocess.PIPE, 885 stdout=asyncio.subprocess.PIPE, 886 stderr=asyncio.subprocess.PIPE, 887 ) 888 _, stderr = await proc.communicate(input=webm_data) 889 if proc.returncode != 0: 890 os.unlink(tmp.name) 891 return None 892 return tmp.name 893 894 895 def _filter_hallucination(text: str) -> str: 896 """Drop text if Whisper is hallucinating (repeated words/phrases on silence).""" 897 words = text.split() 898 if len(words) < 4: 899 return text 900 # If any single word makes up >60% of the text, it's a hallucination loop 901 counts = Counter(w.lower().strip(".,!?") for w in words) 902 most_common_word, most_common_count = counts.most_common(1)[0] 903 if most_common_count / len(words) > 0.6: 904 return "" 905 # If a 2-3 word phrase repeats >4 times, also hallucination 906 for n in (2, 3): 907 if len(words) < n * 4: 908 continue 909 ngrams = [" ".join(words[i:i+n]).lower() for i in range(len(words) - n + 1)] 910 ngram_counts = Counter(ngrams) 911 top_ngram, top_count = ngram_counts.most_common(1)[0] 912 if top_count > 4: 913 return "" 914 return text 915 916 917 _CORE_VOCAB = [ 918 "AURYN", "InterBrain", "DreamNode", "DreamOS", "DreamTalk", 919 "DreamSong", "PRISM", "ABRACADABRA", "Radicle", 920 ] 921 922 # Max terms in Whisper's initial_prompt. 923 # R&D (2026-03-05): medium model tested up to 100 terms with zero quality 924 # degradation and no speed penalty. The old limit of 20 was from tiny/base. 925 _MAX_VOCAB_TERMS = 50 926 927 928 def _build_vocab_prompt( 929 pinned: list[str] | None = None, 930 ephemeral: list[str] | None = None, 931 ) -> str: 932 """Build Whisper initial_prompt from pinned + ephemeral DreamNode titles. 933 934 Medium model tested up to 100 terms with no quality degradation. 935 Cap at 50 for now — conservative margin with room to grow. 936 937 Two tiers: 938 - Pinned: Core vocab + any DreamNode whose name was recognized in the 939 transcript. Once mentioned, stays for the entire session. 940 - Ephemeral: BM25 context provider suggestions from recent transcript. 941 These rotate as conversation shifts. Fill remaining slots up to cap. 942 """ 943 terms: list[str] = [] 944 seen = set() 945 946 def _add(name: str): 947 if name.lower() not in seen and len(name) >= 3: 948 seen.add(name.lower()) 949 terms.append(name) 950 951 # 1. Core DreamOS vocabulary — always present 952 for term in _CORE_VOCAB: 953 _add(term) 954 955 # 2. Session-pinned titles (DreamNodes whose name appeared in transcript) 956 if pinned: 957 for t in pinned: 958 _add(t) 959 960 # 3. Ephemeral BM25 suggestions — fill remaining slots 961 remaining = _MAX_VOCAB_TERMS - len(terms) 962 if ephemeral and remaining > 0: 963 for t in ephemeral[:remaining]: 964 _add(t) 965 966 if not terms: 967 return "" 968 969 return ", ".join(terms) 970 971 972 async def ws_transcribe(request: web.Request) -> web.WebSocketResponse: 973 ws = web.WebSocketResponse() 974 await ws.prepare(request) 975 976 import mlx_whisper 977 await _warm_up_whisper() 978 loop = asyncio.get_event_loop() 979 980 session_id: str | None = None 981 audio_file = None 982 transcript_parts: list[str] = [] 983 start_time: float = 0.0 984 cumulative_webm = bytearray() 985 last_processed_bytes = 0 986 chunk_interval = 8.0 # seconds between transcription passes 987 process_task: asyncio.Task | None = None 988 989 # Vocabulary feedback loop state 990 context_index = load_index() 991 pinned_vocab: list[str] = [] # DreamNodes whose name appeared in transcript (permanent) 992 ephemeral_vocab: list[str] = [] # BM25 suggestions from recent text (rotating) 993 sliding_window: list[str] = [] # last N transcript chunks for BM25 context 994 995 SLIDING_WINDOW_SIZE = 5 # ~40-60 seconds of conversation context 996 997 # Build vocab lookup: multiple key forms -> {folder, title, uuid, path} 998 # Keys: folder (lowered), title (lowered), collapsed spaced title, camelCase-split 999 # This is the single lookup table: title -> (uuid, path) for the whole system 1000 _vocab_lookup: dict[str, dict] = {} 1001 if context_index and "nodes" in context_index: 1002 for node in context_index["nodes"]: 1003 folder = node.get("folder", "") 1004 title = node.get("title", "") 1005 info = { 1006 "folder": folder, "title": title, 1007 "uuid": node.get("uuid", ""), "path": node.get("path", ""), 1008 } 1009 # Index by folder name 1010 if folder: 1011 _vocab_lookup[folder.lower()] = info 1012 # Index by title 1013 if title: 1014 _vocab_lookup[title.lower()] = info 1015 # Collapsed spaced titles (A U R Y N -> auryn) 1016 collapsed = re.sub(r"\s+", "", title).lower() 1017 if collapsed and collapsed not in _vocab_lookup: 1018 _vocab_lookup[collapsed] = info 1019 # CamelCase-split (InterBrain -> inter brain) 1020 camel_words = _split_camel(folder) 1021 if len(camel_words) > 1: 1022 joined = " ".join(camel_words) 1023 if joined not in _vocab_lookup: 1024 _vocab_lookup[joined] = info 1025 1026 vocab_prompt = _build_vocab_prompt() 1027 print(f"[Whisper] Initial vocab: {vocab_prompt}") 1028 1029 _transcript_file: Path | None = None 1030 1031 def _start_transcript_session(recording_name: str, session_start: float): 1032 """Write a session header to the daily transcript file.""" 1033 nonlocal _transcript_file 1034 _transcript_file = TRANSCRIPTS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.md" 1035 wall_time = datetime.now().strftime("%H:%M") 1036 with open(_transcript_file, "a", encoding="utf-8") as f: 1037 f.write(f"\n---\n\n### {wall_time} · `{recording_name}`\n\n") 1038 f.flush() 1039 1040 def _write_transcript_chunk(text: str, session_start: float): 1041 """Append a confirmed transcript chunk to the daily file.""" 1042 elapsed = time.time() - session_start 1043 minutes = int(elapsed // 60) 1044 seconds = int(elapsed % 60) 1045 timestamp = f"{minutes}:{seconds:02d}" 1046 if _transcript_file is None: 1047 return 1048 with open(_transcript_file, "a", encoding="utf-8") as f: 1049 f.write(f"[{timestamp}] {text}\n\n") 1050 f.flush() 1051 1052 def _check_vocab_hits(text: str) -> list[dict]: 1053 """Check if any DreamNode names appear in the transcribed text. 1054 Returns list of matched DreamNode info dicts (deduped by uuid). These get pinned.""" 1055 text_lower = text.lower() 1056 hits = [] 1057 seen_uuids = set() 1058 for key, info in _vocab_lookup.items(): 1059 if info["uuid"] in seen_uuids: 1060 continue 1061 if len(key) <= 3: 1062 # Short names need word boundaries 1063 if re.search(r'\b' + re.escape(key) + r'\b', text_lower): 1064 hits.append(info) 1065 seen_uuids.add(info["uuid"]) 1066 else: 1067 if key in text_lower: 1068 hits.append(info) 1069 seen_uuids.add(info["uuid"]) 1070 return hits 1071 1072 def _update_ephemeral_vocab(text: str): 1073 """Run BM25 on recent transcript (sliding window), update ephemeral suggestions.""" 1074 nonlocal ephemeral_vocab 1075 1076 # Update sliding window 1077 sliding_window.append(text) 1078 if len(sliding_window) > SLIDING_WINDOW_SIZE: 1079 sliding_window.pop(0) 1080 1081 if not context_index: 1082 return 1083 1084 # BM25 on the sliding window 1085 window_text = " ".join(sliding_window) 1086 results = context_search(window_text, context_index, top_k=10) 1087 1088 # Ephemeral = top BM25 hits that aren't already pinned or core. 1089 # No score threshold — if BM25 ranked it in the top 10, it's relevant 1090 # enough to bias Whisper. The slot cap (20 total) is the only limit. 1091 pinned_set = {p.lower() for p in pinned_vocab} 1092 core_set = {t.lower() for t in _CORE_VOCAB} 1093 remaining_slots = _MAX_VOCAB_TERMS - len(_CORE_VOCAB) - len(pinned_vocab) 1094 new_ephemeral = [] 1095 for r in results: 1096 if remaining_slots <= 0: 1097 break 1098 title = r.get("title", "") 1099 if not title: 1100 continue 1101 if title.lower() not in pinned_set and title.lower() not in core_set: 1102 new_ephemeral.append(title) 1103 remaining_slots -= 1 1104 1105 if new_ephemeral != ephemeral_vocab: 1106 ephemeral_vocab = new_ephemeral 1107 print(f"[Whisper] Ephemeral vocab updated: {ephemeral_vocab[:5]}") 1108 1109 def _rebuild_prompt(): 1110 """Rebuild the vocab prompt from current pinned + ephemeral state.""" 1111 nonlocal vocab_prompt 1112 vocab_prompt = _build_vocab_prompt(pinned_vocab, ephemeral_vocab) 1113 1114 async def _send_vocab_state(): 1115 """Send current vocabulary state to UI for debugging.""" 1116 try: 1117 await ws.send_json({ 1118 "type": "vocab_update", 1119 "core": list(_CORE_VOCAB), 1120 "pinned": list(pinned_vocab), 1121 "ephemeral": list(ephemeral_vocab), 1122 "prompt": vocab_prompt, 1123 }) 1124 except Exception: 1125 pass 1126 1127 async def _notify_dreamnode_hit(info: dict): 1128 """Send dreamnode_detected message to UI when a name is recognized.""" 1129 try: 1130 await ws.send_json({ 1131 "type": "dreamnode_detected", 1132 "folder": info["folder"], 1133 "title": info["title"], 1134 "uuid": info["uuid"], 1135 "path": info.get("path", ""), 1136 }) 1137 except Exception: 1138 pass 1139 1140 def _fix_capitalization(text: str, titles: list[str]) -> str: 1141 """Fix capitalization of DreamNode titles in text that was transcribed 1142 before the term was in the vocab prompt. Case-insensitive replace 1143 with the canonical title form.""" 1144 for title in titles: 1145 pattern = re.compile(re.escape(title), re.IGNORECASE) 1146 text = pattern.sub(title, text) 1147 return text 1148 1149 async def _process_new_text(new_text: str) -> str: 1150 """Process newly transcribed text: check vocab hits, update feedback loop. 1151 Returns the (possibly corrected) text.""" 1152 # 1. Check for DreamNode name matches (Tier 1 vocab) → pin them 1153 hits = _check_vocab_hits(new_text) 1154 new_pins = False 1155 newly_pinned_titles = [] 1156 for info in hits: 1157 title = info["title"] 1158 if title not in pinned_vocab: 1159 pinned_vocab.append(title) 1160 newly_pinned_titles.append(title) 1161 new_pins = True 1162 print(f"[Whisper] PINNED: {title} (uuid={info['uuid']}, path={info['path']})") 1163 await _notify_dreamnode_hit(info) 1164 1165 # Fix capitalization in *this* chunk for newly pinned titles 1166 # (the chunk was transcribed before these terms were in the prompt) 1167 if newly_pinned_titles: 1168 new_text = _fix_capitalization(new_text, newly_pinned_titles) 1169 1170 # 2. Update ephemeral vocab via BM25 sliding window 1171 _update_ephemeral_vocab(new_text) 1172 1173 # 3. Rebuild prompt if anything changed 1174 if new_pins or True: # always rebuild to reflect ephemeral changes 1175 _rebuild_prompt() 1176 1177 return new_text 1178 1179 last_transcribed_sec = 0.0 # seconds of audio already transcribed 1180 1181 async def periodic_transcribe(): 1182 """Every chunk_interval seconds, transcribe only the new audio.""" 1183 nonlocal last_processed_bytes, last_transcribed_sec 1184 while True: 1185 await asyncio.sleep(chunk_interval) 1186 if not audio_file or not audio_file.exists(): 1187 continue 1188 if len(cumulative_webm) <= last_processed_bytes: 1189 continue 1190 1191 # Elapsed time since session start = total audio duration 1192 # (MediaRecorder streams in real time, so wall clock ≈ audio clock) 1193 total_sec = time.time() - start_time 1194 1195 # Extract new audio with 1.5s overlap so Whisper gets full 1196 # sentence context instead of cutting mid-word 1197 overlap = 1.5 if last_transcribed_sec > 0 else 0.0 1198 extract_from = max(0, last_transcribed_sec - overlap) 1199 wav_path = await _extract_time_range_file( 1200 str(audio_file), extract_from 1201 ) 1202 if not wav_path: 1203 last_processed_bytes = len(cumulative_webm) 1204 continue 1205 1206 try: 1207 # Prompt: vocab terms + last transcript chunk for continuity 1208 prompt = vocab_prompt 1209 if transcript_parts: 1210 recent = transcript_parts[-1] 1211 prompt = vocab_prompt + ". " + recent if vocab_prompt else recent 1212 1213 result = await loop.run_in_executor( 1214 None, 1215 lambda p=prompt: mlx_whisper.transcribe( 1216 wav_path, 1217 path_or_hf_repo=_whisper_repo, 1218 language="en", 1219 initial_prompt=p, 1220 ), 1221 ) 1222 1223 # Skip segments in the overlap region — they belong to 1224 # the previous chunk. Keep segments starting after overlap. 1225 # Use a soft threshold: any segment whose midpoint is past 1226 # the overlap belongs to this chunk. 1227 segs = result["segments"] 1228 if overlap > 0 and segs: 1229 segs = [s for s in segs 1230 if (s["start"] + s["end"]) / 2 >= overlap] 1231 1232 new_text = " ".join( 1233 s["text"].strip() for s in segs 1234 ).strip() 1235 1236 # Filter hallucinated repetition (Whisper loops on silence) 1237 new_text = _filter_hallucination(new_text) 1238 1239 if new_text: 1240 # Process vocab hits first so capitalization is fixed 1241 # before sending to UI and writing to transcript 1242 new_text = await _process_new_text(new_text) 1243 chunk_text = " " + new_text if transcript_parts else new_text 1244 await ws.send_json({"type": "transcript_chunk", "text": chunk_text}) 1245 transcript_parts.append(new_text) 1246 _write_transcript_chunk(new_text, start_time) 1247 await _send_vocab_state() 1248 1249 last_transcribed_sec = total_sec 1250 last_processed_bytes = len(cumulative_webm) 1251 1252 except Exception as e: 1253 print(f"[Whisper] Transcription error: {e}") 1254 finally: 1255 try: 1256 os.unlink(wav_path) 1257 except OSError: 1258 pass 1259 1260 async for msg in ws: 1261 if msg.type == aiohttp.WSMsgType.TEXT: 1262 try: 1263 data = json.loads(msg.data) 1264 except json.JSONDecodeError: 1265 continue 1266 1267 if data.get("type") == "start_stream": 1268 session_id = str(uuid.uuid4()) 1269 start_time = time.time() 1270 transcript_parts.clear() 1271 cumulative_webm.clear() 1272 last_processed_bytes = 0 1273 last_transcribed_sec = 0.0 1274 pinned_vocab.clear() 1275 ephemeral_vocab.clear() 1276 sliding_window.clear() 1277 vocab_prompt = _build_vocab_prompt() 1278 1279 RECORDINGS_DIR.mkdir(exist_ok=True) 1280 TRANSCRIPTS_DIR.mkdir(exist_ok=True) 1281 1282 ts = datetime.now().strftime("%Y%m%d-%H%M%S") 1283 audio_file = RECORDINGS_DIR / f"{ts}.webm" 1284 _start_transcript_session(f"{ts}.webm", start_time) 1285 1286 process_task = asyncio.create_task(periodic_transcribe()) 1287 await ws.send_json({"type": "session_started", "session_id": session_id}) 1288 1289 elif data.get("type") == "end_stream": 1290 if process_task: 1291 process_task.cancel() 1292 try: 1293 await process_task 1294 except asyncio.CancelledError: 1295 pass 1296 process_task = None 1297 1298 # Final transcription pass — only the tail not yet transcribed 1299 if audio_file and audio_file.exists() and last_transcribed_sec < (time.time() - start_time - 1): 1300 wav_path = await _extract_time_range_file( 1301 str(audio_file), last_transcribed_sec 1302 ) 1303 if wav_path: 1304 try: 1305 prompt = vocab_prompt 1306 if transcript_parts: 1307 recent = transcript_parts[-1] 1308 prompt = vocab_prompt + ". " + recent if vocab_prompt else recent 1309 1310 result = await loop.run_in_executor( 1311 None, 1312 lambda: mlx_whisper.transcribe( 1313 wav_path, 1314 path_or_hf_repo=_whisper_repo, 1315 language="en", 1316 initial_prompt=prompt, 1317 ), 1318 ) 1319 1320 new_text = " ".join( 1321 s["text"].strip() for s in result["segments"] 1322 ).strip() 1323 if new_text: 1324 new_text = await _process_new_text(new_text) 1325 chunk_text = " " + new_text if transcript_parts else new_text 1326 await ws.send_json({"type": "transcript_chunk", "text": chunk_text}) 1327 transcript_parts.append(new_text) 1328 _write_transcript_chunk(new_text, start_time) 1329 await _send_vocab_state() 1330 except Exception as e: 1331 print(f"[Whisper] Final transcription error: {e}") 1332 finally: 1333 try: 1334 os.unlink(wav_path) 1335 except OSError: 1336 pass 1337 1338 # Send final state: all pinned DreamNodes from the session 1339 full_text = " ".join(transcript_parts) 1340 await ws.send_json({ 1341 "type": "stream_ended", 1342 "full_text": full_text, 1343 "pinned_dreamnodes": pinned_vocab, 1344 }) 1345 session_id = None 1346 1347 elif msg.type == aiohttp.WSMsgType.BINARY: 1348 cumulative_webm.extend(msg.data) 1349 if audio_file: 1350 with open(audio_file, "ab") as f: 1351 f.write(msg.data) 1352 1353 elif msg.type in (aiohttp.WSMsgType.ERROR, aiohttp.WSMsgType.CLOSE): 1354 break 1355 1356 if process_task: 1357 process_task.cancel() 1358 1359 return ws 1360 1361 1362 # ============================================================ 1363 # Audio upload (POST /upload/audio) — Whisper with vocabulary 1364 # ============================================================ 1365 1366 async def handle_audio_upload(request: web.Request) -> web.Response: 1367 reader = await request.multipart() 1368 field = await reader.next() 1369 if field is None or field.name != "file": 1370 return web.json_response({"error": "Expected 'file' field"}, status=400) 1371 1372 filename = field.filename or "audio.webm" 1373 ext = Path(filename).suffix or ".webm" 1374 1375 with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f: 1376 while True: 1377 chunk = await field.read_chunk() 1378 if chunk is None: 1379 break 1380 f.write(chunk) 1381 tmp_path = f.name 1382 1383 try: 1384 import mlx_whisper 1385 await _warm_up_whisper() 1386 vocab_prompt = _build_vocab_prompt() 1387 1388 loop = asyncio.get_event_loop() 1389 result = await loop.run_in_executor( 1390 None, 1391 lambda: mlx_whisper.transcribe( 1392 tmp_path, 1393 path_or_hf_repo=_whisper_repo, 1394 language="en", 1395 initial_prompt=vocab_prompt, 1396 ), 1397 ) 1398 text = " ".join(s["text"].strip() for s in result["segments"]) 1399 if not text: 1400 return web.json_response({"error": "Whisper produced no output"}, status=500) 1401 return web.json_response({"text": text}) 1402 finally: 1403 os.unlink(tmp_path) 1404 1405 1406 # ============================================================ 1407 # Static file serving 1408 # ============================================================ 1409 1410 async def handle_index(request: web.Request) -> web.Response: 1411 return web.Response( 1412 text=request.app["injected_html"], 1413 content_type="text/html", 1414 ) 1415 1416 1417 async def handle_static(request: web.Request) -> web.StreamResponse: 1418 rel_path = request.match_info.get("path", "") 1419 file_path = (AURYN_DIR / rel_path).resolve() 1420 1421 if not str(file_path).startswith(str(AURYN_DIR)): 1422 raise web.HTTPForbidden() 1423 if not file_path.is_file(): 1424 raise web.HTTPNotFound() 1425 1426 return web.FileResponse(file_path) 1427 1428 1429 # ============================================================ 1430 # Network: Tailscale, SSL, IP detection 1431 # ============================================================ 1432 1433 def get_local_ip() -> str: 1434 """Get the local LAN IP address.""" 1435 try: 1436 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 1437 s.connect(("8.8.8.8", 80)) 1438 ip = s.getsockname()[0] 1439 s.close() 1440 return ip 1441 except Exception: 1442 return "127.0.0.1" 1443 1444 1445 def ensure_tailscale() -> str | None: 1446 """Ensure Tailscale is running. Returns Tailscale IP or None.""" 1447 if not shutil.which("tailscale"): 1448 return None 1449 1450 result = subprocess.run( 1451 ["tailscale", "ip", "-4"], 1452 capture_output=True, text=True, timeout=5, 1453 ) 1454 if result.returncode == 0: 1455 ip = result.stdout.strip().split("\n")[0] 1456 print(f"[Tailscale] Connected: {ip}") 1457 return ip 1458 1459 print("[Tailscale] Not connected, attempting 'tailscale up'...") 1460 result = subprocess.run( 1461 ["tailscale", "up"], 1462 capture_output=True, text=True, timeout=30, 1463 ) 1464 if result.returncode != 0: 1465 print(f"[Tailscale] Failed: {result.stderr.strip()}") 1466 print("[Tailscale] Run 'sudo brew services start tailscale' manually if needed") 1467 return None 1468 1469 result = subprocess.run( 1470 ["tailscale", "ip", "-4"], 1471 capture_output=True, text=True, timeout=5, 1472 ) 1473 if result.returncode == 0: 1474 ip = result.stdout.strip().split("\n")[0] 1475 print(f"[Tailscale] Connected: {ip}") 1476 return ip 1477 return None 1478 1479 1480 def get_or_create_ssl_context(local_ip: str, tailscale_ip: str | None) -> ssl.SSLContext | None: 1481 """Generate self-signed cert with SANs for all access IPs.""" 1482 cert_file = AURYN_DIR / "cert.pem" 1483 key_file = AURYN_DIR / "key.pem" 1484 1485 sans = [f"IP:127.0.0.1", f"IP:{local_ip}", "DNS:localhost"] 1486 if tailscale_ip: 1487 sans.append(f"IP:{tailscale_ip}") 1488 1489 regen = False 1490 if not cert_file.exists() or not key_file.exists(): 1491 regen = True 1492 else: 1493 try: 1494 result = subprocess.run( 1495 ["openssl", "x509", "-in", str(cert_file), "-noout", "-text"], 1496 capture_output=True, text=True, 1497 ) 1498 cert_text = result.stdout 1499 if tailscale_ip and tailscale_ip not in cert_text: 1500 regen = True 1501 if local_ip not in cert_text: 1502 regen = True 1503 except Exception: 1504 regen = True 1505 1506 if regen: 1507 san_str = ",".join(sans) 1508 try: 1509 subprocess.run([ 1510 "openssl", "req", "-x509", "-newkey", "rsa:2048", 1511 "-keyout", str(key_file), "-out", str(cert_file), 1512 "-days", "365", "-nodes", 1513 "-subj", "/CN=AURYN", 1514 "-addext", f"subjectAltName={san_str}", 1515 ], check=True, capture_output=True) 1516 print(f"[SSL] Generated certificate (SANs: {san_str})") 1517 except (subprocess.CalledProcessError, FileNotFoundError) as e: 1518 print(f"[SSL] Could not generate certificate: {e}") 1519 return None 1520 1521 ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) 1522 ctx.load_cert_chain(str(cert_file), str(key_file)) 1523 return ctx 1524 1525 1526 async def handle_install_cert(request: web.Request) -> web.Response: 1527 """Serve the CA cert for iOS installation.""" 1528 cert_file = AURYN_DIR / "cert.pem" 1529 if not cert_file.exists(): 1530 raise web.HTTPNotFound() 1531 return web.Response( 1532 body=cert_file.read_bytes(), 1533 content_type="application/x-x509-ca-cert", 1534 headers={"Content-Disposition": "attachment; filename=auryn-ca.pem"}, 1535 ) 1536 1537 1538 # ============================================================ 1539 # CLI & App setup 1540 # ============================================================ 1541 1542 async def check_ollama(url: str) -> bool: 1543 try: 1544 async with aiohttp.ClientSession() as session: 1545 async with session.get(url, timeout=aiohttp.ClientTimeout(total=3)) as resp: 1546 return resp.status == 200 1547 except Exception: 1548 return False 1549 1550 1551 def load_interbrain_api_key() -> str: 1552 """Load Claude API key from InterBrain's Obsidian plugin settings.""" 1553 paths = [ 1554 Path.home() / "RealDealVault" / ".obsidian" / "plugins" / "interbrain" / "data.json", 1555 ] 1556 for p in paths: 1557 if p.exists(): 1558 try: 1559 data = json.loads(p.read_text()) 1560 key = data.get("claudeApiKey", "") 1561 if key: 1562 print(f"[Claude] API key loaded from InterBrain settings") 1563 return key 1564 except Exception: 1565 pass 1566 key = os.environ.get("ANTHROPIC_API_KEY", "") 1567 if key: 1568 print("[Claude] API key loaded from ANTHROPIC_API_KEY env var") 1569 return key 1570 1571 1572 def get_ollama_models(ollama_url: str) -> list[str]: 1573 """Fetch available Ollama model names.""" 1574 import urllib.request 1575 try: 1576 with urllib.request.urlopen(f"{ollama_url}/api/tags", timeout=3) as resp: 1577 data = json.loads(resp.read()) 1578 return [m["name"] for m in data.get("models", []) 1579 if not m["name"].startswith("nomic")] 1580 except Exception: 1581 return [] 1582 1583 1584 # ============================================================ 1585 # Knowledge Gardening — route insights to DreamNode READMEs 1586 # ============================================================ 1587 1588 GARDEN_SYSTEM_PROMPT = """You are AURYN's knowledge gardener. Your job: read a conversation and its loaded DreamNode context, then surgically update the relevant READMEs with new insights. 1589 1590 RULES: 1591 - Only add genuinely new insights not already present in the README 1592 - Be surgical — edit specific sections, don't rewrite entire files 1593 - Preserve the existing voice and style of each README 1594 - If a README has no relevant section for the insight, add a minimal new section 1595 - Signal over noise: concrete insights, structural decisions, distilled wisdom. NOT stream of consciousness or redundant reformulations 1596 - Output ONLY valid JSON — no markdown fences, no commentary before or after 1597 1598 OUTPUT FORMAT — a JSON array of edits: 1599 [ 1600 { 1601 "dreamnode": "Title of DreamNode", 1602 "path": "/absolute/path/to/README.md", 1603 "reason": "Brief explanation of what insight is being added", 1604 "edits": [ 1605 { 1606 "old": "exact existing text to find and replace", 1607 "new": "the replacement text with the new insight woven in" 1608 } 1609 ] 1610 } 1611 ] 1612 1613 If no edits are needed, return an empty array: [] 1614 1615 Each "old" string must be an EXACT substring of the current README content. Keep edits minimal — include just enough surrounding context in "old" to uniquely identify the location.""" 1616 1617 1618 async def _ai_bridge_inference(messages: list[dict], port: int = 27182) -> str: 1619 """Send an inference request to the AI bridge WebSocket and collect the full response. 1620 1621 Uses raw sockets because the InterBrain's WebSocket server computes 1622 Sec-WebSocket-Accept incorrectly — browsers don't care, but Python 1623 WebSocket libraries reject the handshake. Raw TCP bypasses this. 1624 """ 1625 import base64 as b64 1626 1627 request_id = str(uuid.uuid4()) 1628 chunks: list[str] = [] 1629 1630 reader, writer = await asyncio.open_connection("localhost", port) 1631 1632 # WebSocket handshake 1633 ws_key = b64.b64encode(os.urandom(16)).decode() 1634 handshake = ( 1635 "GET / HTTP/1.1\r\n" 1636 "Host: localhost:{}\r\n" 1637 "Upgrade: websocket\r\n" 1638 "Connection: Upgrade\r\n" 1639 "Sec-WebSocket-Key: {}\r\n" 1640 "Sec-WebSocket-Version: 13\r\n" 1641 "\r\n" 1642 ).format(port, ws_key) 1643 writer.write(handshake.encode()) 1644 await writer.drain() 1645 1646 # Read handshake response (skip validation of Sec-WebSocket-Accept) 1647 resp = b"" 1648 while b"\r\n\r\n" not in resp: 1649 resp += await reader.read(4096) 1650 if b"101" not in resp: 1651 writer.close() 1652 raise RuntimeError(f"AI bridge handshake failed: {resp[:200].decode()}") 1653 1654 # Any data after the headers is the start of the first frame 1655 after_headers = resp.split(b"\r\n\r\n", 1)[1] 1656 buffer = bytearray(after_headers) 1657 1658 async def _read_frame() -> str: 1659 """Read one WebSocket text frame.""" 1660 nonlocal buffer 1661 # Ensure we have at least 2 bytes for the header 1662 while len(buffer) < 2: 1663 buffer.extend(await reader.read(4096)) 1664 1665 b0, b1 = buffer[0], buffer[1] 1666 masked = bool(b1 & 0x80) 1667 length = b1 & 0x7F 1668 offset = 2 1669 1670 if length == 126: 1671 while len(buffer) < 4: 1672 buffer.extend(await reader.read(4096)) 1673 length = struct.unpack(">H", buffer[2:4])[0] 1674 offset = 4 1675 elif length == 127: 1676 while len(buffer) < 10: 1677 buffer.extend(await reader.read(4096)) 1678 length = struct.unpack(">Q", buffer[2:10])[0] 1679 offset = 10 1680 1681 if masked: 1682 offset += 4 # skip mask key (server shouldn't mask, but just in case) 1683 1684 total = offset + length 1685 while len(buffer) < total: 1686 buffer.extend(await reader.read(4096)) 1687 1688 payload = buffer[offset:total] 1689 if masked: 1690 mask = buffer[offset - 4:offset] 1691 payload = bytes(b ^ mask[i % 4] for i, b in enumerate(payload)) 1692 1693 buffer = buffer[total:] 1694 return payload.decode("utf-8", errors="replace") 1695 1696 def _make_frame(text: str) -> bytes: 1697 """Create a masked WebSocket text frame (client must mask).""" 1698 payload = text.encode("utf-8") 1699 frame = bytearray() 1700 frame.append(0x81) # FIN + text opcode 1701 1702 mask_key = os.urandom(4) 1703 length = len(payload) 1704 if length < 126: 1705 frame.append(0x80 | length) # masked 1706 elif length < 65536: 1707 frame.append(0x80 | 126) 1708 frame.extend(struct.pack(">H", length)) 1709 else: 1710 frame.append(0x80 | 127) 1711 frame.extend(struct.pack(">Q", length)) 1712 1713 frame.extend(mask_key) 1714 frame.extend(bytes(b ^ mask_key[i % 4] for i, b in enumerate(payload))) 1715 return bytes(frame) 1716 1717 try: 1718 # Wait for ai-bridge-ready 1719 ready_msg = await asyncio.wait_for(_read_frame(), timeout=10) 1720 data = json.loads(ready_msg) 1721 if data.get("type") != "ai-bridge-ready": 1722 raise RuntimeError(f"Expected ai-bridge-ready, got: {data}") 1723 1724 # Send inference request 1725 req = json.dumps({ 1726 "type": "ai-inference-stream-request", 1727 "requestId": request_id, 1728 "messages": messages, 1729 "complexity": "standard", 1730 }) 1731 writer.write(_make_frame(req)) 1732 await writer.drain() 1733 1734 # Collect streamed response 1735 while True: 1736 raw = await asyncio.wait_for(_read_frame(), timeout=120) 1737 data = json.loads(raw) 1738 if data.get("requestId") != request_id: 1739 continue 1740 if data["type"] == "ai-inference-stream-chunk": 1741 chunks.append(data["chunk"]) 1742 elif data["type"] == "ai-inference-stream-done": 1743 break 1744 elif data["type"] == "ai-inference-stream-error": 1745 raise RuntimeError(f"AI bridge error: {data.get('error')}") 1746 1747 finally: 1748 writer.close() 1749 1750 return "".join(chunks) 1751 1752 1753 def _apply_garden_edits(edits: list[dict]) -> list[dict]: 1754 """Apply edits to README files. Returns list of results.""" 1755 results = [] 1756 for edit_group in edits: 1757 path = edit_group.get("path", "") 1758 title = edit_group.get("dreamnode", "") 1759 reason = edit_group.get("reason", "") 1760 1761 if not path or not Path(path).exists(): 1762 results.append({"dreamnode": title, "status": "error", "reason": f"File not found: {path}"}) 1763 continue 1764 1765 readme_path = Path(path) 1766 if readme_path.is_dir(): 1767 readme_path = readme_path / "README.md" 1768 1769 if not readme_path.exists(): 1770 results.append({"dreamnode": title, "status": "error", "reason": f"README not found: {readme_path}"}) 1771 continue 1772 1773 content = readme_path.read_text(encoding="utf-8") 1774 modified = False 1775 1776 for edit in edit_group.get("edits", []): 1777 old = edit.get("old", "") 1778 new = edit.get("new", "") 1779 if not old or old == new: 1780 continue 1781 if old in content: 1782 content = content.replace(old, new, 1) 1783 modified = True 1784 else: 1785 results.append({ 1786 "dreamnode": title, "status": "warning", 1787 "reason": f"Could not find text to replace (skipped): {old[:60]}...", 1788 }) 1789 1790 if modified: 1791 readme_path.write_text(content, encoding="utf-8") 1792 # Git commit 1793 try: 1794 subprocess.run( 1795 ["git", "add", str(readme_path)], 1796 cwd=readme_path.parent, capture_output=True, timeout=10, 1797 ) 1798 subprocess.run( 1799 ["git", "commit", "-m", f"Garden: {reason}\n\nCo-Authored-By: AURYN <auryn@dreamos.local>"], 1800 cwd=readme_path.parent, capture_output=True, timeout=10, 1801 ) 1802 except Exception as e: 1803 print(f"[Garden] Git commit failed for {title}: {e}") 1804 1805 results.append({"dreamnode": title, "status": "ok", "reason": reason}) 1806 1807 return results 1808 1809 1810 async def ws_garden(request: web.Request) -> web.WebSocketResponse: 1811 """WebSocket endpoint for knowledge gardening. 1812 1813 Expects a message: { 1814 type: "garden", 1815 conversation: [{role, content}, ...], 1816 context: [{title, id, path}, ...] 1817 } 1818 1819 Reads READMEs for each context node, sends everything to AI bridge, 1820 applies returned edits, reports results. 1821 """ 1822 ws = web.WebSocketResponse() 1823 await ws.prepare(request) 1824 1825 async for msg in ws: 1826 if msg.type != aiohttp.WSMsgType.TEXT: 1827 continue 1828 try: 1829 data = json.loads(msg.data) 1830 except json.JSONDecodeError: 1831 continue 1832 1833 if data.get("type") != "garden": 1834 continue 1835 1836 conversation = data.get("conversation", []) 1837 context_nodes = data.get("context", []) 1838 1839 if not conversation: 1840 await ws.send_json({"type": "garden_result", "status": "empty", "message": "No conversation to garden."}) 1841 continue 1842 1843 await ws.send_json({"type": "garden_status", "message": "Reading DreamNode context..."}) 1844 1845 # Build context block with READMEs 1846 context_block = "" 1847 for node in context_nodes: 1848 path = node.get("path", "") 1849 title = node.get("title", "") 1850 node_id = node.get("id", "") 1851 readme_path = Path(path) / "README.md" if path else None 1852 1853 if readme_path and readme_path.exists(): 1854 readme_content = readme_path.read_text(encoding="utf-8") 1855 context_block += f"\n\n---\n### DreamNode: {title}\n- ID: {node_id}\n- Path: {path}\n- README path: {readme_path}\n\n{readme_content}" 1856 else: 1857 context_block += f"\n\n---\n### DreamNode: {title}\n- ID: {node_id}\n- Path: {path}\n- README: (not found)\n" 1858 1859 # Build the messages for the AI bridge 1860 garden_messages = [ 1861 {"role": "system", "content": GARDEN_SYSTEM_PROMPT}, 1862 {"role": "user", "content": f"""Here is the loaded DreamNode context (these are the READMEs you may edit): 1863 1864 {context_block} 1865 1866 --- 1867 1868 Here is the conversation to extract insights from: 1869 1870 {json.dumps(conversation, indent=2)} 1871 1872 Analyze the conversation. For each DreamNode whose README should be updated with new insights from this conversation, produce the surgical edits. Return ONLY the JSON array."""}, 1873 ] 1874 1875 await ws.send_json({"type": "garden_status", "message": "Routing insights..."}) 1876 1877 try: 1878 response = await _ai_bridge_inference(garden_messages) 1879 1880 # Parse the JSON response — strip markdown fences if present 1881 cleaned = response.strip() 1882 if cleaned.startswith("```"): 1883 cleaned = re.sub(r"^```\w*\n?", "", cleaned) 1884 cleaned = re.sub(r"\n?```$", "", cleaned) 1885 cleaned = cleaned.strip() 1886 1887 edits = json.loads(cleaned) 1888 1889 if not edits: 1890 await ws.send_json({ 1891 "type": "garden_result", "status": "no_changes", 1892 "message": "No new insights to route — READMEs are up to date.", 1893 }) 1894 continue 1895 1896 await ws.send_json({"type": "garden_status", "message": f"Applying {len(edits)} edit(s)..."}) 1897 1898 results = _apply_garden_edits(edits) 1899 await ws.send_json({ 1900 "type": "garden_result", "status": "done", 1901 "edits": results, 1902 "message": f"Gardened {sum(1 for r in results if r['status'] == 'ok')} DreamNode(s).", 1903 }) 1904 1905 except json.JSONDecodeError as e: 1906 await ws.send_json({ 1907 "type": "garden_result", "status": "error", 1908 "message": f"Failed to parse AI response as JSON: {e}\n\nRaw response:\n{response[:500]}", 1909 }) 1910 except Exception as e: 1911 await ws.send_json({ 1912 "type": "garden_result", "status": "error", 1913 "message": f"Garden error: {e}", 1914 }) 1915 1916 return ws 1917 1918 1919 # ============================================================ 1920 # Claude Code Sub-Agent 1921 # ============================================================ 1922 1923 async def run_claude_code( 1924 prompt: str, 1925 session_id: str | None = None, 1926 resume: bool = False, 1927 model: str = "sonnet", 1928 max_budget: float = 0.50, 1929 cwd: str | None = None, 1930 allowed_tools: str | None = None, 1931 ) -> dict: 1932 """Run Claude Code as a sub-agent in headless mode. 1933 1934 Returns parsed JSON output with result, cost, session_id, etc. 1935 """ 1936 cmd = [ 1937 "claude", "-p", 1938 "--output-format", "json", 1939 "--dangerously-skip-permissions", 1940 "--model", model, 1941 "--max-budget-usd", str(max_budget), 1942 ] 1943 1944 if resume and session_id: 1945 cmd.extend(["--resume", session_id]) 1946 elif session_id: 1947 cmd.extend(["--session-id", session_id]) 1948 1949 if allowed_tools: 1950 for tool in allowed_tools.split(): 1951 cmd.extend(["--allowedTools", tool]) 1952 1953 # Pass prompt via stdin to avoid shell argument length limits 1954 1955 env = os.environ.copy() 1956 env.pop("CLAUDECODE", None) # Allow nested sessions 1957 1958 proc = await asyncio.create_subprocess_exec( 1959 *cmd, 1960 stdin=asyncio.subprocess.PIPE, 1961 stdout=asyncio.subprocess.PIPE, 1962 stderr=asyncio.subprocess.PIPE, 1963 cwd=cwd or str(AURYN_DIR), 1964 env=env, 1965 ) 1966 stdout, stderr = await proc.communicate(input=prompt.encode("utf-8")) 1967 1968 output = stdout.decode("utf-8", errors="replace") 1969 try: 1970 parsed = json.loads(output) 1971 # Extract the result from the JSON array 1972 if isinstance(parsed, list): 1973 for item in parsed: 1974 if item.get("type") == "result": 1975 return item 1976 return {"type": "result", "raw": parsed} 1977 return parsed 1978 except json.JSONDecodeError: 1979 return { 1980 "type": "result", "is_error": True, 1981 "result": output or stderr.decode("utf-8", errors="replace"), 1982 } 1983 1984 1985 # ============================================================ 1986 # Server Reload 1987 # ============================================================ 1988 1989 async def handle_reload(request: web.Request) -> web.Response: 1990 """Restart the AURYN server process to pick up code changes. 1991 1992 Spawns a detached shell that waits for this process to die, 1993 then starts a fresh server with the same args. 1994 """ 1995 import sys 1996 1997 async def _do_restart(): 1998 await asyncio.sleep(0.3) # Let the HTTP response send 1999 pid = os.getpid() 2000 cmd_args = " ".join(sys.argv[1:]) 2001 script = f'while kill -0 {pid} 2>/dev/null; do sleep 0.2; done; cd "{AURYN_DIR}" && uv run aurin.py {cmd_args} &' 2002 subprocess.Popen(["bash", "-c", script], start_new_session=True) 2003 os._exit(0) 2004 2005 asyncio.create_task(_do_restart()) 2006 return web.Response(text="Restarting...", content_type="text/plain") 2007 2008 2009 # ============================================================ 2010 # Claude Code WebSocket — /do command handler 2011 # ============================================================ 2012 2013 async def ws_claude(request: web.Request) -> web.WebSocketResponse: 2014 """WebSocket endpoint for Claude Code sub-agent. 2015 2016 Expects: { 2017 type: "claude_code", 2018 prompt: "what to do", 2019 context: [{title, id, path}, ...], 2020 conversation: [{role, content}, ...] 2021 } 2022 2023 Spawns Claude Code in headless mode, streams status back. 2024 Supports session continuity via resume. 2025 """ 2026 ws = web.WebSocketResponse() 2027 await ws.prepare(request) 2028 2029 active_session_id: str | None = None 2030 2031 async for msg in ws: 2032 if msg.type != aiohttp.WSMsgType.TEXT: 2033 continue 2034 try: 2035 data = json.loads(msg.data) 2036 except json.JSONDecodeError: 2037 continue 2038 2039 if data.get("type") != "claude_code": 2040 continue 2041 2042 prompt = data.get("prompt", "") 2043 context_nodes = data.get("context", []) 2044 conversation = data.get("conversation", []) 2045 2046 if not prompt: 2047 await ws.send_json({"type": "claude_status", "message": "No prompt provided."}) 2048 continue 2049 2050 # Build context for Claude Code 2051 context_parts = [] 2052 for node in context_nodes: 2053 path = node.get("path", "") 2054 title = node.get("title", "") 2055 readme_path = Path(path) / "README.md" if path and Path(path).is_dir() else None 2056 if readme_path and readme_path.exists(): 2057 content = readme_path.read_text(encoding="utf-8") 2058 context_parts.append(f"## DreamNode: {title}\nPath: {path}\n\n{content}") 2059 2060 # Include recent conversation for context 2061 conv_text = "" 2062 if conversation: 2063 recent = conversation[-10:] # last 10 messages 2064 conv_text = "\n".join(f"[{m['role']}]: {m['content']}" for m in recent) 2065 2066 full_prompt = prompt 2067 if context_parts: 2068 full_prompt = ( 2069 "Context — these DreamNodes are relevant:\n\n" 2070 + "\n\n---\n\n".join(context_parts) 2071 + "\n\n---\n\nRecent conversation:\n" + conv_text 2072 + "\n\n---\n\nTask: " + prompt 2073 ) 2074 2075 await ws.send_json({"type": "claude_status", "message": f"Spawning Claude Code..."}) 2076 2077 try: 2078 result = await run_claude_code( 2079 prompt=full_prompt, 2080 session_id=active_session_id, 2081 resume=bool(active_session_id), 2082 model="sonnet", 2083 max_budget=1.00, 2084 cwd=str(AURYN_DIR), 2085 allowed_tools="Bash Read Edit Write Grep Glob", 2086 ) 2087 2088 active_session_id = result.get("session_id", active_session_id) 2089 response_text = result.get("result", "") 2090 cost = result.get("total_cost_usd", 0) 2091 is_error = result.get("is_error", False) 2092 2093 await ws.send_json({ 2094 "type": "claude_result", 2095 "status": "error" if is_error else "done", 2096 "message": response_text, 2097 "session_id": active_session_id, 2098 "cost": cost, 2099 }) 2100 2101 except Exception as e: 2102 await ws.send_json({ 2103 "type": "claude_result", 2104 "status": "error", 2105 "message": str(e), 2106 }) 2107 2108 return ws 2109 2110 2111 def create_app( 2112 host: str, port: int, model: str, ollama_url: str, 2113 claude_api_key: str = "", models: list[str] | None = None, 2114 ) -> web.Application: 2115 index_path = AURYN_DIR / "index.html" 2116 raw_html = index_path.read_text() 2117 injected_html = build_injected_index(raw_html, models=models) 2118 2119 app = web.Application() 2120 app["injected_html"] = injected_html 2121 app["default_model"] = model 2122 app["ollama_url"] = ollama_url 2123 app["claude_api_key"] = claude_api_key 2124 2125 app.router.add_get("/ws", ws_inference) 2126 app.router.add_get("/ws/transcribe", ws_transcribe) 2127 app.router.add_get("/ws/garden", ws_garden) 2128 app.router.add_get("/ws/claude", ws_claude) 2129 app.router.add_post("/upload/audio", handle_audio_upload) 2130 app.router.add_post("/reload", handle_reload) 2131 app.router.add_get("/install-cert", handle_install_cert) 2132 app.router.add_get("/", handle_index) 2133 app.router.add_get("/{path:.*}", handle_static) 2134 2135 return app 2136 2137 2138 async def serve(args: argparse.Namespace) -> None: 2139 ollama_ok = await check_ollama(args.ollama_url) 2140 ollama_status = "\u2713" if ollama_ok else "\u2717 (not reachable)" 2141 2142 local_ip = get_local_ip() 2143 tailscale_ip = ensure_tailscale() 2144 ssl_ctx = get_or_create_ssl_context(local_ip, tailscale_ip) 2145 protocol = "https" if ssl_ctx else "http" 2146 ws_protocol = "wss" if ssl_ctx else "ws" 2147 2148 print(f"\nAURYN serving at {protocol}://{args.host}:{args.port}") 2149 print(f" Local: {protocol}://localhost:{args.port}") 2150 print(f" LAN: {protocol}://{local_ip}:{args.port}") 2151 if tailscale_ip: 2152 print(f" Tailscale: {protocol}://{tailscale_ip}:{args.port}") 2153 print(f" WebSocket: {ws_protocol}://{args.host}:{args.port}/ws") 2154 print(f" Ollama: {args.ollama_url} {ollama_status}") 2155 if ssl_ctx: 2156 print(f" SSL: Self-signed certificate") 2157 cert_url = f"{protocol}://{tailscale_ip or local_ip}:{args.port}/install-cert" 2158 print(f" Install: Open {cert_url} on iPhone") 2159 print(f" Then: Settings > General > VPN & Device Management > Install") 2160 print(f" Then: Settings > General > About > Certificate Trust Settings > Enable") 2161 claude_api_key = load_interbrain_api_key() 2162 2163 ollama_models = get_ollama_models(args.ollama_url) 2164 claude_models = [] 2165 if claude_api_key: 2166 claude_models = ["claude-sonnet-4-6", "claude-haiku-4-5-20251001"] 2167 print(f" Claude: {len(claude_models)} models available") 2168 else: 2169 print(" Claude: No API key found") 2170 all_models = [] 2171 if args.model not in ollama_models and args.model not in claude_models: 2172 all_models.append(args.model) 2173 all_models.extend(ollama_models) 2174 all_models.extend(claude_models) 2175 seen = set() 2176 models = [] 2177 for m in all_models: 2178 if m not in seen: 2179 seen.add(m) 2180 models.append(m) 2181 2182 print(f" Recordings: {RECORDINGS_DIR}/") 2183 print(f" Transcripts: {TRANSCRIPTS_DIR}/") 2184 print() 2185 2186 app = create_app( 2187 args.host, args.port, args.model, args.ollama_url, 2188 claude_api_key=claude_api_key, models=models, 2189 ) 2190 runner = web.AppRunner(app) 2191 await runner.setup() 2192 site = web.TCPSite(runner, args.host, args.port, ssl_context=ssl_ctx) 2193 await site.start() 2194 2195 try: 2196 await asyncio.Event().wait() 2197 except (KeyboardInterrupt, asyncio.CancelledError): 2198 pass 2199 finally: 2200 await runner.cleanup() 2201 2202 2203 async def _run_garden_cli(args: argparse.Namespace) -> None: 2204 """CLI entry point for knowledge gardening.""" 2205 import sys 2206 2207 # Read conversation text 2208 if args.input: 2209 text = Path(args.input).read_text() if Path(args.input).exists() else args.input 2210 else: 2211 text = sys.stdin.read() 2212 2213 if not text.strip(): 2214 print("No input provided. Pipe conversation text or use --input.") 2215 return 2216 2217 # Parse context nodes 2218 context_nodes = [] 2219 for spec in (args.context or []): 2220 if ":" in spec: 2221 title, path = spec.split(":", 1) 2222 context_nodes.append({"title": title, "id": "", "path": path}) 2223 else: 2224 context_nodes.append({"title": spec, "id": "", "path": str(VAULT_DIR / spec)}) 2225 2226 # Build context block 2227 context_block = "" 2228 for node in context_nodes: 2229 path = node["path"] 2230 title = node["title"] 2231 readme_path = Path(path) / "README.md" if Path(path).is_dir() else Path(path) 2232 if readme_path.exists(): 2233 content = readme_path.read_text(encoding="utf-8") 2234 context_block += f"\n\n---\n### DreamNode: {title}\n- Path: {path}\n- README path: {readme_path}\n\n{content}" 2235 2236 messages = [ 2237 {"role": "system", "content": GARDEN_SYSTEM_PROMPT}, 2238 {"role": "user", "content": f"""DreamNode context:\n{context_block}\n\n---\n\nConversation:\n{text}\n\nProduce the JSON array of edits."""}, 2239 ] 2240 2241 print("[Garden] Sending to AI bridge...") 2242 try: 2243 response = await _ai_bridge_inference(messages) 2244 cleaned = response.strip() 2245 if cleaned.startswith("```"): 2246 cleaned = re.sub(r"^```\w*\n?", "", cleaned) 2247 cleaned = re.sub(r"\n?```$", "", cleaned) 2248 cleaned = cleaned.strip() 2249 2250 edits = json.loads(cleaned) 2251 if not edits: 2252 print("[Garden] No edits needed — READMEs are up to date.") 2253 return 2254 2255 print(f"[Garden] Applying {len(edits)} edit group(s)...") 2256 results = _apply_garden_edits(edits) 2257 for r in results: 2258 status = r["status"].upper() 2259 print(f" [{status}] {r['dreamnode']}: {r['reason']}") 2260 2261 except Exception as e: 2262 print(f"[Garden] Error: {e}") 2263 2264 2265 async def _run_claude_cli(args: argparse.Namespace) -> None: 2266 """CLI entry point for Claude Code sub-agent.""" 2267 result = await run_claude_code( 2268 prompt=args.prompt, 2269 session_id=args.session_id, 2270 resume=args.resume, 2271 model=args.model, 2272 max_budget=args.budget, 2273 cwd=args.cwd, 2274 ) 2275 2276 if result.get("is_error"): 2277 print(f"Error: {result.get('result', 'Unknown error')}") 2278 else: 2279 print(result.get("result", json.dumps(result, indent=2))) 2280 2281 if result.get("session_id"): 2282 print(f"\nSession ID: {result['session_id']}") 2283 if result.get("total_cost_usd"): 2284 print(f"Cost: ${result['total_cost_usd']:.4f}") 2285 2286 2287 def main() -> None: 2288 parser = argparse.ArgumentParser(description="AURYN self-serving server + context provider") 2289 sub = parser.add_subparsers(dest="command") 2290 2291 # --- serve --- 2292 serve_parser = sub.add_parser("serve", help="Start the AURYN server") 2293 serve_parser.add_argument("--port", type=int, default=8080) 2294 serve_parser.add_argument("--host", default="0.0.0.0") 2295 serve_parser.add_argument("--model", default="qwen3:32b") 2296 serve_parser.add_argument("--ollama-url", default="http://localhost:11434") 2297 serve_parser.add_argument("--no-ssl", action="store_true", help="Disable HTTPS") 2298 serve_parser.add_argument("--transcription-model", default="base", 2299 choices=["tiny", "base", "small", "medium", "large", "turbo"], 2300 help="Whisper model size (tiny=fastest, turbo=best speed/quality)") 2301 2302 # --- context --- 2303 ctx_parser = sub.add_parser("context", help="Fast context provider (Tier 1 + Tier 2)") 2304 ctx_parser.add_argument("input", help="Text string or path to file") 2305 ctx_parser.add_argument("--top", type=int, default=15, help="Max results (default: 15)") 2306 ctx_parser.add_argument("--json", dest="json_output", action="store_true", 2307 help="Output as JSON") 2308 ctx_parser.add_argument("--rebuild", action="store_true", 2309 help="Force rebuild index before searching") 2310 2311 # --- index --- 2312 idx_parser = sub.add_parser("index", help="Build/rebuild the context index") 2313 idx_parser.add_argument("--force", action="store_true", help="Force rebuild") 2314 2315 # --- garden --- 2316 garden_parser = sub.add_parser("garden", help="Knowledge garden: route insights to DreamNode READMEs") 2317 garden_parser.add_argument("--context", nargs="+", metavar="TITLE:PATH", 2318 help="DreamNode context as title:path pairs") 2319 garden_parser.add_argument("--input", help="Conversation text or file path (default: stdin)") 2320 2321 # --- claude --- 2322 claude_parser = sub.add_parser("claude", help="Run Claude Code as sub-agent") 2323 claude_parser.add_argument("prompt", help="Prompt for Claude Code") 2324 claude_parser.add_argument("--session-id", help="Session ID for continuity") 2325 claude_parser.add_argument("--resume", action="store_true", help="Resume previous session") 2326 claude_parser.add_argument("--model", default="sonnet", help="Model (default: sonnet)") 2327 claude_parser.add_argument("--budget", type=float, default=0.50, help="Max budget USD") 2328 claude_parser.add_argument("--cwd", help="Working directory") 2329 2330 args = parser.parse_args() 2331 2332 if args.command == "serve": 2333 if getattr(args, "no_ssl", False): 2334 global get_or_create_ssl_context 2335 get_or_create_ssl_context = lambda *a, **kw: None 2336 global _whisper_model_size, _whisper_repo 2337 _whisper_model_size = getattr(args, "transcription_model", "base") 2338 _whisper_repo = _MLX_WHISPER_MODELS.get(_whisper_model_size, _MLX_WHISPER_MODELS["base"]) 2339 asyncio.run(serve(args)) 2340 elif args.command == "context": 2341 run_context(args) 2342 elif args.command == "index": 2343 run_index(args) 2344 elif args.command == "garden": 2345 asyncio.run(_run_garden_cli(args)) 2346 elif args.command == "claude": 2347 asyncio.run(_run_claude_cli(args)) 2348 else: 2349 parser.print_help() 2350 2351 2352 if __name__ == "__main__": 2353 main()