voice_sessions_backhaul.py
1 #!/usr/bin/env python3 2 """ 3 Voice Sessions Backhaul - Extract Torah Core from Claude.ai Voice Sessions 4 5 Processes the claude-sessions-*.md export files that contain voice transcriptions 6 from Claude.ai web conversations. These often contain foundational philosophical 7 and theological content that should sit at the CORE of the knowledge graph. 8 9 Usage: 10 python3 scripts/voice_sessions_backhaul.py # Process all 11 python3 scripts/voice_sessions_backhaul.py --file path/to/export.md # Specific file 12 python3 scripts/voice_sessions_backhaul.py --date 2026-01-09 # Specific date 13 python3 scripts/voice_sessions_backhaul.py --foundational # Show foundational only 14 """ 15 16 import sys 17 import re 18 import json 19 import hashlib 20 from pathlib import Path 21 from datetime import datetime 22 from typing import List, Dict, Tuple, Optional 23 from dataclasses import dataclass, field, asdict 24 from collections import defaultdict 25 26 # Add parent to path for imports 27 REPO_ROOT = Path(__file__).parent.parent 28 sys.path.insert(0, str(REPO_ROOT)) 29 30 # Try to import graph sink 31 try: 32 from core.graph.sink import get_sink 33 HAS_GRAPH_SINK = True 34 except ImportError: 35 HAS_GRAPH_SINK = False 36 print("[voice-backhaul] Warning: Graph sink not available") 37 38 # Paths 39 SOVEREIGN_HOME = Path.home() / ".sovereign" 40 OBSIDIAN_PATH = Path.home() / "Library/Mobile Documents/iCloud~md~obsidian/Documents" 41 42 43 @dataclass 44 class VoiceInsight: 45 """An insight extracted from voice sessions.""" 46 id: str 47 content: str 48 timestamp: str 49 date: str 50 insight_type: str # foundational, philosophical, operational 51 score: float 52 themes: List[str] = field(default_factory=list) 53 axioms: List[str] = field(default_factory=list) 54 is_core: bool = False # True if this is Torah core 55 56 57 class VoiceSessionsParser: 58 """Parse Claude voice sessions export markdown.""" 59 60 # Foundational markers - theological/philosophical 61 TORAH_MARKERS = [ 62 # Theological 63 'satan', 'god', 'divine', 'sacred', 'jesus', 'christ', 64 'soul', 'spirit', 'angel', 'heaven', 'hell', 'sin', 65 'good', 'evil', 'truth', 'beauty', 'meaning', 'purpose', 66 'creation', 'genesis', 'covenant', 'redemption', 67 # Philosophical 68 'consciousness', 'free will', 'being', 'existence', 'essence', 69 'phenomenology', 'metaphysics', 'ontology', 'epistemology', 70 'ouroboros', 'recursive', 'fractal', 'infinite', 71 # Sovereign OS core 72 'markov blanket', 'boundary', 'integration', 'isolation', 73 'wave', 'collapse', 'superposition', 'resonance', 'frequency', 74 'young yin', 'yang', 'dyad', 'pole', 'tension', 75 'shadow', 'shadow work', 'primitive', 'calcified', 'sclerosis', 76 'cain', 'abel', 'golden cup', 'carpenter', 'cruft', 'ornament', 77 'phoenix', 'resurrection', 'death', 'life', 'alive', 78 'exoskeleton', 'iron man', 'suit', 'safety net', 79 ] 80 81 # High-value phrases that indicate Torah insights 82 CORE_PHRASES = [ 83 'the insight is', 'the key is', 'this is why', 84 'fundamental truth', 'core principle', 'bedrock', 85 'we might have answered', 'i think we solved', 86 'this is the', 'that which is', 'consciousness', 87 'free will', 'made in his image', 'turtles all the way', 88 'the shape that is you', 'circumambulate', 89 ] 90 91 # Axiom detection patterns 92 AXIOM_PATTERNS = { 93 'A0': ['boundary', 'markov', 'blanket', 'membrane', 'what crosses', 'in and out'], 94 'A1': ['integration', 'connection', 'isolation', 'binding', 'relation', 'toward'], 95 'A2': ['life', 'death', 'alive', 'primitive', 'calcified', 'ornament', 'cruft', 'recognize'], 96 'A3': ['pole', 'dyad', 'tension', 'navigate', 'shadow', 'movement', 'dynamic'], 97 'A4': ['ruin', 'survival', 'catastrophic', 'ergodic', 'asymmetry', 'prevent'], 98 } 99 100 def __init__(self): 101 self.insights: List[VoiceInsight] = [] 102 self.daily_sessions: Dict[str, List[Dict]] = defaultdict(list) 103 104 def parse_file(self, filepath: Path) -> List[VoiceInsight]: 105 """Parse a voice sessions export file.""" 106 content = filepath.read_text() 107 108 # Parse by date sections 109 date_pattern = re.compile(r'- (\d{4}-\d{2}-\d{2}) \((\d+) sessions, (\d+) words\)') 110 session_pattern = re.compile(r'\*\*(\d{2}:\d{2}:\d{2})\*\*: (.+?)(?=\*\*\d{2}:\d{2}:\d{2}\*\*:|$)', re.DOTALL) 111 112 lines = content.split('\n') 113 current_date = None 114 current_section = [] 115 116 for i, line in enumerate(lines): 117 date_match = date_pattern.search(line) 118 if date_match: 119 # Process previous section 120 if current_date and current_section: 121 self._process_date_section(current_date, '\n'.join(current_section)) 122 123 current_date = date_match.group(1) 124 current_section = [] 125 elif current_date: 126 current_section.append(line) 127 128 # Process last section 129 if current_date and current_section: 130 self._process_date_section(current_date, '\n'.join(current_section)) 131 132 return self.insights 133 134 def _process_date_section(self, date: str, content: str): 135 """Process a single date's sessions.""" 136 session_pattern = re.compile(r'\*\*(\d{2}:\d{2}:\d{2})\*\*: (.+?)(?=\n\n\*\*\d{2}:\d{2}:\d{2}\*\*:|$)', re.DOTALL) 137 138 for match in session_pattern.finditer(content): 139 timestamp = match.group(1) 140 text = match.group(2).strip() 141 142 if len(text) < 20: # Skip very short utterances 143 continue 144 145 # Score for foundational content 146 score, themes, axioms = self._score_content(text) 147 148 if score > 0: # Only keep scored content 149 insight_type = self._classify_type(score, themes) 150 is_core = score >= 0.5 and insight_type == 'foundational' 151 152 insight = VoiceInsight( 153 id=f"VOICE-{hashlib.md5(f'{date}-{timestamp}-{text[:50]}'.encode()).hexdigest()[:12]}", 154 content=text[:500], # Truncate long content 155 timestamp=timestamp, 156 date=date, 157 insight_type=insight_type, 158 score=score, 159 themes=themes, 160 axioms=axioms, 161 is_core=is_core 162 ) 163 self.insights.append(insight) 164 self.daily_sessions[date].append(asdict(insight)) 165 166 def _score_content(self, text: str) -> Tuple[float, List[str], List[str]]: 167 """Score content for foundational importance.""" 168 text_lower = text.lower() 169 score = 0.0 170 themes = [] 171 axioms = [] 172 173 # Check Torah markers 174 for marker in self.TORAH_MARKERS: 175 if marker in text_lower: 176 score += 0.1 177 if marker not in themes: 178 themes.append(marker) 179 180 # Check core phrases (higher weight) 181 for phrase in self.CORE_PHRASES: 182 if phrase in text_lower: 183 score += 0.2 184 185 # Check axiom patterns 186 for axiom, patterns in self.AXIOM_PATTERNS.items(): 187 if any(p in text_lower for p in patterns): 188 if axiom not in axioms: 189 axioms.append(axiom) 190 score += 0.1 191 192 # Bonus for multiple themes (compound insight) 193 if len(themes) >= 3: 194 score += 0.15 195 if len(axioms) >= 2: 196 score += 0.15 197 198 # Cap at 1.0 199 score = min(score, 1.0) 200 201 return (score, themes[:5], axioms) 202 203 def _classify_type(self, score: float, themes: List[str]) -> str: 204 """Classify insight type.""" 205 theological = ['satan', 'god', 'jesus', 'soul', 'angel', 'heaven', 'evil'] 206 philosophical = ['consciousness', 'free will', 'being', 'phenomenology', 'ouroboros'] 207 208 if any(t in themes for t in theological): 209 return 'foundational' 210 elif any(t in themes for t in philosophical): 211 return 'foundational' 212 elif score >= 0.4: 213 return 'philosophical' 214 else: 215 return 'operational' 216 217 def get_core_insights(self) -> List[VoiceInsight]: 218 """Get Torah core insights only.""" 219 return [i for i in self.insights if i.is_core] 220 221 def get_by_date(self, date: str) -> List[VoiceInsight]: 222 """Get insights for a specific date.""" 223 return [i for i in self.insights if i.date == date] 224 225 def ingest_to_graph(self): 226 """Ingest insights to graph sink.""" 227 if not HAS_GRAPH_SINK: 228 print("[voice-backhaul] Graph sink not available - skipping ingest") 229 return 230 231 sink = get_sink() 232 core_count = 0 233 234 for insight in self.insights: 235 # Only ingest significant insights 236 if insight.score < 0.3: 237 continue 238 239 node_type = "torah_core" if insight.is_core else "voice_insight" 240 241 sink.ingest( 242 content=insight.content, 243 node_type=node_type, 244 source=f"voice:{insight.date}", 245 metadata={ 246 "voice_id": insight.id, 247 "timestamp": insight.timestamp, 248 "date": insight.date, 249 "score": insight.score, 250 "themes": insight.themes, 251 "axioms": insight.axioms, 252 "is_core": insight.is_core, 253 } 254 ) 255 256 if insight.is_core: 257 core_count += 1 258 259 result = sink.flush() 260 print(f"[voice-backhaul] Ingested {result.get('nodes_added', 0)} nodes ({core_count} core)") 261 return result 262 263 264 def find_voice_exports() -> List[Path]: 265 """Find voice session export files.""" 266 exports = [] 267 seen_names = set() 268 269 # Check known locations (prefer newer vault) 270 search_paths = [ 271 OBSIDIAN_PATH / "Sovereign_Estate 2" / "research" / "cognitive-capture" / "exports", 272 OBSIDIAN_PATH / "Sovereign_Estate" / "research" / "cognitive-capture" / "exports", 273 REPO_ROOT / "sessions" / "exports", 274 ] 275 276 for search_path in search_paths: 277 if search_path.exists(): 278 for f in search_path.glob("claude-sessions*.md"): 279 # Deduplicate by filename 280 if f.name not in seen_names: 281 exports.append(f) 282 seen_names.add(f.name) 283 284 return exports 285 286 287 def main(): 288 specific_file = None 289 specific_date = None 290 show_foundational = "--foundational" in sys.argv 291 292 # Parse arguments 293 for i, arg in enumerate(sys.argv): 294 if arg == "--file" and i + 1 < len(sys.argv): 295 specific_file = Path(sys.argv[i + 1]) 296 elif arg == "--date" and i + 1 < len(sys.argv): 297 specific_date = sys.argv[i + 1] 298 299 print("=" * 60) 300 print("VOICE SESSIONS BACKHAUL") 301 print("Extracting Torah Core from Claude.ai Voice Sessions") 302 print("=" * 60) 303 print() 304 305 # Find export files 306 if specific_file: 307 exports = [specific_file] 308 else: 309 exports = find_voice_exports() 310 311 if not exports: 312 print("No voice session exports found.") 313 print("Expected location: Sovereign_Estate/research/cognitive-capture/exports/") 314 return 315 316 print(f"Found {len(exports)} export file(s)") 317 318 parser = VoiceSessionsParser() 319 320 for export_path in exports: 321 print(f"\nProcessing: {export_path.name}") 322 parser.parse_file(export_path) 323 324 print(f"\nExtracted {len(parser.insights)} insights") 325 326 # Filter by date if specified 327 if specific_date: 328 insights = parser.get_by_date(specific_date) 329 print(f" {len(insights)} from {specific_date}") 330 else: 331 insights = parser.insights 332 333 # Show foundational insights 334 core = parser.get_core_insights() 335 print(f"\nTorah Core insights: {len(core)}") 336 337 if show_foundational or specific_date: 338 print("\n" + "-" * 60) 339 print("TORAH CORE (Foundational Insights)") 340 print("-" * 60) 341 342 display = core if show_foundational else [i for i in insights if i.is_core] 343 for i, insight in enumerate(display[:20]): 344 print(f"\n{i+1}. [{insight.date} {insight.timestamp}] Score: {insight.score:.2f}") 345 print(f" Themes: {', '.join(insight.themes[:5])}") 346 print(f" Axioms: {', '.join(insight.axioms)}") 347 print(f" {insight.content[:150]}...") 348 349 # Summary by date 350 print("\n" + "-" * 60) 351 print("INSIGHTS BY DATE") 352 print("-" * 60) 353 for date in sorted(parser.daily_sessions.keys()): 354 sessions = parser.daily_sessions[date] 355 core_for_date = len([s for s in sessions if s['is_core']]) 356 print(f" {date}: {len(sessions)} insights ({core_for_date} core)") 357 358 # Ingest to graph 359 if HAS_GRAPH_SINK and not show_foundational: 360 print("\n" + "-" * 60) 361 print("INGESTING TO GRAPH") 362 print("-" * 60) 363 parser.ingest_to_graph() 364 365 print("\n" + "=" * 60) 366 print("BACKHAUL COMPLETE") 367 print("=" * 60) 368 369 370 if __name__ == "__main__": 371 main()