/ scripts / voice_sessions_backhaul.py
voice_sessions_backhaul.py
  1  #!/usr/bin/env python3
  2  """
  3  Voice Sessions Backhaul - Extract Torah Core from Claude.ai Voice Sessions
  4  
  5  Processes the claude-sessions-*.md export files that contain voice transcriptions
  6  from Claude.ai web conversations. These often contain foundational philosophical
  7  and theological content that should sit at the CORE of the knowledge graph.
  8  
  9  Usage:
 10      python3 scripts/voice_sessions_backhaul.py                              # Process all
 11      python3 scripts/voice_sessions_backhaul.py --file path/to/export.md     # Specific file
 12      python3 scripts/voice_sessions_backhaul.py --date 2026-01-09            # Specific date
 13      python3 scripts/voice_sessions_backhaul.py --foundational               # Show foundational only
 14  """
 15  
 16  import sys
 17  import re
 18  import json
 19  import hashlib
 20  from pathlib import Path
 21  from datetime import datetime
 22  from typing import List, Dict, Tuple, Optional
 23  from dataclasses import dataclass, field, asdict
 24  from collections import defaultdict
 25  
 26  # Add parent to path for imports
 27  REPO_ROOT = Path(__file__).parent.parent
 28  sys.path.insert(0, str(REPO_ROOT))
 29  
 30  # Try to import graph sink
 31  try:
 32      from core.graph.sink import get_sink
 33      HAS_GRAPH_SINK = True
 34  except ImportError:
 35      HAS_GRAPH_SINK = False
 36      print("[voice-backhaul] Warning: Graph sink not available")
 37  
 38  # Paths
 39  SOVEREIGN_HOME = Path.home() / ".sovereign"
 40  OBSIDIAN_PATH = Path.home() / "Library/Mobile Documents/iCloud~md~obsidian/Documents"
 41  
 42  
 43  @dataclass
 44  class VoiceInsight:
 45      """An insight extracted from voice sessions."""
 46      id: str
 47      content: str
 48      timestamp: str
 49      date: str
 50      insight_type: str  # foundational, philosophical, operational
 51      score: float
 52      themes: List[str] = field(default_factory=list)
 53      axioms: List[str] = field(default_factory=list)
 54      is_core: bool = False  # True if this is Torah core
 55  
 56  
 57  class VoiceSessionsParser:
 58      """Parse Claude voice sessions export markdown."""
 59  
 60      # Foundational markers - theological/philosophical
 61      TORAH_MARKERS = [
 62          # Theological
 63          'satan', 'god', 'divine', 'sacred', 'jesus', 'christ',
 64          'soul', 'spirit', 'angel', 'heaven', 'hell', 'sin',
 65          'good', 'evil', 'truth', 'beauty', 'meaning', 'purpose',
 66          'creation', 'genesis', 'covenant', 'redemption',
 67          # Philosophical
 68          'consciousness', 'free will', 'being', 'existence', 'essence',
 69          'phenomenology', 'metaphysics', 'ontology', 'epistemology',
 70          'ouroboros', 'recursive', 'fractal', 'infinite',
 71          # Sovereign OS core
 72          'markov blanket', 'boundary', 'integration', 'isolation',
 73          'wave', 'collapse', 'superposition', 'resonance', 'frequency',
 74          'young yin', 'yang', 'dyad', 'pole', 'tension',
 75          'shadow', 'shadow work', 'primitive', 'calcified', 'sclerosis',
 76          'cain', 'abel', 'golden cup', 'carpenter', 'cruft', 'ornament',
 77          'phoenix', 'resurrection', 'death', 'life', 'alive',
 78          'exoskeleton', 'iron man', 'suit', 'safety net',
 79      ]
 80  
 81      # High-value phrases that indicate Torah insights
 82      CORE_PHRASES = [
 83          'the insight is', 'the key is', 'this is why',
 84          'fundamental truth', 'core principle', 'bedrock',
 85          'we might have answered', 'i think we solved',
 86          'this is the', 'that which is', 'consciousness',
 87          'free will', 'made in his image', 'turtles all the way',
 88          'the shape that is you', 'circumambulate',
 89      ]
 90  
 91      # Axiom detection patterns
 92      AXIOM_PATTERNS = {
 93          'A0': ['boundary', 'markov', 'blanket', 'membrane', 'what crosses', 'in and out'],
 94          'A1': ['integration', 'connection', 'isolation', 'binding', 'relation', 'toward'],
 95          'A2': ['life', 'death', 'alive', 'primitive', 'calcified', 'ornament', 'cruft', 'recognize'],
 96          'A3': ['pole', 'dyad', 'tension', 'navigate', 'shadow', 'movement', 'dynamic'],
 97          'A4': ['ruin', 'survival', 'catastrophic', 'ergodic', 'asymmetry', 'prevent'],
 98      }
 99  
100      def __init__(self):
101          self.insights: List[VoiceInsight] = []
102          self.daily_sessions: Dict[str, List[Dict]] = defaultdict(list)
103  
104      def parse_file(self, filepath: Path) -> List[VoiceInsight]:
105          """Parse a voice sessions export file."""
106          content = filepath.read_text()
107  
108          # Parse by date sections
109          date_pattern = re.compile(r'- (\d{4}-\d{2}-\d{2}) \((\d+) sessions, (\d+) words\)')
110          session_pattern = re.compile(r'\*\*(\d{2}:\d{2}:\d{2})\*\*: (.+?)(?=\*\*\d{2}:\d{2}:\d{2}\*\*:|$)', re.DOTALL)
111  
112          lines = content.split('\n')
113          current_date = None
114          current_section = []
115  
116          for i, line in enumerate(lines):
117              date_match = date_pattern.search(line)
118              if date_match:
119                  # Process previous section
120                  if current_date and current_section:
121                      self._process_date_section(current_date, '\n'.join(current_section))
122  
123                  current_date = date_match.group(1)
124                  current_section = []
125              elif current_date:
126                  current_section.append(line)
127  
128          # Process last section
129          if current_date and current_section:
130              self._process_date_section(current_date, '\n'.join(current_section))
131  
132          return self.insights
133  
134      def _process_date_section(self, date: str, content: str):
135          """Process a single date's sessions."""
136          session_pattern = re.compile(r'\*\*(\d{2}:\d{2}:\d{2})\*\*: (.+?)(?=\n\n\*\*\d{2}:\d{2}:\d{2}\*\*:|$)', re.DOTALL)
137  
138          for match in session_pattern.finditer(content):
139              timestamp = match.group(1)
140              text = match.group(2).strip()
141  
142              if len(text) < 20:  # Skip very short utterances
143                  continue
144  
145              # Score for foundational content
146              score, themes, axioms = self._score_content(text)
147  
148              if score > 0:  # Only keep scored content
149                  insight_type = self._classify_type(score, themes)
150                  is_core = score >= 0.5 and insight_type == 'foundational'
151  
152                  insight = VoiceInsight(
153                      id=f"VOICE-{hashlib.md5(f'{date}-{timestamp}-{text[:50]}'.encode()).hexdigest()[:12]}",
154                      content=text[:500],  # Truncate long content
155                      timestamp=timestamp,
156                      date=date,
157                      insight_type=insight_type,
158                      score=score,
159                      themes=themes,
160                      axioms=axioms,
161                      is_core=is_core
162                  )
163                  self.insights.append(insight)
164                  self.daily_sessions[date].append(asdict(insight))
165  
166      def _score_content(self, text: str) -> Tuple[float, List[str], List[str]]:
167          """Score content for foundational importance."""
168          text_lower = text.lower()
169          score = 0.0
170          themes = []
171          axioms = []
172  
173          # Check Torah markers
174          for marker in self.TORAH_MARKERS:
175              if marker in text_lower:
176                  score += 0.1
177                  if marker not in themes:
178                      themes.append(marker)
179  
180          # Check core phrases (higher weight)
181          for phrase in self.CORE_PHRASES:
182              if phrase in text_lower:
183                  score += 0.2
184  
185          # Check axiom patterns
186          for axiom, patterns in self.AXIOM_PATTERNS.items():
187              if any(p in text_lower for p in patterns):
188                  if axiom not in axioms:
189                      axioms.append(axiom)
190                  score += 0.1
191  
192          # Bonus for multiple themes (compound insight)
193          if len(themes) >= 3:
194              score += 0.15
195          if len(axioms) >= 2:
196              score += 0.15
197  
198          # Cap at 1.0
199          score = min(score, 1.0)
200  
201          return (score, themes[:5], axioms)
202  
203      def _classify_type(self, score: float, themes: List[str]) -> str:
204          """Classify insight type."""
205          theological = ['satan', 'god', 'jesus', 'soul', 'angel', 'heaven', 'evil']
206          philosophical = ['consciousness', 'free will', 'being', 'phenomenology', 'ouroboros']
207  
208          if any(t in themes for t in theological):
209              return 'foundational'
210          elif any(t in themes for t in philosophical):
211              return 'foundational'
212          elif score >= 0.4:
213              return 'philosophical'
214          else:
215              return 'operational'
216  
217      def get_core_insights(self) -> List[VoiceInsight]:
218          """Get Torah core insights only."""
219          return [i for i in self.insights if i.is_core]
220  
221      def get_by_date(self, date: str) -> List[VoiceInsight]:
222          """Get insights for a specific date."""
223          return [i for i in self.insights if i.date == date]
224  
225      def ingest_to_graph(self):
226          """Ingest insights to graph sink."""
227          if not HAS_GRAPH_SINK:
228              print("[voice-backhaul] Graph sink not available - skipping ingest")
229              return
230  
231          sink = get_sink()
232          core_count = 0
233  
234          for insight in self.insights:
235              # Only ingest significant insights
236              if insight.score < 0.3:
237                  continue
238  
239              node_type = "torah_core" if insight.is_core else "voice_insight"
240  
241              sink.ingest(
242                  content=insight.content,
243                  node_type=node_type,
244                  source=f"voice:{insight.date}",
245                  metadata={
246                      "voice_id": insight.id,
247                      "timestamp": insight.timestamp,
248                      "date": insight.date,
249                      "score": insight.score,
250                      "themes": insight.themes,
251                      "axioms": insight.axioms,
252                      "is_core": insight.is_core,
253                  }
254              )
255  
256              if insight.is_core:
257                  core_count += 1
258  
259          result = sink.flush()
260          print(f"[voice-backhaul] Ingested {result.get('nodes_added', 0)} nodes ({core_count} core)")
261          return result
262  
263  
264  def find_voice_exports() -> List[Path]:
265      """Find voice session export files."""
266      exports = []
267      seen_names = set()
268  
269      # Check known locations (prefer newer vault)
270      search_paths = [
271          OBSIDIAN_PATH / "Sovereign_Estate 2" / "research" / "cognitive-capture" / "exports",
272          OBSIDIAN_PATH / "Sovereign_Estate" / "research" / "cognitive-capture" / "exports",
273          REPO_ROOT / "sessions" / "exports",
274      ]
275  
276      for search_path in search_paths:
277          if search_path.exists():
278              for f in search_path.glob("claude-sessions*.md"):
279                  # Deduplicate by filename
280                  if f.name not in seen_names:
281                      exports.append(f)
282                      seen_names.add(f.name)
283  
284      return exports
285  
286  
287  def main():
288      specific_file = None
289      specific_date = None
290      show_foundational = "--foundational" in sys.argv
291  
292      # Parse arguments
293      for i, arg in enumerate(sys.argv):
294          if arg == "--file" and i + 1 < len(sys.argv):
295              specific_file = Path(sys.argv[i + 1])
296          elif arg == "--date" and i + 1 < len(sys.argv):
297              specific_date = sys.argv[i + 1]
298  
299      print("=" * 60)
300      print("VOICE SESSIONS BACKHAUL")
301      print("Extracting Torah Core from Claude.ai Voice Sessions")
302      print("=" * 60)
303      print()
304  
305      # Find export files
306      if specific_file:
307          exports = [specific_file]
308      else:
309          exports = find_voice_exports()
310  
311      if not exports:
312          print("No voice session exports found.")
313          print("Expected location: Sovereign_Estate/research/cognitive-capture/exports/")
314          return
315  
316      print(f"Found {len(exports)} export file(s)")
317  
318      parser = VoiceSessionsParser()
319  
320      for export_path in exports:
321          print(f"\nProcessing: {export_path.name}")
322          parser.parse_file(export_path)
323  
324      print(f"\nExtracted {len(parser.insights)} insights")
325  
326      # Filter by date if specified
327      if specific_date:
328          insights = parser.get_by_date(specific_date)
329          print(f"  {len(insights)} from {specific_date}")
330      else:
331          insights = parser.insights
332  
333      # Show foundational insights
334      core = parser.get_core_insights()
335      print(f"\nTorah Core insights: {len(core)}")
336  
337      if show_foundational or specific_date:
338          print("\n" + "-" * 60)
339          print("TORAH CORE (Foundational Insights)")
340          print("-" * 60)
341  
342          display = core if show_foundational else [i for i in insights if i.is_core]
343          for i, insight in enumerate(display[:20]):
344              print(f"\n{i+1}. [{insight.date} {insight.timestamp}] Score: {insight.score:.2f}")
345              print(f"   Themes: {', '.join(insight.themes[:5])}")
346              print(f"   Axioms: {', '.join(insight.axioms)}")
347              print(f"   {insight.content[:150]}...")
348  
349      # Summary by date
350      print("\n" + "-" * 60)
351      print("INSIGHTS BY DATE")
352      print("-" * 60)
353      for date in sorted(parser.daily_sessions.keys()):
354          sessions = parser.daily_sessions[date]
355          core_for_date = len([s for s in sessions if s['is_core']])
356          print(f"  {date}: {len(sessions)} insights ({core_for_date} core)")
357  
358      # Ingest to graph
359      if HAS_GRAPH_SINK and not show_foundational:
360          print("\n" + "-" * 60)
361          print("INGESTING TO GRAPH")
362          print("-" * 60)
363          parser.ingest_to_graph()
364  
365      print("\n" + "=" * 60)
366      print("BACKHAUL COMPLETE")
367      print("=" * 60)
368  
369  
370  if __name__ == "__main__":
371      main()