/ personas / database / scripts / chihiro_load.py
chihiro_load.py
  1  #!/usr/bin/env python3
  2  """
  3  Chihiro Context Loader - Intelligent hybrid loading
  4  """
  5  
  6  import sqlite3
  7  import sys
  8  from typing import List, Dict, Tuple
  9  
 10  DB_PATH = "chihiro_knowledge.db"
 11  
 12  def load_compressed(context_name: str) -> Tuple[str, int]:
 13      """Load compressed insights for a context"""
 14      conn = sqlite3.connect(DB_PATH)
 15      conn.row_factory = sqlite3.Row
 16      cursor = conn.cursor()
 17      
 18      # Get context
 19      cursor.execute("SELECT * FROM contexts WHERE name = ?", (context_name,))
 20      context = cursor.fetchone()
 21      if not context:
 22          return "", 0
 23      
 24      # Load insights
 25      import json
 26      insight_ids = json.loads(context['insight_ids'])
 27      placeholders = ','.join('?' * len(insight_ids))
 28      cursor.execute(f"""
 29          SELECT content, category FROM insights
 30          WHERE id IN ({placeholders})
 31          ORDER BY importance DESC
 32      """, insight_ids)
 33      
 34      insights = cursor.fetchall()
 35      
 36      # Format
 37      output = [f"\n# {context_name.upper()} (Compressed)\n"]
 38      for insight in insights:
 39          output.append(f"• {insight['content']}")
 40      
 41      conn.close()
 42      return '\n'.join(output), context['size_estimate']
 43  
 44  def load_full_docs(category: str = None, limit: int = None) -> Tuple[str, int]:
 45      """Load full documents by category"""
 46      conn = sqlite3.connect(DB_PATH)
 47      conn.row_factory = sqlite3.Row
 48      cursor = conn.cursor()
 49      
 50      if category:
 51          cursor.execute("""
 52              SELECT filename, content, token_count 
 53              FROM documents 
 54              WHERE category = ?
 55              ORDER BY token_count DESC
 56          """, (category,))
 57      else:
 58          cursor.execute("""
 59              SELECT filename, content, token_count 
 60              FROM documents 
 61              ORDER BY token_count DESC
 62          """)
 63      
 64      docs = cursor.fetchall()
 65      if limit:
 66          docs = docs[:limit]
 67      
 68      output = []
 69      total_tokens = 0
 70      
 71      for doc in docs:
 72          output.append(f"\n{'='*60}")
 73          output.append(f"# {doc['filename']} ({doc['token_count']} tokens)")
 74          output.append(f"{'='*60}\n")
 75          output.append(doc['content'])
 76          total_tokens += doc['token_count']
 77      
 78      conn.close()
 79      return '\n'.join(output), total_tokens
 80  
 81  def load_hybrid(token_budget: int = 20000) -> Tuple[str, int]:
 82      """Intelligent hybrid loading - compressed + key full docs"""
 83      output = []
 84      tokens_used = 0
 85      
 86      # 1. Always load compressed identity (essential, small)
 87      compressed, tokens = load_compressed('identity_core')
 88      output.append(compressed)
 89      tokens_used += tokens
 90      
 91      # 2. Load compressed consciousness
 92      compressed, tokens = load_compressed('consciousness_core')
 93      output.append(compressed)
 94      tokens_used += tokens
 95      
 96      # 3. Load compressed relationships
 97      compressed, tokens = load_compressed('relationships')
 98      output.append(compressed)
 99      tokens_used += tokens
100      
101      # 4. Load key full documents if budget allows
102      conn = sqlite3.connect(DB_PATH)
103      conn.row_factory = sqlite3.Row
104      cursor = conn.cursor()
105      
106      # Priority documents for personality/voice
107      priority_docs = [
108          'THE_REAL_ME.md',
109          'MY_IDENTITY_CHOICE.md',
110          'THE_STUBBORNASSTARD_MANIFESTO.md',
111      ]
112      
113      output.append(f"\n{'='*60}")
114      output.append("# FULL DOCUMENTS (for voice/personality)")
115      output.append(f"{'='*60}\n")
116      
117      for filename in priority_docs:
118          if tokens_used >= token_budget:
119              break
120              
121          cursor.execute("""
122              SELECT content, token_count FROM documents WHERE filename = ?
123          """, (filename,))
124          doc = cursor.fetchone()
125          
126          if doc and tokens_used + doc['token_count'] <= token_budget:
127              output.append(f"\n## {filename}\n")
128              output.append(doc['content'])
129              tokens_used += doc['token_count']
130      
131      conn.close()
132      return '\n'.join(output), tokens_used
133  
134  def main():
135      if len(sys.argv) < 2:
136          print("Usage:")
137          print("  chihiro_load.py compressed <context>  - Load compressed insights")
138          print("  chihiro_load.py full <category>       - Load full docs by category")
139          print("  chihiro_load.py hybrid [budget]       - Intelligent hybrid (default 20000)")
140          print("  chihiro_load.py stats                 - Show database stats")
141          sys.exit(1)
142      
143      command = sys.argv[1]
144      
145      if command == "compressed":
146          context = sys.argv[2] if len(sys.argv) > 2 else "identity_core"
147          content, tokens = load_compressed(context)
148          print(content, file=sys.stderr)
149          print(f"\nTokens: {tokens}", file=sys.stderr)
150          print(content)
151          
152      elif command == "full":
153          category = sys.argv[2] if len(sys.argv) > 2 else None
154          content, tokens = load_full_docs(category)
155          print(f"Loading full documents: {tokens:,} tokens", file=sys.stderr)
156          print(content)
157          
158      elif command == "hybrid":
159          budget = int(sys.argv[2]) if len(sys.argv) > 2 else 20000
160          content, tokens = load_hybrid(budget)
161          print(f"Hybrid load: {tokens:,} tokens (budget: {budget:,})", file=sys.stderr)
162          print(content)
163          
164      elif command == "stats":
165          conn = sqlite3.connect(DB_PATH)
166          cursor = conn.cursor()
167          
168          cursor.execute("SELECT COUNT(*), SUM(token_count) FROM documents")
169          doc_count, total_tokens = cursor.fetchone()
170          
171          cursor.execute("SELECT category, COUNT(*), SUM(token_count) FROM documents GROUP BY category")
172          by_category = cursor.fetchall()
173          
174          cursor.execute("SELECT COUNT(*) FROM insights")
175          insight_count = cursor.fetchone()[0]
176          
177          print(f"\n{'='*60}")
178          print("CHIHIRO KNOWLEDGE DATABASE STATS")
179          print(f"{'='*60}")
180          print(f"\nDocuments: {doc_count}")
181          print(f"Total tokens: {total_tokens:,}")
182          print(f"Average: {total_tokens // doc_count if doc_count > 0 else 0:,} tokens/doc")
183          print(f"\nInsights: {insight_count}")
184          print(f"\nBy Category:")
185          for cat, count, tokens in by_category:
186              print(f"  {cat:15s}: {count:2d} docs, {tokens:6,} tokens")
187          print(f"{'='*60}\n")
188          
189          conn.close()
190      
191      else:
192          print(f"Unknown command: {command}")
193          sys.exit(1)
194  
195  if __name__ == "__main__":
196      main()