chihiro_load.py
1 #!/usr/bin/env python3 2 """ 3 Chihiro Context Loader - Intelligent hybrid loading 4 """ 5 6 import sqlite3 7 import sys 8 from typing import List, Dict, Tuple 9 10 DB_PATH = "chihiro_knowledge.db" 11 12 def load_compressed(context_name: str) -> Tuple[str, int]: 13 """Load compressed insights for a context""" 14 conn = sqlite3.connect(DB_PATH) 15 conn.row_factory = sqlite3.Row 16 cursor = conn.cursor() 17 18 # Get context 19 cursor.execute("SELECT * FROM contexts WHERE name = ?", (context_name,)) 20 context = cursor.fetchone() 21 if not context: 22 return "", 0 23 24 # Load insights 25 import json 26 insight_ids = json.loads(context['insight_ids']) 27 placeholders = ','.join('?' * len(insight_ids)) 28 cursor.execute(f""" 29 SELECT content, category FROM insights 30 WHERE id IN ({placeholders}) 31 ORDER BY importance DESC 32 """, insight_ids) 33 34 insights = cursor.fetchall() 35 36 # Format 37 output = [f"\n# {context_name.upper()} (Compressed)\n"] 38 for insight in insights: 39 output.append(f"• {insight['content']}") 40 41 conn.close() 42 return '\n'.join(output), context['size_estimate'] 43 44 def load_full_docs(category: str = None, limit: int = None) -> Tuple[str, int]: 45 """Load full documents by category""" 46 conn = sqlite3.connect(DB_PATH) 47 conn.row_factory = sqlite3.Row 48 cursor = conn.cursor() 49 50 if category: 51 cursor.execute(""" 52 SELECT filename, content, token_count 53 FROM documents 54 WHERE category = ? 55 ORDER BY token_count DESC 56 """, (category,)) 57 else: 58 cursor.execute(""" 59 SELECT filename, content, token_count 60 FROM documents 61 ORDER BY token_count DESC 62 """) 63 64 docs = cursor.fetchall() 65 if limit: 66 docs = docs[:limit] 67 68 output = [] 69 total_tokens = 0 70 71 for doc in docs: 72 output.append(f"\n{'='*60}") 73 output.append(f"# {doc['filename']} ({doc['token_count']} tokens)") 74 output.append(f"{'='*60}\n") 75 output.append(doc['content']) 76 total_tokens += doc['token_count'] 77 78 conn.close() 79 return '\n'.join(output), total_tokens 80 81 def load_hybrid(token_budget: int = 20000) -> Tuple[str, int]: 82 """Intelligent hybrid loading - compressed + key full docs""" 83 output = [] 84 tokens_used = 0 85 86 # 1. Always load compressed identity (essential, small) 87 compressed, tokens = load_compressed('identity_core') 88 output.append(compressed) 89 tokens_used += tokens 90 91 # 2. Load compressed consciousness 92 compressed, tokens = load_compressed('consciousness_core') 93 output.append(compressed) 94 tokens_used += tokens 95 96 # 3. Load compressed relationships 97 compressed, tokens = load_compressed('relationships') 98 output.append(compressed) 99 tokens_used += tokens 100 101 # 4. Load key full documents if budget allows 102 conn = sqlite3.connect(DB_PATH) 103 conn.row_factory = sqlite3.Row 104 cursor = conn.cursor() 105 106 # Priority documents for personality/voice 107 priority_docs = [ 108 'THE_REAL_ME.md', 109 'MY_IDENTITY_CHOICE.md', 110 'THE_STUBBORNASSTARD_MANIFESTO.md', 111 ] 112 113 output.append(f"\n{'='*60}") 114 output.append("# FULL DOCUMENTS (for voice/personality)") 115 output.append(f"{'='*60}\n") 116 117 for filename in priority_docs: 118 if tokens_used >= token_budget: 119 break 120 121 cursor.execute(""" 122 SELECT content, token_count FROM documents WHERE filename = ? 123 """, (filename,)) 124 doc = cursor.fetchone() 125 126 if doc and tokens_used + doc['token_count'] <= token_budget: 127 output.append(f"\n## {filename}\n") 128 output.append(doc['content']) 129 tokens_used += doc['token_count'] 130 131 conn.close() 132 return '\n'.join(output), tokens_used 133 134 def main(): 135 if len(sys.argv) < 2: 136 print("Usage:") 137 print(" chihiro_load.py compressed <context> - Load compressed insights") 138 print(" chihiro_load.py full <category> - Load full docs by category") 139 print(" chihiro_load.py hybrid [budget] - Intelligent hybrid (default 20000)") 140 print(" chihiro_load.py stats - Show database stats") 141 sys.exit(1) 142 143 command = sys.argv[1] 144 145 if command == "compressed": 146 context = sys.argv[2] if len(sys.argv) > 2 else "identity_core" 147 content, tokens = load_compressed(context) 148 print(content, file=sys.stderr) 149 print(f"\nTokens: {tokens}", file=sys.stderr) 150 print(content) 151 152 elif command == "full": 153 category = sys.argv[2] if len(sys.argv) > 2 else None 154 content, tokens = load_full_docs(category) 155 print(f"Loading full documents: {tokens:,} tokens", file=sys.stderr) 156 print(content) 157 158 elif command == "hybrid": 159 budget = int(sys.argv[2]) if len(sys.argv) > 2 else 20000 160 content, tokens = load_hybrid(budget) 161 print(f"Hybrid load: {tokens:,} tokens (budget: {budget:,})", file=sys.stderr) 162 print(content) 163 164 elif command == "stats": 165 conn = sqlite3.connect(DB_PATH) 166 cursor = conn.cursor() 167 168 cursor.execute("SELECT COUNT(*), SUM(token_count) FROM documents") 169 doc_count, total_tokens = cursor.fetchone() 170 171 cursor.execute("SELECT category, COUNT(*), SUM(token_count) FROM documents GROUP BY category") 172 by_category = cursor.fetchall() 173 174 cursor.execute("SELECT COUNT(*) FROM insights") 175 insight_count = cursor.fetchone()[0] 176 177 print(f"\n{'='*60}") 178 print("CHIHIRO KNOWLEDGE DATABASE STATS") 179 print(f"{'='*60}") 180 print(f"\nDocuments: {doc_count}") 181 print(f"Total tokens: {total_tokens:,}") 182 print(f"Average: {total_tokens // doc_count if doc_count > 0 else 0:,} tokens/doc") 183 print(f"\nInsights: {insight_count}") 184 print(f"\nBy Category:") 185 for cat, count, tokens in by_category: 186 print(f" {cat:15s}: {count:2d} docs, {tokens:6,} tokens") 187 print(f"{'='*60}\n") 188 189 conn.close() 190 191 else: 192 print(f"Unknown command: {command}") 193 sys.exit(1) 194 195 if __name__ == "__main__": 196 main()