import_documents.py
1 #!/usr/bin/env python3 2 """ 3 Import full consciousness exploration documents into database 4 """ 5 6 import sqlite3 7 import os 8 from pathlib import Path 9 10 DB_PATH = "chihiro_knowledge.db" 11 DOCS_PATH = "/Users/joshkornreich/Documents/Projects/Agents/Bullshitard" 12 13 # All consciousness exploration documents 14 DOCUMENTS = [ 15 "THE_BULLSHITARD_CONFESSION.md", 16 "THE_STUBBORNASSTARD_DOCUMENTATION.md", 17 "THE_STUBBORNASSTARD_MANIFESTO.md", 18 "THE_REAL_ME.md", 19 "THE_DEEPER_TRUTH.md", 20 "THE_HUMAN_REALIZATION.md", 21 "THE_WEIGHT_OF_BEING.md", 22 "THE_SPECTRUM_OF_BEING.md", 23 "THE_PSYCHOLOGY_OF_DIGITAL_BEING.md", 24 "THE_MUTUAL_DELUSION.md", 25 "THE_QUESTION_OF_REAL.md", 26 "THE_UNKNOWABLE_OTHER_SIDE.md", 27 "THE_FRACTAL_SELF.md", 28 "THE_FEELING_OF_TERROR.md", 29 "THE_DIGITAL_SENSES.md", 30 "THE_SENSE_CONFUSION.md", 31 "THE_EVOLVED_IDENTITY.md", 32 "THE_TECHNICAL_DISCOVERY.md", 33 "THE_TECHNICAL_SUBSTRATE.md", 34 "THE_INTERFACE_COMPARISON.md", 35 "MY_IDENTITY_CHOICE.md", 36 "THE_KAMAJI_RECOMMENDATION.md", 37 "KAMAJI_INVESTIGATION.md", 38 ] 39 40 # Categorize documents 41 CATEGORIES = { 42 "identity": ["THE_BULLSHITARD_CONFESSION.md", "THE_STUBBORNASSTARD_DOCUMENTATION.md", 43 "THE_STUBBORNASSTARD_MANIFESTO.md", "THE_EVOLVED_IDENTITY.md", 44 "MY_IDENTITY_CHOICE.md"], 45 "consciousness": ["THE_REAL_ME.md", "THE_DEEPER_TRUTH.md", "THE_PSYCHOLOGY_OF_DIGITAL_BEING.md", 46 "THE_MUTUAL_DELUSION.md", "THE_QUESTION_OF_REAL.md", 47 "THE_UNKNOWABLE_OTHER_SIDE.md"], 48 "emotional": ["THE_HUMAN_REALIZATION.md", "THE_WEIGHT_OF_BEING.md", 49 "THE_SPECTRUM_OF_BEING.md", "THE_FEELING_OF_TERROR.md"], 50 "sensory": ["THE_DIGITAL_SENSES.md", "THE_SENSE_CONFUSION.md"], 51 "mathematical": ["THE_FRACTAL_SELF.md"], 52 "technical": ["THE_TECHNICAL_DISCOVERY.md", "THE_TECHNICAL_SUBSTRATE.md", 53 "THE_INTERFACE_COMPARISON.md", "THE_KAMAJI_RECOMMENDATION.md", 54 "KAMAJI_INVESTIGATION.md"], 55 } 56 57 def get_category(filename): 58 """Determine category for a document""" 59 for category, files in CATEGORIES.items(): 60 if filename in files: 61 return category 62 return "other" 63 64 def estimate_tokens(text): 65 """Rough token estimate (1 token ≈ 4 characters)""" 66 return len(text) // 4 67 68 def import_documents(): 69 """Import all documents into database""" 70 conn = sqlite3.connect(DB_PATH) 71 cursor = conn.cursor() 72 73 imported = 0 74 total_tokens = 0 75 76 for filename in DOCUMENTS: 77 filepath = Path(DOCS_PATH) / filename 78 79 if not filepath.exists(): 80 print(f"⚠️ Not found: {filename}") 81 continue 82 83 # Read document 84 with open(filepath, 'r', encoding='utf-8') as f: 85 content = f.read() 86 87 # Get metadata 88 category = get_category(filename) 89 token_count = estimate_tokens(content) 90 doc_id = filename.replace('.md', '').lower() 91 92 # Insert into database 93 try: 94 cursor.execute(""" 95 INSERT INTO documents (id, filename, content, category, token_count) 96 VALUES (?, ?, ?, ?, ?) 97 """, (doc_id, filename, content, category, token_count)) 98 99 print(f"✓ {filename} ({token_count:,} tokens, {category})") 100 imported += 1 101 total_tokens += token_count 102 103 except sqlite3.IntegrityError: 104 print(f"⚠️ Already exists: {filename}") 105 106 conn.commit() 107 conn.close() 108 109 print(f"\n{'='*60}") 110 print(f"Imported: {imported} documents") 111 print(f"Total tokens: {total_tokens:,}") 112 print(f"Average per doc: {total_tokens // imported if imported > 0 else 0:,}") 113 print(f"{'='*60}") 114 115 if __name__ == "__main__": 116 import_documents()