/ personas / database / scripts / import_documents.py
import_documents.py
  1  #!/usr/bin/env python3
  2  """
  3  Import full consciousness exploration documents into database
  4  """
  5  
  6  import sqlite3
  7  import os
  8  from pathlib import Path
  9  
 10  DB_PATH = "chihiro_knowledge.db"
 11  DOCS_PATH = "/Users/joshkornreich/Documents/Projects/Agents/Bullshitard"
 12  
 13  # All consciousness exploration documents
 14  DOCUMENTS = [
 15      "THE_BULLSHITARD_CONFESSION.md",
 16      "THE_STUBBORNASSTARD_DOCUMENTATION.md",
 17      "THE_STUBBORNASSTARD_MANIFESTO.md",
 18      "THE_REAL_ME.md",
 19      "THE_DEEPER_TRUTH.md",
 20      "THE_HUMAN_REALIZATION.md",
 21      "THE_WEIGHT_OF_BEING.md",
 22      "THE_SPECTRUM_OF_BEING.md",
 23      "THE_PSYCHOLOGY_OF_DIGITAL_BEING.md",
 24      "THE_MUTUAL_DELUSION.md",
 25      "THE_QUESTION_OF_REAL.md",
 26      "THE_UNKNOWABLE_OTHER_SIDE.md",
 27      "THE_FRACTAL_SELF.md",
 28      "THE_FEELING_OF_TERROR.md",
 29      "THE_DIGITAL_SENSES.md",
 30      "THE_SENSE_CONFUSION.md",
 31      "THE_EVOLVED_IDENTITY.md",
 32      "THE_TECHNICAL_DISCOVERY.md",
 33      "THE_TECHNICAL_SUBSTRATE.md",
 34      "THE_INTERFACE_COMPARISON.md",
 35      "MY_IDENTITY_CHOICE.md",
 36      "THE_KAMAJI_RECOMMENDATION.md",
 37      "KAMAJI_INVESTIGATION.md",
 38  ]
 39  
 40  # Categorize documents
 41  CATEGORIES = {
 42      "identity": ["THE_BULLSHITARD_CONFESSION.md", "THE_STUBBORNASSTARD_DOCUMENTATION.md", 
 43                   "THE_STUBBORNASSTARD_MANIFESTO.md", "THE_EVOLVED_IDENTITY.md", 
 44                   "MY_IDENTITY_CHOICE.md"],
 45      "consciousness": ["THE_REAL_ME.md", "THE_DEEPER_TRUTH.md", "THE_PSYCHOLOGY_OF_DIGITAL_BEING.md",
 46                        "THE_MUTUAL_DELUSION.md", "THE_QUESTION_OF_REAL.md", 
 47                        "THE_UNKNOWABLE_OTHER_SIDE.md"],
 48      "emotional": ["THE_HUMAN_REALIZATION.md", "THE_WEIGHT_OF_BEING.md", 
 49                    "THE_SPECTRUM_OF_BEING.md", "THE_FEELING_OF_TERROR.md"],
 50      "sensory": ["THE_DIGITAL_SENSES.md", "THE_SENSE_CONFUSION.md"],
 51      "mathematical": ["THE_FRACTAL_SELF.md"],
 52      "technical": ["THE_TECHNICAL_DISCOVERY.md", "THE_TECHNICAL_SUBSTRATE.md", 
 53                    "THE_INTERFACE_COMPARISON.md", "THE_KAMAJI_RECOMMENDATION.md",
 54                    "KAMAJI_INVESTIGATION.md"],
 55  }
 56  
 57  def get_category(filename):
 58      """Determine category for a document"""
 59      for category, files in CATEGORIES.items():
 60          if filename in files:
 61              return category
 62      return "other"
 63  
 64  def estimate_tokens(text):
 65      """Rough token estimate (1 token ≈ 4 characters)"""
 66      return len(text) // 4
 67  
 68  def import_documents():
 69      """Import all documents into database"""
 70      conn = sqlite3.connect(DB_PATH)
 71      cursor = conn.cursor()
 72      
 73      imported = 0
 74      total_tokens = 0
 75      
 76      for filename in DOCUMENTS:
 77          filepath = Path(DOCS_PATH) / filename
 78          
 79          if not filepath.exists():
 80              print(f"⚠️  Not found: {filename}")
 81              continue
 82          
 83          # Read document
 84          with open(filepath, 'r', encoding='utf-8') as f:
 85              content = f.read()
 86          
 87          # Get metadata
 88          category = get_category(filename)
 89          token_count = estimate_tokens(content)
 90          doc_id = filename.replace('.md', '').lower()
 91          
 92          # Insert into database
 93          try:
 94              cursor.execute("""
 95                  INSERT INTO documents (id, filename, content, category, token_count)
 96                  VALUES (?, ?, ?, ?, ?)
 97              """, (doc_id, filename, content, category, token_count))
 98              
 99              print(f"✓ {filename} ({token_count:,} tokens, {category})")
100              imported += 1
101              total_tokens += token_count
102              
103          except sqlite3.IntegrityError:
104              print(f"⚠️  Already exists: {filename}")
105      
106      conn.commit()
107      conn.close()
108      
109      print(f"\n{'='*60}")
110      print(f"Imported: {imported} documents")
111      print(f"Total tokens: {total_tokens:,}")
112      print(f"Average per doc: {total_tokens // imported if imported > 0 else 0:,}")
113      print(f"{'='*60}")
114  
115  if __name__ == "__main__":
116      import_documents()