Cradicle Explorer

/ scripts / concept_stubber.py
concept_stubber.py
  1  #!/usr/bin/env python3
  2  """
  3  Concept Stubber - Auto-create missing concept pages from broken links
  4  
  5  Finds all [[broken-links]] that point to non-existent files and creates
  6  stub pages for them. Overbuilds - creates everything, trim later.
  7  
  8  Usage:
  9      python scripts/concept_stubber.py [--dry-run]
 10  """
 11  
 12  import re
 13  import sys
 14  from pathlib import Path
 15  from datetime import datetime
 16  from collections import defaultdict
 17  import argparse
 18  
 19  SCAN_DIRS = ['docs', 'patterns', 'sessions', 'dashboards']
 20  SKIP_DIRS = {'.git', 'node_modules', '__pycache__', 'templates'}
 21  OUTPUT_DIR = 'sessions/backfill'
 22  
 23  
 24  def normalize_link(link: str) -> str:
 25      """Normalize wiki-link."""
 26      name = link.split('/')[-1].lower()
 27      if name.endswith('.md'):
 28          name = name[:-3]
 29      return name.replace(' ', '-')
 30  
 31  
 32  def extract_wiki_links(content: str) -> set:
 33      """Extract all [[wiki-links]] from content."""
 34      pattern = r'\[\[([^\]|]+)(?:\|[^\]]+)?\]\]'
 35      return {normalize_link(m) for m in re.findall(pattern, content)}
 36  
 37  
 38  def scan_existing_files(base_path: Path) -> set:
 39      """Get all existing file names."""
 40      existing = set()
 41      for scan_dir in SCAN_DIRS:
 42          dir_path = base_path / scan_dir
 43          if not dir_path.exists():
 44              continue
 45          for filepath in dir_path.rglob('*.md'):
 46              if any(skip in filepath.parts for skip in SKIP_DIRS):
 47                  continue
 48              name = filepath.stem.lower().replace(' ', '-')
 49              existing.add(name)
 50      return existing
 51  
 52  
 53  def scan_all_links(base_path: Path) -> dict:
 54      """Get all links and their sources."""
 55      links = defaultdict(list)
 56      for scan_dir in SCAN_DIRS:
 57          dir_path = base_path / scan_dir
 58          if not dir_path.exists():
 59              continue
 60          for filepath in dir_path.rglob('*.md'):
 61              if any(skip in filepath.parts for skip in SKIP_DIRS):
 62                  continue
 63              try:
 64                  content = filepath.read_text(encoding='utf-8')
 65                  for link in extract_wiki_links(content):
 66                      links[link].append(filepath)
 67              except Exception:
 68                  continue
 69      return links
 70  
 71  
 72  def title_from_slug(slug: str) -> str:
 73      """Convert slug to title."""
 74      return slug.replace('-', ' ').title()
 75  
 76  
 77  def create_stub(name: str, sources: list, output_path: Path) -> str:
 78      """Create a stub page."""
 79      title = title_from_slug(name)
 80      sources_str = ', '.join(f"[[{s.stem}]]" for s in sources[:3])
 81      if len(sources) > 3:
 82          sources_str += f" (+{len(sources) - 3} more)"
 83  
 84      return f"""# {title}
 85  
 86  *Stub page - needs content*
 87  
 88  ---
 89  
 90  - **principle**
 91    - "[TODO: Core insight]"
 92  
 93  - **shape**
 94    - [TODO: What this IS]
 95    - [TODO: How it works]
 96  
 97  ---
 98  
 99  ## Definition
100  
101  [TODO: Main content here]
102  
103  ---
104  
105  ## Related
106  
107  - Referenced from: {sources_str}
108  
109  ---
110  
111  *Auto-generated stub | {datetime.now().strftime('%Y-%m-%d')} | Needs curation*
112  """
113  
114  
115  def main():
116      parser = argparse.ArgumentParser(
117          description='Concept Stubber - Create missing concept pages'
118      )
119      parser.add_argument(
120          '--dry-run', '-n',
121          action='store_true',
122          help='Show what would be created'
123      )
124      parser.add_argument(
125          '--path', '-p',
126          type=Path,
127          default=Path(__file__).parent.parent,
128          help='Path to Sovereign_OS repo'
129      )
130      parser.add_argument(
131          '--min-refs',
132          type=int,
133          default=1,
134          help='Minimum references to create stub (default: 1)'
135      )
136  
137      args = parser.parse_args()
138  
139      existing = scan_existing_files(args.path)
140      all_links = scan_all_links(args.path)
141  
142      # Find broken links (referenced but don't exist)
143      broken = {k: v for k, v in all_links.items() if k not in existing}
144  
145      # Filter by reference count
146      broken = {k: v for k, v in broken.items() if len(v) >= args.min_refs}
147  
148      # Filter out noise (templates, dates, etc.)
149      noise_patterns = [
150          r'^\d{4}-\d{2}-\d{2}',  # Dates
151          r'^done$',
152          r'^todo$',
153          r'^\[\[',  # Malformed
154          r'^{',  # Template vars
155      ]
156      for pattern in noise_patterns:
157          broken = {k: v for k, v in broken.items() if not re.match(pattern, k)}
158  
159      print(f"Found {len(broken)} missing concepts")
160      print()
161  
162      output_dir = args.path / OUTPUT_DIR
163      output_dir.mkdir(parents=True, exist_ok=True)
164  
165      created = 0
166      for name, sources in sorted(broken.items(), key=lambda x: -len(x[1])):
167          output_path = output_dir / f"{name}.md"
168  
169          if output_path.exists():
170              print(f"  Skip: {name} (already exists)")
171              continue
172  
173          print(f"  [{len(sources):>2} refs] {name}")
174  
175          if not args.dry_run:
176              content = create_stub(name, sources, output_path)
177              output_path.write_text(content, encoding='utf-8')
178              created += 1
179  
180      print()
181      if args.dry_run:
182          print(f"[DRY RUN] Would create {len(broken)} stubs")
183      else:
184          print(f"✓ Created {created} stub pages in {output_dir}")
185  
186  
187  if __name__ == '__main__':
188      main()