concept_stubber.py
1 #!/usr/bin/env python3 2 """ 3 Concept Stubber - Auto-create missing concept pages from broken links 4 5 Finds all [[broken-links]] that point to non-existent files and creates 6 stub pages for them. Overbuilds - creates everything, trim later. 7 8 Usage: 9 python scripts/concept_stubber.py [--dry-run] 10 """ 11 12 import re 13 import sys 14 from pathlib import Path 15 from datetime import datetime 16 from collections import defaultdict 17 import argparse 18 19 SCAN_DIRS = ['docs', 'patterns', 'sessions', 'dashboards'] 20 SKIP_DIRS = {'.git', 'node_modules', '__pycache__', 'templates'} 21 OUTPUT_DIR = 'sessions/backfill' 22 23 24 def normalize_link(link: str) -> str: 25 """Normalize wiki-link.""" 26 name = link.split('/')[-1].lower() 27 if name.endswith('.md'): 28 name = name[:-3] 29 return name.replace(' ', '-') 30 31 32 def extract_wiki_links(content: str) -> set: 33 """Extract all [[wiki-links]] from content.""" 34 pattern = r'\[\[([^\]|]+)(?:\|[^\]]+)?\]\]' 35 return {normalize_link(m) for m in re.findall(pattern, content)} 36 37 38 def scan_existing_files(base_path: Path) -> set: 39 """Get all existing file names.""" 40 existing = set() 41 for scan_dir in SCAN_DIRS: 42 dir_path = base_path / scan_dir 43 if not dir_path.exists(): 44 continue 45 for filepath in dir_path.rglob('*.md'): 46 if any(skip in filepath.parts for skip in SKIP_DIRS): 47 continue 48 name = filepath.stem.lower().replace(' ', '-') 49 existing.add(name) 50 return existing 51 52 53 def scan_all_links(base_path: Path) -> dict: 54 """Get all links and their sources.""" 55 links = defaultdict(list) 56 for scan_dir in SCAN_DIRS: 57 dir_path = base_path / scan_dir 58 if not dir_path.exists(): 59 continue 60 for filepath in dir_path.rglob('*.md'): 61 if any(skip in filepath.parts for skip in SKIP_DIRS): 62 continue 63 try: 64 content = filepath.read_text(encoding='utf-8') 65 for link in extract_wiki_links(content): 66 links[link].append(filepath) 67 except Exception: 68 continue 69 return links 70 71 72 def title_from_slug(slug: str) -> str: 73 """Convert slug to title.""" 74 return slug.replace('-', ' ').title() 75 76 77 def create_stub(name: str, sources: list, output_path: Path) -> str: 78 """Create a stub page.""" 79 title = title_from_slug(name) 80 sources_str = ', '.join(f"[[{s.stem}]]" for s in sources[:3]) 81 if len(sources) > 3: 82 sources_str += f" (+{len(sources) - 3} more)" 83 84 return f"""# {title} 85 86 *Stub page - needs content* 87 88 --- 89 90 - **principle** 91 - "[TODO: Core insight]" 92 93 - **shape** 94 - [TODO: What this IS] 95 - [TODO: How it works] 96 97 --- 98 99 ## Definition 100 101 [TODO: Main content here] 102 103 --- 104 105 ## Related 106 107 - Referenced from: {sources_str} 108 109 --- 110 111 *Auto-generated stub | {datetime.now().strftime('%Y-%m-%d')} | Needs curation* 112 """ 113 114 115 def main(): 116 parser = argparse.ArgumentParser( 117 description='Concept Stubber - Create missing concept pages' 118 ) 119 parser.add_argument( 120 '--dry-run', '-n', 121 action='store_true', 122 help='Show what would be created' 123 ) 124 parser.add_argument( 125 '--path', '-p', 126 type=Path, 127 default=Path(__file__).parent.parent, 128 help='Path to Sovereign_OS repo' 129 ) 130 parser.add_argument( 131 '--min-refs', 132 type=int, 133 default=1, 134 help='Minimum references to create stub (default: 1)' 135 ) 136 137 args = parser.parse_args() 138 139 existing = scan_existing_files(args.path) 140 all_links = scan_all_links(args.path) 141 142 # Find broken links (referenced but don't exist) 143 broken = {k: v for k, v in all_links.items() if k not in existing} 144 145 # Filter by reference count 146 broken = {k: v for k, v in broken.items() if len(v) >= args.min_refs} 147 148 # Filter out noise (templates, dates, etc.) 149 noise_patterns = [ 150 r'^\d{4}-\d{2}-\d{2}', # Dates 151 r'^done$', 152 r'^todo$', 153 r'^\[\[', # Malformed 154 r'^{', # Template vars 155 ] 156 for pattern in noise_patterns: 157 broken = {k: v for k, v in broken.items() if not re.match(pattern, k)} 158 159 print(f"Found {len(broken)} missing concepts") 160 print() 161 162 output_dir = args.path / OUTPUT_DIR 163 output_dir.mkdir(parents=True, exist_ok=True) 164 165 created = 0 166 for name, sources in sorted(broken.items(), key=lambda x: -len(x[1])): 167 output_path = output_dir / f"{name}.md" 168 169 if output_path.exists(): 170 print(f" Skip: {name} (already exists)") 171 continue 172 173 print(f" [{len(sources):>2} refs] {name}") 174 175 if not args.dry_run: 176 content = create_stub(name, sources, output_path) 177 output_path.write_text(content, encoding='utf-8') 178 created += 1 179 180 print() 181 if args.dry_run: 182 print(f"[DRY RUN] Would create {len(broken)} stubs") 183 else: 184 print(f"✓ Created {created} stub pages in {output_dir}") 185 186 187 if __name__ == '__main__': 188 main()