test_documentation.py
1 """ 2 Verify documentation quality and freshness. 3 4 These tests ensure CLAUDE.md stays healthy and all internal references resolve. 5 Part of the Harness Engineering documentation quality gates. 6 """ 7 import os 8 import re 9 import pytest 10 11 PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 12 13 14 class TestDocumentation: 15 16 @pytest.mark.unit 17 def test_claude_md_size_limit(self): 18 """CLAUDE.md must stay under 250 lines (per self-improvement protocol).""" 19 claude_md = os.path.join(PROJECT_ROOT, "CLAUDE.md") 20 if not os.path.exists(claude_md): 21 pytest.skip("CLAUDE.md not found") 22 with open(claude_md) as f: 23 lines = f.readlines() 24 assert len(lines) <= 250, ( 25 f"CLAUDE.md is {len(lines)} lines (limit: 250).\n" 26 f"Extract verbose content to docs/ and link from CLAUDE.md.\n" 27 f"See the Self-Improvement Protocol in CLAUDE.md for guidance." 28 ) 29 30 # Paths gitignored in .dockerignore but present on dev machines. 31 # When git is unavailable (Docker), these are silently skipped. 32 GITIGNORED_DOC_PREFIXES = ("docs/", "docs\\") 33 34 @pytest.mark.unit 35 def test_see_references_resolve(self): 36 """All markdown link references in CLAUDE.md must point to existing files. 37 38 Uses git ls-files to check tracked files (handles gitignored docs). 39 Falls back to filesystem check if git is unavailable. 40 """ 41 import subprocess 42 43 claude_md = os.path.join(PROJECT_ROOT, "CLAUDE.md") 44 if not os.path.exists(claude_md): 45 pytest.skip("CLAUDE.md not found") 46 47 with open(claude_md) as f: 48 content = f.read() 49 50 # Get tracked files from git 51 tracked_files = None 52 has_git = False 53 try: 54 result = subprocess.run( 55 ["git", "ls-files"], 56 cwd=PROJECT_ROOT, 57 capture_output=True, text=True, timeout=5, 58 ) 59 if result.returncode == 0 and result.stdout.strip(): 60 tracked_files = set(result.stdout.strip().split("\n")) 61 has_git = True 62 except (subprocess.SubprocessError, FileNotFoundError): 63 pass 64 65 # Match markdown links: [text](path) 66 link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)') 67 broken = [] 68 69 for match in link_pattern.finditer(content): 70 text, path = match.group(1), match.group(2) 71 # Skip external URLs 72 if path.startswith("http://") or path.startswith("https://"): 73 continue 74 # Skip anchors 75 if path.startswith("#"): 76 continue 77 # Check: tracked in git OR exists on filesystem 78 resolved = os.path.join(PROJECT_ROOT, path) 79 in_git = tracked_files is not None and path in tracked_files 80 on_disk = os.path.exists(resolved) 81 if in_git or on_disk: 82 continue 83 # Skip gitignored paths via git check-ignore 84 if has_git: 85 ign = subprocess.run( 86 ["git", "check-ignore", "-q", path], 87 cwd=PROJECT_ROOT, 88 capture_output=True, timeout=5, 89 ) 90 if ign.returncode == 0: 91 continue # path is gitignored, skip 92 else: 93 # No git available (Docker): skip known gitignored prefixes 94 if path.startswith(self.GITIGNORED_DOC_PREFIXES): 95 continue 96 broken.append( 97 f" [{text}]({path}) -> FILE NOT FOUND\n" 98 f" Expected at: {resolved}" 99 ) 100 101 assert not broken, ( 102 f"\nBroken references in CLAUDE.md:\n" 103 + "\n".join(broken) 104 + "\n\nFix: Update the path or create the missing file." 105 ) 106 107 @pytest.mark.unit 108 def test_gotchas_max_length(self): 109 """Each gotcha in CLAUDE.md should be concise (max 3 lines including header).""" 110 claude_md = os.path.join(PROJECT_ROOT, "CLAUDE.md") 111 if not os.path.exists(claude_md): 112 pytest.skip("CLAUDE.md not found") 113 114 with open(claude_md) as f: 115 content = f.read() 116 117 # Find the Gotchas section 118 gotchas_match = re.search(r'## Gotchas\n(.*?)(?=\n## |\Z)', content, re.DOTALL) 119 if not gotchas_match: 120 pytest.skip("No Gotchas section found in CLAUDE.md") 121 122 gotchas_text = gotchas_match.group(1).strip() 123 # Each gotcha is a numbered item: "N. **title** — description" 124 gotcha_pattern = re.compile(r'^\d+\.\s+', re.MULTILINE) 125 gotcha_starts = [m.start() for m in gotcha_pattern.finditer(gotchas_text)] 126 127 long_gotchas = [] 128 for i, start in enumerate(gotcha_starts): 129 end = gotcha_starts[i + 1] if i + 1 < len(gotcha_starts) else len(gotchas_text) 130 gotcha = gotchas_text[start:end].strip() 131 lines = gotcha.split("\n") 132 if len(lines) > 3: 133 first_line = lines[0][:80] 134 long_gotchas.append( 135 f" Gotcha starting with: {first_line}...\n" 136 f" Has {len(lines)} lines (max: 3). Compress or move details to docs/." 137 ) 138 139 assert not long_gotchas, ( 140 f"\nGotchas exceeding max length:\n" 141 + "\n".join(long_gotchas) 142 + "\n\nFix: Keep each gotcha to max 2 lines (cause + prevention)." 143 )