/ tests / structural / test_documentation.py
test_documentation.py
  1  """
  2  Verify documentation quality and freshness.
  3  
  4  These tests ensure CLAUDE.md stays healthy and all internal references resolve.
  5  Part of the Harness Engineering documentation quality gates.
  6  """
  7  import os
  8  import re
  9  import pytest
 10  
 11  PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 12  
 13  
 14  class TestDocumentation:
 15  
 16      @pytest.mark.unit
 17      def test_claude_md_size_limit(self):
 18          """CLAUDE.md must stay under 250 lines (per self-improvement protocol)."""
 19          claude_md = os.path.join(PROJECT_ROOT, "CLAUDE.md")
 20          if not os.path.exists(claude_md):
 21              pytest.skip("CLAUDE.md not found")
 22          with open(claude_md) as f:
 23              lines = f.readlines()
 24          assert len(lines) <= 250, (
 25              f"CLAUDE.md is {len(lines)} lines (limit: 250).\n"
 26              f"Extract verbose content to docs/ and link from CLAUDE.md.\n"
 27              f"See the Self-Improvement Protocol in CLAUDE.md for guidance."
 28          )
 29  
 30      # Paths gitignored in .dockerignore but present on dev machines.
 31      # When git is unavailable (Docker), these are silently skipped.
 32      GITIGNORED_DOC_PREFIXES = ("docs/", "docs\\")
 33  
 34      @pytest.mark.unit
 35      def test_see_references_resolve(self):
 36          """All markdown link references in CLAUDE.md must point to existing files.
 37  
 38          Uses git ls-files to check tracked files (handles gitignored docs).
 39          Falls back to filesystem check if git is unavailable.
 40          """
 41          import subprocess
 42  
 43          claude_md = os.path.join(PROJECT_ROOT, "CLAUDE.md")
 44          if not os.path.exists(claude_md):
 45              pytest.skip("CLAUDE.md not found")
 46  
 47          with open(claude_md) as f:
 48              content = f.read()
 49  
 50          # Get tracked files from git
 51          tracked_files = None
 52          has_git = False
 53          try:
 54              result = subprocess.run(
 55                  ["git", "ls-files"],
 56                  cwd=PROJECT_ROOT,
 57                  capture_output=True, text=True, timeout=5,
 58              )
 59              if result.returncode == 0 and result.stdout.strip():
 60                  tracked_files = set(result.stdout.strip().split("\n"))
 61                  has_git = True
 62          except (subprocess.SubprocessError, FileNotFoundError):
 63              pass
 64  
 65          # Match markdown links: [text](path)
 66          link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
 67          broken = []
 68  
 69          for match in link_pattern.finditer(content):
 70              text, path = match.group(1), match.group(2)
 71              # Skip external URLs
 72              if path.startswith("http://") or path.startswith("https://"):
 73                  continue
 74              # Skip anchors
 75              if path.startswith("#"):
 76                  continue
 77              # Check: tracked in git OR exists on filesystem
 78              resolved = os.path.join(PROJECT_ROOT, path)
 79              in_git = tracked_files is not None and path in tracked_files
 80              on_disk = os.path.exists(resolved)
 81              if in_git or on_disk:
 82                  continue
 83              # Skip gitignored paths via git check-ignore
 84              if has_git:
 85                  ign = subprocess.run(
 86                      ["git", "check-ignore", "-q", path],
 87                      cwd=PROJECT_ROOT,
 88                      capture_output=True, timeout=5,
 89                  )
 90                  if ign.returncode == 0:
 91                      continue  # path is gitignored, skip
 92              else:
 93                  # No git available (Docker): skip known gitignored prefixes
 94                  if path.startswith(self.GITIGNORED_DOC_PREFIXES):
 95                      continue
 96              broken.append(
 97                  f"  [{text}]({path}) -> FILE NOT FOUND\n"
 98                  f"    Expected at: {resolved}"
 99              )
100  
101          assert not broken, (
102              f"\nBroken references in CLAUDE.md:\n"
103              + "\n".join(broken)
104              + "\n\nFix: Update the path or create the missing file."
105          )
106  
107      @pytest.mark.unit
108      def test_gotchas_max_length(self):
109          """Each gotcha in CLAUDE.md should be concise (max 3 lines including header)."""
110          claude_md = os.path.join(PROJECT_ROOT, "CLAUDE.md")
111          if not os.path.exists(claude_md):
112              pytest.skip("CLAUDE.md not found")
113  
114          with open(claude_md) as f:
115              content = f.read()
116  
117          # Find the Gotchas section
118          gotchas_match = re.search(r'## Gotchas\n(.*?)(?=\n## |\Z)', content, re.DOTALL)
119          if not gotchas_match:
120              pytest.skip("No Gotchas section found in CLAUDE.md")
121  
122          gotchas_text = gotchas_match.group(1).strip()
123          # Each gotcha is a numbered item: "N. **title** — description"
124          gotcha_pattern = re.compile(r'^\d+\.\s+', re.MULTILINE)
125          gotcha_starts = [m.start() for m in gotcha_pattern.finditer(gotchas_text)]
126  
127          long_gotchas = []
128          for i, start in enumerate(gotcha_starts):
129              end = gotcha_starts[i + 1] if i + 1 < len(gotcha_starts) else len(gotchas_text)
130              gotcha = gotchas_text[start:end].strip()
131              lines = gotcha.split("\n")
132              if len(lines) > 3:
133                  first_line = lines[0][:80]
134                  long_gotchas.append(
135                      f"  Gotcha starting with: {first_line}...\n"
136                      f"    Has {len(lines)} lines (max: 3). Compress or move details to docs/."
137                  )
138  
139          assert not long_gotchas, (
140              f"\nGotchas exceeding max length:\n"
141              + "\n".join(long_gotchas)
142              + "\n\nFix: Keep each gotcha to max 2 lines (cause + prevention)."
143          )