test_skill_kb_consistency.py
1 # -*- coding: utf-8 -*- 2 """KB consistency tests for packaged pyod skill content. 3 4 Enforces three invariants: 5 6 1. Every backtick-wrapped token in hand-written skill prose that looks like 7 a detector name must exist in `pyod.utils.knowledge.algorithms`. Author 8 prose must use backtick convention for detector names. 9 10 2. Every KB-DERIVED block in every skill .md file must have a matching 11 BEGIN / END marker pair with the same section name. Malformed or 12 unmatched markers fail loudly. 13 14 3. Every KB-DERIVED block's body is byte-identical to what 15 scripts/regen_skill.py would produce (re-run the generator to fix). 16 """ 17 import importlib.util 18 import re 19 from pathlib import Path 20 21 REPO_ROOT = Path(__file__).resolve().parents[2] 22 SKILLS_DIR = REPO_ROOT / "pyod" / "skills" 23 SCRIPT_PATH = REPO_ROOT / "scripts" / "regen_skill.py" 24 25 # Allowlist of backtick-wrapped tokens that look like detector names but 26 # are actually legitimate non-detector references (Python symbols, env 27 # vars, CLI commands, etc.). Add to this list when a false positive 28 # surfaces, with a one-line reason. 29 # 30 # IMPORTANT: do NOT add any token here that is also a live KB key. The 31 # allowlist must not shadow real detector names — that would let a typo 32 # in `IForset` slip through if `IForest` were also allowlisted. The 33 # `test_allowlist_does_not_shadow_kb_keys` test enforces this invariant. 34 _BACKTICK_ALLOWLIST = { 35 # Python stdlib / pyod module names that look detector-like but 36 # are orchestration objects, not KB-registered detectors. 37 "ADEngine", "ADEngine.start", "ADEngine.plan", "ADEngine.run", 38 "ADEngine.analyze", "ADEngine.iterate", "ADEngine.report", 39 "ADEngine.investigate", "ADEngine.profile_data", 40 "ADEngine.list_detectors", "ADEngine.explain_detector", 41 "ADEngine.compare_detectors", "ADEngine.get_benchmarks", 42 "BaseDetector", "MultiModalEncoder", 43 # CLI commands and flags 44 "pyod", "pyod-install-skill", "pip", "pyod[graph]", "pyod[mcp]", 45 "pyod info", "pyod install skill", "pyod install skill --project", 46 "pyod mcp serve", "python -m pyod.mcp_server", 47 # Benchmark and paper names — valid references that aren't KB keys 48 "ADBench", "TSB-AD", "BOND", "NLP-ADBench", 49 # Common Python state / attribute refs 50 "state", "state.profile", "state.plan", "state.scores", "state.labels", 51 "state.quality", "state.next_action", "state.best_detector", 52 "state.top_cases", "state.feature_importance", "state.consensus_scores", 53 "decision_scores_", "decision_function", "predict", "predict_proba", 54 "predict_confidence", "predict_with_rejection", "labels_", "fit", 55 "fit_predict", 56 # Sklearn utilities commonly mentioned in prose 57 # (NOTE: PCA is a live KB detector, NOT allowlisted — see comment above.) 58 "StandardScaler", "RobustScaler", 59 # Test runner / stdlib references 60 "True", "False", "None", "HOME", "USERPROFILE", "PYTHONPATH", 61 # Filesystem paths often wrapped in backticks 62 "~/.claude", "~/.codex", "~/.claude/skills", 63 "./skills/", "./skills/od-expert/", 64 # Skill file references (capital-S start, look detector-like to the heuristic) 65 "SKILL.md", 66 # Vendor / encoder backend names referenced in text/image docs 67 "HuggingFace", "OpenAI", "OPENAI_API_KEY", "PIL.Image", 68 } 69 70 # Backtick-wrapped token pattern. Captures the inside of a single-backtick 71 # span. Example: `IForest` → captures "IForest". We deliberately do NOT 72 # match triple-backtick code fences (which are whole-line delimiters) — 73 # the regex only matches inline spans. 74 _BACKTICK_RE = re.compile(r"(?<!`)`([^`\n]+?)`(?!`)") 75 76 # Exact valid marker forms. A line that mentions a KB marker MUST 77 # fullmatch one of these after strip(); anything else is an error. 78 _BEGIN_MARKER_RE = re.compile(r"<!-- BEGIN KB-DERIVED: ([a-z0-9_\-]+) -->") 79 _END_MARKER_RE = re.compile(r"<!-- END KB-DERIVED: ([a-z0-9_\-]+) -->") 80 81 # "Suspicious marker" detector — any line containing an HTML comment 82 # with BEGIN/END + KB- is a candidate for validation, case-insensitive. 83 # This deliberately matches typo'd sentinels like `KB-DEREIVED`, 84 # `KB-DERIVD`, or `Kb-derived` so that the validator can flag them. 85 # A literal substring check for "KB-DERIVED" would silently miss those 86 # typos — that was the hole flagged in the Round 3 review. 87 _SUSPICIOUS_MARKER_RE = re.compile( 88 r"<!--\s*(BEGIN|END)\s+KB-", 89 re.IGNORECASE, 90 ) 91 92 # Paired marker regex (matches well-formed blocks only — same as the 93 # generator's _BLOCK_RE). Used to strip KB-DERIVED bodies before 94 # backtick scanning, and to identify the section body for regen check. 95 _PAIRED_BLOCK_RE = re.compile( 96 r"<!-- BEGIN KB-DERIVED: ([a-z0-9_\-]+) -->\n(.*?)<!-- END KB-DERIVED: \1 -->", 97 re.DOTALL, 98 ) 99 100 101 def _import_regen_skill(): 102 """Import scripts/regen_skill.py as a module by file path.""" 103 spec = importlib.util.spec_from_file_location("regen_skill", SCRIPT_PATH) 104 mod = importlib.util.module_from_spec(spec) 105 spec.loader.exec_module(mod) 106 return mod 107 108 109 def _load_kb(): 110 """Load pyod.utils.knowledge.algorithms.""" 111 from pyod.utils.ad_engine import ADEngine 112 return ADEngine().kb.algorithms 113 114 115 def _all_skill_files(): 116 """Return sorted list of all *.md files under pyod/skills/.""" 117 return sorted(SKILLS_DIR.rglob("*.md")) 118 119 120 def _strip_paired_kb_derived_blocks(text): 121 """Remove content inside well-formed KB-DERIVED blocks. 122 123 KB-DERIVED blocks already came from the KB by definition; we only 124 want to validate hand-written prose. Malformed blocks are left in 125 place so the marker-pairing test can still flag them. 126 """ 127 return _PAIRED_BLOCK_RE.sub("", text) 128 129 130 def test_skill_files_exist(): 131 """At least the od-expert SKILL.md must exist.""" 132 files = _all_skill_files() 133 paths = {f.relative_to(REPO_ROOT) for f in files} 134 assert any("od_expert" in str(p) and p.name == "SKILL.md" for p in paths), ( 135 f"od_expert/SKILL.md not found among {sorted(paths)}" 136 ) 137 138 139 def test_kb_derived_markers_are_well_formed(): 140 """Every line mentioning KB-DERIVED must be a valid BEGIN or END marker, 141 and every BEGIN must match a later END with the same section name. 142 143 This is a line-oriented parser that catches everything the regex-based 144 approach missed in Round 1: 145 - Typo'd markers that don't match either valid form (e.g. 146 ``<!-- BEGIN KB-DERIVED foo -->`` — missing colon, or 147 ``<!-- BEGIN KB-DEREIVED: foo -->`` — typo) 148 - END markers that appear before any BEGIN (depth would go negative) 149 - Unclosed BEGIN markers at end-of-file 150 - Nested BEGIN-inside-BEGIN 151 - Section name mismatch between BEGIN and its corresponding END 152 - Stray lines containing the literal "KB-DERIVED" that are not 153 actual markers (e.g. a prose reference that would otherwise be 154 invisible to the generator but visible to a human reader) 155 156 The generator's regex-based replacement is only safe if every 157 KB-DERIVED marker in the file matches one of the two exact forms. 158 This test enforces that invariant at build time. 159 """ 160 files = _all_skill_files() 161 problems = [] 162 for path in files: 163 text = path.read_text(encoding="utf-8") 164 lines = text.splitlines() 165 open_name = None # None = no block open; str = name of open block 166 for lineno, line in enumerate(lines, start=1): 167 # Only inspect lines that look like a KB-DERIVED marker 168 # (BEGIN/END + KB-, case-insensitive). This is broader than 169 # the literal "KB-DERIVED" substring check so typo'd 170 # sentinels like KB-DEREIVED still land in the validator. 171 if not _SUSPICIOUS_MARKER_RE.search(line): 172 continue 173 begin_m = _BEGIN_MARKER_RE.fullmatch(line.strip()) 174 end_m = _END_MARKER_RE.fullmatch(line.strip()) 175 if begin_m is None and end_m is None: 176 problems.append( 177 f"{path.relative_to(REPO_ROOT)}:{lineno}: line looks " 178 f"like a KB-DERIVED marker but does not match the exact " 179 f"syntax (typo in sentinel or section name, extra text, " 180 f"wrong spacing, etc.). Line: {line!r}" 181 ) 182 continue 183 if begin_m is not None and end_m is not None: 184 # Shouldn't happen given distinct regexes, but guard anyway. 185 problems.append( 186 f"{path.relative_to(REPO_ROOT)}:{lineno}: line matches " 187 f"both BEGIN and END patterns. Line: {line!r}" 188 ) 189 continue 190 if begin_m is not None: 191 name = begin_m.group(1) 192 if open_name is not None: 193 problems.append( 194 f"{path.relative_to(REPO_ROOT)}:{lineno}: nested " 195 f"BEGIN {name!r} while {open_name!r} still open" 196 ) 197 # Keep parsing; treat the new BEGIN as the open block. 198 open_name = name 199 else: # end_m is not None 200 name = end_m.group(1) 201 if open_name is None: 202 problems.append( 203 f"{path.relative_to(REPO_ROOT)}:{lineno}: END " 204 f"{name!r} without matching BEGIN" 205 ) 206 elif open_name != name: 207 problems.append( 208 f"{path.relative_to(REPO_ROOT)}:{lineno}: END " 209 f"{name!r} does not match open BEGIN {open_name!r}" 210 ) 211 open_name = None 212 else: 213 open_name = None # clean close 214 if open_name is not None: 215 problems.append( 216 f"{path.relative_to(REPO_ROOT)}: unclosed BEGIN {open_name!r} " 217 f"at end of file" 218 ) 219 assert not problems, ( 220 "Malformed KB-DERIVED markers in skill files:\n " 221 + "\n ".join(problems) 222 + "\n\nFix the marker pairing manually. Every KB-DERIVED line must " 223 + "match one of:\n" 224 + " <!-- BEGIN KB-DERIVED: section-name -->\n" 225 + " <!-- END KB-DERIVED: section-name -->\n" 226 + "with matching section names and no nesting." 227 ) 228 229 230 def test_kb_derived_sections_are_up_to_date(): 231 """Every KB-DERIVED block in every skill file must match the live KB. 232 233 If this test fails, run `python scripts/regen_skill.py` and commit 234 the result. 235 """ 236 regen = _import_regen_skill() 237 files = _all_skill_files() 238 rc = regen.check_files(files) 239 assert rc == 0, ( 240 "KB-DERIVED sections in skill files are stale. " 241 "Run `python scripts/regen_skill.py` and commit the result." 242 ) 243 244 245 def _extract_backtick_tokens(text): 246 """Return the set of inline backtick-wrapped tokens in `text`. 247 248 Only matches single-backtick spans (inline code), not triple-backtick 249 fences. Splits on spaces and commas to handle things like 250 ``pyod install skill`` (multi-word) — those count as one token, not 251 three. 252 """ 253 matches = _BACKTICK_RE.findall(text) 254 return set(matches) 255 256 257 def test_backtick_detector_names_exist_in_kb(): 258 """Every backtick-wrapped detector-like token in prose must exist in the KB. 259 260 The design enforces a convention: detector names in hand-written 261 skill prose must be wrapped in backticks (inline code style). For 262 example: 263 264 Use `IForest` for large tabular data. # validated 265 Use Isolation Forest for large data. # NOT validated (free prose) 266 267 This lets maintainers write expository prose freely while keeping 268 the canonical detector references type-checked against the KB. 269 270 Failure modes this catches: 271 - Typo (`IForset` instead of `IForest`) 272 - Renamed detector (`Deep_SVDD` after KB rename to `DeepSVDD`) 273 - Stale reference to a removed detector 274 275 Allowlist: tokens that are legitimate backtick references but not 276 KB keys (Python symbols, CLI commands, file paths, benchmark names) 277 live in `_BACKTICK_ALLOWLIST` at the top of this file. Add to that 278 list when a false positive surfaces, with a one-line reason. 279 """ 280 kb = _load_kb() 281 known = set(kb.keys()) 282 files = _all_skill_files() 283 unknown_per_file = {} 284 for path in files: 285 text = path.read_text(encoding="utf-8") 286 text = _strip_paired_kb_derived_blocks(text) 287 tokens = _extract_backtick_tokens(text) 288 # Heuristic: a token is "detector-like" if it starts with a 289 # capital letter and contains no spaces beyond method-call dots. 290 detector_like = { 291 t for t in tokens 292 if t and t[0].isupper() and " " not in t 293 } 294 unknown = detector_like - known - _BACKTICK_ALLOWLIST 295 if unknown: 296 unknown_per_file[path.relative_to(REPO_ROOT)] = sorted(unknown) 297 assert not unknown_per_file, ( 298 "Unknown detector-like backtick tokens in skill hand-written content:\n" 299 + "\n".join( 300 f" {path}: {names}" 301 for path, names in unknown_per_file.items() 302 ) 303 + "\n\nEither:\n" 304 + " 1. Fix the skill — this detector is not in the KB.\n" 305 + " 2. Use free prose instead (remove the backticks).\n" 306 + " 3. Add the token to _BACKTICK_ALLOWLIST with a one-line reason." 307 ) 308 309 310 def test_kb_loads_cleanly(): 311 """Sanity check: the KB must load without errors.""" 312 kb = _load_kb() 313 assert kb, "KB is empty" 314 assert isinstance(kb, dict) 315 # Spot-check that the KB has the expected schema 316 sample_name, sample_algo = next(iter(kb.items())) 317 assert "data_types" in sample_algo, ( 318 f"KB algorithm {sample_name} missing 'data_types' field" 319 ) 320 321 322 def test_allowlist_does_not_shadow_kb_keys(): 323 """The backtick allowlist must not contain any live KB detector name. 324 325 Allowing a real detector name here defeats the safety net's purpose: 326 if `IForest` is in the allowlist and the maintainer typos it as 327 `IForset`, the typo would fail the KB lookup but the allowlist 328 fallback would let it through silently. This test enforces that the 329 allowlist and the live KB are disjoint sets, so every detector name 330 in skill prose is validated against the KB on every CI run. 331 """ 332 kb = _load_kb() 333 overlap = _BACKTICK_ALLOWLIST & set(kb.keys()) 334 assert not overlap, ( 335 f"_BACKTICK_ALLOWLIST shadows live KB detector names: {sorted(overlap)}.\n" 336 f"Remove these entries — they should be validated against the KB, " 337 f"not allowlisted. Allowlisting them would let typos slip past the " 338 f"safety net." 339 )