run_mock_parity_diff.py
1 #!/usr/bin/env python3 2 from __future__ import annotations 3 4 import json 5 import os 6 import subprocess 7 import sys 8 import tempfile 9 from collections import defaultdict 10 from pathlib import Path 11 12 13 def load_manifest(path: Path) -> list[dict]: 14 return json.loads(path.read_text()) 15 16 17 def load_parity_text(path: Path) -> str: 18 return path.read_text() 19 20 21 def ensure_refs_exist(manifest: list[dict], parity_text: str) -> list[tuple[str, str]]: 22 missing: list[tuple[str, str]] = [] 23 for entry in manifest: 24 for ref in entry.get("parity_refs", []): 25 if ref not in parity_text: 26 missing.append((entry["name"], ref)) 27 return missing 28 29 30 def run_harness(rust_root: Path) -> dict: 31 with tempfile.TemporaryDirectory(prefix="mock-parity-report-") as temp_dir: 32 report_path = Path(temp_dir) / "report.json" 33 env = os.environ.copy() 34 env["MOCK_PARITY_REPORT_PATH"] = str(report_path) 35 subprocess.run( 36 [ 37 "cargo", 38 "test", 39 "-p", 40 "rusty-claude-cli", 41 "--test", 42 "mock_parity_harness", 43 "--", 44 "--nocapture", 45 ], 46 cwd=rust_root, 47 check=True, 48 env=env, 49 ) 50 return json.loads(report_path.read_text()) 51 52 53 def main() -> int: 54 script_path = Path(__file__).resolve() 55 rust_root = script_path.parent.parent 56 repo_root = rust_root.parent 57 manifest = load_manifest(rust_root / "mock_parity_scenarios.json") 58 parity_text = load_parity_text(repo_root / "PARITY.md") 59 60 missing_refs = ensure_refs_exist(manifest, parity_text) 61 if missing_refs: 62 print("Missing PARITY.md references:", file=sys.stderr) 63 for scenario_name, ref in missing_refs: 64 print(f" - {scenario_name}: {ref}", file=sys.stderr) 65 return 1 66 67 should_run = "--no-run" not in sys.argv[1:] 68 report = run_harness(rust_root) if should_run else None 69 report_by_name = { 70 entry["name"]: entry for entry in report.get("scenarios", []) 71 } if report else {} 72 73 print("Mock parity diff checklist") 74 print(f"Repo root: {repo_root}") 75 print(f"Scenario manifest: {rust_root / 'mock_parity_scenarios.json'}") 76 print(f"PARITY source: {repo_root / 'PARITY.md'}") 77 print() 78 79 for entry in manifest: 80 scenario_name = entry["name"] 81 scenario_report = report_by_name.get(scenario_name) 82 status = "PASS" if scenario_report else ("MAPPED" if not should_run else "MISSING") 83 print(f"[{status}] {scenario_name} ({entry['category']})") 84 print(f" description: {entry['description']}") 85 print(f" parity refs: {' | '.join(entry['parity_refs'])}") 86 if scenario_report: 87 print( 88 " result: iterations={iterations} requests={requests} tool_uses={tool_uses} tool_errors={tool_errors}".format( 89 iterations=scenario_report["iterations"], 90 requests=scenario_report["request_count"], 91 tool_uses=", ".join(scenario_report["tool_uses"]) or "none", 92 tool_errors=scenario_report["tool_error_count"], 93 ) 94 ) 95 print(f" final: {scenario_report['final_message']}") 96 print() 97 98 coverage = defaultdict(list) 99 for entry in manifest: 100 for ref in entry["parity_refs"]: 101 coverage[ref].append(entry["name"]) 102 103 print("PARITY coverage map") 104 for ref, scenarios in coverage.items(): 105 print(f"- {ref}") 106 print(f" scenarios: {', '.join(scenarios)}") 107 108 if report and report.get("scenarios"): 109 first = report["scenarios"][0] 110 print() 111 print("First scenario result") 112 print(f"- name: {first['name']}") 113 print(f"- iterations: {first['iterations']}") 114 print(f"- requests: {first['request_count']}") 115 print(f"- tool_uses: {', '.join(first['tool_uses']) or 'none'}") 116 print(f"- tool_errors: {first['tool_error_count']}") 117 print(f"- final_message: {first['final_message']}") 118 print() 119 print( 120 "Harness summary: {scenario_count} scenarios, {request_count} requests".format( 121 scenario_count=report["scenario_count"], 122 request_count=report["request_count"], 123 ) 124 ) 125 126 return 0 127 128 129 if __name__ == "__main__": 130 raise SystemExit(main())