check_init_py.py
1 """ 2 Pre-commit hook to check for missing `__init__.py` files in mlflow and tests directories. 3 4 This script ensures that all directories under the mlflow package and tests directory that contain 5 Python files also have an `__init__.py` file. This prevents `setuptools` from excluding these 6 directories during package build and ensures test modules are properly structured. 7 8 Usage: 9 uv run dev/check_init_py.py 10 11 Requirements: 12 - If `mlflow/foo/bar.py` exists, `mlflow/foo/__init__.py` must exist. 13 - If `tests/foo/test_bar.py` exists, `tests/foo/__init__.py` must exist. 14 - Only test files (starting with `test_`) in the tests directory are checked. 15 - All parent directories of Python files are checked recursively for `__init__.py`. 16 - Ignore directories that do not contain any Python files (e.g., `mlflow/server/js`). 17 """ 18 19 import subprocess 20 import sys 21 from pathlib import Path 22 23 24 def get_tracked_python_files() -> list[Path]: 25 try: 26 result = subprocess.check_output( 27 ["git", "ls-files", "mlflow/**/*.py", "tests/**/*.py"], 28 text=True, 29 ) 30 paths = (Path(f) for f in result.splitlines() if f) 31 return [p for p in paths if (not p.is_relative_to("tests") or p.name.startswith("test_"))] 32 except subprocess.CalledProcessError as e: 33 print(f"Error running git ls-files: {e}", file=sys.stderr) 34 sys.exit(1) 35 36 37 def main() -> int: 38 python_files = get_tracked_python_files() 39 if not python_files: 40 return 0 41 42 python_dirs = {p for f in python_files for p in f.parents if p != Path(".")} 43 if missing_init_files := [d for d in python_dirs if not (d / "__init__.py").exists()]: 44 print("Error: The following directories contain Python files but lack __init__.py:") 45 for d in sorted(missing_init_files): 46 print(f" {d.as_posix()}/") 47 print("Please add __init__.py files to the directories listed above.") 48 return 1 49 50 return 0 51 52 53 if __name__ == "__main__": 54 sys.exit(main())