log_model.py
1 """ 2 Example: Using uv for dependency management with MLflow models. 3 4 This script demonstrates three ways to use uv lockfile-based dependencies 5 when logging MLflow models: 6 7 1. Auto-detection: MLflow detects uv.lock + pyproject.toml in the current 8 working directory and uses ``uv export`` to capture pinned dependencies. 9 10 2. Explicit path (uv_project_path): Point to a uv project directory when 11 logging from a different working directory or in a monorepo layout. 12 13 3. Dependency groups and extras (uv_groups, uv_extras): Include additional 14 dependency groups or optional extras defined in pyproject.toml. 15 16 Prerequisites: 17 - uv >= 0.5.0 installed (``pip install uv`` or https://docs.astral.sh/uv/) 18 - Run from this directory so auto-detection finds uv.lock and pyproject.toml 19 20 Usage: 21 cd examples/uv-dependency-management 22 uv run python log_model.py 23 """ 24 25 from pathlib import Path 26 27 from sklearn.datasets import load_iris 28 from sklearn.ensemble import RandomForestClassifier 29 from sklearn.model_selection import train_test_split 30 31 import mlflow 32 33 34 def read_requirements(run_id, artifact_name="model"): 35 """Read the model's requirements.txt from local run artifacts.""" 36 client = mlflow.tracking.MlflowClient() 37 local_path = client.download_artifacts(run_id, f"{artifact_name}/requirements.txt") 38 with open(local_path) as f: 39 return [line.strip() for line in f if line.strip()] 40 41 42 def check_uv_artifacts(run_id, artifact_name="model"): 43 """Check if uv project files were saved as model artifacts.""" 44 client = mlflow.tracking.MlflowClient() 45 model_dir = client.download_artifacts(run_id, artifact_name) 46 model_path = Path(model_dir) 47 return { 48 "uv.lock": (model_path / "uv.lock").exists(), 49 "pyproject.toml": (model_path / "pyproject.toml").exists(), 50 } 51 52 53 def train_model(): 54 """Train a simple RandomForest on the Iris dataset.""" 55 iris = load_iris() 56 X_train, X_test, y_train, y_test = train_test_split( 57 iris.data, iris.target, test_size=0.2, random_state=42 58 ) 59 model = RandomForestClassifier(n_estimators=10, random_state=42) 60 model.fit(X_train, y_train) 61 accuracy = model.score(X_test, y_test) 62 return model, X_test, accuracy 63 64 65 class SklearnWrapper(mlflow.pyfunc.PythonModel): 66 """Wrap a scikit-learn model as a PythonModel for pyfunc logging.""" 67 68 def __init__(self, sklearn_model): 69 self._model = sklearn_model 70 71 def predict(self, context, model_input, params=None): 72 return self._model.predict(model_input) 73 74 75 def example_auto_detection(model, input_example): 76 """ 77 Example 1: Auto-detection. 78 79 When run from a directory containing uv.lock and pyproject.toml, 80 MLflow automatically uses uv export to capture pinned dependencies. 81 No extra parameters needed. 82 """ 83 print("=" * 60) 84 print("Example 1: Auto-detection") 85 print("=" * 60) 86 87 with mlflow.start_run(run_name="uv-auto-detection") as run: 88 model_info = mlflow.pyfunc.log_model( 89 python_model=SklearnWrapper(model), 90 name="model", 91 input_example=input_example, 92 ) 93 94 run_id = run.info.run_id 95 reqs = read_requirements(run_id) 96 print(f"Logged model: {model_info.model_uri}") 97 print(f"Requirements ({len(reqs)} packages):") 98 for req in reqs[:10]: 99 print(f" {req}") 100 if len(reqs) > 10: 101 print(f" ... and {len(reqs) - 10} more") 102 103 # Verify uv artifacts were saved 104 uv_files = check_uv_artifacts(run_id) 105 print(f"uv.lock saved as artifact: {uv_files['uv.lock']}") 106 print(f"pyproject.toml saved as artifact: {uv_files['pyproject.toml']}") 107 print() 108 109 return model_info 110 111 112 def example_explicit_path(model, input_example): 113 """ 114 Example 2: Explicit uv_project_path. 115 116 Use uv_project_path to point to a uv project when logging from 117 a different working directory. Useful in monorepos. 118 """ 119 print("=" * 60) 120 print("Example 2: Explicit uv_project_path") 121 print("=" * 60) 122 123 project_dir = Path(__file__).parent.resolve() 124 125 with mlflow.start_run(run_name="uv-explicit-path") as run: 126 model_info = mlflow.pyfunc.log_model( 127 python_model=SklearnWrapper(model), 128 name="model", 129 input_example=input_example, 130 uv_project_path=project_dir, 131 ) 132 133 run_id = run.info.run_id 134 reqs = read_requirements(run_id) 135 print(f"Logged model: {model_info.model_uri}") 136 print(f"uv_project_path: {project_dir}") 137 print(f"Requirements ({len(reqs)} packages):") 138 for req in reqs[:10]: 139 print(f" {req}") 140 if len(reqs) > 10: 141 print(f" ... and {len(reqs) - 10} more") 142 print() 143 144 return model_info 145 146 147 def example_groups_and_extras(model, input_example): 148 """ 149 Example 3: Dependency groups and extras. 150 151 Include the 'ml' dependency group (xgboost) and 'serving' optional 152 extra (flask) in the exported requirements. 153 """ 154 print("=" * 60) 155 print("Example 3: uv_groups and uv_extras") 156 print("=" * 60) 157 158 project_dir = Path(__file__).parent.resolve() 159 160 with mlflow.start_run(run_name="uv-groups-and-extras") as run: 161 model_info = mlflow.pyfunc.log_model( 162 python_model=SklearnWrapper(model), 163 name="model", 164 input_example=input_example, 165 uv_project_path=project_dir, 166 uv_groups=["ml"], 167 uv_extras=["serving"], 168 ) 169 170 run_id = run.info.run_id 171 reqs = read_requirements(run_id) 172 print(f"Logged model: {model_info.model_uri}") 173 print("uv_groups: ['ml'] (adds xgboost)") 174 print("uv_extras: ['serving'] (adds flask)") 175 print(f"Requirements ({len(reqs)} packages):") 176 177 # Check that group and extra deps were included 178 has_xgboost = any("xgboost" in r for r in reqs) 179 has_flask = any("flask" in r.lower() for r in reqs) 180 print(f" xgboost included (from 'ml' group): {has_xgboost}") 181 print(f" flask included (from 'serving' extra): {has_flask}") 182 print() 183 184 for req in sorted(reqs): 185 print(f" {req}") 186 print() 187 188 return model_info 189 190 191 def main(): 192 print("MLflow uv Dependency Management Example") 193 print() 194 195 # Train a model 196 model, X_test, accuracy = train_model() 197 input_example = X_test[:2] 198 print(f"Trained RandomForestClassifier (accuracy: {accuracy:.2f})") 199 print() 200 201 # Set up a local tracking URI for the example. 202 # Use an absolute path so it works regardless of working directory. 203 example_dir = Path(__file__).parent.resolve() 204 db_path = example_dir / "mlflow.db" 205 mlflow.set_tracking_uri(f"sqlite:///{db_path}") 206 mlflow.set_experiment("uv-dependency-management") 207 208 # Run all three examples 209 example_auto_detection(model, input_example) 210 example_explicit_path(model, input_example) 211 example_groups_and_extras(model, input_example) 212 213 print("All examples completed successfully.") 214 215 216 if __name__ == "__main__": 217 main()