log_model.py
  1  """
  2  Example: Using uv for dependency management with MLflow models.
  3  
  4  This script demonstrates three ways to use uv lockfile-based dependencies
  5  when logging MLflow models:
  6  
  7  1. Auto-detection: MLflow detects uv.lock + pyproject.toml in the current
  8     working directory and uses ``uv export`` to capture pinned dependencies.
  9  
 10  2. Explicit path (uv_project_path): Point to a uv project directory when
 11     logging from a different working directory or in a monorepo layout.
 12  
 13  3. Dependency groups and extras (uv_groups, uv_extras): Include additional
 14     dependency groups or optional extras defined in pyproject.toml.
 15  
 16  Prerequisites:
 17      - uv >= 0.5.0 installed (``pip install uv`` or https://docs.astral.sh/uv/)
 18      - Run from this directory so auto-detection finds uv.lock and pyproject.toml
 19  
 20  Usage:
 21      cd examples/uv-dependency-management
 22      uv run python log_model.py
 23  """
 24  
 25  from pathlib import Path
 26  
 27  from sklearn.datasets import load_iris
 28  from sklearn.ensemble import RandomForestClassifier
 29  from sklearn.model_selection import train_test_split
 30  
 31  import mlflow
 32  
 33  
 34  def read_requirements(run_id, artifact_name="model"):
 35      """Read the model's requirements.txt from local run artifacts."""
 36      client = mlflow.tracking.MlflowClient()
 37      local_path = client.download_artifacts(run_id, f"{artifact_name}/requirements.txt")
 38      with open(local_path) as f:
 39          return [line.strip() for line in f if line.strip()]
 40  
 41  
 42  def check_uv_artifacts(run_id, artifact_name="model"):
 43      """Check if uv project files were saved as model artifacts."""
 44      client = mlflow.tracking.MlflowClient()
 45      model_dir = client.download_artifacts(run_id, artifact_name)
 46      model_path = Path(model_dir)
 47      return {
 48          "uv.lock": (model_path / "uv.lock").exists(),
 49          "pyproject.toml": (model_path / "pyproject.toml").exists(),
 50      }
 51  
 52  
 53  def train_model():
 54      """Train a simple RandomForest on the Iris dataset."""
 55      iris = load_iris()
 56      X_train, X_test, y_train, y_test = train_test_split(
 57          iris.data, iris.target, test_size=0.2, random_state=42
 58      )
 59      model = RandomForestClassifier(n_estimators=10, random_state=42)
 60      model.fit(X_train, y_train)
 61      accuracy = model.score(X_test, y_test)
 62      return model, X_test, accuracy
 63  
 64  
 65  class SklearnWrapper(mlflow.pyfunc.PythonModel):
 66      """Wrap a scikit-learn model as a PythonModel for pyfunc logging."""
 67  
 68      def __init__(self, sklearn_model):
 69          self._model = sklearn_model
 70  
 71      def predict(self, context, model_input, params=None):
 72          return self._model.predict(model_input)
 73  
 74  
 75  def example_auto_detection(model, input_example):
 76      """
 77      Example 1: Auto-detection.
 78  
 79      When run from a directory containing uv.lock and pyproject.toml,
 80      MLflow automatically uses uv export to capture pinned dependencies.
 81      No extra parameters needed.
 82      """
 83      print("=" * 60)
 84      print("Example 1: Auto-detection")
 85      print("=" * 60)
 86  
 87      with mlflow.start_run(run_name="uv-auto-detection") as run:
 88          model_info = mlflow.pyfunc.log_model(
 89              python_model=SklearnWrapper(model),
 90              name="model",
 91              input_example=input_example,
 92          )
 93  
 94          run_id = run.info.run_id
 95          reqs = read_requirements(run_id)
 96          print(f"Logged model: {model_info.model_uri}")
 97          print(f"Requirements ({len(reqs)} packages):")
 98          for req in reqs[:10]:
 99              print(f"  {req}")
100          if len(reqs) > 10:
101              print(f"  ... and {len(reqs) - 10} more")
102  
103          # Verify uv artifacts were saved
104          uv_files = check_uv_artifacts(run_id)
105          print(f"uv.lock saved as artifact: {uv_files['uv.lock']}")
106          print(f"pyproject.toml saved as artifact: {uv_files['pyproject.toml']}")
107          print()
108  
109      return model_info
110  
111  
112  def example_explicit_path(model, input_example):
113      """
114      Example 2: Explicit uv_project_path.
115  
116      Use uv_project_path to point to a uv project when logging from
117      a different working directory. Useful in monorepos.
118      """
119      print("=" * 60)
120      print("Example 2: Explicit uv_project_path")
121      print("=" * 60)
122  
123      project_dir = Path(__file__).parent.resolve()
124  
125      with mlflow.start_run(run_name="uv-explicit-path") as run:
126          model_info = mlflow.pyfunc.log_model(
127              python_model=SklearnWrapper(model),
128              name="model",
129              input_example=input_example,
130              uv_project_path=project_dir,
131          )
132  
133          run_id = run.info.run_id
134          reqs = read_requirements(run_id)
135          print(f"Logged model: {model_info.model_uri}")
136          print(f"uv_project_path: {project_dir}")
137          print(f"Requirements ({len(reqs)} packages):")
138          for req in reqs[:10]:
139              print(f"  {req}")
140          if len(reqs) > 10:
141              print(f"  ... and {len(reqs) - 10} more")
142          print()
143  
144      return model_info
145  
146  
147  def example_groups_and_extras(model, input_example):
148      """
149      Example 3: Dependency groups and extras.
150  
151      Include the 'ml' dependency group (xgboost) and 'serving' optional
152      extra (flask) in the exported requirements.
153      """
154      print("=" * 60)
155      print("Example 3: uv_groups and uv_extras")
156      print("=" * 60)
157  
158      project_dir = Path(__file__).parent.resolve()
159  
160      with mlflow.start_run(run_name="uv-groups-and-extras") as run:
161          model_info = mlflow.pyfunc.log_model(
162              python_model=SklearnWrapper(model),
163              name="model",
164              input_example=input_example,
165              uv_project_path=project_dir,
166              uv_groups=["ml"],
167              uv_extras=["serving"],
168          )
169  
170          run_id = run.info.run_id
171          reqs = read_requirements(run_id)
172          print(f"Logged model: {model_info.model_uri}")
173          print("uv_groups: ['ml']  (adds xgboost)")
174          print("uv_extras: ['serving']  (adds flask)")
175          print(f"Requirements ({len(reqs)} packages):")
176  
177          # Check that group and extra deps were included
178          has_xgboost = any("xgboost" in r for r in reqs)
179          has_flask = any("flask" in r.lower() for r in reqs)
180          print(f"  xgboost included (from 'ml' group): {has_xgboost}")
181          print(f"  flask included (from 'serving' extra): {has_flask}")
182          print()
183  
184          for req in sorted(reqs):
185              print(f"  {req}")
186          print()
187  
188      return model_info
189  
190  
191  def main():
192      print("MLflow uv Dependency Management Example")
193      print()
194  
195      # Train a model
196      model, X_test, accuracy = train_model()
197      input_example = X_test[:2]
198      print(f"Trained RandomForestClassifier (accuracy: {accuracy:.2f})")
199      print()
200  
201      # Set up a local tracking URI for the example.
202      # Use an absolute path so it works regardless of working directory.
203      example_dir = Path(__file__).parent.resolve()
204      db_path = example_dir / "mlflow.db"
205      mlflow.set_tracking_uri(f"sqlite:///{db_path}")
206      mlflow.set_experiment("uv-dependency-management")
207  
208      # Run all three examples
209      example_auto_detection(model, input_example)
210      example_explicit_path(model, input_example)
211      example_groups_and_extras(model, input_example)
212  
213      print("All examples completed successfully.")
214  
215  
216  if __name__ == "__main__":
217      main()