Cradicle Explorer

ml.py
  1  # MLflow 3 Traditional ML Example
  2  # In this example, we will first run a model training job, which is tracked as
  3  # an MLflow Run, to produce a trained model, which is tracked as an MLflow Logged Model.
  4  import pandas as pd
  5  from sklearn.datasets import load_iris
  6  from sklearn.linear_model import ElasticNet
  7  from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
  8  from sklearn.model_selection import train_test_split
  9  
 10  import mlflow
 11  import mlflow.sklearn
 12  from mlflow.entities import Dataset
 13  
 14  
 15  # Helper function to compute metrics
 16  def compute_metrics(actual, predicted):
 17      rmse = mean_squared_error(actual, predicted)
 18      mae = mean_absolute_error(actual, predicted)
 19      r2 = r2_score(actual, predicted)
 20      return rmse, mae, r2
 21  
 22  
 23  # Load Iris dataset and prepare the DataFrame
 24  iris = load_iris()
 25  iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
 26  iris_df["quality"] = (iris.target == 2).astype(int)  # Create a binary target for simplicity
 27  
 28  # Split into training and testing datasets
 29  train_df, test_df = train_test_split(iris_df, test_size=0.2, random_state=42)
 30  
 31  # Start a run to represent the training job
 32  with mlflow.start_run() as training_run:
 33      # Load the training dataset with MLflow. We will link training metrics to this dataset.
 34      train_dataset: Dataset = mlflow.data.from_pandas(train_df, name="train")
 35      train_x = train_dataset.df.drop(["quality"], axis=1)
 36      train_y = train_dataset.df[["quality"]]
 37  
 38      # Fit a model to the training dataset
 39      lr = ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=42)
 40      lr.fit(train_x, train_y)
 41  
 42      # Log the model, specifying its ElasticNet parameters (alpha, l1_ratio)
 43      # As a new feature, the LoggedModel entity is linked to its name and params
 44      logged_model = mlflow.sklearn.log_model(
 45          sk_model=lr,
 46          name="elasticnet",
 47          params={
 48              "alpha": 0.5,
 49              "l1_ratio": 0.5,
 50          },
 51          input_example=train_x,
 52      )
 53  
 54      # Inspect the LoggedModel and its properties
 55      print(logged_model.model_id, logged_model.params)
 56      # m-fa4e1bca8cb64971bce2322a8fd427d3, {'alpha': '0.5', 'l1_ratio': '0.5'}
 57  
 58      # Evaluate the model on the training dataset and log metrics
 59      # These metrics are now linked to the LoggedModel entity
 60      predictions = lr.predict(train_x)
 61      (rmse, mae, r2) = compute_metrics(train_y, predictions)
 62      mlflow.log_metrics(
 63          metrics={
 64              "rmse": rmse,
 65              "r2": r2,
 66              "mae": mae,
 67          },
 68          model_id=logged_model.model_id,
 69          dataset=train_dataset,
 70      )
 71  
 72      # Inspect the LoggedModel, now with metrics
 73      logged_model = mlflow.get_logged_model(logged_model.model_id)
 74      print(logged_model.model_id, logged_model.metrics)
 75      # m-fa4e1bca8cb64971bce2322a8fd427d3, [<Metric: dataset_name='train', key='rmse', model_id='m-fa4e1bca8cb64971bce2322a8fd427d3, value=0.7538635773139717, ...>, ...]
 76  
 77  
 78  # Some time later, when we get a new evaluation dataset based on the latest production data,
 79  # we will run a new model evaluation job, which is tracked as a new MLflow Run,
 80  # to measure the performance of the model on this new dataset.
 81  # This example will produced two MLflow Runs (training_run and evaluation_run) and
 82  # one MLflow Logged Model (elasticnet). From the resulting Logged Model,
 83  # we can see all of the parameters and metadata. We can also see all of the metrics linked
 84  # from the training and evaluation runs.
 85  
 86  # Start a run to represent the test dataset evaluation job
 87  with mlflow.start_run() as evaluation_run:
 88      # Load the test dataset with MLflow. We will link test metrics to this dataset.
 89      test_dataset: mlflow.entities.Dataset = mlflow.data.from_pandas(test_df, name="test")
 90      test_x = test_dataset.df.drop(["quality"], axis=1)
 91      test_y = test_dataset.df[["quality"]]
 92  
 93      # Load the model
 94      model = mlflow.sklearn.load_model(f"models:/{logged_model.model_id}")
 95  
 96      # Evaluate the model on the training dataset and log metrics, linking to model
 97      predictions = model.predict(test_x)
 98      (rmse, mae, r2) = compute_metrics(test_y, predictions)
 99      mlflow.log_metrics(
100          metrics={
101              "rmse": rmse,
102              "r2": r2,
103              "mae": mae,
104          },
105          dataset=test_dataset,
106          model_id=logged_model.model_id,
107      )
108  
109  
110  print(mlflow.get_logged_model(logged_model.model_id).to_dictionary())
111  
112  # Now register the model.
113  mlflow.register_model(logged_model.model_uri, name="my_ml_model")