Cradicle Explorer

/ examples / mlflow-3 / sklearn_example.py
sklearn_example.py
  1  # ruff: noqa
  2  """
  3  python examples/demo.py
  4  """
  5  
  6  import logging
  7  import tempfile
  8  
  9  import numpy as np
 10  import pandas as pd
 11  from sklearn.linear_model import ElasticNet
 12  from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 13  from sklearn.model_selection import train_test_split
 14  
 15  import mlflow
 16  
 17  
 18  # Read the wine-quality csv file from the URL
 19  csv_url = (
 20      "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv"
 21  )
 22  logger = logging.getLogger(__name__)
 23  try:
 24      data = pd.read_csv(csv_url, sep=";")
 25  except Exception as e:
 26      logger.exception(
 27          "Unable to download training & test CSV, check your internet connection. Error: %s", e
 28      )
 29  
 30  # Split the data into training and test sets. (0.75, 0.25) split.
 31  train, test = train_test_split(data)
 32  
 33  
 34  def eval_metrics(actual, pred):
 35      rmse = np.sqrt(mean_squared_error(actual, pred))
 36      mae = mean_absolute_error(actual, pred)
 37      r2 = r2_score(actual, pred)
 38      return rmse, mae, r2
 39  
 40  
 41  alpha = 0.5
 42  l1_ratio = 0.5
 43  
 44  # Start a run to represent the training job
 45  with mlflow.start_run() as training_run:
 46      # Load the training dataset with MLflow. We will link training metrics to this dataset.
 47      train_dataset: mlflow.data.pandas_dataset.PandasDataset = mlflow.data.from_pandas(
 48          train, name="train_dataset"
 49      )
 50      train_x = train_dataset.df.drop(["quality"], axis=1)
 51      train_y = train_dataset.df[["quality"]]
 52  
 53      # Fit a model to the training dataset
 54      lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
 55      lr.fit(train_x, train_y)
 56  
 57      # Evaluate the model on the training dataset and log metrics
 58      predictions = lr.predict(train_x)
 59      (rmse, mae, r2) = eval_metrics(train_y, predictions)
 60      mlflow.log_metrics(
 61          metrics={
 62              "rmse": rmse,
 63              "r2": r2,
 64              "mae": mae,
 65          },
 66          dataset=train_dataset,
 67      )
 68  
 69      # Log the model, specifying its ElasticNet parameters (alpha, l1_ratio)
 70      model = mlflow.sklearn.log_model(
 71          sk_model=lr,
 72          name="elasticnet",
 73          params={
 74              "alpha": alpha,
 75              "l1_ratio": l1_ratio,
 76          },
 77      )
 78  
 79      # Fetch the model ID, and print the model
 80      model_id = model.model_id
 81      print("\n")
 82      print(model)
 83      print("\n")
 84      print(model_id)
 85  
 86  # Start a run to represent the test dataset evaluation job
 87  with mlflow.start_run() as evaluation_run:
 88      # Load the test dataset with MLflow. We will link test metrics to this dataset.
 89      test_dataset: mlflow.data.pandas_dataset.PandasDataset = mlflow.data.from_pandas(
 90          test, name="test_dataset"
 91      )
 92      test_x = test_dataset.df.drop(["quality"], axis=1)
 93      test_y = test_dataset.df[["quality"]]
 94  
 95      # Load the model
 96      model = mlflow.sklearn.load_model(f"models:/{model_id}")
 97  
 98      # Evaluate the model on the training dataset and log metrics
 99      predicted_qualities = lr.predict(test_x)
100      (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
101      mlflow.log_metrics(
102          metrics={
103              "rmse": rmse,
104              "r2": r2,
105              "mae": mae,
106          },
107          dataset=test_dataset,
108          # Specify the ID of the model logged above
109          model_id=model_id,
110      )
111  
112  model = mlflow.get_logged_model(model_id)
113  
114  training_run = mlflow.get_run(training_run.info.run_id)
115  print(training_run)
116  print("\n")
117  print(training_run.outputs)
118  
119  evaluation_run = mlflow.get_run(evaluation_run.info.run_id)
120  print(evaluation_run)
121  print("\n")
122  print(evaluation_run.inputs)
123  
124  print(f"models:/{model_id}")
125  mlflow.register_model(model_uri=f"models:/{model_id}", name="registered_elasticnet")
126  mlflow.MlflowClient().get_model_version("registered_elasticnet", 1)