sklearn_example.py
1 # ruff: noqa 2 """ 3 python examples/demo.py 4 """ 5 6 import logging 7 import tempfile 8 9 import numpy as np 10 import pandas as pd 11 from sklearn.linear_model import ElasticNet 12 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 13 from sklearn.model_selection import train_test_split 14 15 import mlflow 16 17 18 # Read the wine-quality csv file from the URL 19 csv_url = ( 20 "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv" 21 ) 22 logger = logging.getLogger(__name__) 23 try: 24 data = pd.read_csv(csv_url, sep=";") 25 except Exception as e: 26 logger.exception( 27 "Unable to download training & test CSV, check your internet connection. Error: %s", e 28 ) 29 30 # Split the data into training and test sets. (0.75, 0.25) split. 31 train, test = train_test_split(data) 32 33 34 def eval_metrics(actual, pred): 35 rmse = np.sqrt(mean_squared_error(actual, pred)) 36 mae = mean_absolute_error(actual, pred) 37 r2 = r2_score(actual, pred) 38 return rmse, mae, r2 39 40 41 alpha = 0.5 42 l1_ratio = 0.5 43 44 # Start a run to represent the training job 45 with mlflow.start_run() as training_run: 46 # Load the training dataset with MLflow. We will link training metrics to this dataset. 47 train_dataset: mlflow.data.pandas_dataset.PandasDataset = mlflow.data.from_pandas( 48 train, name="train_dataset" 49 ) 50 train_x = train_dataset.df.drop(["quality"], axis=1) 51 train_y = train_dataset.df[["quality"]] 52 53 # Fit a model to the training dataset 54 lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) 55 lr.fit(train_x, train_y) 56 57 # Evaluate the model on the training dataset and log metrics 58 predictions = lr.predict(train_x) 59 (rmse, mae, r2) = eval_metrics(train_y, predictions) 60 mlflow.log_metrics( 61 metrics={ 62 "rmse": rmse, 63 "r2": r2, 64 "mae": mae, 65 }, 66 dataset=train_dataset, 67 ) 68 69 # Log the model, specifying its ElasticNet parameters (alpha, l1_ratio) 70 model = mlflow.sklearn.log_model( 71 sk_model=lr, 72 name="elasticnet", 73 params={ 74 "alpha": alpha, 75 "l1_ratio": l1_ratio, 76 }, 77 ) 78 79 # Fetch the model ID, and print the model 80 model_id = model.model_id 81 print("\n") 82 print(model) 83 print("\n") 84 print(model_id) 85 86 # Start a run to represent the test dataset evaluation job 87 with mlflow.start_run() as evaluation_run: 88 # Load the test dataset with MLflow. We will link test metrics to this dataset. 89 test_dataset: mlflow.data.pandas_dataset.PandasDataset = mlflow.data.from_pandas( 90 test, name="test_dataset" 91 ) 92 test_x = test_dataset.df.drop(["quality"], axis=1) 93 test_y = test_dataset.df[["quality"]] 94 95 # Load the model 96 model = mlflow.sklearn.load_model(f"models:/{model_id}") 97 98 # Evaluate the model on the training dataset and log metrics 99 predicted_qualities = lr.predict(test_x) 100 (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) 101 mlflow.log_metrics( 102 metrics={ 103 "rmse": rmse, 104 "r2": r2, 105 "mae": mae, 106 }, 107 dataset=test_dataset, 108 # Specify the ID of the model logged above 109 model_id=model_id, 110 ) 111 112 model = mlflow.get_logged_model(model_id) 113 114 training_run = mlflow.get_run(training_run.info.run_id) 115 print(training_run) 116 print("\n") 117 print(training_run.outputs) 118 119 evaluation_run = mlflow.get_run(evaluation_run.info.run_id) 120 print(evaluation_run) 121 print("\n") 122 print(evaluation_run.inputs) 123 124 print(f"models:/{model_id}") 125 mlflow.register_model(model_uri=f"models:/{model_id}", name="registered_elasticnet") 126 mlflow.MlflowClient().get_model_version("registered_elasticnet", 1)