ml.py
1 # MLflow 3 Traditional ML Example 2 # In this example, we will first run a model training job, which is tracked as 3 # an MLflow Run, to produce a trained model, which is tracked as an MLflow Logged Model. 4 import pandas as pd 5 from sklearn.datasets import load_iris 6 from sklearn.linear_model import ElasticNet 7 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 8 from sklearn.model_selection import train_test_split 9 10 import mlflow 11 import mlflow.sklearn 12 from mlflow.entities import Dataset 13 14 15 # Helper function to compute metrics 16 def compute_metrics(actual, predicted): 17 rmse = mean_squared_error(actual, predicted) 18 mae = mean_absolute_error(actual, predicted) 19 r2 = r2_score(actual, predicted) 20 return rmse, mae, r2 21 22 23 # Load Iris dataset and prepare the DataFrame 24 iris = load_iris() 25 iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names) 26 iris_df["quality"] = (iris.target == 2).astype(int) # Create a binary target for simplicity 27 28 # Split into training and testing datasets 29 train_df, test_df = train_test_split(iris_df, test_size=0.2, random_state=42) 30 31 # Start a run to represent the training job 32 with mlflow.start_run() as training_run: 33 # Load the training dataset with MLflow. We will link training metrics to this dataset. 34 train_dataset: Dataset = mlflow.data.from_pandas(train_df, name="train") 35 train_x = train_dataset.df.drop(["quality"], axis=1) 36 train_y = train_dataset.df[["quality"]] 37 38 # Fit a model to the training dataset 39 lr = ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=42) 40 lr.fit(train_x, train_y) 41 42 # Log the model, specifying its ElasticNet parameters (alpha, l1_ratio) 43 # As a new feature, the LoggedModel entity is linked to its name and params 44 logged_model = mlflow.sklearn.log_model( 45 sk_model=lr, 46 name="elasticnet", 47 params={ 48 "alpha": 0.5, 49 "l1_ratio": 0.5, 50 }, 51 input_example=train_x, 52 ) 53 54 # Inspect the LoggedModel and its properties 55 print(logged_model.model_id, logged_model.params) 56 # m-fa4e1bca8cb64971bce2322a8fd427d3, {'alpha': '0.5', 'l1_ratio': '0.5'} 57 58 # Evaluate the model on the training dataset and log metrics 59 # These metrics are now linked to the LoggedModel entity 60 predictions = lr.predict(train_x) 61 (rmse, mae, r2) = compute_metrics(train_y, predictions) 62 mlflow.log_metrics( 63 metrics={ 64 "rmse": rmse, 65 "r2": r2, 66 "mae": mae, 67 }, 68 model_id=logged_model.model_id, 69 dataset=train_dataset, 70 ) 71 72 # Inspect the LoggedModel, now with metrics 73 logged_model = mlflow.get_logged_model(logged_model.model_id) 74 print(logged_model.model_id, logged_model.metrics) 75 # m-fa4e1bca8cb64971bce2322a8fd427d3, [<Metric: dataset_name='train', key='rmse', model_id='m-fa4e1bca8cb64971bce2322a8fd427d3, value=0.7538635773139717, ...>, ...] 76 77 78 # Some time later, when we get a new evaluation dataset based on the latest production data, 79 # we will run a new model evaluation job, which is tracked as a new MLflow Run, 80 # to measure the performance of the model on this new dataset. 81 # This example will produced two MLflow Runs (training_run and evaluation_run) and 82 # one MLflow Logged Model (elasticnet). From the resulting Logged Model, 83 # we can see all of the parameters and metadata. We can also see all of the metrics linked 84 # from the training and evaluation runs. 85 86 # Start a run to represent the test dataset evaluation job 87 with mlflow.start_run() as evaluation_run: 88 # Load the test dataset with MLflow. We will link test metrics to this dataset. 89 test_dataset: mlflow.entities.Dataset = mlflow.data.from_pandas(test_df, name="test") 90 test_x = test_dataset.df.drop(["quality"], axis=1) 91 test_y = test_dataset.df[["quality"]] 92 93 # Load the model 94 model = mlflow.sklearn.load_model(f"models:/{logged_model.model_id}") 95 96 # Evaluate the model on the training dataset and log metrics, linking to model 97 predictions = model.predict(test_x) 98 (rmse, mae, r2) = compute_metrics(test_y, predictions) 99 mlflow.log_metrics( 100 metrics={ 101 "rmse": rmse, 102 "r2": r2, 103 "mae": mae, 104 }, 105 dataset=test_dataset, 106 model_id=logged_model.model_id, 107 ) 108 109 110 print(mlflow.get_logged_model(logged_model.model_id).to_dictionary()) 111 112 # Now register the model. 113 mlflow.register_model(logged_model.model_uri, name="my_ml_model")