train.ipynb
  1  {
  2   "cells": [
  3    {
  4     "attachments": {},
  5     "cell_type": "markdown",
  6     "metadata": {},
  7     "source": [
  8      "# MLflow Training Tutorial\n",
  9      "\n",
 10      "This `train.pynb` Jupyter notebook predicts the quality of wine using [sklearn.linear_model.ElasticNet](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html).  \n",
 11      "\n",
 12      "> This is the Jupyter notebook version of the `train.py` example\n",
 13      "\n",
 14      "Attribution\n",
 15      "* The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality\n",
 16      "* P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.\n",
 17      "* Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.\n"
 18     ]
 19    },
 20    {
 21     "cell_type": "code",
 22     "execution_count": 1,
 23     "metadata": {},
 24     "outputs": [],
 25     "source": [
 26      "import logging\n",
 27      "import warnings\n",
 28      "\n",
 29      "\n",
 30      "# Wine Quality Sample\n",
 31      "def train(in_alpha, in_l1_ratio):\n",
 32      "    import numpy as np\n",
 33      "    import pandas as pd\n",
 34      "    from sklearn.linear_model import ElasticNet\n",
 35      "    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
 36      "    from sklearn.model_selection import train_test_split\n",
 37      "\n",
 38      "    import mlflow\n",
 39      "    import mlflow.sklearn\n",
 40      "    from mlflow.models import infer_signature\n",
 41      "\n",
 42      "    logging.basicConfig(level=logging.WARN)\n",
 43      "    logger = logging.getLogger(__name__)\n",
 44      "\n",
 45      "    def eval_metrics(actual, pred):\n",
 46      "        rmse = np.sqrt(mean_squared_error(actual, pred))\n",
 47      "        mae = mean_absolute_error(actual, pred)\n",
 48      "        r2 = r2_score(actual, pred)\n",
 49      "        return rmse, mae, r2\n",
 50      "\n",
 51      "    warnings.filterwarnings(\"ignore\")\n",
 52      "    np.random.seed(40)\n",
 53      "\n",
 54      "    # Read the wine-quality csv file from the URL\n",
 55      "    csv_url = (\n",
 56      "        \"http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv\"\n",
 57      "    )\n",
 58      "    try:\n",
 59      "        data = pd.read_csv(csv_url, sep=\";\")\n",
 60      "    except Exception as e:\n",
 61      "        logger.exception(\n",
 62      "            f\"Unable to download training & test CSV, check your internet connection. Error: {e}\"\n",
 63      "        )\n",
 64      "\n",
 65      "    # Split the data into training and test sets. (0.75, 0.25) split.\n",
 66      "    train, test = train_test_split(data)\n",
 67      "\n",
 68      "    # The predicted column is \"quality\" which is a scalar from [3, 9]\n",
 69      "    train_x = train.drop([\"quality\"], axis=1)\n",
 70      "    test_x = test.drop([\"quality\"], axis=1)\n",
 71      "    train_y = train[[\"quality\"]]\n",
 72      "    test_y = test[[\"quality\"]]\n",
 73      "\n",
 74      "    # Set default values if no alpha is provided\n",
 75      "    alpha = 0.5 if float(in_alpha) is None else float(in_alpha)\n",
 76      "\n",
 77      "    # Set default values if no l1_ratio is provided\n",
 78      "    l1_ratio = 0.5 if float(in_l1_ratio) is None else float(in_l1_ratio)\n",
 79      "\n",
 80      "    # Useful for multiple runs (only doing one run in this sample notebook)\n",
 81      "    with mlflow.start_run():\n",
 82      "        # Execute ElasticNet\n",
 83      "        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)\n",
 84      "        lr.fit(train_x, train_y)\n",
 85      "\n",
 86      "        # Evaluate Metrics\n",
 87      "        predicted_qualities = lr.predict(test_x)\n",
 88      "        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)\n",
 89      "\n",
 90      "        # Print out metrics\n",
 91      "        print(f\"Elasticnet model (alpha={alpha:f}, l1_ratio={l1_ratio:f}):\")\n",
 92      "        print(f\"  RMSE: {rmse}\")\n",
 93      "        print(f\"  MAE: {mae}\")\n",
 94      "        print(f\"  R2: {r2}\")\n",
 95      "\n",
 96      "        # Infer model signature\n",
 97      "        predictions = lr.predict(train_x)\n",
 98      "        signature = infer_signature(train_x, predictions)\n",
 99      "\n",
100      "        # Log parameter, metrics, and model to MLflow\n",
101      "        mlflow.log_param(\"alpha\", alpha)\n",
102      "        mlflow.log_param(\"l1_ratio\", l1_ratio)\n",
103      "        mlflow.log_metric(\"rmse\", rmse)\n",
104      "        mlflow.log_metric(\"r2\", r2)\n",
105      "        mlflow.log_metric(\"mae\", mae)\n",
106      "\n",
107      "        mlflow.sklearn.log_model(lr, name=\"model\", signature=signature)"
108     ]
109    },
110    {
111     "cell_type": "code",
112     "execution_count": 2,
113     "metadata": {},
114     "outputs": [
115      {
116       "name": "stdout",
117       "output_type": "stream",
118       "text": [
119        "Elasticnet model (alpha=0.500000, l1_ratio=0.500000):\n",
120        "  RMSE: 0.82224284975954\n",
121        "  MAE: 0.6278761410160691\n",
122        "  R2: 0.12678721972772689\n"
123       ]
124      }
125     ],
126     "source": [
127      "train(0.5, 0.5)"
128     ]
129    },
130    {
131     "cell_type": "code",
132     "execution_count": 3,
133     "metadata": {},
134     "outputs": [
135      {
136       "name": "stdout",
137       "output_type": "stream",
138       "text": [
139        "Elasticnet model (alpha=0.200000, l1_ratio=0.200000):\n",
140        "  RMSE: 0.7859129997062342\n",
141        "  MAE: 0.6155290394093894\n",
142        "  R2: 0.20224631822892092\n"
143       ]
144      }
145     ],
146     "source": [
147      "train(0.2, 0.2)"
148     ]
149    },
150    {
151     "cell_type": "code",
152     "execution_count": 4,
153     "metadata": {},
154     "outputs": [
155      {
156       "name": "stdout",
157       "output_type": "stream",
158       "text": [
159        "Elasticnet model (alpha=0.100000, l1_ratio=0.100000):\n",
160        "  RMSE: 0.7792546522251949\n",
161        "  MAE: 0.6112547988118587\n",
162        "  R2: 0.2157063843066196\n"
163       ]
164      }
165     ],
166     "source": [
167      "train(0.1, 0.1)"
168     ]
169    }
170   ],
171   "metadata": {
172    "kernelspec": {
173     "display_name": "Python 3",
174     "language": "python",
175     "name": "python3"
176    },
177    "language_info": {
178     "codemirror_mode": {
179      "name": "ipython",
180      "version": 3
181     },
182     "file_extension": ".py",
183     "mimetype": "text/x-python",
184     "name": "python",
185     "nbconvert_exporter": "python",
186     "pygments_lexer": "ipython3",
187     "version": "3.6.5"
188    }
189   },
190   "nbformat": 4,
191   "nbformat_minor": 2
192  }