train.R
1 # The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality 2 # P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 3 # Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009. 4 5 library(mlflow) 6 library(glmnet) 7 library(carrier) 8 9 set.seed(40) 10 11 # Read the wine-quality csv file 12 data <- read.csv("wine-quality.csv") 13 14 # Split the data into training and test sets. (0.75, 0.25) split. 15 sampled <- sample(1:nrow(data), 0.75 * nrow(data)) 16 train <- data[sampled, ] 17 test <- data[-sampled, ] 18 19 # The predicted column is "quality" which is a scalar from [3, 9] 20 train_x <- as.matrix(train[, !(names(train) == "quality")]) 21 test_x <- as.matrix(test[, !(names(train) == "quality")]) 22 train_y <- train[, "quality"] 23 test_y <- test[, "quality"] 24 25 alpha <- mlflow_param("alpha", 0.5, "numeric") 26 lambda <- mlflow_param("lambda", 0.5, "numeric") 27 28 with(mlflow_start_run(), { 29 model <- glmnet(train_x, train_y, alpha = alpha, lambda = lambda, family= "gaussian", standardize = FALSE) 30 predictor <- crate(~ glmnet::predict.glmnet(!!model, as.matrix(.x)), !!model) 31 predicted <- predictor(test_x) 32 33 rmse <- sqrt(mean((predicted - test_y) ^ 2)) 34 mae <- mean(abs(predicted - test_y)) 35 r2 <- as.numeric(cor(predicted, test_y) ^ 2) 36 37 message("Elasticnet model (alpha=", alpha, ", lambda=", lambda, "):") 38 message(" RMSE: ", rmse) 39 message(" MAE: ", mae) 40 message(" R2: ", r2) 41 42 mlflow_log_param("alpha", alpha) 43 mlflow_log_param("lambda", lambda) 44 mlflow_log_metric("rmse", rmse) 45 mlflow_log_metric("r2", r2) 46 mlflow_log_metric("mae", mae) 47 48 mlflow_log_model(predictor, "model") 49 })