/ examples / r_wine / train.R
train.R
 1  # The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
 2  # P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
 3  # Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.
 4  
 5  library(mlflow)
 6  library(glmnet)
 7  library(carrier)
 8  
 9  set.seed(40)
10  
11  # Read the wine-quality csv file
12  data <- read.csv("wine-quality.csv")
13  
14  # Split the data into training and test sets. (0.75, 0.25) split.
15  sampled <- sample(1:nrow(data), 0.75 * nrow(data))
16  train <- data[sampled, ]
17  test <- data[-sampled, ]
18  
19  # The predicted column is "quality" which is a scalar from [3, 9]
20  train_x <- as.matrix(train[, !(names(train) == "quality")])
21  test_x <- as.matrix(test[, !(names(train) == "quality")])
22  train_y <- train[, "quality"]
23  test_y <- test[, "quality"]
24  
25  alpha <- mlflow_param("alpha", 0.5, "numeric")
26  lambda <- mlflow_param("lambda", 0.5, "numeric")
27  
28  with(mlflow_start_run(), {
29      model <- glmnet(train_x, train_y, alpha = alpha, lambda = lambda, family= "gaussian", standardize = FALSE)
30      predictor <- crate(~ glmnet::predict.glmnet(!!model, as.matrix(.x)), !!model)
31      predicted <- predictor(test_x)
32  
33      rmse <- sqrt(mean((predicted - test_y) ^ 2))
34      mae <- mean(abs(predicted - test_y))
35      r2 <- as.numeric(cor(predicted, test_y) ^ 2)
36  
37      message("Elasticnet model (alpha=", alpha, ", lambda=", lambda, "):")
38      message("  RMSE: ", rmse)
39      message("  MAE: ", mae)
40      message("  R2: ", r2)
41  
42      mlflow_log_param("alpha", alpha)
43      mlflow_log_param("lambda", lambda)
44      mlflow_log_metric("rmse", rmse)
45      mlflow_log_metric("r2", r2)
46      mlflow_log_metric("mae", mae)
47  
48      mlflow_log_model(predictor, "model")
49  })