test_paddle_autolog.py
1 import paddle 2 import pytest 3 4 import mlflow 5 from mlflow import MlflowClient 6 7 NUM_EPOCHS = 6 8 9 10 class LinearRegression(paddle.nn.Layer): 11 def __init__(self): 12 super().__init__() 13 self.fc = paddle.nn.Linear(13, 1) 14 15 def forward(self, feature): 16 return self.fc(feature) 17 18 19 def get_datasets(): 20 train_dataset = paddle.text.datasets.UCIHousing(mode="train") 21 eval_dataset = paddle.text.datasets.UCIHousing(mode="test") 22 return train_dataset, eval_dataset 23 24 25 def train_model(**fit_kwargs): 26 model = paddle.Model(LinearRegression()) 27 optim = paddle.optimizer.Adam(learning_rate=0.01, parameters=model.parameters()) 28 model.prepare(optim, paddle.nn.MSELoss()) 29 train_dataset, eval_dataset = get_datasets() 30 model.fit( 31 train_dataset, eval_dataset, batch_size=16, epochs=NUM_EPOCHS, verbose=1, **fit_kwargs 32 ) 33 return model 34 35 36 @pytest.mark.parametrize("log_models", [True, False]) 37 def test_autolog_logs_expected_data(log_models): 38 mlflow.paddle.autolog(log_models=log_models) 39 40 with mlflow.start_run() as run: 41 train_model() 42 43 client = MlflowClient() 44 data = client.get_run(run.info.run_id).data 45 46 # Testing params are logged 47 for param_key, expected_param_value in [("optimizer_name", "Adam"), ("learning_rate", "0.01")]: 48 assert param_key in data.params 49 assert data.params[param_key] == expected_param_value 50 51 # Testing metrics are logged 52 for metric_key in ["batch_size", "loss", "step", "eval_batch_size", "eval_loss", "eval_step"]: 53 assert metric_key in data.metrics 54 metric_history = client.get_metric_history(run.info.run_id, metric_key) 55 assert len(metric_history) == NUM_EPOCHS 56 57 # Testing model_summary.txt is saved 58 artifacts = client.list_artifacts(run.info.run_id) 59 assert any(x.path == "model_summary.txt" for x in artifacts) 60 61 # Testing metrics are logged to the model 62 logged_model = mlflow.last_logged_model() 63 if log_models: 64 assert logged_model is not None 65 assert data.metrics == {m.key: m.value for m in logged_model.metrics} 66 else: 67 assert logged_model is None 68 69 70 @pytest.mark.parametrize("log_models", [True, False]) 71 def test_autolog_early_stopping_callback(log_models): 72 mlflow.paddle.autolog(log_models=log_models) 73 74 early_stopping = paddle.callbacks.EarlyStopping("loss", mode="min", patience=1, min_delta=0) 75 with mlflow.start_run() as run: 76 train_model(callbacks=[early_stopping]) 77 78 client = MlflowClient() 79 data = client.get_run(run.info.run_id).data 80 81 for param_key in ["monitor", "patience", "min_delta", "baseline"]: 82 assert param_key in data.params 83 assert data.params[param_key] == str(getattr(early_stopping, param_key)) 84 85 for metric_key in ["stopped_epoch", "best_value"]: 86 assert metric_key in data.metrics 87 assert float(data.metrics[metric_key]) == getattr(early_stopping, metric_key) 88 89 for metric_key in ["loss", "step"]: 90 assert metric_key in data.metrics 91 metric_history = client.get_metric_history(run.info.run_id, metric_key) 92 assert len(metric_history) == NUM_EPOCHS 93 94 logged_model = mlflow.last_logged_model() 95 if log_models: 96 assert logged_model is not None 97 assert data.metrics == {m.key: m.value for m in logged_model.metrics} 98 else: 99 assert logged_model is None 100 101 102 @pytest.mark.parametrize("log_models", [True, False]) 103 def test_autolog_log_models_configuration(log_models): 104 mlflow.paddle.autolog(log_models=log_models) 105 106 with mlflow.start_run() as run: 107 train_model() 108 109 MlflowClient().list_artifacts(run.info.run_id) 110 assert (mlflow.last_logged_model() is not None) == log_models 111 112 113 def test_autolog_registering_model(): 114 registered_model_name = "test_autolog_registered_model" 115 mlflow.paddle.autolog(registered_model_name=registered_model_name) 116 117 with mlflow.start_run(): 118 train_model() 119 120 registered_model = MlflowClient().get_registered_model(registered_model_name) 121 assert registered_model.name == registered_model_name 122 123 124 def test_extra_tags_paddle_autolog(): 125 mlflow.paddle.autolog(extra_tags={"test_tag": "paddle_autolog"}) 126 train_model() 127 128 run = mlflow.last_active_run() 129 assert run.data.tags["test_tag"] == "paddle_autolog" 130 assert run.data.tags[mlflow.utils.mlflow_tags.MLFLOW_AUTOLOGGING] == "paddle"