__init__.py
1 """ 2 The ``mlflow.paddle`` module provides an API for logging and loading paddle models. 3 This module exports paddle models with the following flavors: 4 5 Paddle (native) format 6 This is the main flavor that can be loaded back into paddle. 7 8 :py:mod:`mlflow.pyfunc` 9 Produced for use by generic pyfunc-based deployment tools and batch inference. 10 NOTE: The `mlflow.pyfunc` flavor is only added for paddle models that define `predict()`, 11 since `predict()` is required for pyfunc model inference. 12 """ 13 14 import logging 15 import os 16 from typing import Any 17 18 import yaml 19 20 import mlflow 21 from mlflow import pyfunc 22 from mlflow.models import Model, ModelInputExample, ModelSignature 23 from mlflow.models.model import MLMODEL_FILE_NAME 24 from mlflow.models.signature import _infer_signature_from_input_example 25 from mlflow.models.utils import _save_example 26 from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS 27 from mlflow.tracking.artifact_utils import _download_artifact_from_uri 28 from mlflow.utils.autologging_utils import autologging_integration, safe_patch 29 from mlflow.utils.docstring_utils import LOG_MODEL_PARAM_DOCS, format_docstring 30 from mlflow.utils.environment import ( 31 _CONDA_ENV_FILE_NAME, 32 _CONSTRAINTS_FILE_NAME, 33 _PYTHON_ENV_FILE_NAME, 34 _REQUIREMENTS_FILE_NAME, 35 _mlflow_conda_env, 36 _process_conda_env, 37 _process_pip_requirements, 38 _PythonEnv, 39 _validate_env_arguments, 40 ) 41 from mlflow.utils.file_utils import write_to 42 from mlflow.utils.model_utils import ( 43 _add_code_from_conf_to_system_path, 44 _copy_extra_files, 45 _get_flavor_configuration, 46 _validate_and_copy_code_paths, 47 _validate_and_prepare_target_save_path, 48 ) 49 from mlflow.utils.requirements_utils import _get_pinned_requirement 50 51 FLAVOR_NAME = "paddle" 52 53 _MODEL_DATA_SUBPATH = "model" 54 55 _logger = logging.getLogger(__name__) 56 57 58 def get_default_pip_requirements(): 59 """ 60 Returns: 61 A list of default pip requirements for MLflow Models produced by this flavor. 62 Calls to :func:`save_model()` and :func:`log_model()` produce a pip environment 63 that, at minimum, contains these requirements. 64 """ 65 return [_get_pinned_requirement("paddlepaddle", module="paddle")] 66 67 68 def get_default_conda_env(): 69 """ 70 Returns: 71 The default Conda environment for MLflow Models produced by calls to 72 :func:`save_model()` and :func:`log_model()`. 73 """ 74 return _mlflow_conda_env(additional_pip_deps=get_default_pip_requirements()) 75 76 77 @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME)) 78 def save_model( 79 pd_model, 80 path, 81 training=False, 82 conda_env=None, 83 code_paths=None, 84 mlflow_model=None, 85 signature: ModelSignature = None, 86 input_example: ModelInputExample = None, 87 pip_requirements=None, 88 extra_pip_requirements=None, 89 metadata=None, 90 extra_files=None, 91 **kwargs, 92 ): 93 """ 94 Save a paddle model to a path on the local file system. Produces an MLflow Model 95 containing the following flavors: 96 97 - :py:mod:`mlflow.paddle` 98 - :py:mod:`mlflow.pyfunc`. NOTE: This flavor is only included for paddle models 99 that define `predict()`, since `predict()` is required for pyfunc model inference. 100 101 Args: 102 pd_model: paddle model to be saved. 103 path: Local path where the model is to be saved. 104 training: Only valid when saving a model trained using the PaddlePaddle high level API. 105 If set to True, the saved model supports both re-training and 106 inference. If set to False, it only supports inference. 107 conda_env: {{ conda_env }} 108 code_paths: {{ code_paths }} 109 mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. 110 signature: {{ signature }} 111 input_example: {{ input_example }} 112 pip_requirements: {{ pip_requirements }} 113 extra_pip_requirements: {{ extra_pip_requirements }} 114 metadata: {{ metadata }} 115 extra_files: {{ extra_files }} 116 kwargs: {{ kwargs }} 117 118 .. code-block:: python 119 :caption: Example 120 121 import mlflow.paddle 122 import paddle 123 from paddle.nn import Linear 124 import paddle.nn.functional as F 125 import numpy as np 126 import os 127 import random 128 from sklearn.datasets import load_diabetes 129 from sklearn.model_selection import train_test_split 130 from sklearn import preprocessing 131 132 133 def load_data(): 134 # dataset on boston housing prediction 135 X, y = load_diabetes(return_X_y=True, as_frame=True) 136 min_max_scaler = preprocessing.MinMaxScaler() 137 X_min_max = min_max_scaler.fit_transform(X) 138 X_normalized = preprocessing.scale(X_min_max, with_std=False) 139 X_train, X_test, y_train, y_test = train_test_split( 140 X_normalized, y, test_size=0.2, random_state=42 141 ) 142 y_train = y_train.reshape(-1, 1) 143 y_test = y_test.reshape(-1, 1) 144 return np.concatenate((X_train, y_train), axis=1), np.concatenate( 145 (X_test, y_test), axis=1 146 ) 147 148 149 class Regressor(paddle.nn.Layer): 150 def __init__(self): 151 super().__init__() 152 self.fc = Linear(in_features=13, out_features=1) 153 154 @paddle.jit.to_static 155 def forward(self, inputs): 156 x = self.fc(inputs) 157 return x 158 159 160 model = Regressor() 161 model.train() 162 training_data, test_data = load_data() 163 opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters()) 164 EPOCH_NUM = 10 165 BATCH_SIZE = 10 166 for epoch_id in range(EPOCH_NUM): 167 np.random.shuffle(training_data) 168 mini_batches = [ 169 training_data[k : k + BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE) 170 ] 171 for iter_id, mini_batch in enumerate(mini_batches): 172 x = np.array(mini_batch[:, :-1]).astype("float32") 173 y = np.array(mini_batch[:, -1:]).astype("float32") 174 house_features = paddle.to_tensor(x) 175 prices = paddle.to_tensor(y) 176 predicts = model(house_features) 177 loss = F.square_error_cost(predicts, label=prices) 178 avg_loss = paddle.mean(loss) 179 if iter_id % 20 == 0: 180 print(f"epoch: {epoch_id}, iter: {iter_id}, loss is: {avg_loss.numpy()}") 181 avg_loss.backward() 182 opt.step() 183 opt.clear_grad() 184 mlflow.log_param("learning_rate", 0.01) 185 mlflow.paddle.log_model(model, name="model") 186 sk_path_dir = "./test-out" 187 mlflow.paddle.save_model(model, sk_path_dir) 188 print("Model saved in run %s" % mlflow.active_run().info.run_id) 189 """ 190 import paddle 191 192 _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) 193 194 _validate_and_prepare_target_save_path(path) 195 code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) 196 197 if mlflow_model is None: 198 mlflow_model = Model() 199 saved_example = _save_example(mlflow_model, input_example, path) 200 201 if signature is None and saved_example is not None: 202 wrapped_model = _PaddleWrapper(pd_model) 203 signature = _infer_signature_from_input_example(saved_example, wrapped_model) 204 elif signature is False: 205 signature = None 206 207 if signature is not None: 208 mlflow_model.signature = signature 209 if metadata is not None: 210 mlflow_model.metadata = metadata 211 212 model_data_subpath = _MODEL_DATA_SUBPATH 213 output_path = os.path.join(path, model_data_subpath) 214 215 if isinstance(pd_model, paddle.Model): 216 pd_model.save(output_path, training=training, **kwargs) 217 else: 218 paddle.jit.save(pd_model, output_path, **kwargs) 219 220 # `PyFuncModel` only works for paddle models that define `predict()`. 221 pyfunc.add_to_model( 222 mlflow_model, 223 loader_module="mlflow.paddle", 224 model_path=model_data_subpath, 225 conda_env=_CONDA_ENV_FILE_NAME, 226 python_env=_PYTHON_ENV_FILE_NAME, 227 code=code_dir_subpath, 228 ) 229 230 extra_files_config = _copy_extra_files(extra_files, path) 231 232 mlflow_model.add_flavor( 233 FLAVOR_NAME, 234 pickled_model=model_data_subpath, 235 paddle_version=paddle.__version__, 236 code=code_dir_subpath, 237 **extra_files_config, 238 ) 239 mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) 240 241 if conda_env is None: 242 if pip_requirements is None: 243 default_reqs = get_default_pip_requirements() 244 # To ensure `_load_pyfunc` can successfully load the model during the dependency 245 # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. 246 inferred_reqs = mlflow.models.infer_pip_requirements( 247 path, 248 FLAVOR_NAME, 249 fallback=default_reqs, 250 ) 251 default_reqs = sorted(set(inferred_reqs).union(default_reqs)) 252 else: 253 default_reqs = None 254 conda_env, pip_requirements, pip_constraints = _process_pip_requirements( 255 default_reqs, 256 pip_requirements, 257 extra_pip_requirements, 258 ) 259 else: 260 conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env) 261 262 with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: 263 yaml.safe_dump(conda_env, stream=f, default_flow_style=False) 264 265 # Save `constraints.txt` if necessary 266 if pip_constraints: 267 write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) 268 269 # Save `requirements.txt` 270 write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements)) 271 272 _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME)) 273 274 275 def load_model(model_uri, model=None, dst_path=None, **kwargs): 276 """ 277 Load a paddle model from a local file or a run. 278 279 Args: 280 model_uri: The location, in URI format, of the MLflow model, for example: 281 - ``/Users/me/path/to/local/model`` 282 - ``relative/path/to/local/model`` 283 - ``s3://my_bucket/path/to/model`` 284 - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` 285 - ``models:/<model_name>/<model_version>`` 286 - ``models:/<model_name>/<stage>`` 287 model: Required when loading a `paddle.Model` model saved with `training=True`. 288 dst_path: The local filesystem path to which to download the model artifact. 289 This directory must already exist. If unspecified, a local output 290 path will be created. 291 kwargs: The keyword arguments to pass to `paddle.jit.load` 292 or `model.load`. 293 294 For more information about supported URI schemes, see 295 `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# 296 artifact-locations>`_. 297 298 Returns: 299 A paddle model. 300 301 .. code-block:: python 302 :caption: Example 303 304 import mlflow.paddle 305 306 pd_model = mlflow.paddle.load_model("runs:/96771d893a5e46159d9f3b49bf9013e2/pd_models") 307 # use Pandas DataFrame to make predictions 308 np_array = ... 309 predictions = pd_model(np_array) 310 """ 311 import paddle 312 313 local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path) 314 flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME) 315 _add_code_from_conf_to_system_path(local_model_path, flavor_conf) 316 pd_model_artifacts_path = os.path.join(local_model_path, flavor_conf["pickled_model"]) 317 if model is None: 318 return paddle.jit.load(pd_model_artifacts_path, **kwargs) 319 elif not isinstance(model, paddle.Model): 320 raise TypeError(f"Invalid object type `{type(model)}` for `model`, must be `paddle.Model`") 321 else: 322 contains_pdparams = _contains_pdparams(local_model_path) 323 if not contains_pdparams: 324 raise TypeError( 325 "This model can't be loaded via `model.load` because a '.pdparams' file " 326 "doesn't exist. Please leave `model` unspecified to load the model via " 327 "`paddle.jit.load` or set `training` to True when saving a model." 328 ) 329 330 model.load(pd_model_artifacts_path, **kwargs) 331 return model 332 333 334 @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME)) 335 def log_model( 336 pd_model, 337 artifact_path: str | None = None, 338 training=False, 339 conda_env=None, 340 code_paths=None, 341 registered_model_name=None, 342 signature: ModelSignature = None, 343 input_example: ModelInputExample = None, 344 await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, 345 pip_requirements=None, 346 extra_pip_requirements=None, 347 metadata=None, 348 extra_files=None, 349 name: str | None = None, 350 params: dict[str, Any] | None = None, 351 tags: dict[str, Any] | None = None, 352 model_type: str | None = None, 353 step: int = 0, 354 model_id: str | None = None, 355 **kwargs, 356 ): 357 """ 358 Log a paddle model as an MLflow artifact for the current run. Produces an MLflow Model 359 containing the following flavors: 360 361 - :py:mod:`mlflow.paddle` 362 - :py:mod:`mlflow.pyfunc`. NOTE: This flavor is only included for paddle models 363 that define `predict()`, since `predict()` is required for pyfunc model inference. 364 365 Args: 366 pd_model: paddle model to be saved. 367 artifact_path: Deprecated. Use `name` instead. 368 training: Only valid when saving a model trained using the PaddlePaddle high level API. 369 If set to True, the saved model supports both re-training and 370 inference. If set to False, it only supports inference. 371 conda_env: {{ conda_env }} 372 code_paths: {{ code_paths }} 373 registered_model_name: If given, create a model version under 374 ``registered_model_name``, also creating a registered model if one 375 with the given name does not exist. 376 signature: {{ signature }} 377 input_example: {{ input_example }} 378 await_registration_for: Number of seconds to wait for the model version to finish 379 being created and is in ``READY`` status. By default, the function 380 waits for five minutes. Specify 0 or None to skip waiting. 381 pip_requirements: {{ pip_requirements }} 382 extra_pip_requirements: {{ extra_pip_requirements }} 383 metadata: {{ metadata }} 384 extra_files: {{ extra_files }} 385 name: {{ name }} 386 params: {{ params }} 387 tags: {{ tags }} 388 model_type: {{ model_type }} 389 step: {{ step }} 390 model_id: {{ model_id }} 391 kwargs: {{ kwargs }} 392 393 Returns: 394 A :py:class:`ModelInfo <mlflow.models.model.ModelInfo>` instance that contains the 395 metadata of the logged model. 396 397 .. code-block:: python 398 :caption: Example 399 400 import mlflow.paddle 401 402 403 def load_data(): ... 404 405 406 class Regressor: ... 407 408 409 model = Regressor() 410 model.train() 411 training_data, test_data = load_data() 412 opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters()) 413 EPOCH_NUM = 10 414 BATCH_SIZE = 10 415 for epoch_id in range(EPOCH_NUM): 416 ... 417 mlflow.log_param("learning_rate", 0.01) 418 mlflow.paddle.log_model(model, name="model") 419 sk_path_dir = ... 420 mlflow.paddle.save_model(model, sk_path_dir) 421 """ 422 return Model.log( 423 artifact_path=artifact_path, 424 name=name, 425 flavor=mlflow.paddle, 426 pd_model=pd_model, 427 conda_env=conda_env, 428 code_paths=code_paths, 429 registered_model_name=registered_model_name, 430 signature=signature, 431 input_example=input_example, 432 await_registration_for=await_registration_for, 433 training=training, 434 pip_requirements=pip_requirements, 435 extra_pip_requirements=extra_pip_requirements, 436 metadata=metadata, 437 extra_files=extra_files, 438 params=params, 439 tags=tags, 440 model_type=model_type, 441 step=step, 442 model_id=model_id, 443 **kwargs, 444 ) 445 446 447 def _load_pyfunc(path): 448 """ 449 Loads PyFunc implementation. Called by ``pyfunc.load_model``. 450 451 Args: 452 path: Local filesystem path to the MLflow Model with the ``paddle`` flavor. 453 """ 454 return _PaddleWrapper(load_model(path)) 455 456 457 class _PaddleWrapper: 458 """ 459 Wrapper class that creates a predict function such that 460 predict(data: pd.DataFrame) -> model's output as pd.DataFrame (pandas DataFrame) 461 """ 462 463 def __init__(self, pd_model): 464 self.pd_model = pd_model 465 466 def get_raw_model(self): 467 """ 468 Returns the underlying model. 469 """ 470 return self.pd_model 471 472 def predict( 473 self, 474 data, 475 params: dict[str, Any] | None = None, 476 ): 477 """ 478 Args: 479 data: Model input data. 480 params: Additional parameters to pass to the model for inference. 481 482 Returns: 483 Model predictions. 484 """ 485 import numpy as np 486 import paddle 487 import pandas as pd 488 489 if isinstance(data, pd.DataFrame): 490 inp_data = data.values.astype(np.float32) 491 elif isinstance(data, np.ndarray): 492 inp_data = data 493 elif isinstance(data, (list, dict)): 494 raise TypeError( 495 "The paddle flavor does not support List or Dict input types. " 496 "Please use a pandas.DataFrame or a numpy.ndarray" 497 ) 498 else: 499 raise TypeError("Input data should be pandas.DataFrame or numpy.ndarray") 500 inp_data = np.squeeze(inp_data) 501 502 self.pd_model.eval() 503 504 predicted = self.pd_model(paddle.to_tensor(inp_data)) 505 return pd.DataFrame(predicted.numpy()) 506 507 508 def _contains_pdparams(path): 509 file_list = os.listdir(path) 510 return any(".pdparams" in file for file in file_list) 511 512 513 @autologging_integration(FLAVOR_NAME) 514 def autolog( 515 log_every_n_epoch=1, 516 log_models=True, 517 disable=False, 518 exclusive=False, 519 silent=False, 520 registered_model_name=None, 521 extra_tags=None, 522 ): 523 """ 524 Enables (or disables) and configures autologging from PaddlePaddle to MLflow. 525 526 Autologging is performed when the `fit` method of `paddle.Model`_ is called. 527 528 .. _paddle.Model: 529 https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/Model_en.html 530 531 Args: 532 log_every_n_epoch: If specified, logs metrics once every `n` epochs. By default, metrics 533 are logged after every epoch. 534 log_models: If ``True``, trained models are logged as MLflow model artifacts. 535 If ``False``, trained models are not logged. 536 disable: If ``True``, disables the PaddlePaddle autologging integration. 537 If ``False``, enables the PaddlePaddle autologging integration. 538 exclusive: If ``True``, autologged content is not logged to user-created fluent runs. 539 If ``False``, autologged content is logged to the active fluent run, 540 which may be user-created. 541 silent: If ``True``, suppress all event logs and warnings from MLflow during PyTorch 542 Lightning autologging. If ``False``, show all events and warnings during 543 PaddlePaddle autologging. 544 registered_model_name: If given, each time a model is trained, it is registered as a 545 new model version of the registered model with this name. 546 The registered model is created if it does not already exist. 547 extra_tags: A dictionary of extra tags to set on each managed run created by autologging. 548 549 .. code-block:: python 550 :caption: Example 551 552 import paddle 553 import mlflow 554 from mlflow import MlflowClient 555 556 557 def show_run_data(run_id): 558 run = mlflow.get_run(run_id) 559 print(f"params: {run.data.params}") 560 print(f"metrics: {run.data.metrics}") 561 client = MlflowClient() 562 artifacts = [f.path for f in client.list_artifacts(run.info.run_id, "model")] 563 print(f"artifacts: {artifacts}") 564 565 566 class LinearRegression(paddle.nn.Layer): 567 def __init__(self): 568 super().__init__() 569 self.fc = paddle.nn.Linear(13, 1) 570 571 def forward(self, feature): 572 return self.fc(feature) 573 574 575 train_dataset = paddle.text.datasets.UCIHousing(mode="train") 576 eval_dataset = paddle.text.datasets.UCIHousing(mode="test") 577 model = paddle.Model(LinearRegression()) 578 optim = paddle.optimizer.SGD(learning_rate=1e-2, parameters=model.parameters()) 579 model.prepare(optim, paddle.nn.MSELoss(), paddle.metric.Accuracy()) 580 mlflow.paddle.autolog() 581 with mlflow.start_run() as run: 582 model.fit(train_dataset, eval_dataset, batch_size=16, epochs=10) 583 show_run_data(run.info.run_id) 584 585 .. code-block:: text 586 :caption: Output 587 588 params: { 589 "learning_rate": "0.01", 590 "optimizer_name": "SGD", 591 } 592 metrics: { 593 "loss": 17.482044, 594 "step": 25.0, 595 "acc": 0.0, 596 "eval_step": 6.0, 597 "eval_acc": 0.0, 598 "eval_batch_size": 6.0, 599 "batch_size": 4.0, 600 "eval_loss": 24.717455, 601 } 602 artifacts: [ 603 "model/MLmodel", 604 "model/conda.yaml", 605 "model/model.pdiparams", 606 "model/model.pdiparams.info", 607 "model/model.pdmodel", 608 "model/requirements.txt", 609 ] 610 """ 611 import paddle 612 613 from mlflow.paddle._paddle_autolog import patched_fit 614 615 safe_patch( 616 FLAVOR_NAME, paddle.Model, "fit", patched_fit, manage_run=True, extra_tags=extra_tags 617 )