/ mlflow / paddle / __init__.py
__init__.py
  1  """
  2  The ``mlflow.paddle`` module provides an API for logging and loading paddle models.
  3  This module exports paddle models with the following flavors:
  4  
  5  Paddle (native) format
  6      This is the main flavor that can be loaded back into paddle.
  7  
  8  :py:mod:`mlflow.pyfunc`
  9      Produced for use by generic pyfunc-based deployment tools and batch inference.
 10      NOTE: The `mlflow.pyfunc` flavor is only added for paddle models that define `predict()`,
 11      since `predict()` is required for pyfunc model inference.
 12  """
 13  
 14  import logging
 15  import os
 16  from typing import Any
 17  
 18  import yaml
 19  
 20  import mlflow
 21  from mlflow import pyfunc
 22  from mlflow.models import Model, ModelInputExample, ModelSignature
 23  from mlflow.models.model import MLMODEL_FILE_NAME
 24  from mlflow.models.signature import _infer_signature_from_input_example
 25  from mlflow.models.utils import _save_example
 26  from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
 27  from mlflow.tracking.artifact_utils import _download_artifact_from_uri
 28  from mlflow.utils.autologging_utils import autologging_integration, safe_patch
 29  from mlflow.utils.docstring_utils import LOG_MODEL_PARAM_DOCS, format_docstring
 30  from mlflow.utils.environment import (
 31      _CONDA_ENV_FILE_NAME,
 32      _CONSTRAINTS_FILE_NAME,
 33      _PYTHON_ENV_FILE_NAME,
 34      _REQUIREMENTS_FILE_NAME,
 35      _mlflow_conda_env,
 36      _process_conda_env,
 37      _process_pip_requirements,
 38      _PythonEnv,
 39      _validate_env_arguments,
 40  )
 41  from mlflow.utils.file_utils import write_to
 42  from mlflow.utils.model_utils import (
 43      _add_code_from_conf_to_system_path,
 44      _copy_extra_files,
 45      _get_flavor_configuration,
 46      _validate_and_copy_code_paths,
 47      _validate_and_prepare_target_save_path,
 48  )
 49  from mlflow.utils.requirements_utils import _get_pinned_requirement
 50  
 51  FLAVOR_NAME = "paddle"
 52  
 53  _MODEL_DATA_SUBPATH = "model"
 54  
 55  _logger = logging.getLogger(__name__)
 56  
 57  
 58  def get_default_pip_requirements():
 59      """
 60      Returns:
 61          A list of default pip requirements for MLflow Models produced by this flavor.
 62          Calls to :func:`save_model()` and :func:`log_model()` produce a pip environment
 63          that, at minimum, contains these requirements.
 64      """
 65      return [_get_pinned_requirement("paddlepaddle", module="paddle")]
 66  
 67  
 68  def get_default_conda_env():
 69      """
 70      Returns:
 71          The default Conda environment for MLflow Models produced by calls to
 72          :func:`save_model()` and :func:`log_model()`.
 73      """
 74      return _mlflow_conda_env(additional_pip_deps=get_default_pip_requirements())
 75  
 76  
 77  @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME))
 78  def save_model(
 79      pd_model,
 80      path,
 81      training=False,
 82      conda_env=None,
 83      code_paths=None,
 84      mlflow_model=None,
 85      signature: ModelSignature = None,
 86      input_example: ModelInputExample = None,
 87      pip_requirements=None,
 88      extra_pip_requirements=None,
 89      metadata=None,
 90      extra_files=None,
 91      **kwargs,
 92  ):
 93      """
 94      Save a paddle model to a path on the local file system. Produces an MLflow Model
 95      containing the following flavors:
 96  
 97          - :py:mod:`mlflow.paddle`
 98          - :py:mod:`mlflow.pyfunc`. NOTE: This flavor is only included for paddle models
 99            that define `predict()`, since `predict()` is required for pyfunc model inference.
100  
101      Args:
102          pd_model: paddle model to be saved.
103          path: Local path where the model is to be saved.
104          training: Only valid when saving a model trained using the PaddlePaddle high level API.
105              If set to True, the saved model supports both re-training and
106              inference. If set to False, it only supports inference.
107          conda_env: {{ conda_env }}
108          code_paths: {{ code_paths }}
109          mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
110          signature: {{ signature }}
111          input_example: {{ input_example }}
112          pip_requirements: {{ pip_requirements }}
113          extra_pip_requirements: {{ extra_pip_requirements }}
114          metadata: {{ metadata }}
115          extra_files: {{ extra_files }}
116          kwargs: {{ kwargs }}
117  
118      .. code-block:: python
119          :caption: Example
120  
121          import mlflow.paddle
122          import paddle
123          from paddle.nn import Linear
124          import paddle.nn.functional as F
125          import numpy as np
126          import os
127          import random
128          from sklearn.datasets import load_diabetes
129          from sklearn.model_selection import train_test_split
130          from sklearn import preprocessing
131  
132  
133          def load_data():
134              # dataset on boston housing prediction
135              X, y = load_diabetes(return_X_y=True, as_frame=True)
136              min_max_scaler = preprocessing.MinMaxScaler()
137              X_min_max = min_max_scaler.fit_transform(X)
138              X_normalized = preprocessing.scale(X_min_max, with_std=False)
139              X_train, X_test, y_train, y_test = train_test_split(
140                  X_normalized, y, test_size=0.2, random_state=42
141              )
142              y_train = y_train.reshape(-1, 1)
143              y_test = y_test.reshape(-1, 1)
144              return np.concatenate((X_train, y_train), axis=1), np.concatenate(
145                  (X_test, y_test), axis=1
146              )
147  
148  
149          class Regressor(paddle.nn.Layer):
150              def __init__(self):
151                  super().__init__()
152                  self.fc = Linear(in_features=13, out_features=1)
153  
154              @paddle.jit.to_static
155              def forward(self, inputs):
156                  x = self.fc(inputs)
157                  return x
158  
159  
160          model = Regressor()
161          model.train()
162          training_data, test_data = load_data()
163          opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())
164          EPOCH_NUM = 10
165          BATCH_SIZE = 10
166          for epoch_id in range(EPOCH_NUM):
167              np.random.shuffle(training_data)
168              mini_batches = [
169                  training_data[k : k + BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE)
170              ]
171              for iter_id, mini_batch in enumerate(mini_batches):
172                  x = np.array(mini_batch[:, :-1]).astype("float32")
173                  y = np.array(mini_batch[:, -1:]).astype("float32")
174                  house_features = paddle.to_tensor(x)
175                  prices = paddle.to_tensor(y)
176                  predicts = model(house_features)
177                  loss = F.square_error_cost(predicts, label=prices)
178                  avg_loss = paddle.mean(loss)
179                  if iter_id % 20 == 0:
180                      print(f"epoch: {epoch_id}, iter: {iter_id}, loss is: {avg_loss.numpy()}")
181                  avg_loss.backward()
182                  opt.step()
183                  opt.clear_grad()
184          mlflow.log_param("learning_rate", 0.01)
185          mlflow.paddle.log_model(model, name="model")
186          sk_path_dir = "./test-out"
187          mlflow.paddle.save_model(model, sk_path_dir)
188          print("Model saved in run %s" % mlflow.active_run().info.run_id)
189      """
190      import paddle
191  
192      _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements)
193  
194      _validate_and_prepare_target_save_path(path)
195      code_dir_subpath = _validate_and_copy_code_paths(code_paths, path)
196  
197      if mlflow_model is None:
198          mlflow_model = Model()
199      saved_example = _save_example(mlflow_model, input_example, path)
200  
201      if signature is None and saved_example is not None:
202          wrapped_model = _PaddleWrapper(pd_model)
203          signature = _infer_signature_from_input_example(saved_example, wrapped_model)
204      elif signature is False:
205          signature = None
206  
207      if signature is not None:
208          mlflow_model.signature = signature
209      if metadata is not None:
210          mlflow_model.metadata = metadata
211  
212      model_data_subpath = _MODEL_DATA_SUBPATH
213      output_path = os.path.join(path, model_data_subpath)
214  
215      if isinstance(pd_model, paddle.Model):
216          pd_model.save(output_path, training=training, **kwargs)
217      else:
218          paddle.jit.save(pd_model, output_path, **kwargs)
219  
220      # `PyFuncModel` only works for paddle models that define `predict()`.
221      pyfunc.add_to_model(
222          mlflow_model,
223          loader_module="mlflow.paddle",
224          model_path=model_data_subpath,
225          conda_env=_CONDA_ENV_FILE_NAME,
226          python_env=_PYTHON_ENV_FILE_NAME,
227          code=code_dir_subpath,
228      )
229  
230      extra_files_config = _copy_extra_files(extra_files, path)
231  
232      mlflow_model.add_flavor(
233          FLAVOR_NAME,
234          pickled_model=model_data_subpath,
235          paddle_version=paddle.__version__,
236          code=code_dir_subpath,
237          **extra_files_config,
238      )
239      mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
240  
241      if conda_env is None:
242          if pip_requirements is None:
243              default_reqs = get_default_pip_requirements()
244              # To ensure `_load_pyfunc` can successfully load the model during the dependency
245              # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file.
246              inferred_reqs = mlflow.models.infer_pip_requirements(
247                  path,
248                  FLAVOR_NAME,
249                  fallback=default_reqs,
250              )
251              default_reqs = sorted(set(inferred_reqs).union(default_reqs))
252          else:
253              default_reqs = None
254          conda_env, pip_requirements, pip_constraints = _process_pip_requirements(
255              default_reqs,
256              pip_requirements,
257              extra_pip_requirements,
258          )
259      else:
260          conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env)
261  
262      with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f:
263          yaml.safe_dump(conda_env, stream=f, default_flow_style=False)
264  
265      # Save `constraints.txt` if necessary
266      if pip_constraints:
267          write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints))
268  
269      # Save `requirements.txt`
270      write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
271  
272      _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
273  
274  
275  def load_model(model_uri, model=None, dst_path=None, **kwargs):
276      """
277      Load a paddle model from a local file or a run.
278  
279      Args:
280          model_uri: The location, in URI format, of the MLflow model, for example:
281              - ``/Users/me/path/to/local/model``
282              - ``relative/path/to/local/model``
283              - ``s3://my_bucket/path/to/model``
284              - ``runs:/<mlflow_run_id>/run-relative/path/to/model``
285              - ``models:/<model_name>/<model_version>``
286              - ``models:/<model_name>/<stage>``
287          model: Required when loading a `paddle.Model` model saved with `training=True`.
288          dst_path: The local filesystem path to which to download the model artifact.
289              This directory must already exist. If unspecified, a local output
290              path will be created.
291          kwargs: The keyword arguments to pass to `paddle.jit.load`
292              or `model.load`.
293  
294      For more information about supported URI schemes, see
295      `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html#
296      artifact-locations>`_.
297  
298      Returns:
299          A paddle model.
300  
301      .. code-block:: python
302          :caption: Example
303  
304          import mlflow.paddle
305  
306          pd_model = mlflow.paddle.load_model("runs:/96771d893a5e46159d9f3b49bf9013e2/pd_models")
307          # use Pandas DataFrame to make predictions
308          np_array = ...
309          predictions = pd_model(np_array)
310      """
311      import paddle
312  
313      local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path)
314      flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME)
315      _add_code_from_conf_to_system_path(local_model_path, flavor_conf)
316      pd_model_artifacts_path = os.path.join(local_model_path, flavor_conf["pickled_model"])
317      if model is None:
318          return paddle.jit.load(pd_model_artifacts_path, **kwargs)
319      elif not isinstance(model, paddle.Model):
320          raise TypeError(f"Invalid object type `{type(model)}` for `model`, must be `paddle.Model`")
321      else:
322          contains_pdparams = _contains_pdparams(local_model_path)
323          if not contains_pdparams:
324              raise TypeError(
325                  "This model can't be loaded via `model.load` because a '.pdparams' file "
326                  "doesn't exist. Please leave `model` unspecified to load the model via "
327                  "`paddle.jit.load` or set `training` to True when saving a model."
328              )
329  
330          model.load(pd_model_artifacts_path, **kwargs)
331          return model
332  
333  
334  @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME))
335  def log_model(
336      pd_model,
337      artifact_path: str | None = None,
338      training=False,
339      conda_env=None,
340      code_paths=None,
341      registered_model_name=None,
342      signature: ModelSignature = None,
343      input_example: ModelInputExample = None,
344      await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS,
345      pip_requirements=None,
346      extra_pip_requirements=None,
347      metadata=None,
348      extra_files=None,
349      name: str | None = None,
350      params: dict[str, Any] | None = None,
351      tags: dict[str, Any] | None = None,
352      model_type: str | None = None,
353      step: int = 0,
354      model_id: str | None = None,
355      **kwargs,
356  ):
357      """
358      Log a paddle model as an MLflow artifact for the current run. Produces an MLflow Model
359      containing the following flavors:
360  
361          - :py:mod:`mlflow.paddle`
362          - :py:mod:`mlflow.pyfunc`. NOTE: This flavor is only included for paddle models
363            that define `predict()`, since `predict()` is required for pyfunc model inference.
364  
365      Args:
366          pd_model: paddle model to be saved.
367          artifact_path: Deprecated. Use `name` instead.
368          training: Only valid when saving a model trained using the PaddlePaddle high level API.
369              If set to True, the saved model supports both re-training and
370              inference. If set to False, it only supports inference.
371          conda_env: {{ conda_env }}
372          code_paths: {{ code_paths }}
373          registered_model_name: If given, create a model version under
374              ``registered_model_name``, also creating a registered model if one
375              with the given name does not exist.
376          signature: {{ signature }}
377          input_example: {{ input_example }}
378          await_registration_for: Number of seconds to wait for the model version to finish
379              being created and is in ``READY`` status. By default, the function
380              waits for five minutes. Specify 0 or None to skip waiting.
381          pip_requirements: {{ pip_requirements }}
382          extra_pip_requirements: {{ extra_pip_requirements }}
383          metadata: {{ metadata }}
384          extra_files: {{ extra_files }}
385          name: {{ name }}
386          params: {{ params }}
387          tags: {{ tags }}
388          model_type: {{ model_type }}
389          step: {{ step }}
390          model_id: {{ model_id }}
391          kwargs: {{ kwargs }}
392  
393      Returns:
394          A :py:class:`ModelInfo <mlflow.models.model.ModelInfo>` instance that contains the
395          metadata of the logged model.
396  
397      .. code-block:: python
398          :caption: Example
399  
400          import mlflow.paddle
401  
402  
403          def load_data(): ...
404  
405  
406          class Regressor: ...
407  
408  
409          model = Regressor()
410          model.train()
411          training_data, test_data = load_data()
412          opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())
413          EPOCH_NUM = 10
414          BATCH_SIZE = 10
415          for epoch_id in range(EPOCH_NUM):
416              ...
417          mlflow.log_param("learning_rate", 0.01)
418          mlflow.paddle.log_model(model, name="model")
419          sk_path_dir = ...
420          mlflow.paddle.save_model(model, sk_path_dir)
421      """
422      return Model.log(
423          artifact_path=artifact_path,
424          name=name,
425          flavor=mlflow.paddle,
426          pd_model=pd_model,
427          conda_env=conda_env,
428          code_paths=code_paths,
429          registered_model_name=registered_model_name,
430          signature=signature,
431          input_example=input_example,
432          await_registration_for=await_registration_for,
433          training=training,
434          pip_requirements=pip_requirements,
435          extra_pip_requirements=extra_pip_requirements,
436          metadata=metadata,
437          extra_files=extra_files,
438          params=params,
439          tags=tags,
440          model_type=model_type,
441          step=step,
442          model_id=model_id,
443          **kwargs,
444      )
445  
446  
447  def _load_pyfunc(path):
448      """
449      Loads PyFunc implementation. Called by ``pyfunc.load_model``.
450  
451      Args:
452          path: Local filesystem path to the MLflow Model with the ``paddle`` flavor.
453      """
454      return _PaddleWrapper(load_model(path))
455  
456  
457  class _PaddleWrapper:
458      """
459      Wrapper class that creates a predict function such that
460      predict(data: pd.DataFrame) -> model's output as pd.DataFrame (pandas DataFrame)
461      """
462  
463      def __init__(self, pd_model):
464          self.pd_model = pd_model
465  
466      def get_raw_model(self):
467          """
468          Returns the underlying model.
469          """
470          return self.pd_model
471  
472      def predict(
473          self,
474          data,
475          params: dict[str, Any] | None = None,
476      ):
477          """
478          Args:
479              data: Model input data.
480              params: Additional parameters to pass to the model for inference.
481  
482          Returns:
483              Model predictions.
484          """
485          import numpy as np
486          import paddle
487          import pandas as pd
488  
489          if isinstance(data, pd.DataFrame):
490              inp_data = data.values.astype(np.float32)
491          elif isinstance(data, np.ndarray):
492              inp_data = data
493          elif isinstance(data, (list, dict)):
494              raise TypeError(
495                  "The paddle flavor does not support List or Dict input types. "
496                  "Please use a pandas.DataFrame or a numpy.ndarray"
497              )
498          else:
499              raise TypeError("Input data should be pandas.DataFrame or numpy.ndarray")
500          inp_data = np.squeeze(inp_data)
501  
502          self.pd_model.eval()
503  
504          predicted = self.pd_model(paddle.to_tensor(inp_data))
505          return pd.DataFrame(predicted.numpy())
506  
507  
508  def _contains_pdparams(path):
509      file_list = os.listdir(path)
510      return any(".pdparams" in file for file in file_list)
511  
512  
513  @autologging_integration(FLAVOR_NAME)
514  def autolog(
515      log_every_n_epoch=1,
516      log_models=True,
517      disable=False,
518      exclusive=False,
519      silent=False,
520      registered_model_name=None,
521      extra_tags=None,
522  ):
523      """
524      Enables (or disables) and configures autologging from PaddlePaddle to MLflow.
525  
526      Autologging is performed when the `fit` method of `paddle.Model`_ is called.
527  
528      .. _paddle.Model:
529          https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/Model_en.html
530  
531      Args:
532          log_every_n_epoch: If specified, logs metrics once every `n` epochs. By default, metrics
533              are logged after every epoch.
534          log_models: If ``True``, trained models are logged as MLflow model artifacts.
535              If ``False``, trained models are not logged.
536          disable: If ``True``, disables the PaddlePaddle autologging integration.
537              If ``False``, enables the PaddlePaddle autologging integration.
538          exclusive: If ``True``, autologged content is not logged to user-created fluent runs.
539              If ``False``, autologged content is logged to the active fluent run,
540              which may be user-created.
541          silent: If ``True``, suppress all event logs and warnings from MLflow during PyTorch
542              Lightning autologging. If ``False``, show all events and warnings during
543              PaddlePaddle autologging.
544          registered_model_name: If given, each time a model is trained, it is registered as a
545              new model version of the registered model with this name.
546              The registered model is created if it does not already exist.
547          extra_tags: A dictionary of extra tags to set on each managed run created by autologging.
548  
549      .. code-block:: python
550          :caption: Example
551  
552          import paddle
553          import mlflow
554          from mlflow import MlflowClient
555  
556  
557          def show_run_data(run_id):
558              run = mlflow.get_run(run_id)
559              print(f"params: {run.data.params}")
560              print(f"metrics: {run.data.metrics}")
561              client = MlflowClient()
562              artifacts = [f.path for f in client.list_artifacts(run.info.run_id, "model")]
563              print(f"artifacts: {artifacts}")
564  
565  
566          class LinearRegression(paddle.nn.Layer):
567              def __init__(self):
568                  super().__init__()
569                  self.fc = paddle.nn.Linear(13, 1)
570  
571              def forward(self, feature):
572                  return self.fc(feature)
573  
574  
575          train_dataset = paddle.text.datasets.UCIHousing(mode="train")
576          eval_dataset = paddle.text.datasets.UCIHousing(mode="test")
577          model = paddle.Model(LinearRegression())
578          optim = paddle.optimizer.SGD(learning_rate=1e-2, parameters=model.parameters())
579          model.prepare(optim, paddle.nn.MSELoss(), paddle.metric.Accuracy())
580          mlflow.paddle.autolog()
581          with mlflow.start_run() as run:
582              model.fit(train_dataset, eval_dataset, batch_size=16, epochs=10)
583          show_run_data(run.info.run_id)
584  
585      .. code-block:: text
586          :caption: Output
587  
588          params: {
589              "learning_rate": "0.01",
590              "optimizer_name": "SGD",
591          }
592          metrics: {
593              "loss": 17.482044,
594              "step": 25.0,
595              "acc": 0.0,
596              "eval_step": 6.0,
597              "eval_acc": 0.0,
598              "eval_batch_size": 6.0,
599              "batch_size": 4.0,
600              "eval_loss": 24.717455,
601          }
602          artifacts: [
603              "model/MLmodel",
604              "model/conda.yaml",
605              "model/model.pdiparams",
606              "model/model.pdiparams.info",
607              "model/model.pdmodel",
608              "model/requirements.txt",
609          ]
610      """
611      import paddle
612  
613      from mlflow.paddle._paddle_autolog import patched_fit
614  
615      safe_patch(
616          FLAVOR_NAME, paddle.Model, "fit", patched_fit, manage_run=True, extra_tags=extra_tags
617      )