Cradicle Explorer

/ mlflow / utils / validation.py
validation.py
  1  """
  2  Utilities for validating user inputs such as metric names and parameter names.
  3  """
  4  
  5  import ipaddress
  6  import json
  7  import logging
  8  import numbers
  9  import posixpath
 10  import re
 11  import socket
 12  import urllib.parse
 13  from typing import Any
 14  
 15  from mlflow.entities import Dataset, DatasetInput, InputTag, Param, RunTag
 16  from mlflow.entities.model_registry.prompt_version import PROMPT_TEXT_TAG_KEY
 17  from mlflow.entities.webhook import WebhookEvent
 18  from mlflow.environment_variables import (
 19      _MLFLOW_WEBHOOK_ALLOW_PRIVATE_IPS,
 20      _MLFLOW_WEBHOOK_ALLOWED_SCHEMES,
 21      MLFLOW_ARTIFACT_LOCATION_MAX_LENGTH,
 22      MLFLOW_TRUNCATE_LONG_VALUES,
 23  )
 24  from mlflow.exceptions import MlflowException
 25  from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
 26  from mlflow.utils.os import is_windows
 27  from mlflow.utils.string_utils import is_string_type
 28  
 29  _logger = logging.getLogger(__name__)
 30  
 31  # Regex for valid run IDs: must be an alphanumeric string of length 1 to 256.
 32  _RUN_ID_REGEX = re.compile(r"^[a-zA-Z0-9][\w\-]{0,255}$")
 33  
 34  # Regex: starting with an alphanumeric, optionally followed by up to 63 characters
 35  # including alphanumerics, underscores, or dashes.
 36  _EXPERIMENT_ID_REGEX = re.compile(r"^[a-zA-Z0-9][\w\-]{0,63}$")
 37  
 38  # Regex for valid registered model alias names: may only contain alphanumerics,
 39  # underscores, and dashes.
 40  _REGISTERED_MODEL_ALIAS_REGEX = re.compile(r"^[\w\-]*$")
 41  
 42  # Regex for valid registered model alias to prevent conflict with version aliases.
 43  _REGISTERED_MODEL_ALIAS_VERSION_REGEX = re.compile(r"^[vV]\d+$")
 44  
 45  # The reserver "latest" alias name
 46  _REGISTERED_MODEL_ALIAS_LATEST = "latest"
 47  
 48  _BAD_ALIAS_CHARACTERS_MESSAGE = (
 49      "Names may only contain alphanumerics, underscores (_), and dashes (-)."
 50  )
 51  
 52  _MISSING_KEY_NAME_MESSAGE = "A key name must be provided."
 53  
 54  MAX_PARAMS_TAGS_PER_BATCH = 100
 55  MAX_METRICS_PER_BATCH = 1000
 56  MAX_DATASETS_PER_BATCH = 1000
 57  MAX_ENTITIES_PER_BATCH = 1000
 58  MAX_BATCH_LOG_REQUEST_SIZE = int(1e6)
 59  MAX_PARAM_VAL_LENGTH = 6000
 60  MAX_TAG_VAL_LENGTH = 8000
 61  MAX_EXPERIMENT_NAME_LENGTH = 500
 62  MAX_EXPERIMENT_TAG_KEY_LENGTH = 250
 63  MAX_EXPERIMENT_TAG_VAL_LENGTH = 5000
 64  MAX_ENTITY_KEY_LENGTH = 250
 65  MAX_MODEL_REGISTRY_TAG_KEY_LENGTH = 250
 66  MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH = 100_000
 67  MAX_EXPERIMENTS_LISTED_PER_PAGE = 50000
 68  MAX_DATASET_NAME_SIZE = 500
 69  MAX_DATASET_DIGEST_SIZE = 36
 70  # 1MB -1, the db limit for MEDIUMTEXT column is 16MB, but
 71  # we restrict to 1MB because user might log lots of datasets
 72  # to a single run, 16MB increases burden on db
 73  MAX_DATASET_SCHEMA_SIZE = 1048575
 74  MAX_DATASET_SOURCE_SIZE = 65535  # 64KB -1 (the db limit for TEXT column)
 75  MAX_DATASET_PROFILE_SIZE = 16777215  # 16MB -1 (the db limit for MEDIUMTEXT column)
 76  MAX_INPUT_TAG_KEY_SIZE = 255
 77  MAX_INPUT_TAG_VALUE_SIZE = 500
 78  MAX_REGISTERED_MODEL_ALIAS_LENGTH = 255
 79  MAX_TRACE_TAG_KEY_LENGTH = 250
 80  MAX_TRACE_TAG_VAL_LENGTH = 8000
 81  
 82  PARAM_VALIDATION_MSG = """
 83  
 84  The cause of this error is typically due to repeated calls
 85  to an individual run_id event logging.
 86  
 87  Incorrect Example:
 88  ---------------------------------------
 89  with mlflow.start_run():
 90      mlflow.log_param("depth", 3)
 91      mlflow.log_param("depth", 5)
 92  ---------------------------------------
 93  
 94  Which will throw an MlflowException for overwriting a
 95  logged parameter.
 96  
 97  Correct Example:
 98  ---------------------------------------
 99  with mlflow.start_run():
100      with mlflow.start_run(nested=True):
101          mlflow.log_param("depth", 3)
102      with mlflow.start_run(nested=True):
103          mlflow.log_param("depth", 5)
104  ---------------------------------------
105  
106  Which will create a new nested run for each individual
107  model and prevent parameter key collisions within the
108  tracking store."""
109  
110  
111  def invalid_value(path, value, message=None):
112      """
113      Compose a standardized error message for invalid parameter values.
114      """
115      formattedValue = json.dumps(value, sort_keys=True, separators=(",", ":"))
116  
117      if message:
118          return f"Invalid value {formattedValue} for parameter '{path}' supplied: {message}"
119      else:
120          return f"Invalid value {formattedValue} for parameter '{path}' supplied."
121  
122  
123  def missing_value(path):
124      return f"Missing value for required parameter '{path}'."
125  
126  
127  def not_integer_value(path, value):
128      return f"Parameter '{path}' must be an integer, got '{value}'."
129  
130  
131  def exceeds_maximum_length(path, limit):
132      return f"'{path}' exceeds the maximum length of {limit} characters"
133  
134  
135  def append_to_json_path(currentPath, value):
136      if not currentPath:
137          return value
138  
139      if value.startswith("["):
140          return f"{currentPath}{value}"
141  
142      return f"{currentPath}.{value}"
143  
144  
145  def bad_path_message(name):
146      return (
147          "Names may be treated as files in certain cases, and must not resolve to other names"
148          f" when treated as such. This name would resolve to {posixpath.normpath(name)!r}"
149      )
150  
151  
152  def validate_param_and_metric_name(name):
153      # In windows system valid param and metric names: may only contain slashes, alphanumerics,
154      # underscores, periods, dashes, and spaces.
155      if is_windows():
156          return re.match(r"^[/\w.\- ]*$", name)
157  
158      # For other system valid param and metric names: may only contain slashes, alphanumerics,
159      # underscores, periods, dashes, colons, and spaces.
160      return re.match(r"^[/\w.\- :]*$", name)
161  
162  
163  def bad_character_message():
164      # Valid param and metric names may only contain slashes, alphanumerics, underscores,
165      # periods, dashes, colons, and spaces. For windows param and metric names can not contain colon
166      msg = (
167          "Names may only contain alphanumerics, underscores (_), dashes (-), periods (.),"
168          " spaces ( ){} and slashes (/)."
169      )
170      return msg.format("") if is_windows() else msg.format(", colon(:)")
171  
172  
173  def path_not_unique(name):
174      norm = posixpath.normpath(name)
175      return norm != str(name) or norm == "." or norm.startswith("..") or norm.startswith("/")
176  
177  
178  def _validate_metric_name(name, path="name"):
179      """Check that `name` is a valid metric name and raise an exception if it isn't."""
180      if name is None:
181          raise MlflowException(
182              invalid_value(path, name, f"Metric name cannot be None. {_MISSING_KEY_NAME_MESSAGE}"),
183              error_code=INVALID_PARAMETER_VALUE,
184          )
185      if not validate_param_and_metric_name(name):
186          raise MlflowException(
187              invalid_value(path, name, bad_character_message()),
188              INVALID_PARAMETER_VALUE,
189          )
190      if path_not_unique(name):
191          raise MlflowException(
192              invalid_value(path, name, bad_path_message(name)),
193              INVALID_PARAMETER_VALUE,
194          )
195  
196  
197  def _is_numeric(value):
198      """
199      Returns True if the passed-in value is numeric.
200      """
201      # Note that `isinstance(bool_value, numbers.Number)` returns `True` because `bool` is a
202      # subclass of `int`.
203      return not isinstance(value, bool) and isinstance(value, numbers.Number)
204  
205  
206  def _validate_metric(key, value, timestamp, step, path=""):
207      """
208      Check that a metric with the specified key, value, timestamp, and step is valid and raise an
209      exception if it isn't.
210      """
211      _validate_metric_name(key, append_to_json_path(path, "name"))
212  
213      # If invocated via log_metric, no prior validation of the presence of the value was done.
214      if value is None:
215          raise MlflowException(
216              missing_value(append_to_json_path(path, "value")),
217              INVALID_PARAMETER_VALUE,
218          )
219  
220      # value must be a Number
221      # since bool is an instance of Number check for bool additionally
222      if not _is_numeric(value):
223          raise MlflowException(
224              invalid_value(
225                  append_to_json_path(path, "value"),
226                  value,
227                  f"(timestamp={timestamp}). "
228                  f"Please specify value as a valid double (64-bit floating point)",
229              ),
230              INVALID_PARAMETER_VALUE,
231          )
232  
233      if not isinstance(timestamp, numbers.Number) or timestamp < 0:
234          raise MlflowException(
235              invalid_value(
236                  append_to_json_path(path, "timestamp"),
237                  timestamp,
238                  f"metric '{key}' (value={value}). "
239                  f"Timestamp must be a nonnegative long (64-bit integer) ",
240              ),
241              INVALID_PARAMETER_VALUE,
242          )
243  
244      if not isinstance(step, numbers.Number):
245          raise MlflowException(
246              invalid_value(
247                  append_to_json_path(path, "step"),
248                  step,
249                  f"metric '{key}' (value={value}). Step must be a valid long (64-bit integer).",
250              ),
251              INVALID_PARAMETER_VALUE,
252          )
253  
254      _validate_length_limit("Metric name", MAX_ENTITY_KEY_LENGTH, key)
255  
256  
257  def _validate_param(key, value, path=""):
258      """
259      Check that a param with the specified key & value is valid and raise an exception if it
260      isn't.
261      """
262      _validate_param_name(key, append_to_json_path(path, "key"))
263      return Param(
264          _validate_length_limit("Param key", MAX_ENTITY_KEY_LENGTH, key),
265          _validate_length_limit("Param value", MAX_PARAM_VAL_LENGTH, value, truncate=True),
266      )
267  
268  
269  def _validate_tag(key, value, path=""):
270      """
271      Check that a tag with the specified key & value is valid and raise an exception if it isn't.
272      """
273      _validate_tag_name(key, append_to_json_path(path, "key"))
274      return RunTag(
275          _validate_length_limit(append_to_json_path(path, "key"), MAX_ENTITY_KEY_LENGTH, key),
276          _validate_length_limit(
277              append_to_json_path(path, "value"), MAX_TAG_VAL_LENGTH, value, truncate=True
278          ),
279      )
280  
281  
282  def _validate_experiment_tag(key, value):
283      """
284      Check that a tag with the specified key & value is valid and raise an exception if it isn't.
285      """
286      _validate_tag_name(key)
287      _validate_length_limit("key", MAX_EXPERIMENT_TAG_KEY_LENGTH, key)
288      _validate_length_limit("value", MAX_EXPERIMENT_TAG_VAL_LENGTH, value)
289  
290  
291  def _validate_registered_model_tag(key, value):
292      """
293      Check that a tag with the specified key & value is valid and raise an exception if it isn't.
294      """
295      _validate_tag_name(key)
296      _validate_length_limit("key", MAX_MODEL_REGISTRY_TAG_KEY_LENGTH, key)
297      _validate_length_limit("value", MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH, value)
298  
299  
300  def _validate_model_version_tag(key, value):
301      """
302      Check that a tag with the specified key & value is valid and raise an exception if it isn't.
303      """
304      _validate_tag_name(key)
305      _validate_tag_value(value)
306      _validate_length_limit("key", MAX_MODEL_REGISTRY_TAG_KEY_LENGTH, key)
307  
308      # Check prompt text tag particularly for showing friendly error message
309      if key == PROMPT_TEXT_TAG_KEY and len(value) > MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH:
310          raise MlflowException.invalid_parameter_value(
311              f"Prompt text exceeds max length of {MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH} characters.",
312          )
313  
314      _validate_length_limit("value", MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH, value)
315  
316  
317  def _validate_param_keys_unique(params):
318      """Ensures that duplicate param keys are not present in the `log_batch()` params argument"""
319      unique_keys = []
320      dupe_keys = []
321      for param in params:
322          if param.key not in unique_keys:
323              unique_keys.append(param.key)
324          else:
325              dupe_keys.append(param.key)
326  
327      if dupe_keys:
328          raise MlflowException(
329              f"Duplicate parameter keys have been submitted: {dupe_keys}. Please ensure "
330              "the request contains only one param value per param key.",
331              INVALID_PARAMETER_VALUE,
332          )
333  
334  
335  def _validate_param_name(name, path="key"):
336      """Check that `name` is a valid parameter name and raise an exception if it isn't."""
337      if name is None:
338          raise MlflowException(
339              invalid_value(path, "", _MISSING_KEY_NAME_MESSAGE),
340              error_code=INVALID_PARAMETER_VALUE,
341          )
342      if not validate_param_and_metric_name(name):
343          raise MlflowException(
344              invalid_value(path, name, bad_character_message()),
345              INVALID_PARAMETER_VALUE,
346          )
347      if path_not_unique(name):
348          raise MlflowException(
349              invalid_value(path, name, bad_path_message(name)),
350              INVALID_PARAMETER_VALUE,
351          )
352  
353  
354  def _validate_tag_name(name, path="key"):
355      """Check that `name` is a valid tag name and raise an exception if it isn't."""
356      # Reuse param & metric check.
357      if name is None:
358          raise MlflowException(
359              missing_value(path),
360              error_code=INVALID_PARAMETER_VALUE,
361          )
362      if not validate_param_and_metric_name(name):
363          raise MlflowException(
364              invalid_value(path, name, bad_character_message()),
365              INVALID_PARAMETER_VALUE,
366          )
367      if path_not_unique(name):
368          raise MlflowException(
369              invalid_value(path, name, bad_path_message(name)),
370              INVALID_PARAMETER_VALUE,
371          )
372  
373  
374  def _validate_length_limit(entity_name, limit, value, *, truncate=False):
375      if value is None:
376          return None
377  
378      if len(value) <= limit:
379          return value
380  
381      if truncate and MLFLOW_TRUNCATE_LONG_VALUES.get():
382          _logger.warning(
383              f"{entity_name} '{value[:100]}...' ({len(value)} characters) is truncated to "
384              f"{limit} characters to meet the length limit."
385          )
386          return value[:limit]
387  
388      raise MlflowException(
389          exceeds_maximum_length(entity_name, limit),
390          error_code=INVALID_PARAMETER_VALUE,
391      )
392  
393  
394  def _validate_run_id(run_id, path="run_id"):
395      """Check that `run_id` is a valid run ID and raise an exception if it isn't."""
396      if _RUN_ID_REGEX.match(run_id) is None:
397          raise MlflowException(invalid_value(path, run_id), error_code=INVALID_PARAMETER_VALUE)
398  
399  
400  def _validate_experiment_id(exp_id):
401      """Check that `experiment_id`is a valid string or None, raise an exception if it isn't."""
402      if exp_id is not None and _EXPERIMENT_ID_REGEX.match(exp_id) is None:
403          raise MlflowException(
404              f"Invalid experiment ID: '{exp_id}'", error_code=INVALID_PARAMETER_VALUE
405          )
406  
407  
408  def _validate_batch_limit(entity_name, limit, length):
409      if length > limit:
410          error_msg = (
411              f"A batch logging request can contain at most {limit} {entity_name}. "
412              f"Got {length} {entity_name}. Please split up {entity_name} across multiple"
413              " requests and try again."
414          )
415          raise MlflowException(error_msg, error_code=INVALID_PARAMETER_VALUE)
416  
417  
418  def _validate_batch_log_limits(metrics, params, tags):
419      """Validate that the provided batched logging arguments are within expected limits."""
420      _validate_batch_limit(entity_name="metrics", limit=MAX_METRICS_PER_BATCH, length=len(metrics))
421      _validate_batch_limit(entity_name="params", limit=MAX_PARAMS_TAGS_PER_BATCH, length=len(params))
422      _validate_batch_limit(entity_name="tags", limit=MAX_PARAMS_TAGS_PER_BATCH, length=len(tags))
423      total_length = len(metrics) + len(params) + len(tags)
424      _validate_batch_limit(
425          entity_name="metrics, params, and tags",
426          limit=MAX_ENTITIES_PER_BATCH,
427          length=total_length,
428      )
429  
430  
431  def _validate_batch_log_data(metrics, params, tags):
432      for index, metric in enumerate(metrics):
433          path = f"metrics[{index}]"
434          _validate_metric(metric.key, metric.value, metric.timestamp, metric.step, path=path)
435      return (
436          metrics,
437          [_validate_param(p.key, p.value, path=f"params[{idx}]") for (idx, p) in enumerate(params)],
438          [_validate_tag(t.key, t.value, path=f"tags[{idx}]") for (idx, t) in enumerate(tags)],
439      )
440  
441  
442  def _validate_batch_log_api_req(json_req):
443      if len(json_req) > MAX_BATCH_LOG_REQUEST_SIZE:
444          error_msg = (
445              "Batched logging API requests must be at most {limit} bytes, got a "
446              "request of size {size}."
447          ).format(limit=MAX_BATCH_LOG_REQUEST_SIZE, size=len(json_req))
448          raise MlflowException(error_msg, error_code=INVALID_PARAMETER_VALUE)
449  
450  
451  def _validate_experiment_name(experiment_name):
452      """Check that `experiment_name` is a valid string and raise an exception if it isn't."""
453      if experiment_name == "" or experiment_name is None:
454          raise MlflowException(
455              f"Invalid experiment name: '{experiment_name}'",
456              error_code=INVALID_PARAMETER_VALUE,
457          )
458  
459      if not is_string_type(experiment_name):
460          raise MlflowException(
461              f"Invalid experiment name: {experiment_name}. Expects a string.",
462              error_code=INVALID_PARAMETER_VALUE,
463          )
464  
465      if len(experiment_name) > MAX_EXPERIMENT_NAME_LENGTH:
466          raise MlflowException.invalid_parameter_value(
467              exceeds_maximum_length("name", MAX_EXPERIMENT_NAME_LENGTH)
468          )
469  
470  
471  def _validate_experiment_id_type(experiment_id):
472      """
473      Check that a user-provided experiment_id is either a string, int, or None and raise an
474      exception if it isn't.
475      """
476      if experiment_id is not None and not isinstance(experiment_id, (str, int)):
477          raise MlflowException(
478              f"Invalid experiment id: {experiment_id} of type {type(experiment_id)}. "
479              "Must be one of str, int, or None.",
480              error_code=INVALID_PARAMETER_VALUE,
481          )
482  
483  
484  def _validate_list_param(param_name: str, param_value: Any, allow_none: bool = False) -> None:
485      """
486      Validate that a parameter is a list and raise a helpful error if it isn't.
487  
488      Args:
489          param_name: Name of the parameter being validated (e.g., "experiment_ids")
490          param_value: The value to validate
491          allow_none: If True, None is allowed. If False, None is treated as invalid.
492  
493      Raises:
494          MlflowException: If the parameter is not a list (and not None when allow_none=True)
495      """
496      if allow_none and param_value is None:
497          return
498  
499      if not isinstance(param_value, list):
500          raise MlflowException.invalid_parameter_value(
501              f"{param_name} must be a list, got {type(param_value).__name__}. "
502              f"Did you mean to use {param_name}=[{param_value!r}]?"
503          )
504  
505  
506  def _validate_model_name(model_name: str) -> None:
507      if model_name is None or model_name.strip() == "":
508          raise MlflowException(missing_value("name"), error_code=INVALID_PARAMETER_VALUE)
509      invalid_chars = ("/", ":")
510      if any(c in model_name for c in invalid_chars):
511          raise MlflowException(
512              f"Invalid model name '{model_name}'. Names cannot contain '/' or ':'.",
513              error_code=INVALID_PARAMETER_VALUE,
514          )
515      if path_not_unique(model_name):
516          raise MlflowException(
517              invalid_value("name", model_name, bad_path_message(model_name)),
518              INVALID_PARAMETER_VALUE,
519          )
520  
521  
522  def _validate_model_renaming(model_new_name: str) -> None:
523      if model_new_name is None or str(model_new_name).strip() == "":
524          raise MlflowException(missing_value("new_name"), error_code=INVALID_PARAMETER_VALUE)
525      _validate_model_name(model_new_name)
526  
527  
528  def _validate_model_version(model_version):
529      try:
530          model_version = int(model_version)
531      except ValueError:
532          raise MlflowException(
533              not_integer_value("version", model_version), error_code=INVALID_PARAMETER_VALUE
534          )
535  
536  
537  def _validate_model_alias_name(model_alias_name):
538      if model_alias_name is None or model_alias_name == "":
539          raise MlflowException(
540              "Registered model alias name cannot be empty.", INVALID_PARAMETER_VALUE
541          )
542      if not _REGISTERED_MODEL_ALIAS_REGEX.match(model_alias_name):
543          raise MlflowException(
544              f"Invalid alias name: '{model_alias_name}'. {_BAD_ALIAS_CHARACTERS_MESSAGE}",
545              INVALID_PARAMETER_VALUE,
546          )
547      _validate_length_limit(
548          "Registered model alias name",
549          MAX_REGISTERED_MODEL_ALIAS_LENGTH,
550          model_alias_name,
551      )
552  
553  
554  def _validate_model_alias_name_reserved(model_alias_name):
555      if model_alias_name.lower() == "latest":
556          raise MlflowException(
557              "'latest' alias name (case insensitive) is reserved.",
558              INVALID_PARAMETER_VALUE,
559          )
560      if _REGISTERED_MODEL_ALIAS_VERSION_REGEX.match(model_alias_name):
561          raise MlflowException(
562              f"Version alias name '{model_alias_name}' is reserved.",
563              INVALID_PARAMETER_VALUE,
564          )
565  
566  
567  def _validate_experiment_artifact_location(artifact_location):
568      if artifact_location is not None and artifact_location.startswith("runs:"):
569          raise MlflowException(
570              f"Artifact location cannot be a runs:/ URI. Given: '{artifact_location}'",
571              error_code=INVALID_PARAMETER_VALUE,
572          )
573  
574  
575  def _validate_db_type_string(db_type):
576      """validates db_type parsed from DB URI is supported"""
577      from mlflow.store.db.db_types import DATABASE_ENGINES
578  
579      if db_type not in DATABASE_ENGINES:
580          error_msg = (
581              f"Invalid database engine: '{db_type}'. "
582              f"Supported database engines are {', '.join(DATABASE_ENGINES)}"
583          )
584          raise MlflowException(error_msg, INVALID_PARAMETER_VALUE)
585  
586  
587  def _validate_model_version_or_stage_exists(version, stage):
588      if version and stage:
589          raise MlflowException("version and stage cannot be set together", INVALID_PARAMETER_VALUE)
590  
591      if not (version or stage):
592          raise MlflowException("version or stage must be set", INVALID_PARAMETER_VALUE)
593  
594  
595  def _validate_tag_value(value):
596      if value is None:
597          raise MlflowException("Tag value cannot be None", INVALID_PARAMETER_VALUE)
598  
599  
600  def _validate_dataset_inputs(dataset_inputs: list[DatasetInput]):
601      for dataset_input in dataset_inputs:
602          _validate_dataset(dataset_input.dataset)
603          _validate_input_tags(dataset_input.tags)
604  
605  
606  def _validate_dataset(dataset: Dataset):
607      if dataset is None:
608          raise MlflowException("Dataset cannot be None", INVALID_PARAMETER_VALUE)
609      if dataset.name is None:
610          raise MlflowException("Dataset name cannot be None", INVALID_PARAMETER_VALUE)
611      if dataset.digest is None:
612          raise MlflowException("Dataset digest cannot be None", INVALID_PARAMETER_VALUE)
613      if dataset.source_type is None:
614          raise MlflowException("Dataset source_type cannot be None", INVALID_PARAMETER_VALUE)
615      if dataset.source is None:
616          raise MlflowException("Dataset source cannot be None", INVALID_PARAMETER_VALUE)
617      if len(dataset.name) > MAX_DATASET_NAME_SIZE:
618          raise MlflowException(
619              exceeds_maximum_length("name", MAX_DATASET_NAME_SIZE),
620              INVALID_PARAMETER_VALUE,
621          )
622      if len(dataset.digest) > MAX_DATASET_DIGEST_SIZE:
623          raise MlflowException(
624              exceeds_maximum_length("digest", MAX_DATASET_DIGEST_SIZE),
625              INVALID_PARAMETER_VALUE,
626          )
627      if len(dataset.source) > MAX_DATASET_SOURCE_SIZE:
628          raise MlflowException(
629              exceeds_maximum_length("source", MAX_DATASET_SOURCE_SIZE),
630              INVALID_PARAMETER_VALUE,
631          )
632      if dataset.schema is not None and len(dataset.schema) > MAX_DATASET_SCHEMA_SIZE:
633          raise MlflowException(
634              exceeds_maximum_length("schema", MAX_DATASET_SCHEMA_SIZE),
635              INVALID_PARAMETER_VALUE,
636          )
637      if dataset.profile is not None and len(dataset.profile) > MAX_DATASET_PROFILE_SIZE:
638          raise MlflowException(
639              exceeds_maximum_length("profile", MAX_DATASET_PROFILE_SIZE),
640              INVALID_PARAMETER_VALUE,
641          )
642  
643  
644  def _validate_input_tags(input_tags: list[InputTag]):
645      for input_tag in input_tags:
646          _validate_input_tag(input_tag)
647  
648  
649  def _validate_input_tag(input_tag: InputTag):
650      if input_tag is None:
651          raise MlflowException("InputTag cannot be None", INVALID_PARAMETER_VALUE)
652      if input_tag.key is None:
653          raise MlflowException("InputTag key cannot be None", INVALID_PARAMETER_VALUE)
654      if input_tag.value is None:
655          raise MlflowException("InputTag value cannot be None", INVALID_PARAMETER_VALUE)
656      if len(input_tag.key) > MAX_INPUT_TAG_KEY_SIZE:
657          raise MlflowException(
658              exceeds_maximum_length("key", MAX_INPUT_TAG_KEY_SIZE),
659              INVALID_PARAMETER_VALUE,
660          )
661      if len(input_tag.value) > MAX_INPUT_TAG_VALUE_SIZE:
662          raise MlflowException(
663              exceeds_maximum_length("value", MAX_INPUT_TAG_VALUE_SIZE),
664              INVALID_PARAMETER_VALUE,
665          )
666  
667  
668  def _validate_username(username):
669      if username is None or username == "":
670          raise MlflowException("Username cannot be empty.", INVALID_PARAMETER_VALUE)
671  
672  
673  def _validate_password(password) -> None:
674      if password is None or len(password) < 12:
675          raise MlflowException.invalid_parameter_value(
676              "Password must be a string longer than 12 characters."
677          )
678  
679  
680  def _validate_trace_tag(key, value):
681      _validate_tag_name(key)
682      key = _validate_length_limit("key", MAX_TRACE_TAG_KEY_LENGTH, key)
683      value = _validate_length_limit("value", MAX_TRACE_TAG_VAL_LENGTH, value, truncate=True)
684      return key, value
685  
686  
687  def _validate_experiment_artifact_location_length(artifact_location: str):
688      max_length = MLFLOW_ARTIFACT_LOCATION_MAX_LENGTH.get()
689      if len(artifact_location) > max_length:
690          raise MlflowException(
691              "Invalid artifact path length. The length of the artifact path cannot be "
692              f"greater than {max_length} characters. To configure this limit, please set the "
693              "MLFLOW_ARTIFACT_LOCATION_MAX_LENGTH environment variable.",
694              INVALID_PARAMETER_VALUE,
695          )
696  
697  
698  def _validate_logged_model_name(name: str | None) -> None:
699      if name is None:
700          return
701  
702      bad_chars = ("/", ":", ".", "%", '"', "'")
703      if not name or any(c in name for c in bad_chars):
704          raise MlflowException(
705              f"Invalid model name ({name!r}) provided. Model name must be a non-empty string "
706              f"and cannot contain the following characters: {bad_chars}",
707              INVALID_PARAMETER_VALUE,
708          )
709  
710  
711  _WEBHOOK_NAME_REGEX = re.compile(
712      r"^(?=.{1,63}$)"  # Total length between 1 and 63 characters
713      r"[a-z0-9]"  # Must start with letter or digit
714      r"([a-z0-9._-]*[a-z0-9])?$",  # Optional middle + end with letter/digit
715      re.IGNORECASE,
716  )
717  
718  
719  def _validate_webhook_name(name: str) -> None:
720      if not isinstance(name, str):
721          raise MlflowException.invalid_parameter_value(
722              f"Webhook name must be a string, got {type(name).__name__!r}"
723          )
724  
725      if not _WEBHOOK_NAME_REGEX.fullmatch(name):
726          raise MlflowException.invalid_parameter_value(
727              f"Webhook name {name!r} is invalid. It must start and end with a letter or digit, "
728              "be less than 63 characters long, and contain only letters, digits, dots (.), "
729              "underscores (_), and hyphens (-)."
730          )
731  
732  
733  def _validate_webhook_url(url: str) -> None:
734      if not isinstance(url, str):
735          raise MlflowException.invalid_parameter_value(
736              f"Webhook URL must be a string, got {type(url).__name__!r}"
737          )
738  
739      if not url.strip():
740          raise MlflowException.invalid_parameter_value(
741              f"Webhook URL cannot be empty or just whitespace: {url!r}"
742          )
743  
744      try:
745          parsed_url = urllib.parse.urlparse(url)
746      except ValueError as e:
747          raise MlflowException.invalid_parameter_value(f"Invalid webhook URL {url!r}: {e!r}") from e
748      schemes = _MLFLOW_WEBHOOK_ALLOWED_SCHEMES.get()
749      if parsed_url.scheme not in schemes:
750          raise MlflowException.invalid_parameter_value(
751              f"Invalid webhook URL scheme: {parsed_url.scheme!r}. "
752              f"Allowed schemes are: {', '.join(schemes)}."
753          )
754  
755      hostname = parsed_url.hostname
756      if not hostname:
757          raise MlflowException.invalid_parameter_value(
758              f"Webhook URL must include a hostname: {url!r}"
759          )
760  
761      if not _MLFLOW_WEBHOOK_ALLOW_PRIVATE_IPS.get():
762          try:
763              addr_infos = socket.getaddrinfo(hostname, None)
764          except socket.gaierror as e:
765              raise MlflowException.invalid_parameter_value(
766                  f"Cannot resolve webhook URL hostname {hostname!r}: {e}"
767              ) from e
768  
769          for addr_info in addr_infos:
770              try:
771                  ip = ipaddress.ip_address(addr_info[4][0])
772              except ValueError as e:
773                  raise MlflowException.invalid_parameter_value(
774                      f"Webhook URL hostname {hostname!r} resolved to an invalid IP address: {e}"
775                  ) from e
776              if not ip.is_global:
777                  raise MlflowException.invalid_parameter_value(
778                      f"Webhook URL must not resolve to a non-public IP address. "
779                      f"{hostname!r} resolves to {ip}."
780                  )
781  
782  
783  def _validate_webhook_events(events: list[WebhookEvent]) -> None:
784      if (
785          not events
786          or not isinstance(events, list)
787          or not all(isinstance(e, WebhookEvent) for e in events)
788      ):
789          raise MlflowException.invalid_parameter_value(
790              f"Webhook events must be a non-empty list of WebhookEvent objects: {events}."
791          )
792  
793  
794  def _resolve_experiment_ids_and_locations(
795      experiment_ids: list[str] | None, locations: list[str] | None
796  ) -> list[str]:
797      if experiment_ids:
798          if locations:
799              raise MlflowException.invalid_parameter_value(
800                  "`experiment_ids` is deprecated, use `locations` instead."
801              )
802          else:
803              locations = experiment_ids
804      if not locations:
805          return locations
806  
807      if invalid_experiment_ids := [location for location in locations if "." in location]:
808          invalid_exp_ids_str = ", ".join(invalid_experiment_ids)
809          if len(invalid_exp_ids_str) > 20:
810              invalid_exp_ids_str = invalid_exp_ids_str[:20] + "..."
811          raise MlflowException.invalid_parameter_value(
812              "Locations must be a list of experiment IDs. "
813              f"Found invalid experiment IDs: {invalid_exp_ids_str}."
814          )
815  
816      return locations