validation.py
1 """ 2 Utilities for validating user inputs such as metric names and parameter names. 3 """ 4 5 import ipaddress 6 import json 7 import logging 8 import numbers 9 import posixpath 10 import re 11 import socket 12 import urllib.parse 13 from typing import Any 14 15 from mlflow.entities import Dataset, DatasetInput, InputTag, Param, RunTag 16 from mlflow.entities.model_registry.prompt_version import PROMPT_TEXT_TAG_KEY 17 from mlflow.entities.webhook import WebhookEvent 18 from mlflow.environment_variables import ( 19 _MLFLOW_WEBHOOK_ALLOW_PRIVATE_IPS, 20 _MLFLOW_WEBHOOK_ALLOWED_SCHEMES, 21 MLFLOW_ARTIFACT_LOCATION_MAX_LENGTH, 22 MLFLOW_TRUNCATE_LONG_VALUES, 23 ) 24 from mlflow.exceptions import MlflowException 25 from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE 26 from mlflow.utils.os import is_windows 27 from mlflow.utils.string_utils import is_string_type 28 29 _logger = logging.getLogger(__name__) 30 31 # Regex for valid run IDs: must be an alphanumeric string of length 1 to 256. 32 _RUN_ID_REGEX = re.compile(r"^[a-zA-Z0-9][\w\-]{0,255}$") 33 34 # Regex: starting with an alphanumeric, optionally followed by up to 63 characters 35 # including alphanumerics, underscores, or dashes. 36 _EXPERIMENT_ID_REGEX = re.compile(r"^[a-zA-Z0-9][\w\-]{0,63}$") 37 38 # Regex for valid registered model alias names: may only contain alphanumerics, 39 # underscores, and dashes. 40 _REGISTERED_MODEL_ALIAS_REGEX = re.compile(r"^[\w\-]*$") 41 42 # Regex for valid registered model alias to prevent conflict with version aliases. 43 _REGISTERED_MODEL_ALIAS_VERSION_REGEX = re.compile(r"^[vV]\d+$") 44 45 # The reserver "latest" alias name 46 _REGISTERED_MODEL_ALIAS_LATEST = "latest" 47 48 _BAD_ALIAS_CHARACTERS_MESSAGE = ( 49 "Names may only contain alphanumerics, underscores (_), and dashes (-)." 50 ) 51 52 _MISSING_KEY_NAME_MESSAGE = "A key name must be provided." 53 54 MAX_PARAMS_TAGS_PER_BATCH = 100 55 MAX_METRICS_PER_BATCH = 1000 56 MAX_DATASETS_PER_BATCH = 1000 57 MAX_ENTITIES_PER_BATCH = 1000 58 MAX_BATCH_LOG_REQUEST_SIZE = int(1e6) 59 MAX_PARAM_VAL_LENGTH = 6000 60 MAX_TAG_VAL_LENGTH = 8000 61 MAX_EXPERIMENT_NAME_LENGTH = 500 62 MAX_EXPERIMENT_TAG_KEY_LENGTH = 250 63 MAX_EXPERIMENT_TAG_VAL_LENGTH = 5000 64 MAX_ENTITY_KEY_LENGTH = 250 65 MAX_MODEL_REGISTRY_TAG_KEY_LENGTH = 250 66 MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH = 100_000 67 MAX_EXPERIMENTS_LISTED_PER_PAGE = 50000 68 MAX_DATASET_NAME_SIZE = 500 69 MAX_DATASET_DIGEST_SIZE = 36 70 # 1MB -1, the db limit for MEDIUMTEXT column is 16MB, but 71 # we restrict to 1MB because user might log lots of datasets 72 # to a single run, 16MB increases burden on db 73 MAX_DATASET_SCHEMA_SIZE = 1048575 74 MAX_DATASET_SOURCE_SIZE = 65535 # 64KB -1 (the db limit for TEXT column) 75 MAX_DATASET_PROFILE_SIZE = 16777215 # 16MB -1 (the db limit for MEDIUMTEXT column) 76 MAX_INPUT_TAG_KEY_SIZE = 255 77 MAX_INPUT_TAG_VALUE_SIZE = 500 78 MAX_REGISTERED_MODEL_ALIAS_LENGTH = 255 79 MAX_TRACE_TAG_KEY_LENGTH = 250 80 MAX_TRACE_TAG_VAL_LENGTH = 8000 81 82 PARAM_VALIDATION_MSG = """ 83 84 The cause of this error is typically due to repeated calls 85 to an individual run_id event logging. 86 87 Incorrect Example: 88 --------------------------------------- 89 with mlflow.start_run(): 90 mlflow.log_param("depth", 3) 91 mlflow.log_param("depth", 5) 92 --------------------------------------- 93 94 Which will throw an MlflowException for overwriting a 95 logged parameter. 96 97 Correct Example: 98 --------------------------------------- 99 with mlflow.start_run(): 100 with mlflow.start_run(nested=True): 101 mlflow.log_param("depth", 3) 102 with mlflow.start_run(nested=True): 103 mlflow.log_param("depth", 5) 104 --------------------------------------- 105 106 Which will create a new nested run for each individual 107 model and prevent parameter key collisions within the 108 tracking store.""" 109 110 111 def invalid_value(path, value, message=None): 112 """ 113 Compose a standardized error message for invalid parameter values. 114 """ 115 formattedValue = json.dumps(value, sort_keys=True, separators=(",", ":")) 116 117 if message: 118 return f"Invalid value {formattedValue} for parameter '{path}' supplied: {message}" 119 else: 120 return f"Invalid value {formattedValue} for parameter '{path}' supplied." 121 122 123 def missing_value(path): 124 return f"Missing value for required parameter '{path}'." 125 126 127 def not_integer_value(path, value): 128 return f"Parameter '{path}' must be an integer, got '{value}'." 129 130 131 def exceeds_maximum_length(path, limit): 132 return f"'{path}' exceeds the maximum length of {limit} characters" 133 134 135 def append_to_json_path(currentPath, value): 136 if not currentPath: 137 return value 138 139 if value.startswith("["): 140 return f"{currentPath}{value}" 141 142 return f"{currentPath}.{value}" 143 144 145 def bad_path_message(name): 146 return ( 147 "Names may be treated as files in certain cases, and must not resolve to other names" 148 f" when treated as such. This name would resolve to {posixpath.normpath(name)!r}" 149 ) 150 151 152 def validate_param_and_metric_name(name): 153 # In windows system valid param and metric names: may only contain slashes, alphanumerics, 154 # underscores, periods, dashes, and spaces. 155 if is_windows(): 156 return re.match(r"^[/\w.\- ]*$", name) 157 158 # For other system valid param and metric names: may only contain slashes, alphanumerics, 159 # underscores, periods, dashes, colons, and spaces. 160 return re.match(r"^[/\w.\- :]*$", name) 161 162 163 def bad_character_message(): 164 # Valid param and metric names may only contain slashes, alphanumerics, underscores, 165 # periods, dashes, colons, and spaces. For windows param and metric names can not contain colon 166 msg = ( 167 "Names may only contain alphanumerics, underscores (_), dashes (-), periods (.)," 168 " spaces ( ){} and slashes (/)." 169 ) 170 return msg.format("") if is_windows() else msg.format(", colon(:)") 171 172 173 def path_not_unique(name): 174 norm = posixpath.normpath(name) 175 return norm != str(name) or norm == "." or norm.startswith("..") or norm.startswith("/") 176 177 178 def _validate_metric_name(name, path="name"): 179 """Check that `name` is a valid metric name and raise an exception if it isn't.""" 180 if name is None: 181 raise MlflowException( 182 invalid_value(path, name, f"Metric name cannot be None. {_MISSING_KEY_NAME_MESSAGE}"), 183 error_code=INVALID_PARAMETER_VALUE, 184 ) 185 if not validate_param_and_metric_name(name): 186 raise MlflowException( 187 invalid_value(path, name, bad_character_message()), 188 INVALID_PARAMETER_VALUE, 189 ) 190 if path_not_unique(name): 191 raise MlflowException( 192 invalid_value(path, name, bad_path_message(name)), 193 INVALID_PARAMETER_VALUE, 194 ) 195 196 197 def _is_numeric(value): 198 """ 199 Returns True if the passed-in value is numeric. 200 """ 201 # Note that `isinstance(bool_value, numbers.Number)` returns `True` because `bool` is a 202 # subclass of `int`. 203 return not isinstance(value, bool) and isinstance(value, numbers.Number) 204 205 206 def _validate_metric(key, value, timestamp, step, path=""): 207 """ 208 Check that a metric with the specified key, value, timestamp, and step is valid and raise an 209 exception if it isn't. 210 """ 211 _validate_metric_name(key, append_to_json_path(path, "name")) 212 213 # If invocated via log_metric, no prior validation of the presence of the value was done. 214 if value is None: 215 raise MlflowException( 216 missing_value(append_to_json_path(path, "value")), 217 INVALID_PARAMETER_VALUE, 218 ) 219 220 # value must be a Number 221 # since bool is an instance of Number check for bool additionally 222 if not _is_numeric(value): 223 raise MlflowException( 224 invalid_value( 225 append_to_json_path(path, "value"), 226 value, 227 f"(timestamp={timestamp}). " 228 f"Please specify value as a valid double (64-bit floating point)", 229 ), 230 INVALID_PARAMETER_VALUE, 231 ) 232 233 if not isinstance(timestamp, numbers.Number) or timestamp < 0: 234 raise MlflowException( 235 invalid_value( 236 append_to_json_path(path, "timestamp"), 237 timestamp, 238 f"metric '{key}' (value={value}). " 239 f"Timestamp must be a nonnegative long (64-bit integer) ", 240 ), 241 INVALID_PARAMETER_VALUE, 242 ) 243 244 if not isinstance(step, numbers.Number): 245 raise MlflowException( 246 invalid_value( 247 append_to_json_path(path, "step"), 248 step, 249 f"metric '{key}' (value={value}). Step must be a valid long (64-bit integer).", 250 ), 251 INVALID_PARAMETER_VALUE, 252 ) 253 254 _validate_length_limit("Metric name", MAX_ENTITY_KEY_LENGTH, key) 255 256 257 def _validate_param(key, value, path=""): 258 """ 259 Check that a param with the specified key & value is valid and raise an exception if it 260 isn't. 261 """ 262 _validate_param_name(key, append_to_json_path(path, "key")) 263 return Param( 264 _validate_length_limit("Param key", MAX_ENTITY_KEY_LENGTH, key), 265 _validate_length_limit("Param value", MAX_PARAM_VAL_LENGTH, value, truncate=True), 266 ) 267 268 269 def _validate_tag(key, value, path=""): 270 """ 271 Check that a tag with the specified key & value is valid and raise an exception if it isn't. 272 """ 273 _validate_tag_name(key, append_to_json_path(path, "key")) 274 return RunTag( 275 _validate_length_limit(append_to_json_path(path, "key"), MAX_ENTITY_KEY_LENGTH, key), 276 _validate_length_limit( 277 append_to_json_path(path, "value"), MAX_TAG_VAL_LENGTH, value, truncate=True 278 ), 279 ) 280 281 282 def _validate_experiment_tag(key, value): 283 """ 284 Check that a tag with the specified key & value is valid and raise an exception if it isn't. 285 """ 286 _validate_tag_name(key) 287 _validate_length_limit("key", MAX_EXPERIMENT_TAG_KEY_LENGTH, key) 288 _validate_length_limit("value", MAX_EXPERIMENT_TAG_VAL_LENGTH, value) 289 290 291 def _validate_registered_model_tag(key, value): 292 """ 293 Check that a tag with the specified key & value is valid and raise an exception if it isn't. 294 """ 295 _validate_tag_name(key) 296 _validate_length_limit("key", MAX_MODEL_REGISTRY_TAG_KEY_LENGTH, key) 297 _validate_length_limit("value", MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH, value) 298 299 300 def _validate_model_version_tag(key, value): 301 """ 302 Check that a tag with the specified key & value is valid and raise an exception if it isn't. 303 """ 304 _validate_tag_name(key) 305 _validate_tag_value(value) 306 _validate_length_limit("key", MAX_MODEL_REGISTRY_TAG_KEY_LENGTH, key) 307 308 # Check prompt text tag particularly for showing friendly error message 309 if key == PROMPT_TEXT_TAG_KEY and len(value) > MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH: 310 raise MlflowException.invalid_parameter_value( 311 f"Prompt text exceeds max length of {MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH} characters.", 312 ) 313 314 _validate_length_limit("value", MAX_MODEL_REGISTRY_TAG_VALUE_LENGTH, value) 315 316 317 def _validate_param_keys_unique(params): 318 """Ensures that duplicate param keys are not present in the `log_batch()` params argument""" 319 unique_keys = [] 320 dupe_keys = [] 321 for param in params: 322 if param.key not in unique_keys: 323 unique_keys.append(param.key) 324 else: 325 dupe_keys.append(param.key) 326 327 if dupe_keys: 328 raise MlflowException( 329 f"Duplicate parameter keys have been submitted: {dupe_keys}. Please ensure " 330 "the request contains only one param value per param key.", 331 INVALID_PARAMETER_VALUE, 332 ) 333 334 335 def _validate_param_name(name, path="key"): 336 """Check that `name` is a valid parameter name and raise an exception if it isn't.""" 337 if name is None: 338 raise MlflowException( 339 invalid_value(path, "", _MISSING_KEY_NAME_MESSAGE), 340 error_code=INVALID_PARAMETER_VALUE, 341 ) 342 if not validate_param_and_metric_name(name): 343 raise MlflowException( 344 invalid_value(path, name, bad_character_message()), 345 INVALID_PARAMETER_VALUE, 346 ) 347 if path_not_unique(name): 348 raise MlflowException( 349 invalid_value(path, name, bad_path_message(name)), 350 INVALID_PARAMETER_VALUE, 351 ) 352 353 354 def _validate_tag_name(name, path="key"): 355 """Check that `name` is a valid tag name and raise an exception if it isn't.""" 356 # Reuse param & metric check. 357 if name is None: 358 raise MlflowException( 359 missing_value(path), 360 error_code=INVALID_PARAMETER_VALUE, 361 ) 362 if not validate_param_and_metric_name(name): 363 raise MlflowException( 364 invalid_value(path, name, bad_character_message()), 365 INVALID_PARAMETER_VALUE, 366 ) 367 if path_not_unique(name): 368 raise MlflowException( 369 invalid_value(path, name, bad_path_message(name)), 370 INVALID_PARAMETER_VALUE, 371 ) 372 373 374 def _validate_length_limit(entity_name, limit, value, *, truncate=False): 375 if value is None: 376 return None 377 378 if len(value) <= limit: 379 return value 380 381 if truncate and MLFLOW_TRUNCATE_LONG_VALUES.get(): 382 _logger.warning( 383 f"{entity_name} '{value[:100]}...' ({len(value)} characters) is truncated to " 384 f"{limit} characters to meet the length limit." 385 ) 386 return value[:limit] 387 388 raise MlflowException( 389 exceeds_maximum_length(entity_name, limit), 390 error_code=INVALID_PARAMETER_VALUE, 391 ) 392 393 394 def _validate_run_id(run_id, path="run_id"): 395 """Check that `run_id` is a valid run ID and raise an exception if it isn't.""" 396 if _RUN_ID_REGEX.match(run_id) is None: 397 raise MlflowException(invalid_value(path, run_id), error_code=INVALID_PARAMETER_VALUE) 398 399 400 def _validate_experiment_id(exp_id): 401 """Check that `experiment_id`is a valid string or None, raise an exception if it isn't.""" 402 if exp_id is not None and _EXPERIMENT_ID_REGEX.match(exp_id) is None: 403 raise MlflowException( 404 f"Invalid experiment ID: '{exp_id}'", error_code=INVALID_PARAMETER_VALUE 405 ) 406 407 408 def _validate_batch_limit(entity_name, limit, length): 409 if length > limit: 410 error_msg = ( 411 f"A batch logging request can contain at most {limit} {entity_name}. " 412 f"Got {length} {entity_name}. Please split up {entity_name} across multiple" 413 " requests and try again." 414 ) 415 raise MlflowException(error_msg, error_code=INVALID_PARAMETER_VALUE) 416 417 418 def _validate_batch_log_limits(metrics, params, tags): 419 """Validate that the provided batched logging arguments are within expected limits.""" 420 _validate_batch_limit(entity_name="metrics", limit=MAX_METRICS_PER_BATCH, length=len(metrics)) 421 _validate_batch_limit(entity_name="params", limit=MAX_PARAMS_TAGS_PER_BATCH, length=len(params)) 422 _validate_batch_limit(entity_name="tags", limit=MAX_PARAMS_TAGS_PER_BATCH, length=len(tags)) 423 total_length = len(metrics) + len(params) + len(tags) 424 _validate_batch_limit( 425 entity_name="metrics, params, and tags", 426 limit=MAX_ENTITIES_PER_BATCH, 427 length=total_length, 428 ) 429 430 431 def _validate_batch_log_data(metrics, params, tags): 432 for index, metric in enumerate(metrics): 433 path = f"metrics[{index}]" 434 _validate_metric(metric.key, metric.value, metric.timestamp, metric.step, path=path) 435 return ( 436 metrics, 437 [_validate_param(p.key, p.value, path=f"params[{idx}]") for (idx, p) in enumerate(params)], 438 [_validate_tag(t.key, t.value, path=f"tags[{idx}]") for (idx, t) in enumerate(tags)], 439 ) 440 441 442 def _validate_batch_log_api_req(json_req): 443 if len(json_req) > MAX_BATCH_LOG_REQUEST_SIZE: 444 error_msg = ( 445 "Batched logging API requests must be at most {limit} bytes, got a " 446 "request of size {size}." 447 ).format(limit=MAX_BATCH_LOG_REQUEST_SIZE, size=len(json_req)) 448 raise MlflowException(error_msg, error_code=INVALID_PARAMETER_VALUE) 449 450 451 def _validate_experiment_name(experiment_name): 452 """Check that `experiment_name` is a valid string and raise an exception if it isn't.""" 453 if experiment_name == "" or experiment_name is None: 454 raise MlflowException( 455 f"Invalid experiment name: '{experiment_name}'", 456 error_code=INVALID_PARAMETER_VALUE, 457 ) 458 459 if not is_string_type(experiment_name): 460 raise MlflowException( 461 f"Invalid experiment name: {experiment_name}. Expects a string.", 462 error_code=INVALID_PARAMETER_VALUE, 463 ) 464 465 if len(experiment_name) > MAX_EXPERIMENT_NAME_LENGTH: 466 raise MlflowException.invalid_parameter_value( 467 exceeds_maximum_length("name", MAX_EXPERIMENT_NAME_LENGTH) 468 ) 469 470 471 def _validate_experiment_id_type(experiment_id): 472 """ 473 Check that a user-provided experiment_id is either a string, int, or None and raise an 474 exception if it isn't. 475 """ 476 if experiment_id is not None and not isinstance(experiment_id, (str, int)): 477 raise MlflowException( 478 f"Invalid experiment id: {experiment_id} of type {type(experiment_id)}. " 479 "Must be one of str, int, or None.", 480 error_code=INVALID_PARAMETER_VALUE, 481 ) 482 483 484 def _validate_list_param(param_name: str, param_value: Any, allow_none: bool = False) -> None: 485 """ 486 Validate that a parameter is a list and raise a helpful error if it isn't. 487 488 Args: 489 param_name: Name of the parameter being validated (e.g., "experiment_ids") 490 param_value: The value to validate 491 allow_none: If True, None is allowed. If False, None is treated as invalid. 492 493 Raises: 494 MlflowException: If the parameter is not a list (and not None when allow_none=True) 495 """ 496 if allow_none and param_value is None: 497 return 498 499 if not isinstance(param_value, list): 500 raise MlflowException.invalid_parameter_value( 501 f"{param_name} must be a list, got {type(param_value).__name__}. " 502 f"Did you mean to use {param_name}=[{param_value!r}]?" 503 ) 504 505 506 def _validate_model_name(model_name: str) -> None: 507 if model_name is None or model_name.strip() == "": 508 raise MlflowException(missing_value("name"), error_code=INVALID_PARAMETER_VALUE) 509 invalid_chars = ("/", ":") 510 if any(c in model_name for c in invalid_chars): 511 raise MlflowException( 512 f"Invalid model name '{model_name}'. Names cannot contain '/' or ':'.", 513 error_code=INVALID_PARAMETER_VALUE, 514 ) 515 if path_not_unique(model_name): 516 raise MlflowException( 517 invalid_value("name", model_name, bad_path_message(model_name)), 518 INVALID_PARAMETER_VALUE, 519 ) 520 521 522 def _validate_model_renaming(model_new_name: str) -> None: 523 if model_new_name is None or str(model_new_name).strip() == "": 524 raise MlflowException(missing_value("new_name"), error_code=INVALID_PARAMETER_VALUE) 525 _validate_model_name(model_new_name) 526 527 528 def _validate_model_version(model_version): 529 try: 530 model_version = int(model_version) 531 except ValueError: 532 raise MlflowException( 533 not_integer_value("version", model_version), error_code=INVALID_PARAMETER_VALUE 534 ) 535 536 537 def _validate_model_alias_name(model_alias_name): 538 if model_alias_name is None or model_alias_name == "": 539 raise MlflowException( 540 "Registered model alias name cannot be empty.", INVALID_PARAMETER_VALUE 541 ) 542 if not _REGISTERED_MODEL_ALIAS_REGEX.match(model_alias_name): 543 raise MlflowException( 544 f"Invalid alias name: '{model_alias_name}'. {_BAD_ALIAS_CHARACTERS_MESSAGE}", 545 INVALID_PARAMETER_VALUE, 546 ) 547 _validate_length_limit( 548 "Registered model alias name", 549 MAX_REGISTERED_MODEL_ALIAS_LENGTH, 550 model_alias_name, 551 ) 552 553 554 def _validate_model_alias_name_reserved(model_alias_name): 555 if model_alias_name.lower() == "latest": 556 raise MlflowException( 557 "'latest' alias name (case insensitive) is reserved.", 558 INVALID_PARAMETER_VALUE, 559 ) 560 if _REGISTERED_MODEL_ALIAS_VERSION_REGEX.match(model_alias_name): 561 raise MlflowException( 562 f"Version alias name '{model_alias_name}' is reserved.", 563 INVALID_PARAMETER_VALUE, 564 ) 565 566 567 def _validate_experiment_artifact_location(artifact_location): 568 if artifact_location is not None and artifact_location.startswith("runs:"): 569 raise MlflowException( 570 f"Artifact location cannot be a runs:/ URI. Given: '{artifact_location}'", 571 error_code=INVALID_PARAMETER_VALUE, 572 ) 573 574 575 def _validate_db_type_string(db_type): 576 """validates db_type parsed from DB URI is supported""" 577 from mlflow.store.db.db_types import DATABASE_ENGINES 578 579 if db_type not in DATABASE_ENGINES: 580 error_msg = ( 581 f"Invalid database engine: '{db_type}'. " 582 f"Supported database engines are {', '.join(DATABASE_ENGINES)}" 583 ) 584 raise MlflowException(error_msg, INVALID_PARAMETER_VALUE) 585 586 587 def _validate_model_version_or_stage_exists(version, stage): 588 if version and stage: 589 raise MlflowException("version and stage cannot be set together", INVALID_PARAMETER_VALUE) 590 591 if not (version or stage): 592 raise MlflowException("version or stage must be set", INVALID_PARAMETER_VALUE) 593 594 595 def _validate_tag_value(value): 596 if value is None: 597 raise MlflowException("Tag value cannot be None", INVALID_PARAMETER_VALUE) 598 599 600 def _validate_dataset_inputs(dataset_inputs: list[DatasetInput]): 601 for dataset_input in dataset_inputs: 602 _validate_dataset(dataset_input.dataset) 603 _validate_input_tags(dataset_input.tags) 604 605 606 def _validate_dataset(dataset: Dataset): 607 if dataset is None: 608 raise MlflowException("Dataset cannot be None", INVALID_PARAMETER_VALUE) 609 if dataset.name is None: 610 raise MlflowException("Dataset name cannot be None", INVALID_PARAMETER_VALUE) 611 if dataset.digest is None: 612 raise MlflowException("Dataset digest cannot be None", INVALID_PARAMETER_VALUE) 613 if dataset.source_type is None: 614 raise MlflowException("Dataset source_type cannot be None", INVALID_PARAMETER_VALUE) 615 if dataset.source is None: 616 raise MlflowException("Dataset source cannot be None", INVALID_PARAMETER_VALUE) 617 if len(dataset.name) > MAX_DATASET_NAME_SIZE: 618 raise MlflowException( 619 exceeds_maximum_length("name", MAX_DATASET_NAME_SIZE), 620 INVALID_PARAMETER_VALUE, 621 ) 622 if len(dataset.digest) > MAX_DATASET_DIGEST_SIZE: 623 raise MlflowException( 624 exceeds_maximum_length("digest", MAX_DATASET_DIGEST_SIZE), 625 INVALID_PARAMETER_VALUE, 626 ) 627 if len(dataset.source) > MAX_DATASET_SOURCE_SIZE: 628 raise MlflowException( 629 exceeds_maximum_length("source", MAX_DATASET_SOURCE_SIZE), 630 INVALID_PARAMETER_VALUE, 631 ) 632 if dataset.schema is not None and len(dataset.schema) > MAX_DATASET_SCHEMA_SIZE: 633 raise MlflowException( 634 exceeds_maximum_length("schema", MAX_DATASET_SCHEMA_SIZE), 635 INVALID_PARAMETER_VALUE, 636 ) 637 if dataset.profile is not None and len(dataset.profile) > MAX_DATASET_PROFILE_SIZE: 638 raise MlflowException( 639 exceeds_maximum_length("profile", MAX_DATASET_PROFILE_SIZE), 640 INVALID_PARAMETER_VALUE, 641 ) 642 643 644 def _validate_input_tags(input_tags: list[InputTag]): 645 for input_tag in input_tags: 646 _validate_input_tag(input_tag) 647 648 649 def _validate_input_tag(input_tag: InputTag): 650 if input_tag is None: 651 raise MlflowException("InputTag cannot be None", INVALID_PARAMETER_VALUE) 652 if input_tag.key is None: 653 raise MlflowException("InputTag key cannot be None", INVALID_PARAMETER_VALUE) 654 if input_tag.value is None: 655 raise MlflowException("InputTag value cannot be None", INVALID_PARAMETER_VALUE) 656 if len(input_tag.key) > MAX_INPUT_TAG_KEY_SIZE: 657 raise MlflowException( 658 exceeds_maximum_length("key", MAX_INPUT_TAG_KEY_SIZE), 659 INVALID_PARAMETER_VALUE, 660 ) 661 if len(input_tag.value) > MAX_INPUT_TAG_VALUE_SIZE: 662 raise MlflowException( 663 exceeds_maximum_length("value", MAX_INPUT_TAG_VALUE_SIZE), 664 INVALID_PARAMETER_VALUE, 665 ) 666 667 668 def _validate_username(username): 669 if username is None or username == "": 670 raise MlflowException("Username cannot be empty.", INVALID_PARAMETER_VALUE) 671 672 673 def _validate_password(password) -> None: 674 if password is None or len(password) < 12: 675 raise MlflowException.invalid_parameter_value( 676 "Password must be a string longer than 12 characters." 677 ) 678 679 680 def _validate_trace_tag(key, value): 681 _validate_tag_name(key) 682 key = _validate_length_limit("key", MAX_TRACE_TAG_KEY_LENGTH, key) 683 value = _validate_length_limit("value", MAX_TRACE_TAG_VAL_LENGTH, value, truncate=True) 684 return key, value 685 686 687 def _validate_experiment_artifact_location_length(artifact_location: str): 688 max_length = MLFLOW_ARTIFACT_LOCATION_MAX_LENGTH.get() 689 if len(artifact_location) > max_length: 690 raise MlflowException( 691 "Invalid artifact path length. The length of the artifact path cannot be " 692 f"greater than {max_length} characters. To configure this limit, please set the " 693 "MLFLOW_ARTIFACT_LOCATION_MAX_LENGTH environment variable.", 694 INVALID_PARAMETER_VALUE, 695 ) 696 697 698 def _validate_logged_model_name(name: str | None) -> None: 699 if name is None: 700 return 701 702 bad_chars = ("/", ":", ".", "%", '"', "'") 703 if not name or any(c in name for c in bad_chars): 704 raise MlflowException( 705 f"Invalid model name ({name!r}) provided. Model name must be a non-empty string " 706 f"and cannot contain the following characters: {bad_chars}", 707 INVALID_PARAMETER_VALUE, 708 ) 709 710 711 _WEBHOOK_NAME_REGEX = re.compile( 712 r"^(?=.{1,63}$)" # Total length between 1 and 63 characters 713 r"[a-z0-9]" # Must start with letter or digit 714 r"([a-z0-9._-]*[a-z0-9])?$", # Optional middle + end with letter/digit 715 re.IGNORECASE, 716 ) 717 718 719 def _validate_webhook_name(name: str) -> None: 720 if not isinstance(name, str): 721 raise MlflowException.invalid_parameter_value( 722 f"Webhook name must be a string, got {type(name).__name__!r}" 723 ) 724 725 if not _WEBHOOK_NAME_REGEX.fullmatch(name): 726 raise MlflowException.invalid_parameter_value( 727 f"Webhook name {name!r} is invalid. It must start and end with a letter or digit, " 728 "be less than 63 characters long, and contain only letters, digits, dots (.), " 729 "underscores (_), and hyphens (-)." 730 ) 731 732 733 def _validate_webhook_url(url: str) -> None: 734 if not isinstance(url, str): 735 raise MlflowException.invalid_parameter_value( 736 f"Webhook URL must be a string, got {type(url).__name__!r}" 737 ) 738 739 if not url.strip(): 740 raise MlflowException.invalid_parameter_value( 741 f"Webhook URL cannot be empty or just whitespace: {url!r}" 742 ) 743 744 try: 745 parsed_url = urllib.parse.urlparse(url) 746 except ValueError as e: 747 raise MlflowException.invalid_parameter_value(f"Invalid webhook URL {url!r}: {e!r}") from e 748 schemes = _MLFLOW_WEBHOOK_ALLOWED_SCHEMES.get() 749 if parsed_url.scheme not in schemes: 750 raise MlflowException.invalid_parameter_value( 751 f"Invalid webhook URL scheme: {parsed_url.scheme!r}. " 752 f"Allowed schemes are: {', '.join(schemes)}." 753 ) 754 755 hostname = parsed_url.hostname 756 if not hostname: 757 raise MlflowException.invalid_parameter_value( 758 f"Webhook URL must include a hostname: {url!r}" 759 ) 760 761 if not _MLFLOW_WEBHOOK_ALLOW_PRIVATE_IPS.get(): 762 try: 763 addr_infos = socket.getaddrinfo(hostname, None) 764 except socket.gaierror as e: 765 raise MlflowException.invalid_parameter_value( 766 f"Cannot resolve webhook URL hostname {hostname!r}: {e}" 767 ) from e 768 769 for addr_info in addr_infos: 770 try: 771 ip = ipaddress.ip_address(addr_info[4][0]) 772 except ValueError as e: 773 raise MlflowException.invalid_parameter_value( 774 f"Webhook URL hostname {hostname!r} resolved to an invalid IP address: {e}" 775 ) from e 776 if not ip.is_global: 777 raise MlflowException.invalid_parameter_value( 778 f"Webhook URL must not resolve to a non-public IP address. " 779 f"{hostname!r} resolves to {ip}." 780 ) 781 782 783 def _validate_webhook_events(events: list[WebhookEvent]) -> None: 784 if ( 785 not events 786 or not isinstance(events, list) 787 or not all(isinstance(e, WebhookEvent) for e in events) 788 ): 789 raise MlflowException.invalid_parameter_value( 790 f"Webhook events must be a non-empty list of WebhookEvent objects: {events}." 791 ) 792 793 794 def _resolve_experiment_ids_and_locations( 795 experiment_ids: list[str] | None, locations: list[str] | None 796 ) -> list[str]: 797 if experiment_ids: 798 if locations: 799 raise MlflowException.invalid_parameter_value( 800 "`experiment_ids` is deprecated, use `locations` instead." 801 ) 802 else: 803 locations = experiment_ids 804 if not locations: 805 return locations 806 807 if invalid_experiment_ids := [location for location in locations if "." in location]: 808 invalid_exp_ids_str = ", ".join(invalid_experiment_ids) 809 if len(invalid_exp_ids_str) > 20: 810 invalid_exp_ids_str = invalid_exp_ids_str[:20] + "..." 811 raise MlflowException.invalid_parameter_value( 812 "Locations must be a list of experiment IDs. " 813 f"Found invalid experiment IDs: {invalid_exp_ids_str}." 814 ) 815 816 return locations