test_datasets.py
1 import json 2 3 import pytest 4 from click.testing import CliRunner 5 6 import mlflow 7 from mlflow.cli.datasets import commands 8 from mlflow.genai.datasets import create_dataset 9 10 11 @pytest.fixture 12 def runner(): 13 return CliRunner(catch_exceptions=False) 14 15 16 @pytest.fixture 17 def experiment(): 18 exp_id = mlflow.create_experiment("test_datasets_cli") 19 yield exp_id 20 mlflow.delete_experiment(exp_id) 21 22 23 @pytest.fixture 24 def dataset_a(experiment): 25 return create_dataset( 26 name="dataset_a", 27 experiment_id=experiment, 28 tags={"env": "production"}, 29 ) 30 31 32 @pytest.fixture 33 def dataset_b(experiment): 34 return create_dataset( 35 name="dataset_b", 36 experiment_id=experiment, 37 tags={"env": "staging"}, 38 ) 39 40 41 def test_commands_group_exists(): 42 assert commands.name == "datasets" 43 assert commands.help is not None 44 45 46 def test_list_command_params(): 47 list_cmd = next((cmd for cmd in commands.commands.values() if cmd.name == "list"), None) 48 assert list_cmd is not None 49 param_names = {p.name for p in list_cmd.params} 50 expected_params = { 51 "experiment_id", 52 "filter_string", 53 "max_results", 54 "order_by", 55 "page_token", 56 "output", 57 } 58 assert param_names == expected_params 59 60 61 def test_list_datasets_table_output(runner: CliRunner, experiment: str, dataset_a): 62 result = runner.invoke(commands, ["list", "--experiment-id", experiment]) 63 64 assert result.exit_code == 0 65 assert dataset_a.dataset_id in result.output 66 assert "dataset_a" in result.output 67 68 69 def test_list_datasets_json_output(runner: CliRunner, experiment: str, dataset_a): 70 result = runner.invoke(commands, ["list", "--experiment-id", experiment, "--output", "json"]) 71 72 assert result.exit_code == 0 73 74 expected = { 75 "datasets": [ 76 { 77 "dataset_id": dataset_a.dataset_id, 78 "name": "dataset_a", 79 "digest": dataset_a.digest, 80 "created_time": dataset_a.created_time, 81 "last_update_time": dataset_a.last_update_time, 82 "created_by": dataset_a.created_by, 83 "last_updated_by": dataset_a.last_updated_by, 84 "tags": dataset_a.tags, 85 } 86 ], 87 "next_page_token": None, 88 } 89 assert json.loads(result.output) == expected 90 91 92 def test_list_datasets_empty_results(runner: CliRunner, experiment: str): 93 result = runner.invoke(commands, ["list", "--experiment-id", experiment]) 94 95 assert result.exit_code == 0 96 97 98 def test_list_datasets_json_empty_results(runner: CliRunner, experiment: str): 99 result = runner.invoke(commands, ["list", "--experiment-id", experiment, "--output", "json"]) 100 101 assert result.exit_code == 0 102 output_json = json.loads(result.output) 103 assert output_json == {"datasets": [], "next_page_token": None} 104 105 106 def test_list_datasets_with_experiment_id_env_var(runner: CliRunner, experiment: str, dataset_a): 107 result = runner.invoke(commands, ["list"], env={"MLFLOW_EXPERIMENT_ID": experiment}) 108 109 assert result.exit_code == 0 110 assert dataset_a.dataset_id in result.output 111 112 113 def test_list_datasets_missing_experiment_id(runner: CliRunner): 114 result = runner.invoke(commands, ["list"]) 115 116 assert result.exit_code != 0 117 assert "Missing option '--experiment-id' / '-x'" in result.output 118 119 120 def test_list_datasets_invalid_output_format(runner: CliRunner, experiment: str): 121 result = runner.invoke(commands, ["list", "--experiment-id", experiment, "--output", "invalid"]) 122 123 assert result.exit_code != 0 124 assert "'invalid' is not one of 'table', 'json'" in result.output 125 126 127 def test_list_datasets_with_filter_string(runner: CliRunner, experiment: str, dataset_a, dataset_b): 128 result = runner.invoke( 129 commands, 130 ["list", "--experiment-id", experiment, "--filter-string", "name = 'dataset_a'"], 131 ) 132 133 assert result.exit_code == 0 134 assert "dataset_a" in result.output 135 assert "dataset_b" not in result.output 136 137 138 def test_list_datasets_with_max_results(runner: CliRunner, experiment: str, dataset_a, dataset_b): 139 result = runner.invoke(commands, ["list", "--experiment-id", experiment, "--max-results", "1"]) 140 141 assert result.exit_code == 0 142 output_lines = [line for line in result.output.split("\n") if "dataset_" in line] 143 assert len(output_lines) == 1 144 145 146 def test_list_datasets_with_order_by(runner: CliRunner, experiment: str, dataset_a, dataset_b): 147 result = runner.invoke( 148 commands, ["list", "--experiment-id", experiment, "--order-by", "name ASC"] 149 ) 150 151 assert result.exit_code == 0 152 a_pos = result.output.find("dataset_a") 153 b_pos = result.output.find("dataset_b") 154 assert a_pos < b_pos 155 156 157 def test_list_datasets_short_option_x(runner: CliRunner, experiment: str, dataset_a): 158 result = runner.invoke(commands, ["list", "-x", experiment]) 159 160 assert result.exit_code == 0 161 assert dataset_a.dataset_id in result.output 162 163 164 def test_list_datasets_multiple_datasets(runner: CliRunner, experiment: str, dataset_a, dataset_b): 165 result = runner.invoke(commands, ["list", "--experiment-id", experiment]) 166 167 assert result.exit_code == 0 168 assert dataset_a.dataset_id in result.output 169 assert "dataset_a" in result.output 170 assert dataset_b.dataset_id in result.output 171 assert "dataset_b" in result.output 172 173 174 def test_list_datasets_json_multiple_datasets( 175 runner: CliRunner, experiment: str, dataset_a, dataset_b 176 ): 177 result = runner.invoke(commands, ["list", "--experiment-id", experiment, "--output", "json"]) 178 179 assert result.exit_code == 0 180 output_json = json.loads(result.output) 181 assert len(output_json["datasets"]) == 2 182 183 names = {d["name"] for d in output_json["datasets"]} 184 assert names == {"dataset_a", "dataset_b"}