generate_ui_test_data.py
1 """ 2 Small script used to generate mock data to test the UI. 3 """ 4 5 import argparse 6 import itertools 7 import random 8 import string 9 from random import random as rand 10 11 import mlflow 12 from mlflow import MlflowClient 13 14 15 def log_metrics(metrics): 16 for k, values in metrics.items(): 17 for v in values: 18 mlflow.log_metric(k, v) 19 20 21 def log_params(parameters): 22 for k, v in parameters.items(): 23 mlflow.log_param(k, v) 24 25 26 def rand_str(max_len=40): 27 return "".join(random.sample(string.ascii_letters, random.randint(1, max_len))) 28 29 30 if __name__ == "__main__": 31 parser = argparse.ArgumentParser() 32 parser.add_argument( 33 "--large", 34 help="If true, will also generate larger datasets for testing UI performance.", 35 action="store_true", 36 ) 37 args = parser.parse_args() 38 client = MlflowClient() 39 # Simple run 40 for l1, alpha in itertools.product([0, 0.25, 0.5, 0.75, 1], [0, 0.5, 1]): 41 with mlflow.start_run(run_name="ipython"): 42 parameters = { 43 "l1": str(l1), 44 "alpha": str(alpha), 45 } 46 metrics = { 47 "MAE": [rand()], 48 "R2": [rand()], 49 "RMSE": [rand()], 50 } 51 log_params(parameters) 52 log_metrics(metrics) 53 54 # Runs with multiple values for a single metric so that we can QA the time-series metric 55 # plot 56 for i in range(3): 57 with mlflow.start_run(): 58 for j in range(10): 59 sign = random.choice([-1, 1]) 60 mlflow.log_metric( 61 "myReallyLongTimeSeriesMetricName-abcdefghijklmnopqrstuvwxyz", 62 random.random() * sign, 63 ) 64 mlflow.log_metric("Another Timeseries Metric", rand() * sign) 65 mlflow.log_metric("Yet Another Timeseries Metric", rand() * sign) 66 if i == 0: 67 mlflow.log_metric("Special Timeseries Metric", rand() * sign) 68 mlflow.log_metric("Bar chart metric", rand()) 69 70 # Big parameter values 71 with mlflow.start_run(run_name="ipython"): 72 parameters = { 73 "this is a pretty long parameter name": "NA10921-test_file_2018-08-10.txt", 74 } 75 metrics = {"grower": [i**1.2 for i in range(10)]} 76 log_params(parameters) 77 log_metrics(metrics) 78 79 # Nested runs. 80 with mlflow.start_run(run_name="multirun.py"): 81 l1 = 0.5 82 alpha = 0.5 83 parameters = { 84 "l1": str(l1), 85 "alpha": str(alpha), 86 } 87 metrics = { 88 "MAE": [rand()], 89 "R2": [rand()], 90 "RMSE": [rand()], 91 } 92 log_params(parameters) 93 log_metrics(metrics) 94 95 with mlflow.start_run(run_name="child_params.py", nested=True): 96 parameters = { 97 "lot": str(rand()), 98 "of": str(rand()), 99 "parameters": str(rand()), 100 "in": str(rand()), 101 "this": str(rand()), 102 "experiment": str(rand()), 103 "run": str(rand()), 104 "because": str(rand()), 105 "we": str(rand()), 106 "need": str(rand()), 107 "to": str(rand()), 108 "check": str(rand()), 109 "how": str(rand()), 110 "it": str(rand()), 111 "handles": str(rand()), 112 } 113 log_params(parameters) 114 mlflow.log_metric("test_metric", 1) 115 116 with mlflow.start_run(run_name="child_metrics.py", nested=True): 117 metrics = { 118 "lot": [rand()], 119 "of": [rand()], 120 "parameters": [rand()], 121 "in": [rand()], 122 "this": [rand()], 123 "experiment": [rand()], 124 "run": [rand()], 125 "because": [rand()], 126 "we": [rand()], 127 "need": [rand()], 128 "to": [rand()], 129 "check": [rand()], 130 "how": [rand()], 131 "it": [rand()], 132 "handles": [rand()], 133 } 134 log_metrics(metrics) 135 136 with mlflow.start_run(run_name="sort_child.py", nested=True): 137 mlflow.log_metric("test_metric", 1) 138 mlflow.log_param("test_param", 1) 139 140 with mlflow.start_run(run_name="sort_child.py", nested=True): 141 mlflow.log_metric("test_metric", 2) 142 mlflow.log_param("test_param", 2) 143 144 # Grandchildren 145 with mlflow.start_run(run_name="parent"): 146 with mlflow.start_run(run_name="child", nested=True): 147 with mlflow.start_run(run_name="grandchild", nested=True): 148 pass 149 150 # Loop 151 loop_1_run_id = None 152 loop_2_run_id = None 153 with mlflow.start_run(run_name="loop-1") as run_1: 154 with mlflow.start_run(run_name="loop-2", nested=True) as run_2: 155 loop_1_run_id = run_1.info.run_id 156 loop_2_run_id = run_2.info.run_id 157 client.set_tag(loop_1_run_id, "mlflow.parentRunId", loop_2_run_id) 158 159 # Lot's of children 160 with mlflow.start_run(run_name="parent-with-lots-of-children"): 161 for i in range(100): 162 with mlflow.start_run(run_name=f"child-{i}", nested=True): 163 pass 164 mlflow.set_experiment("my-empty-experiment") 165 mlflow.set_experiment("runs-but-no-metrics-params") 166 for i in range(100): 167 with mlflow.start_run(run_name=f"empty-run-{i}"): 168 pass 169 if args.large: 170 mlflow.set_experiment("med-size-experiment") 171 # Experiment with a mix of nested runs & non-nested runs 172 for i in range(3): 173 with mlflow.start_run(run_name=f"parent-with-children-{i}"): 174 params = {rand_str(): rand_str() for _ in range(5)} 175 metrics = {rand_str(): [rand()] for _ in range(5)} 176 log_params(params) 177 log_metrics(metrics) 178 for j in range(10): 179 with mlflow.start_run(run_name=f"child-{j}", nested=True): 180 params = {rand_str(): rand_str() for _ in range(30)} 181 metrics = {rand_str(): [rand()] for idx in range(30)} 182 log_params(params) 183 log_metrics(metrics) 184 for j in range(10): 185 with mlflow.start_run(run_name=f"unnested-{i}-{j}"): 186 params = {rand_str(): rand_str() for _ in range(5)} 187 metrics = {rand_str(): [rand()] for _ in range(5)} 188 mlflow.set_experiment("hitting-metric-param-limits") 189 for i in range(50): 190 with mlflow.start_run(run_name=f"big-run-{i}"): 191 params = {str(j) + "a" * 250: "b" * 1000 for j in range(100)} 192 metrics = {str(j) + "a" * 250: [rand()] for j in range(100)} 193 log_metrics(metrics) 194 log_params(params)