Cradicle Explorer

/ tests / generate_ui_test_data.py
generate_ui_test_data.py
  1  """
  2  Small script used to generate mock data to test the UI.
  3  """
  4  
  5  import argparse
  6  import itertools
  7  import random
  8  import string
  9  from random import random as rand
 10  
 11  import mlflow
 12  from mlflow import MlflowClient
 13  
 14  
 15  def log_metrics(metrics):
 16      for k, values in metrics.items():
 17          for v in values:
 18              mlflow.log_metric(k, v)
 19  
 20  
 21  def log_params(parameters):
 22      for k, v in parameters.items():
 23          mlflow.log_param(k, v)
 24  
 25  
 26  def rand_str(max_len=40):
 27      return "".join(random.sample(string.ascii_letters, random.randint(1, max_len)))
 28  
 29  
 30  if __name__ == "__main__":
 31      parser = argparse.ArgumentParser()
 32      parser.add_argument(
 33          "--large",
 34          help="If true, will also generate larger datasets for testing UI performance.",
 35          action="store_true",
 36      )
 37      args = parser.parse_args()
 38      client = MlflowClient()
 39      # Simple run
 40      for l1, alpha in itertools.product([0, 0.25, 0.5, 0.75, 1], [0, 0.5, 1]):
 41          with mlflow.start_run(run_name="ipython"):
 42              parameters = {
 43                  "l1": str(l1),
 44                  "alpha": str(alpha),
 45              }
 46              metrics = {
 47                  "MAE": [rand()],
 48                  "R2": [rand()],
 49                  "RMSE": [rand()],
 50              }
 51              log_params(parameters)
 52              log_metrics(metrics)
 53  
 54      # Runs with multiple values for a single metric so that we can QA the time-series metric
 55      # plot
 56      for i in range(3):
 57          with mlflow.start_run():
 58              for j in range(10):
 59                  sign = random.choice([-1, 1])
 60                  mlflow.log_metric(
 61                      "myReallyLongTimeSeriesMetricName-abcdefghijklmnopqrstuvwxyz",
 62                      random.random() * sign,
 63                  )
 64                  mlflow.log_metric("Another Timeseries Metric", rand() * sign)
 65                  mlflow.log_metric("Yet Another Timeseries Metric", rand() * sign)
 66              if i == 0:
 67                  mlflow.log_metric("Special Timeseries Metric", rand() * sign)
 68              mlflow.log_metric("Bar chart metric", rand())
 69  
 70      # Big parameter values
 71      with mlflow.start_run(run_name="ipython"):
 72          parameters = {
 73              "this is a pretty long parameter name": "NA10921-test_file_2018-08-10.txt",
 74          }
 75          metrics = {"grower": [i**1.2 for i in range(10)]}
 76          log_params(parameters)
 77          log_metrics(metrics)
 78  
 79      # Nested runs.
 80      with mlflow.start_run(run_name="multirun.py"):
 81          l1 = 0.5
 82          alpha = 0.5
 83          parameters = {
 84              "l1": str(l1),
 85              "alpha": str(alpha),
 86          }
 87          metrics = {
 88              "MAE": [rand()],
 89              "R2": [rand()],
 90              "RMSE": [rand()],
 91          }
 92          log_params(parameters)
 93          log_metrics(metrics)
 94  
 95          with mlflow.start_run(run_name="child_params.py", nested=True):
 96              parameters = {
 97                  "lot": str(rand()),
 98                  "of": str(rand()),
 99                  "parameters": str(rand()),
100                  "in": str(rand()),
101                  "this": str(rand()),
102                  "experiment": str(rand()),
103                  "run": str(rand()),
104                  "because": str(rand()),
105                  "we": str(rand()),
106                  "need": str(rand()),
107                  "to": str(rand()),
108                  "check": str(rand()),
109                  "how": str(rand()),
110                  "it": str(rand()),
111                  "handles": str(rand()),
112              }
113              log_params(parameters)
114              mlflow.log_metric("test_metric", 1)
115  
116          with mlflow.start_run(run_name="child_metrics.py", nested=True):
117              metrics = {
118                  "lot": [rand()],
119                  "of": [rand()],
120                  "parameters": [rand()],
121                  "in": [rand()],
122                  "this": [rand()],
123                  "experiment": [rand()],
124                  "run": [rand()],
125                  "because": [rand()],
126                  "we": [rand()],
127                  "need": [rand()],
128                  "to": [rand()],
129                  "check": [rand()],
130                  "how": [rand()],
131                  "it": [rand()],
132                  "handles": [rand()],
133              }
134              log_metrics(metrics)
135  
136          with mlflow.start_run(run_name="sort_child.py", nested=True):
137              mlflow.log_metric("test_metric", 1)
138              mlflow.log_param("test_param", 1)
139  
140          with mlflow.start_run(run_name="sort_child.py", nested=True):
141              mlflow.log_metric("test_metric", 2)
142              mlflow.log_param("test_param", 2)
143  
144      # Grandchildren
145      with mlflow.start_run(run_name="parent"):
146          with mlflow.start_run(run_name="child", nested=True):
147              with mlflow.start_run(run_name="grandchild", nested=True):
148                  pass
149  
150      # Loop
151      loop_1_run_id = None
152      loop_2_run_id = None
153      with mlflow.start_run(run_name="loop-1") as run_1:
154          with mlflow.start_run(run_name="loop-2", nested=True) as run_2:
155              loop_1_run_id = run_1.info.run_id
156              loop_2_run_id = run_2.info.run_id
157      client.set_tag(loop_1_run_id, "mlflow.parentRunId", loop_2_run_id)
158  
159      # Lot's of children
160      with mlflow.start_run(run_name="parent-with-lots-of-children"):
161          for i in range(100):
162              with mlflow.start_run(run_name=f"child-{i}", nested=True):
163                  pass
164      mlflow.set_experiment("my-empty-experiment")
165      mlflow.set_experiment("runs-but-no-metrics-params")
166      for i in range(100):
167          with mlflow.start_run(run_name=f"empty-run-{i}"):
168              pass
169      if args.large:
170          mlflow.set_experiment("med-size-experiment")
171          # Experiment with a mix of nested runs & non-nested runs
172          for i in range(3):
173              with mlflow.start_run(run_name=f"parent-with-children-{i}"):
174                  params = {rand_str(): rand_str() for _ in range(5)}
175                  metrics = {rand_str(): [rand()] for _ in range(5)}
176                  log_params(params)
177                  log_metrics(metrics)
178                  for j in range(10):
179                      with mlflow.start_run(run_name=f"child-{j}", nested=True):
180                          params = {rand_str(): rand_str() for _ in range(30)}
181                          metrics = {rand_str(): [rand()] for idx in range(30)}
182                          log_params(params)
183                          log_metrics(metrics)
184              for j in range(10):
185                  with mlflow.start_run(run_name=f"unnested-{i}-{j}"):
186                      params = {rand_str(): rand_str() for _ in range(5)}
187                      metrics = {rand_str(): [rand()] for _ in range(5)}
188          mlflow.set_experiment("hitting-metric-param-limits")
189          for i in range(50):
190              with mlflow.start_run(run_name=f"big-run-{i}"):
191                  params = {str(j) + "a" * 250: "b" * 1000 for j in range(100)}
192                  metrics = {str(j) + "a" * 250: [rand()] for j in range(100)}
193                  log_metrics(metrics)
194                  log_params(params)