/ examples / databricks / log_runs.py
log_runs.py
 1  """
 2  Logs MLflow runs in Databricks from an external host.
 3  
 4  How to run:
 5  $ python examples/databricks/log_runs.py --host <host> --token <token> --user <user> [--experiment-id 123]
 6  
 7  See also:
 8  https://docs.databricks.com/dev-tools/api/latest/authentication.html#generate-a-personal-access-token
 9  """
10  
11  import argparse
12  import os
13  import uuid
14  
15  from sklearn import datasets, svm
16  from sklearn.model_selection import GridSearchCV, ParameterGrid
17  
18  import mlflow
19  
20  
21  def main():
22      parser = argparse.ArgumentParser()
23      parser.add_argument("--host", help="Databricks workspace URL")
24      parser.add_argument("--token", help="Databricks personal access token")
25      parser.add_argument("--user", help="Databricks username")
26      parser.add_argument(
27          "--experiment-id",
28          default=None,
29          help="ID of the experiment to log runs in. If unspecified, a new experiment will be created.",
30      )
31      args = parser.parse_args()
32  
33      os.environ["DATABRICKS_HOST"] = args.host
34      os.environ["DATABRICKS_TOKEN"] = args.token
35  
36      mlflow.set_tracking_uri("databricks")
37      if args.experiment_id:
38          experiment = mlflow.set_experiment(experiment_id=args.experiment_id)
39      else:
40          experiment = mlflow.set_experiment(f"/Users/{args.user}/{uuid.uuid4().hex}")
41  
42      print(f"Logging runs in {args.host}#/mlflow/experiments/{experiment.experiment_id}")
43      mlflow.sklearn.autolog(max_tuning_runs=None)
44      iris = datasets.load_iris()
45      parameters = {"kernel": ("linear", "rbf"), "C": [1, 5, 10]}
46      clf = GridSearchCV(svm.SVC(), parameters)
47      clf.fit(iris.data, iris.target)
48  
49      # Log unnested runs
50      for params in ParameterGrid(parameters):
51          clf = svm.SVC(**params)
52          clf.fit(iris.data, iris.target)
53  
54  
55  if __name__ == "__main__":
56      main()