log_runs.py
1 """ 2 Logs MLflow runs in Databricks from an external host. 3 4 How to run: 5 $ python examples/databricks/log_runs.py --host <host> --token <token> --user <user> [--experiment-id 123] 6 7 See also: 8 https://docs.databricks.com/dev-tools/api/latest/authentication.html#generate-a-personal-access-token 9 """ 10 11 import argparse 12 import os 13 import uuid 14 15 from sklearn import datasets, svm 16 from sklearn.model_selection import GridSearchCV, ParameterGrid 17 18 import mlflow 19 20 21 def main(): 22 parser = argparse.ArgumentParser() 23 parser.add_argument("--host", help="Databricks workspace URL") 24 parser.add_argument("--token", help="Databricks personal access token") 25 parser.add_argument("--user", help="Databricks username") 26 parser.add_argument( 27 "--experiment-id", 28 default=None, 29 help="ID of the experiment to log runs in. If unspecified, a new experiment will be created.", 30 ) 31 args = parser.parse_args() 32 33 os.environ["DATABRICKS_HOST"] = args.host 34 os.environ["DATABRICKS_TOKEN"] = args.token 35 36 mlflow.set_tracking_uri("databricks") 37 if args.experiment_id: 38 experiment = mlflow.set_experiment(experiment_id=args.experiment_id) 39 else: 40 experiment = mlflow.set_experiment(f"/Users/{args.user}/{uuid.uuid4().hex}") 41 42 print(f"Logging runs in {args.host}#/mlflow/experiments/{experiment.experiment_id}") 43 mlflow.sklearn.autolog(max_tuning_runs=None) 44 iris = datasets.load_iris() 45 parameters = {"kernel": ("linear", "rbf"), "C": [1, 5, 10]} 46 clf = GridSearchCV(svm.SVC(), parameters) 47 clf.fit(iris.data, iris.target) 48 49 # Log unnested runs 50 for params in ParameterGrid(parameters): 51 clf = svm.SVC(**params) 52 clf.fit(iris.data, iris.target) 53 54 55 if __name__ == "__main__": 56 main()