initial_models.py
1 # Snapshot of MLflow DB models as of the 0.9.1 release, prior to the first database migration. 2 # This file corresponds to the first database schema that we can reasonably expect users to be 3 # running and exists to test that the oldest database schema can be brought up-to-date. 4 # Copied from https://github.com/mlflow/mlflow/blob/v0.9.1/mlflow/store/dbmodels/models.py, with 5 # modifications to substitute constants from MLflow with hard-coded values (e.g. replacing 6 # SourceType.to_string(SourceType.NOTEBOOK) with the constant "NOTEBOOK"). 7 import time 8 9 from sqlalchemy import ( 10 BigInteger, 11 CheckConstraint, 12 Column, 13 Float, 14 ForeignKey, 15 Integer, 16 PrimaryKeyConstraint, 17 String, 18 ) 19 from sqlalchemy.orm import backref, declarative_base, relationship 20 21 Base = declarative_base() 22 23 24 SourceTypes = [ 25 "NOTEBOOK", 26 "JOB", 27 "LOCAL", 28 "UNKNOWN", 29 "PROJECT", 30 ] 31 32 RunStatusTypes = [ 33 "SCHEDULED", 34 "FAILED", 35 "FINISHED", 36 "RUNNING", 37 ] 38 39 40 class SqlExperiment(Base): 41 """ 42 DB model for :py:class:`mlflow.entities.Experiment`. These are recorded in ``experiment`` table. 43 """ 44 45 __tablename__ = "experiments" 46 47 experiment_id = Column(Integer, autoincrement=True) 48 """ 49 Experiment ID: `Integer`. *Primary Key* for ``experiment`` table. 50 """ 51 name = Column(String(256), unique=True, nullable=False) 52 """ 53 Experiment name: `String` (limit 256 characters). Defined as *Unique* and *Non null* in 54 table schema. 55 """ 56 artifact_location = Column(String(256), nullable=True) 57 """ 58 Default artifact location for this experiment: `String` (limit 256 characters). Defined as 59 *Non null* in table schema. 60 """ 61 lifecycle_stage = Column(String(32), default="active") 62 """ 63 Lifecycle Stage of experiment: `String` (limit 32 characters). 64 Can be either ``active`` (default) or ``deleted``. 65 """ 66 67 __table_args__ = ( 68 CheckConstraint(lifecycle_stage.in_(["active", "deleted"]), name="lifecycle_stage"), 69 PrimaryKeyConstraint("experiment_id", name="experiment_pk"), 70 ) 71 72 def __repr__(self): 73 return f"<SqlExperiment ({self.experiment_id}, {self.name})>" 74 75 76 class SqlRun(Base): 77 """ 78 DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table. 79 """ 80 81 __tablename__ = "runs" 82 83 run_uuid = Column(String(32), nullable=False) 84 """ 85 Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table. 86 """ 87 name = Column(String(250)) 88 """ 89 Run name: `String` (limit 250 characters). 90 """ 91 source_type = Column(String(20), default="LOCAL") 92 """ 93 Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``, 94 ``LOCAL`` (default), or ``UNKNOWN``. 95 """ 96 source_name = Column(String(500)) 97 """ 98 Name of source recording the run: `String` (limit 500 characters). 99 """ 100 entry_point_name = Column(String(50)) 101 """ 102 Entry-point name that launched the run run: `String` (limit 50 characters). 103 """ 104 user_id = Column(String(256), nullable=True, default=None) 105 """ 106 User ID: `String` (limit 256 characters). Defaults to ``null``. 107 """ 108 status = Column(String(20), default="SCHEDULED") 109 """ 110 Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default), 111 ``FINISHED``, ``FAILED``. 112 """ 113 start_time = Column(BigInteger, default=int(time.time())) 114 """ 115 Run start time: `BigInteger`. Defaults to current system time. 116 """ 117 end_time = Column(BigInteger, nullable=True, default=None) 118 """ 119 Run end time: `BigInteger`. 120 """ 121 deleted_time = Column(BigInteger, nullable=True, default=None) 122 """ 123 Run deleted time: `BigInteger`. Timestamp of when run is deleted, defaults to none. 124 """ 125 source_version = Column(String(50)) 126 """ 127 Source version: `String` (limit 50 characters). 128 """ 129 lifecycle_stage = Column(String(20), default="active") 130 """ 131 Lifecycle Stage of run: `String` (limit 32 characters). 132 Can be either ``active`` (default) or ``deleted``. 133 """ 134 artifact_uri = Column(String(200), default=None) 135 """ 136 Default artifact location for this run: `String` (limit 200 characters). 137 """ 138 experiment_id = Column(Integer, ForeignKey("experiments.experiment_id")) 139 """ 140 Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table. 141 """ 142 experiment = relationship("SqlExperiment", backref=backref("runs", cascade="all")) 143 """ 144 SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`. 145 """ 146 147 __table_args__ = ( 148 CheckConstraint(source_type.in_(SourceTypes), name="source_type"), 149 CheckConstraint(status.in_(RunStatusTypes), name="status"), 150 CheckConstraint(lifecycle_stage.in_(["active", "deleted"]), name="lifecycle_stage"), 151 PrimaryKeyConstraint("run_uuid", name="run_pk"), 152 ) 153 154 155 class SqlTag(Base): 156 """ 157 DB model for :py:class:`mlflow.entities.RunTag`. These are recorded in ``tags`` table. 158 """ 159 160 __tablename__ = "tags" 161 162 key = Column(String(250)) 163 """ 164 Tag key: `String` (limit 250 characters). *Primary Key* for ``tags`` table. 165 """ 166 value = Column(String(250), nullable=True) 167 """ 168 Value associated with tag: `String` (limit 250 characters). Could be *null*. 169 """ 170 run_uuid = Column(String(32), ForeignKey("runs.run_uuid")) 171 """ 172 Run UUID to which this tag belongs to: *Foreign Key* into ``runs`` table. 173 """ 174 run = relationship("SqlRun", backref=backref("tags", cascade="all")) 175 """ 176 SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlRun`. 177 """ 178 179 __table_args__ = (PrimaryKeyConstraint("key", "run_uuid", name="tag_pk"),) 180 181 def __repr__(self): 182 return f"<SqlRunTag({self.key}, {self.value})>" 183 184 185 class SqlMetric(Base): 186 __tablename__ = "metrics" 187 188 key = Column(String(250)) 189 """ 190 Metric key: `String` (limit 250 characters). Part of *Primary Key* for ``metrics`` table. 191 """ 192 value = Column(Float, nullable=False) 193 """ 194 Metric value: `Float`. Defined as *Non-null* in schema. 195 """ 196 timestamp = Column(BigInteger, default=lambda: int(time.time())) 197 """ 198 Timestamp recorded for this metric entry: `BigInteger`. Part of *Primary Key* for 199 ``metrics`` table. 200 """ 201 run_uuid = Column(String(32), ForeignKey("runs.run_uuid")) 202 """ 203 Run UUID to which this metric belongs to: Part of *Primary Key* for ``metrics`` table. 204 *Foreign Key* into ``runs`` table. 205 """ 206 run = relationship("SqlRun", backref=backref("metrics", cascade="all")) 207 """ 208 SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlRun`. 209 """ 210 211 __table_args__ = (PrimaryKeyConstraint("key", "timestamp", "run_uuid", name="metric_pk"),) 212 213 def __repr__(self): 214 return f"<SqlMetric({self.key}, {self.value}, {self.timestamp})>" 215 216 217 class SqlParam(Base): 218 __tablename__ = "params" 219 220 key = Column(String(250)) 221 """ 222 Param key: `String` (limit 250 characters). Part of *Primary Key* for ``params`` table. 223 """ 224 value = Column(String(250), nullable=False) 225 """ 226 Param value: `String` (limit 250 characters). Defined as *Non-null* in schema. 227 """ 228 run_uuid = Column(String(32), ForeignKey("runs.run_uuid")) 229 """ 230 Run UUID to which this metric belongs to: Part of *Primary Key* for ``params`` table. 231 *Foreign Key* into ``runs`` table. 232 """ 233 run = relationship("SqlRun", backref=backref("params", cascade="all")) 234 """ 235 SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlRun`. 236 """ 237 238 __table_args__ = (PrimaryKeyConstraint("key", "run_uuid", name="param_pk"),) 239 240 def __repr__(self): 241 return f"<SqlParam({self.key}, {self.value})>"