test_evaluation_entity.py
1 from mlflow.entities import Metric 2 from mlflow.evaluation.assessment import AssessmentEntity, AssessmentSource 3 from mlflow.evaluation.evaluation import EvaluationEntity 4 from mlflow.evaluation.evaluation_tag import EvaluationTag 5 6 7 def test_evaluation_equality(): 8 source_1 = AssessmentSource(source_type="HUMAN", source_id="user_1") 9 metric_1 = Metric(key="metric1", value=1.1, timestamp=123, step=0) 10 tag_1 = EvaluationTag(key="tag1", value="value1") 11 12 # Valid evaluations 13 evaluation_1 = EvaluationEntity( 14 evaluation_id="eval1", 15 run_id="run1", 16 inputs_id="inputs1", 17 inputs={"feature1": 1.0, "feature2": 2.0}, 18 outputs={"prediction": 0.5}, 19 request_id="req1", 20 targets={"actual": 0.6}, 21 assessments=[ 22 AssessmentEntity( 23 evaluation_id="eval1", 24 name="relevance", 25 source=source_1, 26 timestamp=123456789, 27 numeric_value=0.9, 28 ) 29 ], 30 metrics=[metric_1], 31 tags=[tag_1], 32 error_code="E001", 33 error_message="An error occurred", 34 ) 35 evaluation_2 = EvaluationEntity( 36 evaluation_id="eval1", 37 run_id="run1", 38 inputs_id="inputs1", 39 inputs={"feature1": 1.0, "feature2": 2.0}, 40 outputs={"prediction": 0.5}, 41 request_id="req1", 42 targets={"actual": 0.6}, 43 assessments=[ 44 AssessmentEntity( 45 evaluation_id="eval1", 46 name="relevance", 47 source=source_1, 48 timestamp=123456789, 49 numeric_value=0.9, 50 ) 51 ], 52 metrics=[metric_1], 53 tags=[tag_1], 54 error_code="E001", 55 error_message="An error occurred", 56 ) 57 evaluation_3 = EvaluationEntity( 58 evaluation_id="eval2", 59 run_id="run2", 60 inputs_id="inputs2", 61 inputs={"feature1": 1.0, "feature2": 2.0}, 62 outputs={"prediction": 0.5}, 63 request_id="req2", 64 targets={"actual": 0.7}, 65 assessments=[ 66 AssessmentEntity( 67 evaluation_id="eval2", 68 name="relevance", 69 source=source_1, 70 timestamp=123456789, 71 numeric_value=0.8, 72 ) 73 ], 74 metrics=[Metric(key="metric1", value=1.2, timestamp=123, step=0)], 75 tags=[EvaluationTag(key="tag2", value="value2")], 76 error_code="E002", 77 error_message="Another error occurred", 78 ) 79 80 assert evaluation_1 == evaluation_2 # Same evaluation data 81 assert evaluation_1 != evaluation_3 # Different evaluation data 82 83 84 def test_evaluation_properties(): 85 source = AssessmentSource(source_type="HUMAN", source_id="user_1") 86 metric = Metric(key="metric1", value=1.1, timestamp=123, step=0) 87 tag = EvaluationTag(key="tag1", value="value1") 88 assessment = AssessmentEntity( 89 evaluation_id="eval1", 90 name="relevance", 91 source=source, 92 timestamp=123456789, 93 numeric_value=0.9, 94 rationale="Rationale text", 95 metadata={"key1": "value1"}, 96 ) 97 evaluation = EvaluationEntity( 98 evaluation_id="eval1", 99 run_id="run1", 100 inputs_id="inputs1", 101 inputs={"feature1": 1.0, "feature2": 2.0}, 102 outputs={"prediction": 0.5}, 103 request_id="req1", 104 targets={"actual": 0.6}, 105 assessments=[assessment], 106 metrics=[metric], 107 tags=[tag], 108 error_code="E001", 109 error_message="An error occurred", 110 ) 111 112 assert evaluation.evaluation_id == "eval1" 113 assert evaluation.run_id == "run1" 114 assert evaluation.inputs_id == "inputs1" 115 assert evaluation.inputs == {"feature1": 1.0, "feature2": 2.0} 116 assert evaluation.outputs == {"prediction": 0.5} 117 assert evaluation.request_id == "req1" 118 assert evaluation.targets == {"actual": 0.6} 119 assert evaluation.error_code == "E001" 120 assert evaluation.error_message == "An error occurred" 121 assert evaluation.assessments == [assessment] 122 assert evaluation.metrics == [metric] 123 assert evaluation.tags == [tag] 124 125 126 def test_evaluation_to_from_dictionary(): 127 source = AssessmentSource(source_type="HUMAN", source_id="user_1") 128 metric = Metric(key="metric1", value=1.1, timestamp=123, step=0) 129 tag = EvaluationTag(key="tag1", value="value1") 130 assessment = AssessmentEntity( 131 evaluation_id="eval1", 132 name="relevance", 133 source=source, 134 timestamp=123456789, 135 numeric_value=0.9, 136 rationale="Rationale text", 137 metadata={"key1": "value1"}, 138 ) 139 evaluation = EvaluationEntity( 140 evaluation_id="eval1", 141 run_id="run1", 142 inputs_id="inputs1", 143 inputs={"feature1": 1.0, "feature2": 2.0}, 144 outputs={"prediction": 0.5}, 145 request_id="req1", 146 targets={"actual": 0.6}, 147 assessments=[assessment], 148 metrics=[metric], 149 tags=[tag], 150 error_code="E001", 151 error_message="An error occurred", 152 ) 153 evaluation_dict = evaluation.to_dictionary() 154 155 expected_dict = { 156 "evaluation_id": "eval1", 157 "run_id": "run1", 158 "inputs_id": "inputs1", 159 "inputs": {"feature1": 1.0, "feature2": 2.0}, 160 "outputs": {"prediction": 0.5}, 161 "request_id": "req1", 162 "targets": {"actual": 0.6}, 163 "assessments": [assessment.to_dictionary()], 164 "metrics": [metric.to_dictionary()], 165 "tags": [tag.to_dictionary()], 166 "error_code": "E001", 167 "error_message": "An error occurred", 168 } 169 assert evaluation_dict == expected_dict 170 171 recreated_evaluation = EvaluationEntity.from_dictionary(evaluation_dict) 172 assert recreated_evaluation == evaluation 173 174 175 def test_evaluation_construction_with_minimal_required_fields(): 176 evaluation = EvaluationEntity( 177 evaluation_id="eval1", 178 run_id="run1", 179 inputs_id="inputs1", 180 inputs={"feature1": 1.0, "feature2": 2.0}, 181 ) 182 evaluation_dict = evaluation.to_dictionary() 183 recreated_evaluation = EvaluationEntity.from_dictionary(evaluation_dict) 184 assert recreated_evaluation == evaluation