test_data_drift_tests.py
1 import json 2 3 import pandas as pd 4 5 from evidently.legacy.pipeline.column_mapping import ColumnMapping 6 from evidently.legacy.test_suite import TestSuite 7 from evidently.legacy.tests import TestColumnDrift 8 from evidently.legacy.tests import TestNumberOfDriftedColumns 9 from evidently.legacy.tests import TestShareOfDriftedColumns 10 11 12 def test_data_drift_test_number_of_drifted_features() -> None: 13 test_current_dataset = pd.DataFrame( 14 { 15 "category_feature": ["n", "d", "p", "n"], 16 "numerical_feature": [0, 1, 2, 5], 17 "target": [0, 0, 0, 1], 18 "prediction": [0, 0, 0, 1], 19 } 20 ) 21 suite = TestSuite(tests=[TestNumberOfDriftedColumns()]) 22 suite.run(current_data=test_current_dataset, reference_data=test_current_dataset) 23 suite._inner_suite.raise_for_error() 24 assert suite 25 26 suite = TestSuite(tests=[TestNumberOfDriftedColumns(is_in=[234, 14])]) 27 suite.run(current_data=test_current_dataset, reference_data=test_current_dataset) 28 assert not suite 29 30 suite = TestSuite(tests=[TestNumberOfDriftedColumns(lt=1)]) 31 suite.run(current_data=test_current_dataset, reference_data=test_current_dataset) 32 suite._inner_suite.raise_for_error() 33 assert suite 34 assert suite.show() 35 assert suite.json() 36 37 38 def test_data_drift_test_number_of_drifted_features_json_render() -> None: 39 current_dataset = pd.DataFrame( 40 { 41 "category_feature": ["n", "d", "p", "n"], 42 "numerical_feature": [0, 1, 2, 5], 43 "target": [0, 0, 0, 1], 44 "prediction": [0, 0, 0, 1], 45 } 46 ) 47 reference_dataset = pd.DataFrame( 48 { 49 "category_feature": ["n", "d", "p", "n"], 50 "numerical_feature": [0, 1, 2, 5], 51 "target": [1, 1, 0, 1], 52 "prediction": [1, 1, 0, 1], 53 } 54 ) 55 suite = TestSuite(tests=[TestNumberOfDriftedColumns()]) 56 suite.run(current_data=current_dataset, reference_data=reference_dataset) 57 suite._inner_suite.raise_for_error() 58 assert suite 59 60 result_from_json = json.loads(suite.json()) 61 assert result_from_json["summary"]["all_passed"] is True 62 test_info = result_from_json["tests"][0] 63 assert test_info == { 64 "description": "The drift is detected for 0 out of 4 features. The test " "threshold is lt=1.", 65 "group": "data_drift", 66 "name": "Number of Drifted Features", 67 "parameters": { 68 "condition": {"lt": 1}, 69 "features": { 70 "category_feature": { 71 "detected": False, 72 "score": 1.0, 73 "stattest": "chi-square " "p_value", 74 "threshold": 0.05, 75 }, 76 "numerical_feature": { 77 "detected": False, 78 "score": 1.0, 79 "stattest": "chi-square " "p_value", 80 "threshold": 0.05, 81 }, 82 "prediction": { 83 "detected": False, 84 "score": 0.157, 85 "stattest": "Z-test p_value", 86 "threshold": 0.05, 87 }, 88 "target": { 89 "detected": False, 90 "score": 0.157, 91 "stattest": "Z-test p_value", 92 "threshold": 0.05, 93 }, 94 }, 95 }, 96 "status": "SUCCESS", 97 } 98 99 100 def test_data_drift_test_share_of_drifted_features() -> None: 101 test_current_dataset = pd.DataFrame( 102 { 103 "category_feature": ["n", "d", "p", "n"], 104 "numerical_feature": [0, 1, 2, 5], 105 "target": [1, 0, 0, 1], 106 "prediction": [1, 0, 0, 1], 107 } 108 ) 109 suite = TestSuite(tests=[TestShareOfDriftedColumns()]) 110 suite.run(current_data=test_current_dataset, reference_data=test_current_dataset) 111 suite._inner_suite.raise_for_error() 112 assert suite 113 114 suite = TestSuite(tests=[TestShareOfDriftedColumns(gt=0.6)]) 115 suite.run(current_data=test_current_dataset, reference_data=test_current_dataset) 116 assert not suite 117 118 suite = TestSuite(tests=[TestShareOfDriftedColumns(lte=0.5)]) 119 suite.run(current_data=test_current_dataset, reference_data=test_current_dataset) 120 suite._inner_suite.raise_for_error() 121 assert suite 122 assert suite.show() 123 assert suite.json() 124 125 126 def test_data_drift_test_share_of_drifted_features_json_render() -> None: 127 test_current_dataset = pd.DataFrame( 128 { 129 "category_feature": ["n", "d", "p", "n"], 130 "numerical_feature": [0, 1, 2, 5], 131 "target": [0, 0, 0, 1], 132 "prediction": [0, 0, 0, 1], 133 } 134 ) 135 suite = TestSuite(tests=[TestShareOfDriftedColumns()]) 136 suite.run(current_data=test_current_dataset, reference_data=test_current_dataset) 137 suite._inner_suite.raise_for_error() 138 assert suite 139 140 result_from_json = json.loads(suite.json()) 141 assert result_from_json["summary"]["all_passed"] is True 142 test_info = result_from_json["tests"][0] 143 assert test_info == { 144 "description": "The drift is detected for 0% features (0 out of 4). The test " "threshold is lt=0.3", 145 "group": "data_drift", 146 "name": "Share of Drifted Columns", 147 "parameters": { 148 "condition": {"lt": 0.3}, 149 "features": { 150 "category_feature": { 151 "detected": False, 152 "score": 1.0, 153 "stattest": "chi-square " "p_value", 154 "threshold": 0.05, 155 }, 156 "numerical_feature": { 157 "detected": False, 158 "score": 1.0, 159 "stattest": "chi-square " "p_value", 160 "threshold": 0.05, 161 }, 162 "prediction": { 163 "detected": False, 164 "score": 1.0, 165 "stattest": "Z-test p_value", 166 "threshold": 0.05, 167 }, 168 "target": {"detected": False, "score": 1.0, "stattest": "Z-test p_value", "threshold": 0.05}, 169 }, 170 }, 171 "status": "SUCCESS", 172 } 173 174 175 def test_data_drift_test_feature_value_drift() -> None: 176 test_current_dataset = pd.DataFrame({"feature_1": [0, 0, 0, 1], "target": [0, 0, 0, 1], "prediction": [0, 0, 0, 1]}) 177 test_reference_dataset = pd.DataFrame( 178 {"feature_1": [0, 1, 2, 0], "target": [0, 0, 0, 1], "prediction": [0, 0, 0, 1]} 179 ) 180 suite = TestSuite(tests=[TestColumnDrift(column_name="feature_1")]) 181 suite.run(current_data=test_current_dataset, reference_data=test_reference_dataset, column_mapping=ColumnMapping()) 182 suite._inner_suite.raise_for_error() 183 assert suite 184 assert suite.show() 185 assert suite.json() 186 187 188 def test_data_drift_test_feature_value_drift_json_render() -> None: 189 test_current_dataset = pd.DataFrame({"feature_1": [0, 0, 0, 1], "target": [0, 0, 0, 1], "prediction": [0, 0, 0, 1]}) 190 test_reference_dataset = pd.DataFrame( 191 {"feature_1": [1, 1, 2, 0], "target": [0, 0, 0, 1], "prediction": [0, 0, 0, 1]} 192 ) 193 suite = TestSuite(tests=[TestColumnDrift(column_name="feature_1")]) 194 suite.run(current_data=test_current_dataset, reference_data=test_reference_dataset) 195 suite._inner_suite.raise_for_error() 196 assert suite 197 198 result_from_json = json.loads(suite.json()) 199 assert result_from_json["summary"]["all_passed"] is True 200 test_info = result_from_json["tests"][0] 201 assert test_info == { 202 "description": "The drift score for the feature **feature_1** is 0.064." 203 " The drift detection method is chi-square p_value. The drift detection threshold is 0.05.", 204 "group": "data_drift", 205 "name": "Drift per Column", 206 "parameters": { 207 "detected": False, 208 "score": 0.064, 209 "stattest": "chi-square p_value", 210 "threshold": 0.05, 211 "column_name": "feature_1", 212 }, 213 "status": "SUCCESS", 214 }