test_data_quality.py
1 import json 2 from typing import Optional 3 4 import numpy as np 5 import pandas as pd 6 import pytest 7 8 from evidently.legacy.metric_preset import DataQualityPreset 9 from evidently.legacy.pipeline.column_mapping import ColumnMapping 10 from evidently.legacy.report import Report 11 12 13 @pytest.mark.parametrize( 14 "current_data, reference_data, metric, column_mapping", 15 ( 16 ( 17 pd.DataFrame( 18 { 19 "my_target": [1, 2, 3], 20 "prediction": [1, 2, 3], 21 "feature1": [1, 2, 3], 22 "feature2": ["a", "b", "c"], 23 "datetime": pd.date_range("2020-01-01", periods=3), 24 } 25 ), 26 None, 27 DataQualityPreset(columns=["feature1", "feature2"]), 28 ColumnMapping( 29 target="my_target", 30 ), 31 ), 32 ( 33 pd.DataFrame( 34 { 35 "myid": "some_id", 36 "my_target": [1, 2, 3], 37 "prediction": [1, 2, 3], 38 "feature1": [1, 2, 3], 39 "feature2": ["a", "b", "c"], 40 "datetime": pd.date_range("2020-01-01", periods=3), 41 } 42 ), 43 pd.DataFrame( 44 { 45 "myid": "some_id", 46 "my_target": [1, np.nan, 3, 3, 2, 1], 47 "prediction": [1, 2, 3, 3, 2, 1], 48 "feature1": [1, 2, 3, np.nan, 2, np.nan], 49 "feature2": [np.nan, "b", "c", "a", "b", "c"], 50 "feature3": [np.nan, "b", "c", "a", "b", "c"], 51 "datetime": pd.date_range("2020-01-01", periods=6), 52 } 53 ), 54 DataQualityPreset(), 55 ColumnMapping( 56 target="my_target", 57 id="myid", 58 prediction="prediction", 59 datetime="datetime", 60 ), 61 ), 62 ( 63 pd.DataFrame( 64 { 65 "myid": "some_id", 66 "my_target": [1, 2, 3], 67 "prediction": [1, 2, 3], 68 "feature1": [1, 2, 3], 69 "feature2": ["a", "b", "c"], 70 "datetime": pd.date_range("2020-01-01", periods=3), 71 } 72 ), 73 None, 74 DataQualityPreset(), 75 ColumnMapping( 76 target="my_target", 77 id="myid", 78 prediction="prediction", 79 datetime="datetime", 80 ), 81 ), 82 ), 83 ) 84 def test_data_quality_preset( 85 current_data: pd.DataFrame, 86 reference_data: Optional[pd.DataFrame], 87 metric: DataQualityPreset, 88 column_mapping: ColumnMapping, 89 ) -> None: 90 report = Report(metrics=[metric]) 91 report.run(current_data=current_data, reference_data=reference_data, column_mapping=column_mapping) 92 assert report.show() 93 json_result = report.json() 94 result = json.loads(json_result) 95 assert "metrics" in result