/ tests / metric_preset / test_data_quality.py
test_data_quality.py
 1  import json
 2  from typing import Optional
 3  
 4  import numpy as np
 5  import pandas as pd
 6  import pytest
 7  
 8  from evidently.legacy.metric_preset import DataQualityPreset
 9  from evidently.legacy.pipeline.column_mapping import ColumnMapping
10  from evidently.legacy.report import Report
11  
12  
13  @pytest.mark.parametrize(
14      "current_data, reference_data, metric, column_mapping",
15      (
16          (
17              pd.DataFrame(
18                  {
19                      "my_target": [1, 2, 3],
20                      "prediction": [1, 2, 3],
21                      "feature1": [1, 2, 3],
22                      "feature2": ["a", "b", "c"],
23                      "datetime": pd.date_range("2020-01-01", periods=3),
24                  }
25              ),
26              None,
27              DataQualityPreset(columns=["feature1", "feature2"]),
28              ColumnMapping(
29                  target="my_target",
30              ),
31          ),
32          (
33              pd.DataFrame(
34                  {
35                      "myid": "some_id",
36                      "my_target": [1, 2, 3],
37                      "prediction": [1, 2, 3],
38                      "feature1": [1, 2, 3],
39                      "feature2": ["a", "b", "c"],
40                      "datetime": pd.date_range("2020-01-01", periods=3),
41                  }
42              ),
43              pd.DataFrame(
44                  {
45                      "myid": "some_id",
46                      "my_target": [1, np.nan, 3, 3, 2, 1],
47                      "prediction": [1, 2, 3, 3, 2, 1],
48                      "feature1": [1, 2, 3, np.nan, 2, np.nan],
49                      "feature2": [np.nan, "b", "c", "a", "b", "c"],
50                      "feature3": [np.nan, "b", "c", "a", "b", "c"],
51                      "datetime": pd.date_range("2020-01-01", periods=6),
52                  }
53              ),
54              DataQualityPreset(),
55              ColumnMapping(
56                  target="my_target",
57                  id="myid",
58                  prediction="prediction",
59                  datetime="datetime",
60              ),
61          ),
62          (
63              pd.DataFrame(
64                  {
65                      "myid": "some_id",
66                      "my_target": [1, 2, 3],
67                      "prediction": [1, 2, 3],
68                      "feature1": [1, 2, 3],
69                      "feature2": ["a", "b", "c"],
70                      "datetime": pd.date_range("2020-01-01", periods=3),
71                  }
72              ),
73              None,
74              DataQualityPreset(),
75              ColumnMapping(
76                  target="my_target",
77                  id="myid",
78                  prediction="prediction",
79                  datetime="datetime",
80              ),
81          ),
82      ),
83  )
84  def test_data_quality_preset(
85      current_data: pd.DataFrame,
86      reference_data: Optional[pd.DataFrame],
87      metric: DataQualityPreset,
88      column_mapping: ColumnMapping,
89  ) -> None:
90      report = Report(metrics=[metric])
91      report.run(current_data=current_data, reference_data=reference_data, column_mapping=column_mapping)
92      assert report.show()
93      json_result = report.json()
94      result = json.loads(json_result)
95      assert "metrics" in result