/ tests / test_preset / test_data_drift.py
test_data_drift.py
 1  import pandas as pd
 2  
 3  from evidently.legacy.pipeline.column_mapping import ColumnMapping
 4  from evidently.legacy.test_preset import DataDriftTestPreset
 5  from evidently.legacy.test_suite import TestSuite
 6  
 7  
 8  def test_data_drift_preset():
 9      test_current_dataset = pd.DataFrame(
10          {
11              "category_feature_1": ["y", "y", "n", "y"],
12              "category_feature_2": [0, 1, 0, 4],
13              "numerical_feature_1": [0.4, -12, 7, 234],
14              "numerical_feature_2": [4, -2, 53, 23.4],
15              "target": [1, 1, 0, 1],
16              "prediction": [0, 0, 1, 0],
17          }
18      )
19      test_reference_dataset = pd.DataFrame(
20          {
21              "category_feature_1": ["y", "n", "n", "y"],
22              "category_feature_2": [0, 1, 4, 0],
23              "numerical_feature_1": [0, 1, 2, 5],
24              "numerical_feature_2": [0.1, 4.1, 1.2, 5],
25              "target": [0, 0, 0, 1],
26              "prediction": [0, 0, 0, 1],
27          }
28      )
29      data_quality_suite = TestSuite(
30          tests=[
31              DataDriftTestPreset(),
32          ]
33      )
34      column_mapping = ColumnMapping(
35          numerical_features=["numerical_feature_1", "numerical_feature_2"],
36          categorical_features=["category_feature_1", "category_feature_2"],
37      )
38      data_quality_suite.run(
39          current_data=test_current_dataset, reference_data=test_reference_dataset, column_mapping=column_mapping
40      )
41      data_quality_suite._inner_suite.raise_for_error()
42      assert data_quality_suite
43      assert len(data_quality_suite.as_dict()["tests"]) == 7