test_correlations.py
1 import numpy as np 2 import pandas as pd 3 4 from evidently import BinaryClassification 5 from evidently import DataDefinition 6 from evidently import Dataset 7 from evidently import Report 8 from evidently.core.metric_types import DataframeValue 9 from evidently.metrics import ColumnCorrelationMatrix 10 from evidently.metrics import ColumnCorrelations 11 from evidently.metrics.data_quality import CorrelationMatrix 12 13 14 def test_column_correlations(): 15 df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 16 ds = Dataset.from_pandas(df) 17 18 metric = ColumnCorrelations(column_name="a") 19 report = Report(metrics=[metric]) 20 21 run = report.run(ds) 22 23 result = run.context.get_metric_result(ColumnCorrelationMatrix(column_name="a", kind="cramer_v")) 24 assert isinstance(result, DataframeValue) 25 pd.testing.assert_frame_equal(result.value, pd.DataFrame([{"kind": "cramer_v", "column_name": "b", "value": 1.0}])) 26 27 28 def test_dataset_correlations(): 29 df = pd.DataFrame( 30 { 31 "my_target": [1, np.nan, 3] * 1000, 32 "my_prediction": [1, 2, np.nan] * 1000, 33 "feature_1": [1, 2, 3] * 1000, 34 "feature_2": ["a", np.nan, "a"] * 1000, 35 } 36 ) 37 ds = Dataset.from_pandas( 38 df, 39 data_definition=DataDefinition( 40 classification=[BinaryClassification(target="my_target", prediction_labels="my_prediction")] 41 ), 42 ) 43 44 metric = CorrelationMatrix() 45 report = Report(metrics=[metric]) 46 47 run = report.run(ds) 48 49 result = run.context.get_metric_result(metric) 50 assert isinstance(result, DataframeValue) 51 pd.testing.assert_frame_equal( 52 result.value, 53 pd.DataFrame( 54 [{"my_target": 1, "my_prediction": np.nan}, {"my_target": np.nan, "my_prediction": 1}], 55 index=["my_target", "my_prediction"], 56 ), 57 )