test_multicolumn.py
1 from typing import List 2 from typing import Optional 3 4 import pandas as pd 5 6 from evidently._pydantic_compat import PrivateAttr 7 from evidently.legacy.base_metric import ColumnName 8 from evidently.legacy.core import ColumnType 9 from evidently.legacy.features.feature_generator import FeatureGenerator 10 from evidently.legacy.features.generated_features import GeneratedFeatures 11 from evidently.legacy.metrics import ColumnSummaryMetric 12 from evidently.legacy.options.base import Options 13 from evidently.legacy.report import Report 14 from evidently.legacy.utils.data_preprocessing import DataDefinition 15 16 17 class MultiColumnFeature(GeneratedFeatures): 18 class Config: 19 alias_required = False 20 21 source_column: str 22 _called_count: int = PrivateAttr(0) 23 24 def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition, options: Options) -> pd.DataFrame: 25 self._called_count += 1 26 col = data[self.source_column] 27 return pd.DataFrame({"+1": col + 1, "+5": col + 5}) 28 29 def list_columns(self) -> List["ColumnName"]: 30 return [self._create_column(subcolumn="+1"), self._create_column(subcolumn="+5")] 31 32 def get_type(self, subcolumn: Optional[str] = None): 33 return ColumnType.Numerical 34 35 36 def test_feature_generator(): 37 f1 = MultiColumnFeature( 38 source_column="a", 39 ) 40 f2 = MultiColumnFeature(source_column="b") 41 report = FeatureGenerator( 42 features=[ 43 f1, 44 f2, 45 ] 46 ) 47 cur = pd.DataFrame({"a": [1, 2], "b": [11, 12]}) 48 ref = pd.DataFrame({"a": [3, 4], "b": [13, 14]}) 49 report.run(current_data=cur, reference_data=ref) 50 51 f1_cur, f1_ref = report.get_features(f1) 52 pd.testing.assert_frame_equal( 53 f1_cur, pd.DataFrame({f"{f1.get_fingerprint()}.+1": [2, 3], f"{f1.get_fingerprint()}.+5": [6, 7]}) 54 ) 55 pd.testing.assert_frame_equal( 56 f1_ref, pd.DataFrame({f"{f1.get_fingerprint()}.+1": [4, 5], f"{f1.get_fingerprint()}.+5": [8, 9]}) 57 ) 58 59 f2_cur, f2_ref = report.get_features(f2) 60 61 pd.testing.assert_frame_equal( 62 f2_cur, pd.DataFrame({f"{f2.get_fingerprint()}.+1": [12, 13], f"{f2.get_fingerprint()}.+5": [16, 17]}) 63 ) 64 pd.testing.assert_frame_equal( 65 f2_ref, pd.DataFrame({f"{f2.get_fingerprint()}.+1": [14, 15], f"{f2.get_fingerprint()}.+5": [18, 19]}) 66 ) 67 68 all_features_cur, all_features_ref = report.get_features() 69 pd.testing.assert_frame_equal(all_features_cur, f1_cur.join(f2_cur)) 70 pd.testing.assert_frame_equal(all_features_ref, f1_ref.join(f2_ref)) 71 72 assert f1._called_count == 2 # once for cur and ref 73 assert f2._called_count == 2 # once for cur and ref 74 75 76 def test_multicolumn_in_report(): 77 cur = pd.DataFrame({"a": [1, 2]}) 78 ref = pd.DataFrame({"a": [3, 4]}) 79 80 f1 = MultiColumnFeature(source_column="a") 81 f2 = MultiColumnFeature(source_column="a") 82 report = Report( 83 metrics=[ 84 ColumnSummaryMetric(column_name=f1.as_column(subcolumn="+1")), 85 ColumnSummaryMetric(column_name=f2.as_column(subcolumn="+5")), 86 ] 87 ) 88 report.run(current_data=cur, reference_data=ref) 89 90 res_cur, res_ref = report.datasets() 91 pd.testing.assert_frame_equal( 92 res_cur, pd.DataFrame({"a": [1, 2], f"{f1.get_fingerprint()}.+1": [2, 3], f"{f1.get_fingerprint()}.+5": [6, 7]}) 93 ) 94 pd.testing.assert_frame_equal( 95 res_ref, pd.DataFrame({"a": [3, 4], f"{f1.get_fingerprint()}.+1": [4, 5], f"{f1.get_fingerprint()}.+5": [8, 9]}) 96 ) 97 assert f1._called_count + f2._called_count == 2 # once for cur and ref