test_text_part_feature.py
1 from typing import List 2 3 import pandas as pd 4 import pytest 5 6 from evidently.legacy.features.text_part_feature import BeginsWith 7 from evidently.legacy.features.text_part_feature import EndsWith 8 from evidently.legacy.pipeline.column_mapping import ColumnMapping 9 from evidently.legacy.utils.data_preprocessing import create_data_definition 10 11 test_data = [ 12 "abcdefg", 13 "aBcdeFg", 14 "cdefg", 15 "abcde", 16 "abfg", 17 ] 18 19 20 @pytest.mark.parametrize( 21 ("substr", "case", "expected"), 22 [ 23 ("abc", True, [True, False, False, True, False]), 24 ("abc", False, [True, True, False, True, False]), 25 ("ABC", False, [True, True, False, True, False]), 26 ], 27 ) 28 def test_text_begins_feature(substr: str, case: bool, expected: List[bool]): 29 feature_generator = BeginsWith("column_1", substr, case_sensitive=case) 30 data = pd.DataFrame(dict(column_1=test_data)) 31 result = feature_generator.generate_feature( 32 data=data, 33 data_definition=create_data_definition(None, data, ColumnMapping()), 34 ) 35 column_expected = feature_generator._feature_column_name() 36 expected_df = pd.DataFrame({column_expected: expected}) 37 assert result.equals(expected_df) 38 39 40 @pytest.mark.parametrize( 41 ("substr", "case", "expected"), 42 [ 43 ("efg", True, [True, False, True, False, False]), 44 ("efg", False, [True, True, True, False, False]), 45 ], 46 ) 47 def test_text_ends_feature(substr: str, case: bool, expected: List[bool]): 48 feature_generator = EndsWith("column_1", substr, case_sensitive=case) 49 data = pd.DataFrame(dict(column_1=test_data)) 50 result = feature_generator.generate_feature( 51 data=data, 52 data_definition=create_data_definition(None, data, ColumnMapping()), 53 ) 54 column_expected = feature_generator._feature_column_name() 55 expected_df = pd.DataFrame({column_expected: expected}) 56 assert result.equals(expected_df)