/ tests / features / test_text_part_feature.py
test_text_part_feature.py
 1  from typing import List
 2  
 3  import pandas as pd
 4  import pytest
 5  
 6  from evidently.legacy.features.text_part_feature import BeginsWith
 7  from evidently.legacy.features.text_part_feature import EndsWith
 8  from evidently.legacy.pipeline.column_mapping import ColumnMapping
 9  from evidently.legacy.utils.data_preprocessing import create_data_definition
10  
11  test_data = [
12      "abcdefg",
13      "aBcdeFg",
14      "cdefg",
15      "abcde",
16      "abfg",
17  ]
18  
19  
20  @pytest.mark.parametrize(
21      ("substr", "case", "expected"),
22      [
23          ("abc", True, [True, False, False, True, False]),
24          ("abc", False, [True, True, False, True, False]),
25          ("ABC", False, [True, True, False, True, False]),
26      ],
27  )
28  def test_text_begins_feature(substr: str, case: bool, expected: List[bool]):
29      feature_generator = BeginsWith("column_1", substr, case_sensitive=case)
30      data = pd.DataFrame(dict(column_1=test_data))
31      result = feature_generator.generate_feature(
32          data=data,
33          data_definition=create_data_definition(None, data, ColumnMapping()),
34      )
35      column_expected = feature_generator._feature_column_name()
36      expected_df = pd.DataFrame({column_expected: expected})
37      assert result.equals(expected_df)
38  
39  
40  @pytest.mark.parametrize(
41      ("substr", "case", "expected"),
42      [
43          ("efg", True, [True, False, True, False, False]),
44          ("efg", False, [True, True, True, False, False]),
45      ],
46  )
47  def test_text_ends_feature(substr: str, case: bool, expected: List[bool]):
48      feature_generator = EndsWith("column_1", substr, case_sensitive=case)
49      data = pd.DataFrame(dict(column_1=test_data))
50      result = feature_generator.generate_feature(
51          data=data,
52          data_definition=create_data_definition(None, data, ColumnMapping()),
53      )
54      column_expected = feature_generator._feature_column_name()
55      expected_df = pd.DataFrame({column_expected: expected})
56      assert result.equals(expected_df)