/ tests / future / test_ui / test_filters.py
test_filters.py
  1  import numpy as np
  2  import pandas as pd
  3  import pytest
  4  
  5  from evidently.ui.service.datasets.filters import ContainsStrFilter
  6  from evidently.ui.service.datasets.filters import EndsWithFilter
  7  from evidently.ui.service.datasets.filters import EqualFilter
  8  from evidently.ui.service.datasets.filters import GTEFilter
  9  from evidently.ui.service.datasets.filters import GTFilter
 10  from evidently.ui.service.datasets.filters import LTEFilter
 11  from evidently.ui.service.datasets.filters import LTFilter
 12  from evidently.ui.service.datasets.filters import NotEqualFilter
 13  from evidently.ui.service.datasets.filters import StartsWithFilter
 14  from evidently.ui.service.datasets.filters import filter_df
 15  
 16  
 17  @pytest.fixture
 18  def sample_dataframe():
 19      """Create a sample dataframe for testing."""
 20      return pd.DataFrame(
 21          {
 22              "name": ["Alice", "Bob", "Charlie", "David", "Eve"],
 23              "age": [25, 30, 35, 40, 45],
 24              "score": [85.5, 90.0, 75.5, 95.0, 88.5],
 25              "city": ["New York", "London", "Paris", "Tokyo", "Berlin"],
 26          }
 27      )
 28  
 29  
 30  def test_contains_str_filter(sample_dataframe):
 31      """Test ContainsStrFilter."""
 32      filter_obj = ContainsStrFilter(column="name", value="li")
 33      condition = filter_obj.condition(sample_dataframe)
 34      assert condition.sum() == 2  # Alice, Charlie
 35      filtered = sample_dataframe[condition]
 36      assert len(filtered) == 2
 37      assert "Alice" in filtered["name"].values
 38      assert "Charlie" in filtered["name"].values
 39  
 40  
 41  def test_starts_with_filter(sample_dataframe):
 42      """Test StartsWithFilter."""
 43      filter_obj = StartsWithFilter(column="name", value="C")
 44      condition = filter_obj.condition(sample_dataframe)
 45      assert condition.sum() == 1  # Charlie
 46      filtered = sample_dataframe[condition]
 47      assert len(filtered) == 1
 48      assert filtered.iloc[0]["name"] == "Charlie"
 49  
 50  
 51  def test_ends_with_filter(sample_dataframe):
 52      """Test EndsWithFilter."""
 53      filter_obj = EndsWithFilter(column="name", value="e")
 54      condition = filter_obj.condition(sample_dataframe)
 55      assert condition.sum() == 3  # Alice, Charlie, Eve
 56      filtered = sample_dataframe[condition]
 57      assert len(filtered) == 3
 58      assert "Alice" in filtered["name"].values
 59      assert "Charlie" in filtered["name"].values
 60      assert "Eve" in filtered["name"].values
 61  
 62  
 63  def test_equal_filter(sample_dataframe):
 64      """Test EqualFilter."""
 65      filter_obj = EqualFilter(column="age", value=30)
 66      condition = filter_obj.condition(sample_dataframe)
 67      assert condition.sum() == 1  # Bob
 68      filtered = sample_dataframe[condition]
 69      assert len(filtered) == 1
 70      assert filtered.iloc[0]["name"] == "Bob"
 71      assert filtered.iloc[0]["age"] == 30
 72  
 73  
 74  def test_not_equal_filter(sample_dataframe):
 75      """Test NotEqualFilter."""
 76      filter_obj = NotEqualFilter(column="age", value=30)
 77      condition = filter_obj.condition(sample_dataframe)
 78      assert condition.sum() == 4  # Everyone except Bob
 79      filtered = sample_dataframe[condition]
 80      assert len(filtered) == 4
 81      assert "Bob" not in filtered["name"].values
 82  
 83  
 84  def test_gt_filter(sample_dataframe):
 85      """Test GTFilter."""
 86      filter_obj = GTFilter(column="age", value=35)
 87      condition = filter_obj.condition(sample_dataframe)
 88      assert condition.sum() == 2  # David (40), Eve (45)
 89      filtered = sample_dataframe[condition]
 90      assert len(filtered) == 2
 91      assert all(filtered["age"] > 35)
 92  
 93  
 94  def test_gte_filter(sample_dataframe):
 95      """Test GTEFilter."""
 96      filter_obj = GTEFilter(column="age", value=35)
 97      condition = filter_obj.condition(sample_dataframe)
 98      assert condition.sum() == 3  # Charlie (35), David (40), Eve (45)
 99      filtered = sample_dataframe[condition]
100      assert len(filtered) == 3
101      assert all(filtered["age"] >= 35)
102  
103  
104  def test_lt_filter(sample_dataframe):
105      """Test LTFilter."""
106      filter_obj = LTFilter(column="age", value=35)
107      condition = filter_obj.condition(sample_dataframe)
108      assert condition.sum() == 2  # Alice (25), Bob (30)
109      filtered = sample_dataframe[condition]
110      assert len(filtered) == 2
111      assert all(filtered["age"] < 35)
112  
113  
114  def test_lte_filter(sample_dataframe):
115      """Test LTEFilter."""
116      filter_obj = LTEFilter(column="age", value=35)
117      condition = filter_obj.condition(sample_dataframe)
118      assert condition.sum() == 3  # Alice (25), Bob (30), Charlie (35)
119      filtered = sample_dataframe[condition]
120      assert len(filtered) == 3
121      assert all(filtered["age"] <= 35)
122  
123  
124  def test_filter_df_single_filter(sample_dataframe):
125      """Test filter_df with a single filter."""
126      filters = [EqualFilter(column="age", value=30)]
127      filtered = filter_df(sample_dataframe, filters)
128      assert len(filtered) == 1
129      assert filtered.iloc[0]["name"] == "Bob"
130  
131  
132  def test_filter_df_multiple_filters(sample_dataframe):
133      """Test filter_df with multiple filters (AND logic)."""
134      filters = [
135          GTEFilter(column="age", value=30),
136          LTFilter(column="age", value=40),
137      ]
138      filtered = filter_df(sample_dataframe, filters)
139      assert len(filtered) == 2  # Bob (30), Charlie (35)
140      assert "Bob" in filtered["name"].values
141      assert "Charlie" in filtered["name"].values
142      assert all(30 <= age < 40 for age in filtered["age"])
143  
144  
145  def test_filter_df_no_filters(sample_dataframe):
146      """Test filter_df with no filters."""
147      filtered = filter_df(sample_dataframe, None)
148      assert len(filtered) == len(sample_dataframe)
149      pd.testing.assert_frame_equal(filtered, sample_dataframe)
150  
151  
152  def test_filter_df_empty_list(sample_dataframe):
153      """Test filter_df with empty filter list."""
154      filtered = filter_df(sample_dataframe, [])
155      assert len(filtered) == len(sample_dataframe)
156      pd.testing.assert_frame_equal(filtered, sample_dataframe)
157  
158  
159  def test_filter_df_combined_string_and_number(sample_dataframe):
160      """Test filter_df with both string and number filters."""
161      filters = [
162          ContainsStrFilter(column="name", value="a"),
163          GTFilter(column="score", value=80.0),
164      ]
165      filtered = filter_df(sample_dataframe, filters)
166      # Names with 'a' (case-insensitive): Alice, Charlie, David
167      # Scores > 80: Alice (85.5), Bob (90.0), David (95.0), Eve (88.5)
168      # Combined: Alice (has 'a' and score 85.5 > 80), David (has 'a' and score 95.0 > 80)
169      # Charlie has 'a' but score 75.5 <= 80, so filtered out
170      # Note: ContainsStrFilter is case-sensitive, so "Alice" doesn't match "a"
171      # Only "David" matches (contains lowercase 'a' in "David")
172      assert len(filtered) == 1
173      assert "David" in filtered["name"].values
174  
175  
176  def test_filter_df_with_nan_values():
177      """Test filter_df with NaN values."""
178      df = pd.DataFrame(
179          {
180              "name": ["Alice", "Bob", None, "David"],
181              "age": [25, 30, np.nan, 40],
182          }
183      )
184      filter_obj = EqualFilter(column="age", value=30)
185      condition = filter_obj.condition(df)
186      filtered = df[condition]
187      assert len(filtered) == 1
188      assert filtered.iloc[0]["name"] == "Bob"
189  
190  
191  def test_filter_df_missing_column(sample_dataframe):
192      """Test filter_df with non-existent column."""
193      filter_obj = EqualFilter(column="nonexistent", value=10)
194      with pytest.raises(KeyError):
195          filter_obj.condition(sample_dataframe)
196  
197  
198  def test_filter_df_empty_dataframe():
199      """Test filter_df with empty dataframe."""
200      empty_df = pd.DataFrame({"col1": [], "col2": []})
201      filters = [EqualFilter(column="col1", value=1)]
202      filtered = filter_df(empty_df, filters)
203      assert len(filtered) == 0