test_filters.py
1 import numpy as np 2 import pandas as pd 3 import pytest 4 5 from evidently.ui.service.datasets.filters import ContainsStrFilter 6 from evidently.ui.service.datasets.filters import EndsWithFilter 7 from evidently.ui.service.datasets.filters import EqualFilter 8 from evidently.ui.service.datasets.filters import GTEFilter 9 from evidently.ui.service.datasets.filters import GTFilter 10 from evidently.ui.service.datasets.filters import LTEFilter 11 from evidently.ui.service.datasets.filters import LTFilter 12 from evidently.ui.service.datasets.filters import NotEqualFilter 13 from evidently.ui.service.datasets.filters import StartsWithFilter 14 from evidently.ui.service.datasets.filters import filter_df 15 16 17 @pytest.fixture 18 def sample_dataframe(): 19 """Create a sample dataframe for testing.""" 20 return pd.DataFrame( 21 { 22 "name": ["Alice", "Bob", "Charlie", "David", "Eve"], 23 "age": [25, 30, 35, 40, 45], 24 "score": [85.5, 90.0, 75.5, 95.0, 88.5], 25 "city": ["New York", "London", "Paris", "Tokyo", "Berlin"], 26 } 27 ) 28 29 30 def test_contains_str_filter(sample_dataframe): 31 """Test ContainsStrFilter.""" 32 filter_obj = ContainsStrFilter(column="name", value="li") 33 condition = filter_obj.condition(sample_dataframe) 34 assert condition.sum() == 2 # Alice, Charlie 35 filtered = sample_dataframe[condition] 36 assert len(filtered) == 2 37 assert "Alice" in filtered["name"].values 38 assert "Charlie" in filtered["name"].values 39 40 41 def test_starts_with_filter(sample_dataframe): 42 """Test StartsWithFilter.""" 43 filter_obj = StartsWithFilter(column="name", value="C") 44 condition = filter_obj.condition(sample_dataframe) 45 assert condition.sum() == 1 # Charlie 46 filtered = sample_dataframe[condition] 47 assert len(filtered) == 1 48 assert filtered.iloc[0]["name"] == "Charlie" 49 50 51 def test_ends_with_filter(sample_dataframe): 52 """Test EndsWithFilter.""" 53 filter_obj = EndsWithFilter(column="name", value="e") 54 condition = filter_obj.condition(sample_dataframe) 55 assert condition.sum() == 3 # Alice, Charlie, Eve 56 filtered = sample_dataframe[condition] 57 assert len(filtered) == 3 58 assert "Alice" in filtered["name"].values 59 assert "Charlie" in filtered["name"].values 60 assert "Eve" in filtered["name"].values 61 62 63 def test_equal_filter(sample_dataframe): 64 """Test EqualFilter.""" 65 filter_obj = EqualFilter(column="age", value=30) 66 condition = filter_obj.condition(sample_dataframe) 67 assert condition.sum() == 1 # Bob 68 filtered = sample_dataframe[condition] 69 assert len(filtered) == 1 70 assert filtered.iloc[0]["name"] == "Bob" 71 assert filtered.iloc[0]["age"] == 30 72 73 74 def test_not_equal_filter(sample_dataframe): 75 """Test NotEqualFilter.""" 76 filter_obj = NotEqualFilter(column="age", value=30) 77 condition = filter_obj.condition(sample_dataframe) 78 assert condition.sum() == 4 # Everyone except Bob 79 filtered = sample_dataframe[condition] 80 assert len(filtered) == 4 81 assert "Bob" not in filtered["name"].values 82 83 84 def test_gt_filter(sample_dataframe): 85 """Test GTFilter.""" 86 filter_obj = GTFilter(column="age", value=35) 87 condition = filter_obj.condition(sample_dataframe) 88 assert condition.sum() == 2 # David (40), Eve (45) 89 filtered = sample_dataframe[condition] 90 assert len(filtered) == 2 91 assert all(filtered["age"] > 35) 92 93 94 def test_gte_filter(sample_dataframe): 95 """Test GTEFilter.""" 96 filter_obj = GTEFilter(column="age", value=35) 97 condition = filter_obj.condition(sample_dataframe) 98 assert condition.sum() == 3 # Charlie (35), David (40), Eve (45) 99 filtered = sample_dataframe[condition] 100 assert len(filtered) == 3 101 assert all(filtered["age"] >= 35) 102 103 104 def test_lt_filter(sample_dataframe): 105 """Test LTFilter.""" 106 filter_obj = LTFilter(column="age", value=35) 107 condition = filter_obj.condition(sample_dataframe) 108 assert condition.sum() == 2 # Alice (25), Bob (30) 109 filtered = sample_dataframe[condition] 110 assert len(filtered) == 2 111 assert all(filtered["age"] < 35) 112 113 114 def test_lte_filter(sample_dataframe): 115 """Test LTEFilter.""" 116 filter_obj = LTEFilter(column="age", value=35) 117 condition = filter_obj.condition(sample_dataframe) 118 assert condition.sum() == 3 # Alice (25), Bob (30), Charlie (35) 119 filtered = sample_dataframe[condition] 120 assert len(filtered) == 3 121 assert all(filtered["age"] <= 35) 122 123 124 def test_filter_df_single_filter(sample_dataframe): 125 """Test filter_df with a single filter.""" 126 filters = [EqualFilter(column="age", value=30)] 127 filtered = filter_df(sample_dataframe, filters) 128 assert len(filtered) == 1 129 assert filtered.iloc[0]["name"] == "Bob" 130 131 132 def test_filter_df_multiple_filters(sample_dataframe): 133 """Test filter_df with multiple filters (AND logic).""" 134 filters = [ 135 GTEFilter(column="age", value=30), 136 LTFilter(column="age", value=40), 137 ] 138 filtered = filter_df(sample_dataframe, filters) 139 assert len(filtered) == 2 # Bob (30), Charlie (35) 140 assert "Bob" in filtered["name"].values 141 assert "Charlie" in filtered["name"].values 142 assert all(30 <= age < 40 for age in filtered["age"]) 143 144 145 def test_filter_df_no_filters(sample_dataframe): 146 """Test filter_df with no filters.""" 147 filtered = filter_df(sample_dataframe, None) 148 assert len(filtered) == len(sample_dataframe) 149 pd.testing.assert_frame_equal(filtered, sample_dataframe) 150 151 152 def test_filter_df_empty_list(sample_dataframe): 153 """Test filter_df with empty filter list.""" 154 filtered = filter_df(sample_dataframe, []) 155 assert len(filtered) == len(sample_dataframe) 156 pd.testing.assert_frame_equal(filtered, sample_dataframe) 157 158 159 def test_filter_df_combined_string_and_number(sample_dataframe): 160 """Test filter_df with both string and number filters.""" 161 filters = [ 162 ContainsStrFilter(column="name", value="a"), 163 GTFilter(column="score", value=80.0), 164 ] 165 filtered = filter_df(sample_dataframe, filters) 166 # Names with 'a' (case-insensitive): Alice, Charlie, David 167 # Scores > 80: Alice (85.5), Bob (90.0), David (95.0), Eve (88.5) 168 # Combined: Alice (has 'a' and score 85.5 > 80), David (has 'a' and score 95.0 > 80) 169 # Charlie has 'a' but score 75.5 <= 80, so filtered out 170 # Note: ContainsStrFilter is case-sensitive, so "Alice" doesn't match "a" 171 # Only "David" matches (contains lowercase 'a' in "David") 172 assert len(filtered) == 1 173 assert "David" in filtered["name"].values 174 175 176 def test_filter_df_with_nan_values(): 177 """Test filter_df with NaN values.""" 178 df = pd.DataFrame( 179 { 180 "name": ["Alice", "Bob", None, "David"], 181 "age": [25, 30, np.nan, 40], 182 } 183 ) 184 filter_obj = EqualFilter(column="age", value=30) 185 condition = filter_obj.condition(df) 186 filtered = df[condition] 187 assert len(filtered) == 1 188 assert filtered.iloc[0]["name"] == "Bob" 189 190 191 def test_filter_df_missing_column(sample_dataframe): 192 """Test filter_df with non-existent column.""" 193 filter_obj = EqualFilter(column="nonexistent", value=10) 194 with pytest.raises(KeyError): 195 filter_obj.condition(sample_dataframe) 196 197 198 def test_filter_df_empty_dataframe(): 199 """Test filter_df with empty dataframe.""" 200 empty_df = pd.DataFrame({"col1": [], "col2": []}) 201 filters = [EqualFilter(column="col1", value=1)] 202 filtered = filter_df(empty_df, filters) 203 assert len(filtered) == 0