test_filters.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import pytest 6 7 from haystack import Document 8 from haystack.errors import FilterError 9 from haystack.utils.filters import document_matches_filter 10 11 document_matches_filter_data = [ 12 # == operator params 13 pytest.param( 14 {"field": "meta.name", "operator": "==", "value": "test"}, 15 Document(meta={"name": "test"}), 16 True, 17 id="== operator with equal values", 18 ), 19 pytest.param( 20 {"field": "meta.name", "operator": "==", "value": "test"}, 21 Document(meta={"name": "different value"}), 22 False, 23 id="== operator with different values", 24 ), 25 pytest.param( 26 {"field": "meta.name", "operator": "==", "value": "test"}, 27 Document(meta={"name": ["test"]}), 28 False, 29 id="== operator with different types values", 30 ), 31 pytest.param( 32 {"field": "meta.name", "operator": "==", "value": "test"}, 33 Document(), 34 False, 35 id="== operator with missing Document value", 36 ), 37 pytest.param( 38 {"field": "meta.name", "operator": "==", "value": "test"}, 39 Document(meta={"name": None}), 40 False, 41 id="== operator with None Document value", 42 ), 43 pytest.param( 44 {"field": "meta.name", "operator": "==", "value": None}, 45 Document(meta={"name": "test"}), 46 False, 47 id="== operator with None filter value", 48 ), 49 # != operator params 50 pytest.param( 51 {"field": "meta.name", "operator": "!=", "value": "test"}, 52 Document(meta={"name": "test"}), 53 False, 54 id="!= operator with equal values", 55 ), 56 pytest.param( 57 {"field": "meta.name", "operator": "!=", "value": "test"}, 58 Document(meta={"name": "different value"}), 59 True, 60 id="!= operator with different values", 61 ), 62 pytest.param( 63 {"field": "meta.name", "operator": "!=", "value": "test"}, 64 Document(meta={"name": ["test"]}), 65 True, 66 id="!= operator with different types values", 67 ), 68 pytest.param( 69 {"field": "meta.name", "operator": "!=", "value": "test"}, Document(), True, id="!= operator with missing value" 70 ), 71 pytest.param( 72 {"field": "meta.name", "operator": "!=", "value": "test"}, 73 Document(meta={"name": None}), 74 True, 75 id="!= operator with None Document value", 76 ), 77 pytest.param( 78 {"field": "meta.name", "operator": "!=", "value": None}, 79 Document(meta={"name": "test"}), 80 True, 81 id="!= operator with None filter value", 82 ), 83 # > operator params 84 pytest.param( 85 {"field": "meta.page", "operator": ">", "value": 10}, 86 Document(meta={"page": 10}), 87 False, 88 id="> operator with equal Document value", 89 ), 90 pytest.param( 91 {"field": "meta.page", "operator": ">", "value": 10}, 92 Document(meta={"page": 11}), 93 True, 94 id="> operator with greater Document value", 95 ), 96 pytest.param( 97 {"field": "meta.page", "operator": ">", "value": 10}, 98 Document(meta={"page": 9}), 99 False, 100 id="> operator with smaller Document value", 101 ), 102 pytest.param( 103 {"field": "meta.date", "operator": ">", "value": "1969-07-21T20:17:40"}, 104 Document(meta={"date": "1969-07-21T20:17:40"}), 105 False, 106 id="> operator with equal ISO 8601 datetime Document value", 107 ), 108 pytest.param( 109 {"field": "meta.date", "operator": ">", "value": "1969-07-21T20:17:40"}, 110 Document(meta={"date": "1972-12-11T19:54:58"}), 111 True, 112 id="> operator with greater ISO 8601 datetime Document value", 113 ), 114 pytest.param( 115 {"field": "meta.date", "operator": ">", "value": "1972-12-11T19:54:58"}, 116 Document(meta={"date": "1969-07-21T20:17:40"}), 117 False, 118 id="> operator with smaller ISO 8601 datetime Document value", 119 ), 120 pytest.param( 121 {"field": "meta.page", "operator": ">", "value": 10}, 122 Document(), 123 False, 124 id="> operator with missing Document value", 125 ), 126 pytest.param( 127 {"field": "meta.page", "operator": ">", "value": 10}, 128 Document(meta={"page": None}), 129 False, 130 id="> operator with None Document value", 131 ), 132 pytest.param( 133 {"field": "meta.page", "operator": ">", "value": None}, 134 Document(meta={"page": 10}), 135 False, 136 id="> operator with None filter value", 137 ), 138 pytest.param( 139 {"field": "meta.page", "operator": ">", "value": None}, 140 Document(meta={"page": None}), 141 False, 142 id="> operator with None Document and filter value", 143 ), 144 # >= operator params 145 pytest.param( 146 {"field": "meta.page", "operator": ">=", "value": 10}, 147 Document(meta={"page": 10}), 148 True, 149 id=">= operator with equal Document value", 150 ), 151 pytest.param( 152 {"field": "meta.page", "operator": ">=", "value": 10}, 153 Document(meta={"page": 11}), 154 True, 155 id=">= operator with greater Document value", 156 ), 157 pytest.param( 158 {"field": "meta.page", "operator": ">=", "value": 10}, 159 Document(meta={"page": 9}), 160 False, 161 id=">= operator with smaller Document value", 162 ), 163 pytest.param( 164 {"field": "meta.date", "operator": ">=", "value": "1969-07-21T20:17:40"}, 165 Document(meta={"date": "1969-07-21T20:17:40"}), 166 True, 167 id=">= operator with equal ISO 8601 datetime Document value", 168 ), 169 pytest.param( 170 {"field": "meta.date", "operator": ">=", "value": "1969-07-21T20:17:40"}, 171 Document(meta={"date": "1972-12-11T19:54:58"}), 172 True, 173 id=">= operator with greater ISO 8601 datetime Document value", 174 ), 175 pytest.param( 176 {"field": "meta.date", "operator": ">=", "value": "1972-12-11T19:54:58"}, 177 Document(meta={"date": "1969-07-21T20:17:40"}), 178 False, 179 id=">= operator with smaller ISO 8601 datetime Document value", 180 ), 181 pytest.param( 182 {"field": "meta.page", "operator": ">=", "value": 10}, 183 Document(), 184 False, 185 id=">= operator with missing Document value", 186 ), 187 pytest.param( 188 {"field": "meta.page", "operator": ">=", "value": 10}, 189 Document(meta={"page": None}), 190 False, 191 id=">= operator with None Document value", 192 ), 193 pytest.param( 194 {"field": "meta.page", "operator": ">=", "value": None}, 195 Document(meta={"page": 10}), 196 False, 197 id=">= operator with None filter value", 198 ), 199 pytest.param( 200 {"field": "meta.page", "operator": ">=", "value": None}, 201 Document(meta={"page": None}), 202 False, 203 id=">= operator with None Document and filter value", 204 ), 205 # < operator params 206 pytest.param( 207 {"field": "meta.page", "operator": "<", "value": 10}, 208 Document(meta={"page": 10}), 209 False, 210 id="< operator with equal Document value", 211 ), 212 pytest.param( 213 {"field": "meta.page", "operator": "<", "value": 10}, 214 Document(meta={"page": 11}), 215 False, 216 id="< operator with greater Document value", 217 ), 218 pytest.param( 219 {"field": "meta.page", "operator": "<", "value": 10}, 220 Document(meta={"page": 9}), 221 True, 222 id="< operator with smaller Document value", 223 ), 224 pytest.param( 225 {"field": "meta.date", "operator": "<", "value": "1969-07-21T20:17:40"}, 226 Document(meta={"date": "1969-07-21T20:17:40"}), 227 False, 228 id="< operator with equal ISO 8601 datetime Document value", 229 ), 230 pytest.param( 231 {"field": "meta.date", "operator": "<", "value": "1969-07-21T20:17:40"}, 232 Document(meta={"date": "1972-12-11T19:54:58"}), 233 False, 234 id="< operator with greater ISO 8601 datetime Document value", 235 ), 236 pytest.param( 237 {"field": "meta.date", "operator": "<", "value": "1972-12-11T19:54:58"}, 238 Document(meta={"date": "1969-07-21T20:17:40"}), 239 True, 240 id="< operator with smaller ISO 8601 datetime Document value", 241 ), 242 pytest.param( 243 {"field": "meta.page", "operator": "<", "value": 10}, 244 Document(), 245 False, 246 id="< operator with missing Document value", 247 ), 248 pytest.param( 249 {"field": "meta.page", "operator": "<", "value": 10}, 250 Document(meta={"page": None}), 251 False, 252 id="< operator with None Document value", 253 ), 254 pytest.param( 255 {"field": "meta.page", "operator": "<", "value": None}, 256 Document(meta={"page": 10}), 257 False, 258 id="< operator with None filter value", 259 ), 260 pytest.param( 261 {"field": "meta.page", "operator": "<", "value": None}, 262 Document(meta={"page": None}), 263 False, 264 id="< operator with None Document and filter value", 265 ), 266 # <= operator params 267 pytest.param( 268 {"field": "meta.page", "operator": "<=", "value": 10}, 269 Document(meta={"page": 10}), 270 True, 271 id="<= operator with equal Document value", 272 ), 273 pytest.param( 274 {"field": "meta.page", "operator": "<=", "value": 10}, 275 Document(meta={"page": 11}), 276 False, 277 id="<= operator with greater Document value", 278 ), 279 pytest.param( 280 {"field": "meta.page", "operator": "<=", "value": 10}, 281 Document(meta={"page": 9}), 282 True, 283 id="<= operator with smaller Document value", 284 ), 285 pytest.param( 286 {"field": "meta.date", "operator": "<=", "value": "1969-07-21T20:17:40"}, 287 Document(meta={"date": "1969-07-21T20:17:40"}), 288 True, 289 id="<= operator with equal ISO 8601 datetime Document value", 290 ), 291 pytest.param( 292 {"field": "meta.date", "operator": "<=", "value": "1969-07-21T20:17:40"}, 293 Document(meta={"date": "1972-12-11T19:54:58"}), 294 False, 295 id="<= operator with greater ISO 8601 datetime Document value", 296 ), 297 pytest.param( 298 {"field": "meta.date", "operator": "<=", "value": "1972-12-11T19:54:58"}, 299 Document(meta={"date": "1969-07-21T20:17:40"}), 300 True, 301 id="<= operator with smaller ISO 8601 datetime Document value", 302 ), 303 pytest.param( 304 {"field": "meta.page", "operator": "<=", "value": 10}, 305 Document(), 306 False, 307 id="<= operator with missing Document value", 308 ), 309 pytest.param( 310 {"field": "meta.page", "operator": "<=", "value": 10}, 311 Document(meta={"page": None}), 312 False, 313 id="<= operator with None Document value", 314 ), 315 pytest.param( 316 {"field": "meta.page", "operator": "<=", "value": None}, 317 Document(meta={"page": 10}), 318 False, 319 id="<= operator with None filter value", 320 ), 321 pytest.param( 322 {"field": "meta.page", "operator": "<=", "value": None}, 323 Document(meta={"page": None}), 324 False, 325 id="<= operator with None Document and filter value", 326 ), 327 # in operator params 328 pytest.param( 329 {"field": "meta.page", "operator": "in", "value": [9, 10]}, 330 Document(meta={"page": 1}), 331 False, 332 id="in operator with filter value not containing Document value", 333 ), 334 pytest.param( 335 {"field": "meta.page", "operator": "in", "value": [9, 10]}, 336 Document(meta={"page": 10}), 337 True, 338 id="in operator with filter value containing Document value", 339 ), 340 # not in operator params 341 pytest.param( 342 {"field": "meta.page", "operator": "not in", "value": [9, 10]}, 343 Document(meta={"page": 1}), 344 True, 345 id="not in operator with filter value not containing Document value", 346 ), 347 pytest.param( 348 {"field": "meta.page", "operator": "not in", "value": [9, 10]}, 349 Document(meta={"page": 10}), 350 False, 351 id="not in operator with filter value containing Document value", 352 ), 353 # AND operator params 354 pytest.param( 355 { 356 "operator": "AND", 357 "conditions": [ 358 {"field": "meta.page", "operator": "==", "value": 10}, 359 {"field": "meta.type", "operator": "==", "value": "article"}, 360 ], 361 }, 362 Document(meta={"page": 10, "type": "article"}), 363 True, 364 id="AND operator with Document matching all conditions", 365 ), 366 pytest.param( 367 { 368 "operator": "AND", 369 "conditions": [ 370 {"field": "meta.page", "operator": "==", "value": 10}, 371 {"field": "meta.type", "operator": "==", "value": "article"}, 372 ], 373 }, 374 Document(meta={"page": 20, "type": "article"}), 375 False, 376 id="AND operator with Document matching a single condition", 377 ), 378 pytest.param( 379 { 380 "operator": "AND", 381 "conditions": [ 382 {"field": "meta.page", "operator": "==", "value": 10}, 383 {"field": "meta.type", "operator": "==", "value": "article"}, 384 ], 385 }, 386 Document(meta={"page": 11, "value": "blog post"}), 387 False, 388 id="AND operator with Document matching no condition", 389 ), 390 # OR operator params 391 pytest.param( 392 { 393 "operator": "OR", 394 "conditions": [ 395 {"field": "meta.page", "operator": "==", "value": 10}, 396 {"field": "meta.type", "operator": "==", "value": "article"}, 397 ], 398 }, 399 Document(meta={"page": 10, "type": "article"}), 400 True, 401 id="OR operator with Document matching all conditions", 402 ), 403 pytest.param( 404 { 405 "operator": "OR", 406 "conditions": [ 407 {"field": "meta.page", "operator": "==", "value": 10}, 408 {"field": "meta.type", "operator": "==", "value": "article"}, 409 ], 410 }, 411 Document(meta={"page": 20, "type": "article"}), 412 True, 413 id="OR operator with Document matching a single condition", 414 ), 415 pytest.param( 416 { 417 "operator": "OR", 418 "conditions": [ 419 {"field": "meta.page", "operator": "==", "value": 10}, 420 {"field": "meta.type", "operator": "==", "value": "article"}, 421 ], 422 }, 423 Document(meta={"page": 11, "value": "blog post"}), 424 False, 425 id="OR operator with Document matching no condition", 426 ), 427 # NOT operator params 428 pytest.param( 429 { 430 "operator": "NOT", 431 "conditions": [ 432 {"field": "meta.page", "operator": "==", "value": 10}, 433 {"field": "meta.type", "operator": "==", "value": "article"}, 434 ], 435 }, 436 Document(meta={"page": 10, "type": "article"}), 437 False, 438 id="NOT operator with Document matching all conditions", 439 ), 440 pytest.param( 441 { 442 "operator": "NOT", 443 "conditions": [ 444 {"field": "meta.page", "operator": "==", "value": 10}, 445 {"field": "meta.type", "operator": "==", "value": "article"}, 446 ], 447 }, 448 Document(meta={"page": 20, "type": "article"}), 449 True, 450 id="NOT operator with Document matching a single condition", 451 ), 452 pytest.param( 453 { 454 "operator": "NOT", 455 "conditions": [ 456 {"field": "meta.page", "operator": "==", "value": 10}, 457 {"field": "meta.type", "operator": "==", "value": "article"}, 458 ], 459 }, 460 Document(meta={"page": 11, "value": "blog post"}), 461 True, 462 id="NOT operator with Document matching no condition", 463 ), 464 pytest.param( 465 {"field": "meta.date", "operator": "==", "value": "2025-02-03T12:45:46.435816Z"}, 466 Document(meta={"date": "2025-02-03T12:45:46.435816Z"}), 467 True, 468 id="== operator with ISO 8601 datetime Document value", 469 ), 470 pytest.param( 471 {"field": "meta.date", "operator": ">=", "value": "2025-02-01"}, 472 Document(meta={"date": "2025-02-03T12:45:46.435816Z"}), 473 True, 474 id=">= operator with naive and aware ISO 8601 datetime Document value", 475 ), 476 ] 477 478 479 @pytest.mark.parametrize("filters, document, expected_result", document_matches_filter_data) 480 def test_document_matches_filter(filters, document, expected_result): 481 assert document_matches_filter(filters, document) == expected_result 482 483 484 document_matches_filter_raises_error_data = [ 485 # > operator params 486 pytest.param({"field": "meta.page", "operator": ">", "value": "10"}, id="> operator with string filter value"), 487 pytest.param({"field": "meta.page", "operator": ">", "value": [10]}, id="> operator with list filter value"), 488 # >= operator params 489 pytest.param({"field": "meta.page", "operator": ">=", "value": "10"}, id=">= operator with string filter value"), 490 pytest.param({"field": "meta.page", "operator": ">=", "value": [10]}, id=">= operator with list filter value"), 491 # < operator params 492 pytest.param({"field": "meta.page", "operator": "<", "value": "10"}, id="< operator with string filter value"), 493 pytest.param({"field": "meta.page", "operator": "<", "value": [10]}, id="< operator with list filter value"), 494 # <= operator params 495 pytest.param({"field": "meta.page", "operator": "<=", "value": "10"}, id="<= operator with string filter value"), 496 pytest.param({"field": "meta.page", "operator": "<=", "value": [10]}, id="<= operator with list filter value"), 497 # in operator params 498 pytest.param({"field": "meta.page", "operator": "in", "value": 1}, id="in operator with non list filter value"), 499 # at some point we might want to support any iterable and this test should fail 500 pytest.param( 501 {"field": "meta.page", "operator": "in", "value": (10, 11)}, id="in operator with non list filter value" 502 ), 503 # not in operator params 504 pytest.param( 505 {"field": "meta.page", "operator": "not in", "value": 1}, id="not in operator with non list filter value" 506 ), 507 # at some point we might want to support any iterable and this test should fail 508 pytest.param( 509 {"field": "meta.page", "operator": "not in", "value": (10, 11)}, id="not in operator with non list filter value" 510 ), 511 # Malformed filters 512 pytest.param( 513 {"conditions": [{"field": "meta.name", "operator": "==", "value": "test"}]}, id="Missing root operator key" 514 ), 515 pytest.param({"operator": "AND"}, id="Missing root conditions key"), 516 pytest.param({"operator": "==", "value": "test"}, id="Missing condition field key"), 517 pytest.param({"field": "meta.name", "value": "test"}, id="Missing condition operator key"), 518 pytest.param({"field": "meta.name", "operator": "=="}, id="Missing condition value key"), 519 ] 520 521 522 @pytest.mark.parametrize("filters", document_matches_filter_raises_error_data) 523 def test_document_matches_filter_raises_error(filters): 524 with pytest.raises(FilterError): 525 document = Document(meta={"page": 10}) 526 document_matches_filter(filters, document)