/ test / utils / test_filters.py
test_filters.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  import pytest
  6  
  7  from haystack import Document
  8  from haystack.errors import FilterError
  9  from haystack.utils.filters import document_matches_filter
 10  
 11  document_matches_filter_data = [
 12      # == operator params
 13      pytest.param(
 14          {"field": "meta.name", "operator": "==", "value": "test"},
 15          Document(meta={"name": "test"}),
 16          True,
 17          id="== operator with equal values",
 18      ),
 19      pytest.param(
 20          {"field": "meta.name", "operator": "==", "value": "test"},
 21          Document(meta={"name": "different value"}),
 22          False,
 23          id="== operator with different values",
 24      ),
 25      pytest.param(
 26          {"field": "meta.name", "operator": "==", "value": "test"},
 27          Document(meta={"name": ["test"]}),
 28          False,
 29          id="== operator with different types values",
 30      ),
 31      pytest.param(
 32          {"field": "meta.name", "operator": "==", "value": "test"},
 33          Document(),
 34          False,
 35          id="== operator with missing Document value",
 36      ),
 37      pytest.param(
 38          {"field": "meta.name", "operator": "==", "value": "test"},
 39          Document(meta={"name": None}),
 40          False,
 41          id="== operator with None Document value",
 42      ),
 43      pytest.param(
 44          {"field": "meta.name", "operator": "==", "value": None},
 45          Document(meta={"name": "test"}),
 46          False,
 47          id="== operator with None filter value",
 48      ),
 49      # != operator params
 50      pytest.param(
 51          {"field": "meta.name", "operator": "!=", "value": "test"},
 52          Document(meta={"name": "test"}),
 53          False,
 54          id="!= operator with equal values",
 55      ),
 56      pytest.param(
 57          {"field": "meta.name", "operator": "!=", "value": "test"},
 58          Document(meta={"name": "different value"}),
 59          True,
 60          id="!= operator with different values",
 61      ),
 62      pytest.param(
 63          {"field": "meta.name", "operator": "!=", "value": "test"},
 64          Document(meta={"name": ["test"]}),
 65          True,
 66          id="!= operator with different types values",
 67      ),
 68      pytest.param(
 69          {"field": "meta.name", "operator": "!=", "value": "test"}, Document(), True, id="!= operator with missing value"
 70      ),
 71      pytest.param(
 72          {"field": "meta.name", "operator": "!=", "value": "test"},
 73          Document(meta={"name": None}),
 74          True,
 75          id="!= operator with None Document value",
 76      ),
 77      pytest.param(
 78          {"field": "meta.name", "operator": "!=", "value": None},
 79          Document(meta={"name": "test"}),
 80          True,
 81          id="!= operator with None filter value",
 82      ),
 83      # > operator params
 84      pytest.param(
 85          {"field": "meta.page", "operator": ">", "value": 10},
 86          Document(meta={"page": 10}),
 87          False,
 88          id="> operator with equal Document value",
 89      ),
 90      pytest.param(
 91          {"field": "meta.page", "operator": ">", "value": 10},
 92          Document(meta={"page": 11}),
 93          True,
 94          id="> operator with greater Document value",
 95      ),
 96      pytest.param(
 97          {"field": "meta.page", "operator": ">", "value": 10},
 98          Document(meta={"page": 9}),
 99          False,
100          id="> operator with smaller Document value",
101      ),
102      pytest.param(
103          {"field": "meta.date", "operator": ">", "value": "1969-07-21T20:17:40"},
104          Document(meta={"date": "1969-07-21T20:17:40"}),
105          False,
106          id="> operator with equal ISO 8601 datetime Document value",
107      ),
108      pytest.param(
109          {"field": "meta.date", "operator": ">", "value": "1969-07-21T20:17:40"},
110          Document(meta={"date": "1972-12-11T19:54:58"}),
111          True,
112          id="> operator with greater ISO 8601 datetime Document value",
113      ),
114      pytest.param(
115          {"field": "meta.date", "operator": ">", "value": "1972-12-11T19:54:58"},
116          Document(meta={"date": "1969-07-21T20:17:40"}),
117          False,
118          id="> operator with smaller ISO 8601 datetime Document value",
119      ),
120      pytest.param(
121          {"field": "meta.page", "operator": ">", "value": 10},
122          Document(),
123          False,
124          id="> operator with missing Document value",
125      ),
126      pytest.param(
127          {"field": "meta.page", "operator": ">", "value": 10},
128          Document(meta={"page": None}),
129          False,
130          id="> operator with None Document value",
131      ),
132      pytest.param(
133          {"field": "meta.page", "operator": ">", "value": None},
134          Document(meta={"page": 10}),
135          False,
136          id="> operator with None filter value",
137      ),
138      pytest.param(
139          {"field": "meta.page", "operator": ">", "value": None},
140          Document(meta={"page": None}),
141          False,
142          id="> operator with None Document and filter value",
143      ),
144      # >= operator params
145      pytest.param(
146          {"field": "meta.page", "operator": ">=", "value": 10},
147          Document(meta={"page": 10}),
148          True,
149          id=">= operator with equal Document value",
150      ),
151      pytest.param(
152          {"field": "meta.page", "operator": ">=", "value": 10},
153          Document(meta={"page": 11}),
154          True,
155          id=">= operator with greater Document value",
156      ),
157      pytest.param(
158          {"field": "meta.page", "operator": ">=", "value": 10},
159          Document(meta={"page": 9}),
160          False,
161          id=">= operator with smaller Document value",
162      ),
163      pytest.param(
164          {"field": "meta.date", "operator": ">=", "value": "1969-07-21T20:17:40"},
165          Document(meta={"date": "1969-07-21T20:17:40"}),
166          True,
167          id=">= operator with equal ISO 8601 datetime Document value",
168      ),
169      pytest.param(
170          {"field": "meta.date", "operator": ">=", "value": "1969-07-21T20:17:40"},
171          Document(meta={"date": "1972-12-11T19:54:58"}),
172          True,
173          id=">= operator with greater ISO 8601 datetime Document value",
174      ),
175      pytest.param(
176          {"field": "meta.date", "operator": ">=", "value": "1972-12-11T19:54:58"},
177          Document(meta={"date": "1969-07-21T20:17:40"}),
178          False,
179          id=">= operator with smaller ISO 8601 datetime Document value",
180      ),
181      pytest.param(
182          {"field": "meta.page", "operator": ">=", "value": 10},
183          Document(),
184          False,
185          id=">= operator with missing Document value",
186      ),
187      pytest.param(
188          {"field": "meta.page", "operator": ">=", "value": 10},
189          Document(meta={"page": None}),
190          False,
191          id=">= operator with None Document value",
192      ),
193      pytest.param(
194          {"field": "meta.page", "operator": ">=", "value": None},
195          Document(meta={"page": 10}),
196          False,
197          id=">= operator with None filter value",
198      ),
199      pytest.param(
200          {"field": "meta.page", "operator": ">=", "value": None},
201          Document(meta={"page": None}),
202          False,
203          id=">= operator with None Document and filter value",
204      ),
205      # < operator params
206      pytest.param(
207          {"field": "meta.page", "operator": "<", "value": 10},
208          Document(meta={"page": 10}),
209          False,
210          id="< operator with equal Document value",
211      ),
212      pytest.param(
213          {"field": "meta.page", "operator": "<", "value": 10},
214          Document(meta={"page": 11}),
215          False,
216          id="< operator with greater Document value",
217      ),
218      pytest.param(
219          {"field": "meta.page", "operator": "<", "value": 10},
220          Document(meta={"page": 9}),
221          True,
222          id="< operator with smaller Document value",
223      ),
224      pytest.param(
225          {"field": "meta.date", "operator": "<", "value": "1969-07-21T20:17:40"},
226          Document(meta={"date": "1969-07-21T20:17:40"}),
227          False,
228          id="< operator with equal ISO 8601 datetime Document value",
229      ),
230      pytest.param(
231          {"field": "meta.date", "operator": "<", "value": "1969-07-21T20:17:40"},
232          Document(meta={"date": "1972-12-11T19:54:58"}),
233          False,
234          id="< operator with greater ISO 8601 datetime Document value",
235      ),
236      pytest.param(
237          {"field": "meta.date", "operator": "<", "value": "1972-12-11T19:54:58"},
238          Document(meta={"date": "1969-07-21T20:17:40"}),
239          True,
240          id="< operator with smaller ISO 8601 datetime Document value",
241      ),
242      pytest.param(
243          {"field": "meta.page", "operator": "<", "value": 10},
244          Document(),
245          False,
246          id="< operator with missing Document value",
247      ),
248      pytest.param(
249          {"field": "meta.page", "operator": "<", "value": 10},
250          Document(meta={"page": None}),
251          False,
252          id="< operator with None Document value",
253      ),
254      pytest.param(
255          {"field": "meta.page", "operator": "<", "value": None},
256          Document(meta={"page": 10}),
257          False,
258          id="< operator with None filter value",
259      ),
260      pytest.param(
261          {"field": "meta.page", "operator": "<", "value": None},
262          Document(meta={"page": None}),
263          False,
264          id="< operator with None Document and filter value",
265      ),
266      # <= operator params
267      pytest.param(
268          {"field": "meta.page", "operator": "<=", "value": 10},
269          Document(meta={"page": 10}),
270          True,
271          id="<= operator with equal Document value",
272      ),
273      pytest.param(
274          {"field": "meta.page", "operator": "<=", "value": 10},
275          Document(meta={"page": 11}),
276          False,
277          id="<= operator with greater Document value",
278      ),
279      pytest.param(
280          {"field": "meta.page", "operator": "<=", "value": 10},
281          Document(meta={"page": 9}),
282          True,
283          id="<= operator with smaller Document value",
284      ),
285      pytest.param(
286          {"field": "meta.date", "operator": "<=", "value": "1969-07-21T20:17:40"},
287          Document(meta={"date": "1969-07-21T20:17:40"}),
288          True,
289          id="<= operator with equal ISO 8601 datetime Document value",
290      ),
291      pytest.param(
292          {"field": "meta.date", "operator": "<=", "value": "1969-07-21T20:17:40"},
293          Document(meta={"date": "1972-12-11T19:54:58"}),
294          False,
295          id="<= operator with greater ISO 8601 datetime Document value",
296      ),
297      pytest.param(
298          {"field": "meta.date", "operator": "<=", "value": "1972-12-11T19:54:58"},
299          Document(meta={"date": "1969-07-21T20:17:40"}),
300          True,
301          id="<= operator with smaller ISO 8601 datetime Document value",
302      ),
303      pytest.param(
304          {"field": "meta.page", "operator": "<=", "value": 10},
305          Document(),
306          False,
307          id="<= operator with missing Document value",
308      ),
309      pytest.param(
310          {"field": "meta.page", "operator": "<=", "value": 10},
311          Document(meta={"page": None}),
312          False,
313          id="<= operator with None Document value",
314      ),
315      pytest.param(
316          {"field": "meta.page", "operator": "<=", "value": None},
317          Document(meta={"page": 10}),
318          False,
319          id="<= operator with None filter value",
320      ),
321      pytest.param(
322          {"field": "meta.page", "operator": "<=", "value": None},
323          Document(meta={"page": None}),
324          False,
325          id="<= operator with None Document and filter value",
326      ),
327      # in operator params
328      pytest.param(
329          {"field": "meta.page", "operator": "in", "value": [9, 10]},
330          Document(meta={"page": 1}),
331          False,
332          id="in operator with filter value not containing Document value",
333      ),
334      pytest.param(
335          {"field": "meta.page", "operator": "in", "value": [9, 10]},
336          Document(meta={"page": 10}),
337          True,
338          id="in operator with filter value containing Document value",
339      ),
340      # not in operator params
341      pytest.param(
342          {"field": "meta.page", "operator": "not in", "value": [9, 10]},
343          Document(meta={"page": 1}),
344          True,
345          id="not in operator with filter value not containing Document value",
346      ),
347      pytest.param(
348          {"field": "meta.page", "operator": "not in", "value": [9, 10]},
349          Document(meta={"page": 10}),
350          False,
351          id="not in operator with filter value containing Document value",
352      ),
353      # AND operator params
354      pytest.param(
355          {
356              "operator": "AND",
357              "conditions": [
358                  {"field": "meta.page", "operator": "==", "value": 10},
359                  {"field": "meta.type", "operator": "==", "value": "article"},
360              ],
361          },
362          Document(meta={"page": 10, "type": "article"}),
363          True,
364          id="AND operator with Document matching all conditions",
365      ),
366      pytest.param(
367          {
368              "operator": "AND",
369              "conditions": [
370                  {"field": "meta.page", "operator": "==", "value": 10},
371                  {"field": "meta.type", "operator": "==", "value": "article"},
372              ],
373          },
374          Document(meta={"page": 20, "type": "article"}),
375          False,
376          id="AND operator with Document matching a single condition",
377      ),
378      pytest.param(
379          {
380              "operator": "AND",
381              "conditions": [
382                  {"field": "meta.page", "operator": "==", "value": 10},
383                  {"field": "meta.type", "operator": "==", "value": "article"},
384              ],
385          },
386          Document(meta={"page": 11, "value": "blog post"}),
387          False,
388          id="AND operator with Document matching no condition",
389      ),
390      # OR operator params
391      pytest.param(
392          {
393              "operator": "OR",
394              "conditions": [
395                  {"field": "meta.page", "operator": "==", "value": 10},
396                  {"field": "meta.type", "operator": "==", "value": "article"},
397              ],
398          },
399          Document(meta={"page": 10, "type": "article"}),
400          True,
401          id="OR operator with Document matching all conditions",
402      ),
403      pytest.param(
404          {
405              "operator": "OR",
406              "conditions": [
407                  {"field": "meta.page", "operator": "==", "value": 10},
408                  {"field": "meta.type", "operator": "==", "value": "article"},
409              ],
410          },
411          Document(meta={"page": 20, "type": "article"}),
412          True,
413          id="OR operator with Document matching a single condition",
414      ),
415      pytest.param(
416          {
417              "operator": "OR",
418              "conditions": [
419                  {"field": "meta.page", "operator": "==", "value": 10},
420                  {"field": "meta.type", "operator": "==", "value": "article"},
421              ],
422          },
423          Document(meta={"page": 11, "value": "blog post"}),
424          False,
425          id="OR operator with Document matching no condition",
426      ),
427      # NOT operator params
428      pytest.param(
429          {
430              "operator": "NOT",
431              "conditions": [
432                  {"field": "meta.page", "operator": "==", "value": 10},
433                  {"field": "meta.type", "operator": "==", "value": "article"},
434              ],
435          },
436          Document(meta={"page": 10, "type": "article"}),
437          False,
438          id="NOT operator with Document matching all conditions",
439      ),
440      pytest.param(
441          {
442              "operator": "NOT",
443              "conditions": [
444                  {"field": "meta.page", "operator": "==", "value": 10},
445                  {"field": "meta.type", "operator": "==", "value": "article"},
446              ],
447          },
448          Document(meta={"page": 20, "type": "article"}),
449          True,
450          id="NOT operator with Document matching a single condition",
451      ),
452      pytest.param(
453          {
454              "operator": "NOT",
455              "conditions": [
456                  {"field": "meta.page", "operator": "==", "value": 10},
457                  {"field": "meta.type", "operator": "==", "value": "article"},
458              ],
459          },
460          Document(meta={"page": 11, "value": "blog post"}),
461          True,
462          id="NOT operator with Document matching no condition",
463      ),
464      pytest.param(
465          {"field": "meta.date", "operator": "==", "value": "2025-02-03T12:45:46.435816Z"},
466          Document(meta={"date": "2025-02-03T12:45:46.435816Z"}),
467          True,
468          id="== operator with ISO 8601 datetime Document value",
469      ),
470      pytest.param(
471          {"field": "meta.date", "operator": ">=", "value": "2025-02-01"},
472          Document(meta={"date": "2025-02-03T12:45:46.435816Z"}),
473          True,
474          id=">= operator with naive and aware ISO 8601 datetime Document value",
475      ),
476  ]
477  
478  
479  @pytest.mark.parametrize("filters, document, expected_result", document_matches_filter_data)
480  def test_document_matches_filter(filters, document, expected_result):
481      assert document_matches_filter(filters, document) == expected_result
482  
483  
484  document_matches_filter_raises_error_data = [
485      # > operator params
486      pytest.param({"field": "meta.page", "operator": ">", "value": "10"}, id="> operator with string filter value"),
487      pytest.param({"field": "meta.page", "operator": ">", "value": [10]}, id="> operator with list filter value"),
488      # >= operator params
489      pytest.param({"field": "meta.page", "operator": ">=", "value": "10"}, id=">= operator with string filter value"),
490      pytest.param({"field": "meta.page", "operator": ">=", "value": [10]}, id=">= operator with list filter value"),
491      # < operator params
492      pytest.param({"field": "meta.page", "operator": "<", "value": "10"}, id="< operator with string filter value"),
493      pytest.param({"field": "meta.page", "operator": "<", "value": [10]}, id="< operator with list filter value"),
494      # <= operator params
495      pytest.param({"field": "meta.page", "operator": "<=", "value": "10"}, id="<= operator with string filter value"),
496      pytest.param({"field": "meta.page", "operator": "<=", "value": [10]}, id="<= operator with list filter value"),
497      # in operator params
498      pytest.param({"field": "meta.page", "operator": "in", "value": 1}, id="in operator with non list filter value"),
499      # at some point we might want to support any iterable and this test should fail
500      pytest.param(
501          {"field": "meta.page", "operator": "in", "value": (10, 11)}, id="in operator with non list filter value"
502      ),
503      # not in operator params
504      pytest.param(
505          {"field": "meta.page", "operator": "not in", "value": 1}, id="not in operator with non list filter value"
506      ),
507      # at some point we might want to support any iterable and this test should fail
508      pytest.param(
509          {"field": "meta.page", "operator": "not in", "value": (10, 11)}, id="not in operator with non list filter value"
510      ),
511      # Malformed filters
512      pytest.param(
513          {"conditions": [{"field": "meta.name", "operator": "==", "value": "test"}]}, id="Missing root operator key"
514      ),
515      pytest.param({"operator": "AND"}, id="Missing root conditions key"),
516      pytest.param({"operator": "==", "value": "test"}, id="Missing condition field key"),
517      pytest.param({"field": "meta.name", "value": "test"}, id="Missing condition operator key"),
518      pytest.param({"field": "meta.name", "operator": "=="}, id="Missing condition value key"),
519  ]
520  
521  
522  @pytest.mark.parametrize("filters", document_matches_filter_raises_error_data)
523  def test_document_matches_filter_raises_error(filters):
524      with pytest.raises(FilterError):
525          document = Document(meta={"page": 10})
526          document_matches_filter(filters, document)