document_store.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import inspect 6 import random 7 from datetime import datetime 8 9 from haystack.dataclasses import Document 10 from haystack.document_stores.errors import DuplicateDocumentError 11 from haystack.document_stores.types import DocumentStore, DuplicatePolicy 12 from haystack.errors import FilterError 13 from haystack.lazy_imports import LazyImport 14 15 with LazyImport("Run 'pip install pytest'") as pytest_import: 16 import pytest 17 18 19 def _random_embeddings(n): 20 return [random.random() for _ in range(n)] 21 22 23 # These are random embedding that are used to test filters. 24 # We declare them here as they're used both in the `filterable_docs` fixture 25 # and the body of several `filter_documents` tests. 26 TEST_EMBEDDING_1 = _random_embeddings(768) 27 TEST_EMBEDDING_2 = _random_embeddings(768) 28 29 30 class AssertDocumentsEqualMixin: 31 @staticmethod 32 def assert_documents_are_equal(received: list[Document], expected: list[Document]): 33 """ 34 Assert that two lists of Documents are equal. 35 36 This is used in every test, if a Document Store implementation has a different behaviour 37 it should override this method. This can happen for example when the Document Store sets 38 a score to returned Documents. Since we can't know what the score will be, we can't compare 39 the Documents reliably. 40 """ 41 assert received == expected 42 43 44 class CountDocumentsTest: 45 """ 46 Utility class to test a Document Store `count_documents` method. 47 48 To use it create a custom test class and override the `document_store` fixture to return your Document Store. 49 Example usage: 50 51 ```python 52 class MyDocumentStoreTest(CountDocumentsTest): 53 @pytest.fixture 54 def document_store(self): 55 return MyDocumentStore() 56 ``` 57 """ 58 59 @staticmethod 60 def test_count_empty(document_store: DocumentStore): 61 """Test count is zero for an empty document store""" 62 assert document_store.count_documents() == 0 63 64 @staticmethod 65 def test_count_not_empty(document_store: DocumentStore): 66 """Test count is greater than zero if the document store contains documents""" 67 document_store.write_documents( 68 [Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")] 69 ) 70 assert document_store.count_documents() == 3 71 72 73 class WriteDocumentsTest(AssertDocumentsEqualMixin): 74 """ 75 Utility class to test a Document Store `write_documents` method. 76 77 To use it create a custom test class and override the `document_store` fixture to return your Document Store. 78 The Document Store `filter_documents` method must be at least partly implemented to return all stored Documents 79 for these tests to work correctly. 80 Example usage: 81 82 ```python 83 class MyDocumentStoreTest(WriteDocumentsTest): 84 @pytest.fixture 85 def document_store(self): 86 return MyDocumentStore() 87 ``` 88 """ 89 90 def test_write_documents(self, document_store: DocumentStore): 91 """ 92 Test write_documents() default behaviour. 93 """ 94 msg = ( 95 "Default write_documents() behaviour depends on the Document Store implementation, " 96 "as we don't enforce a default behaviour when no policy is set. " 97 "Override this test in your custom test class." 98 ) 99 raise NotImplementedError(msg) 100 101 def test_write_documents_duplicate_fail(self, document_store: DocumentStore): 102 """Test write_documents() fails when writing documents with same id and `DuplicatePolicy.FAIL`.""" 103 doc = Document(content="test doc") 104 assert document_store.write_documents([doc], policy=DuplicatePolicy.FAIL) == 1 105 with pytest.raises(DuplicateDocumentError): 106 document_store.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL) 107 self.assert_documents_are_equal(document_store.filter_documents(), [doc]) 108 109 @staticmethod 110 def test_write_documents_duplicate_skip(document_store: DocumentStore): 111 """Test write_documents() skips writing when using DuplicatePolicy.SKIP.""" 112 doc = Document(content="test doc") 113 assert document_store.write_documents([doc], policy=DuplicatePolicy.SKIP) == 1 114 assert document_store.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP) == 0 115 116 def test_write_documents_duplicate_overwrite(self, document_store: DocumentStore): 117 """Test write_documents() overwrites when using DuplicatePolicy.OVERWRITE.""" 118 doc1 = Document(id="1", content="test doc 1") 119 doc2 = Document(id="1", content="test doc 2") 120 121 assert document_store.write_documents([doc2], policy=DuplicatePolicy.OVERWRITE) == 1 122 self.assert_documents_are_equal(document_store.filter_documents(), [doc2]) 123 assert document_store.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE) == 1 124 self.assert_documents_are_equal(document_store.filter_documents(), [doc1]) 125 126 @staticmethod 127 def test_write_documents_invalid_input(document_store: DocumentStore): 128 """Test write_documents() fails when providing unexpected input.""" 129 with pytest.raises(ValueError): 130 document_store.write_documents(["not a document for sure"]) # type: ignore 131 with pytest.raises(ValueError): 132 document_store.write_documents("not a list actually") # type: ignore 133 134 135 class DeleteDocumentsTest: 136 """ 137 Utility class to test a Document Store `delete_documents` method. 138 139 To use it create a custom test class and override the `document_store` fixture to return your Document Store. 140 The Document Store `write_documents` and `count_documents` methods must be implemented for this tests to work 141 correctly. 142 Example usage: 143 144 ```python 145 class MyDocumentStoreTest(DeleteDocumentsTest): 146 @pytest.fixture 147 def document_store(self): 148 return MyDocumentStore() 149 ``` 150 """ 151 152 @staticmethod 153 def test_delete_documents(document_store: DocumentStore): 154 """Test delete_documents() normal behaviour.""" 155 doc = Document(content="test doc") 156 document_store.write_documents([doc]) 157 assert document_store.count_documents() == 1 158 159 document_store.delete_documents([doc.id]) 160 assert document_store.count_documents() == 0 161 162 @staticmethod 163 def test_delete_documents_empty_document_store(document_store: DocumentStore): 164 """Test delete_documents() doesn't fail when called using an empty Document Store.""" 165 document_store.delete_documents(["non_existing_id"]) 166 167 @staticmethod 168 def test_delete_documents_non_existing_document(document_store: DocumentStore): 169 """Test delete_documents() doesn't delete any Document when called with non-existing id.""" 170 doc = Document(content="test doc") 171 document_store.write_documents([doc]) 172 assert document_store.count_documents() == 1 173 174 document_store.delete_documents(["non_existing_id"]) 175 176 # No Document has been deleted 177 assert document_store.count_documents() == 1 178 179 180 def create_filterable_docs() -> list[Document]: 181 """ 182 Create a list of filterable documents to be used in the filterable_docs fixture. 183 """ 184 185 documents = [] 186 for i in range(3): 187 documents.append( 188 Document( 189 content=f"A Foo Document {i}", 190 meta={ 191 "name": f"name_{i}", 192 "page": "100", 193 "chapter": "intro", 194 "number": 2, 195 "date": "1969-07-21T20:17:40", 196 }, 197 embedding=_random_embeddings(768), 198 ) 199 ) 200 documents.append( 201 Document( 202 content=f"A Bar Document {i}", 203 meta={ 204 "name": f"name_{i}", 205 "page": "123", 206 "chapter": "abstract", 207 "number": -2, 208 "date": "1972-12-11T19:54:58", 209 }, 210 embedding=_random_embeddings(768), 211 ) 212 ) 213 documents.append( 214 Document( 215 content=f"A Foobar Document {i}", 216 meta={ 217 "name": f"name_{i}", 218 "page": "90", 219 "chapter": "conclusion", 220 "number": -10, 221 "date": "1989-11-09T17:53:00", 222 }, 223 embedding=_random_embeddings(768), 224 ) 225 ) 226 documents.append( 227 Document( 228 content=f"Document {i} without embedding", 229 meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"}, 230 ) 231 ) 232 documents.append( 233 Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1) 234 ) 235 documents.append( 236 Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2) 237 ) 238 return documents 239 240 241 class FilterableDocsFixtureMixin: 242 """ 243 Mixin class that adds a filterable_docs() fixture to a test class. 244 """ 245 246 @pytest.fixture 247 def filterable_docs(self) -> list[Document]: 248 """Fixture that returns a list of Documents that can be used to test filtering.""" 249 return create_filterable_docs() 250 251 252 class FilterDocumentsTest(AssertDocumentsEqualMixin, FilterableDocsFixtureMixin): 253 """ 254 Utility class to test a Document Store `filter_documents` method using different types of filters. 255 256 To use it create a custom test class and override the `document_store` fixture to return your Document Store. 257 Example usage: 258 259 ```python 260 class MyDocumentStoreTest(FilterDocumentsTest): 261 @pytest.fixture 262 def document_store(self): 263 return MyDocumentStore() 264 ``` 265 """ 266 267 def test_no_filters(self, document_store): 268 """Test filter_documents() with empty filters""" 269 self.assert_documents_are_equal(document_store.filter_documents(), []) 270 self.assert_documents_are_equal(document_store.filter_documents(filters={}), []) 271 docs = [Document(content="test doc")] 272 document_store.write_documents(docs) 273 self.assert_documents_are_equal(document_store.filter_documents(), docs) 274 self.assert_documents_are_equal(document_store.filter_documents(filters={}), docs) 275 276 # == comparator 277 def test_comparison_equal(self, document_store, filterable_docs): 278 """Test filter_documents() with == comparator""" 279 document_store.write_documents(filterable_docs) 280 result = document_store.filter_documents(filters={"field": "meta.number", "operator": "==", "value": 100}) 281 self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") == 100]) 282 283 def test_comparison_equal_with_none(self, document_store, filterable_docs): 284 """Test filter_documents() with == comparator and None""" 285 document_store.write_documents(filterable_docs) 286 result = document_store.filter_documents(filters={"field": "meta.number", "operator": "==", "value": None}) 287 self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") is None]) 288 289 # != comparator 290 def test_comparison_not_equal(self, document_store, filterable_docs): 291 """Test filter_documents() with != comparator""" 292 document_store.write_documents(filterable_docs) 293 result = document_store.filter_documents({"field": "meta.number", "operator": "!=", "value": 100}) 294 self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") != 100]) 295 296 def test_comparison_not_equal_with_none(self, document_store, filterable_docs): 297 """Test filter_documents() with != comparator and None""" 298 document_store.write_documents(filterable_docs) 299 result = document_store.filter_documents(filters={"field": "meta.number", "operator": "!=", "value": None}) 300 self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") is not None]) 301 302 # > comparator 303 def test_comparison_greater_than(self, document_store, filterable_docs): 304 """Test filter_documents() with > comparator""" 305 document_store.write_documents(filterable_docs) 306 result = document_store.filter_documents({"field": "meta.number", "operator": ">", "value": 0}) 307 self.assert_documents_are_equal( 308 result, [d for d in filterable_docs if d.meta.get("number") is not None and d.meta["number"] > 0] 309 ) 310 311 def test_comparison_greater_than_with_iso_date(self, document_store, filterable_docs): 312 """Test filter_documents() with > comparator and datetime""" 313 document_store.write_documents(filterable_docs) 314 result = document_store.filter_documents( 315 {"field": "meta.date", "operator": ">", "value": "1972-12-11T19:54:58"} 316 ) 317 self.assert_documents_are_equal( 318 result, 319 [ 320 d 321 for d in filterable_docs 322 if d.meta.get("date") is not None 323 and datetime.fromisoformat(d.meta["date"]) > datetime.fromisoformat("1972-12-11T19:54:58") 324 ], 325 ) 326 327 @staticmethod 328 def test_comparison_greater_than_with_string(document_store, filterable_docs): 329 """Test filter_documents() with > comparator and string""" 330 document_store.write_documents(filterable_docs) 331 with pytest.raises(FilterError): 332 document_store.filter_documents(filters={"field": "meta.number", "operator": ">", "value": "1"}) 333 334 @staticmethod 335 def test_comparison_greater_than_with_list(document_store, filterable_docs): 336 """Test filter_documents() with > comparator and list""" 337 document_store.write_documents(filterable_docs) 338 with pytest.raises(FilterError): 339 document_store.filter_documents(filters={"field": "meta.number", "operator": ">", "value": [1]}) 340 341 def test_comparison_greater_than_with_none(self, document_store, filterable_docs): 342 """Test filter_documents() with > comparator and None""" 343 document_store.write_documents(filterable_docs) 344 result = document_store.filter_documents(filters={"field": "meta.number", "operator": ">", "value": None}) 345 self.assert_documents_are_equal(result, []) 346 347 # >= comparator 348 def test_comparison_greater_than_equal(self, document_store, filterable_docs): 349 """Test filter_documents() with >= comparator""" 350 document_store.write_documents(filterable_docs) 351 result = document_store.filter_documents({"field": "meta.number", "operator": ">=", "value": 0}) 352 self.assert_documents_are_equal( 353 result, [d for d in filterable_docs if d.meta.get("number") is not None and d.meta["number"] >= 0] 354 ) 355 356 def test_comparison_greater_than_equal_with_iso_date(self, document_store, filterable_docs): 357 """Test filter_documents() with >= comparator and datetime""" 358 document_store.write_documents(filterable_docs) 359 result = document_store.filter_documents( 360 {"field": "meta.date", "operator": ">=", "value": "1969-07-21T20:17:40"} 361 ) 362 self.assert_documents_are_equal( 363 result, 364 [ 365 d 366 for d in filterable_docs 367 if d.meta.get("date") is not None 368 and datetime.fromisoformat(d.meta["date"]) >= datetime.fromisoformat("1969-07-21T20:17:40") 369 ], 370 ) 371 372 @staticmethod 373 def test_comparison_greater_than_equal_with_string(document_store, filterable_docs): 374 """Test filter_documents() with >= comparator and string""" 375 document_store.write_documents(filterable_docs) 376 with pytest.raises(FilterError): 377 document_store.filter_documents(filters={"field": "meta.number", "operator": ">=", "value": "1"}) 378 379 @staticmethod 380 def test_comparison_greater_than_equal_with_list(document_store, filterable_docs): 381 """Test filter_documents() with >= comparator and list""" 382 document_store.write_documents(filterable_docs) 383 with pytest.raises(FilterError): 384 document_store.filter_documents(filters={"field": "meta.number", "operator": ">=", "value": [1]}) 385 386 def test_comparison_greater_than_equal_with_none(self, document_store, filterable_docs): 387 """Test filter_documents() with >= comparator and None""" 388 document_store.write_documents(filterable_docs) 389 result = document_store.filter_documents(filters={"field": "meta.number", "operator": ">=", "value": None}) 390 self.assert_documents_are_equal(result, []) 391 392 # < comparator 393 def test_comparison_less_than(self, document_store, filterable_docs): 394 """Test filter_documents() with < comparator""" 395 document_store.write_documents(filterable_docs) 396 result = document_store.filter_documents({"field": "meta.number", "operator": "<", "value": 0}) 397 self.assert_documents_are_equal( 398 result, [d for d in filterable_docs if d.meta.get("number") is not None and d.meta["number"] < 0] 399 ) 400 401 def test_comparison_less_than_with_iso_date(self, document_store, filterable_docs): 402 """Test filter_documents() with < comparator and datetime""" 403 document_store.write_documents(filterable_docs) 404 result = document_store.filter_documents( 405 {"field": "meta.date", "operator": "<", "value": "1969-07-21T20:17:40"} 406 ) 407 self.assert_documents_are_equal( 408 result, 409 [ 410 d 411 for d in filterable_docs 412 if d.meta.get("date") is not None 413 and datetime.fromisoformat(d.meta["date"]) < datetime.fromisoformat("1969-07-21T20:17:40") 414 ], 415 ) 416 417 @staticmethod 418 def test_comparison_less_than_with_string(document_store, filterable_docs): 419 """Test filter_documents() with < comparator and string""" 420 document_store.write_documents(filterable_docs) 421 with pytest.raises(FilterError): 422 document_store.filter_documents(filters={"field": "meta.number", "operator": "<", "value": "1"}) 423 424 @staticmethod 425 def test_comparison_less_than_with_list(document_store, filterable_docs): 426 """Test filter_documents() with < comparator and list""" 427 document_store.write_documents(filterable_docs) 428 with pytest.raises(FilterError): 429 document_store.filter_documents(filters={"field": "meta.number", "operator": "<", "value": [1]}) 430 431 def test_comparison_less_than_with_none(self, document_store, filterable_docs): 432 """Test filter_documents() with < comparator and None""" 433 document_store.write_documents(filterable_docs) 434 result = document_store.filter_documents(filters={"field": "meta.number", "operator": "<", "value": None}) 435 self.assert_documents_are_equal(result, []) 436 437 # <= comparator 438 def test_comparison_less_than_equal(self, document_store, filterable_docs): 439 """Test filter_documents() with <=""" 440 document_store.write_documents(filterable_docs) 441 result = document_store.filter_documents({"field": "meta.number", "operator": "<=", "value": 0}) 442 self.assert_documents_are_equal( 443 result, [d for d in filterable_docs if d.meta.get("number") is not None and d.meta["number"] <= 0] 444 ) 445 446 def test_comparison_less_than_equal_with_iso_date(self, document_store, filterable_docs): 447 """Test filter_documents() with <= comparator and datetime""" 448 document_store.write_documents(filterable_docs) 449 result = document_store.filter_documents( 450 {"field": "meta.date", "operator": "<=", "value": "1969-07-21T20:17:40"} 451 ) 452 self.assert_documents_are_equal( 453 result, 454 [ 455 d 456 for d in filterable_docs 457 if d.meta.get("date") is not None 458 and datetime.fromisoformat(d.meta["date"]) <= datetime.fromisoformat("1969-07-21T20:17:40") 459 ], 460 ) 461 462 @staticmethod 463 def test_comparison_less_than_equal_with_string(document_store, filterable_docs): 464 """Test filter_documents() with <= comparator and string""" 465 document_store.write_documents(filterable_docs) 466 with pytest.raises(FilterError): 467 document_store.filter_documents(filters={"field": "meta.number", "operator": "<=", "value": "1"}) 468 469 @staticmethod 470 def test_comparison_less_than_equal_with_list(document_store, filterable_docs): 471 """Test filter_documents() with <= comparator and list""" 472 document_store.write_documents(filterable_docs) 473 with pytest.raises(FilterError): 474 document_store.filter_documents(filters={"field": "meta.number", "operator": "<=", "value": [1]}) 475 476 def test_comparison_less_than_equal_with_none(self, document_store, filterable_docs): 477 """Test filter_documents() with <= comparator and None""" 478 document_store.write_documents(filterable_docs) 479 result = document_store.filter_documents(filters={"field": "meta.number", "operator": "<=", "value": None}) 480 self.assert_documents_are_equal(result, []) 481 482 # in comparator 483 def test_comparison_in(self, document_store, filterable_docs): 484 """Test filter_documents() with 'in' comparator""" 485 document_store.write_documents(filterable_docs) 486 result = document_store.filter_documents({"field": "meta.number", "operator": "in", "value": [10, -10]}) 487 assert len(result) 488 expected = [d for d in filterable_docs if d.meta.get("number") is not None and d.meta["number"] in [10, -10]] 489 self.assert_documents_are_equal(result, expected) 490 491 @staticmethod 492 def test_comparison_in_with_with_non_list(document_store, filterable_docs): 493 """Test filter_documents() with 'in' comparator and non-iterable""" 494 document_store.write_documents(filterable_docs) 495 with pytest.raises(FilterError): 496 document_store.filter_documents({"field": "meta.number", "operator": "in", "value": 9}) 497 498 @staticmethod 499 def test_comparison_in_with_with_non_list_iterable(document_store, filterable_docs): 500 """Test filter_documents() with 'in' comparator and iterable""" 501 document_store.write_documents(filterable_docs) 502 with pytest.raises(FilterError): 503 document_store.filter_documents({"field": "meta.number", "operator": "in", "value": (10, 11)}) 504 505 # not in comparator 506 def test_comparison_not_in(self, document_store, filterable_docs): 507 """Test filter_documents() with 'not in' comparator""" 508 document_store.write_documents(filterable_docs) 509 result = document_store.filter_documents({"field": "meta.number", "operator": "not in", "value": [9, 10]}) 510 self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") not in [9, 10]]) 511 512 @staticmethod 513 def test_comparison_not_in_with_with_non_list(document_store, filterable_docs): 514 """Test filter_documents() with 'not in' comparator and non-iterable""" 515 document_store.write_documents(filterable_docs) 516 with pytest.raises(FilterError): 517 document_store.filter_documents({"field": "meta.number", "operator": "not in", "value": 9}) 518 519 @staticmethod 520 def test_comparison_not_in_with_with_non_list_iterable(document_store, filterable_docs): 521 """Test filter_documents() with 'not in' comparator and iterable""" 522 document_store.write_documents(filterable_docs) 523 with pytest.raises(FilterError): 524 document_store.filter_documents({"field": "meta.number", "operator": "not in", "value": (10, 11)}) 525 526 # Logical operator 527 def test_and_operator(self, document_store, filterable_docs): 528 """Test filter_documents() with 'AND' operator""" 529 document_store.write_documents(filterable_docs) 530 result = document_store.filter_documents( 531 filters={ 532 "operator": "AND", 533 "conditions": [ 534 {"field": "meta.number", "operator": "==", "value": 100}, 535 {"field": "meta.name", "operator": "==", "value": "name_0"}, 536 ], 537 } 538 ) 539 self.assert_documents_are_equal( 540 result, [d for d in filterable_docs if d.meta.get("number") == 100 and d.meta.get("name") == "name_0"] 541 ) 542 543 def test_or_operator(self, document_store, filterable_docs): 544 """Test filter_documents() with 'OR' operator""" 545 document_store.write_documents(filterable_docs) 546 result = document_store.filter_documents( 547 filters={ 548 "operator": "OR", 549 "conditions": [ 550 {"field": "meta.number", "operator": "==", "value": 100}, 551 {"field": "meta.name", "operator": "==", "value": "name_0"}, 552 ], 553 } 554 ) 555 self.assert_documents_are_equal( 556 result, [d for d in filterable_docs if d.meta.get("number") == 100 or d.meta.get("name") == "name_0"] 557 ) 558 559 def test_not_operator(self, document_store, filterable_docs): 560 """Test filter_documents() with 'NOT' operator""" 561 document_store.write_documents(filterable_docs) 562 result = document_store.filter_documents( 563 filters={ 564 "operator": "NOT", 565 "conditions": [ 566 {"field": "meta.number", "operator": "==", "value": 100}, 567 {"field": "meta.name", "operator": "==", "value": "name_0"}, 568 ], 569 } 570 ) 571 self.assert_documents_are_equal( 572 result, [d for d in filterable_docs if not (d.meta.get("number") == 100 and d.meta.get("name") == "name_0")] 573 ) 574 575 # Malformed filters 576 def test_missing_top_level_operator_key(self, document_store, filterable_docs): 577 """Test filter_documents() with top-level operator""" 578 document_store.write_documents(filterable_docs) 579 with pytest.raises(FilterError): 580 document_store.filter_documents( 581 filters={"conditions": [{"field": "meta.name", "operator": "==", "value": "test"}]} 582 ) 583 584 def test_missing_top_level_conditions_key(self, document_store, filterable_docs): 585 """Test filter_documents() with missing top-level condition key""" 586 document_store.write_documents(filterable_docs) 587 with pytest.raises(FilterError): 588 document_store.filter_documents(filters={"operator": "AND"}) 589 590 def test_missing_condition_field_key(self, document_store, filterable_docs): 591 """Test filter_documents() with missing condition key""" 592 document_store.write_documents(filterable_docs) 593 with pytest.raises(FilterError): 594 document_store.filter_documents( 595 filters={"operator": "AND", "conditions": [{"operator": "==", "value": "test"}]} 596 ) 597 598 def test_missing_condition_operator_key(self, document_store, filterable_docs): 599 """Test filter_documents() with missing operator key""" 600 document_store.write_documents(filterable_docs) 601 with pytest.raises(FilterError): 602 document_store.filter_documents( 603 filters={"operator": "AND", "conditions": [{"field": "meta.name", "value": "test"}]} 604 ) 605 606 def test_missing_condition_value_key(self, document_store, filterable_docs): 607 """Test filter_documents() with missing condition value""" 608 document_store.write_documents(filterable_docs) 609 with pytest.raises(FilterError): 610 document_store.filter_documents( 611 filters={"operator": "AND", "conditions": [{"field": "meta.name", "operator": "=="}]} 612 ) 613 614 615 class DeleteAllTest: 616 """ 617 Tests for Document Store delete_all_documents(). 618 619 To use it create a custom test class and override the `document_store` fixture. 620 Only mix in for stores that implement delete_all_documents. 621 """ 622 623 @staticmethod 624 def test_delete_all_documents(document_store: DocumentStore): 625 """ 626 Test delete_all_documents() normal behaviour. 627 628 This test verifies that delete_all_documents() removes all documents from the store 629 and that the store remains functional after deletion. 630 """ 631 docs = [Document(content="first doc", id="1"), Document(content="second doc", id="2")] 632 document_store.write_documents(docs) 633 assert document_store.count_documents() == 2 634 635 document_store.delete_all_documents() # type:ignore[attr-defined] 636 assert document_store.count_documents() == 0 637 638 new_doc = Document(content="new doc after delete all", id="3") 639 document_store.write_documents([new_doc]) 640 assert document_store.count_documents() == 1 641 642 @staticmethod 643 def test_delete_all_documents_empty_store(document_store: DocumentStore): 644 """ 645 Test delete_all_documents() on an empty store. 646 647 This should not raise an error and should leave the store empty. 648 """ 649 assert document_store.count_documents() == 0 650 document_store.delete_all_documents() # type:ignore[attr-defined] 651 assert document_store.count_documents() == 0 652 653 @staticmethod 654 def _delete_all_supports_recreate(document_store: DocumentStore) -> str | None: 655 """ 656 Return the recreate parameter name if delete_all_documents supports it, else None. 657 """ 658 sig = inspect.signature(document_store.delete_all_documents) # type:ignore[attr-defined] 659 if "recreate_index" in sig.parameters: 660 return "recreate_index" 661 if "recreate_collection" in sig.parameters: 662 return "recreate_collection" 663 return None 664 665 @staticmethod 666 def test_delete_all_documents_without_recreate_index(document_store: DocumentStore): 667 """ 668 Test delete_all_documents() with recreate_index/recreate_collection=False when supported. 669 670 Skipped if the store's delete_all_documents does not have recreate_index or recreate_collection. 671 """ 672 param_name = DeleteAllTest._delete_all_supports_recreate(document_store) 673 if param_name is None: 674 pytest.skip("delete_all_documents has no recreate_index or recreate_collection parameter") 675 676 docs = [Document(id="1", content="A first document"), Document(id="2", content="Second document")] 677 document_store.write_documents(docs) 678 assert document_store.count_documents() == 2 679 680 document_store.delete_all_documents(**{param_name: False}) # type:ignore[attr-defined] 681 assert document_store.count_documents() == 0 682 683 new_doc = Document(id="3", content="New document after delete all") 684 document_store.write_documents([new_doc]) 685 assert document_store.count_documents() == 1 686 687 @staticmethod 688 def test_delete_all_documents_with_recreate_index(document_store: DocumentStore): 689 """ 690 Test delete_all_documents() with recreate_index/recreate_collection=True when supported. 691 692 Skipped if the store's delete_all_documents does not have recreate_index or recreate_collection. 693 """ 694 param_name = DeleteAllTest._delete_all_supports_recreate(document_store) 695 if param_name is None: 696 pytest.skip("delete_all_documents has no recreate_index or recreate_collection parameter") 697 698 docs = [Document(id="1", content="A first document"), Document(id="2", content="Second document")] 699 document_store.write_documents(docs) 700 assert document_store.count_documents() == 2 701 702 document_store.delete_all_documents(**{param_name: True}) # type:ignore[attr-defined] 703 assert document_store.count_documents() == 0 704 705 new_doc = Document(id="3", content="New document after delete all with recreate") 706 document_store.write_documents([new_doc]) 707 assert document_store.count_documents() == 1 708 709 retrieved = document_store.filter_documents() 710 assert len(retrieved) == 1 711 assert retrieved[0].content == "New document after delete all with recreate" 712 713 714 class DeleteByFilterTest: 715 """ 716 Tests for Document Store delete_by_filter(). 717 """ 718 719 @staticmethod 720 def test_delete_by_filter(document_store: DocumentStore): 721 """Delete documents matching a filter and verify count and remaining docs.""" 722 docs = [ 723 Document(content="Doc 1", meta={"category": "Alpha"}), 724 Document(content="Doc 2", meta={"category": "Beta"}), 725 Document(content="Doc 3", meta={"category": "Alpha"}), 726 ] 727 document_store.write_documents(docs) 728 assert document_store.count_documents() == 3 729 730 # `delete_by_filter` is not part of the DocumentStore protocol 731 sig = inspect.signature(document_store.delete_by_filter) # type:ignore[attr-defined] 732 params = {"refresh": True} if "refresh" in sig.parameters else {} 733 deleted_count = document_store.delete_by_filter( # type:ignore[attr-defined] 734 filters={"field": "meta.category", "operator": "==", "value": "Alpha"}, **params 735 ) 736 assert deleted_count == 2 737 assert document_store.count_documents() == 1 738 739 remaining_docs = document_store.filter_documents() 740 assert len(remaining_docs) == 1 741 assert remaining_docs[0].meta["category"] == "Beta" 742 743 @staticmethod 744 def test_delete_by_filter_no_matches(document_store: DocumentStore): 745 """Delete with a filter that matches no documents returns 0 and leaves store unchanged.""" 746 docs = [ 747 Document(content="Doc 1", meta={"category": "Alpha"}), 748 Document(content="Doc 2", meta={"category": "Beta"}), 749 ] 750 document_store.write_documents(docs) 751 assert document_store.count_documents() == 2 752 753 deleted_count = document_store.delete_by_filter( # type:ignore[attr-defined] 754 filters={"field": "meta.category", "operator": "==", "value": "Gamma"} 755 ) 756 assert deleted_count == 0 757 assert document_store.count_documents() == 2 758 759 @staticmethod 760 def test_delete_by_filter_advanced_filters(document_store: DocumentStore): 761 """Delete with AND/OR filter combinations and verify remaining documents.""" 762 docs = [ 763 Document(content="Doc 1", meta={"category": "Alpha", "year": 2023, "status": "draft"}), 764 Document(content="Doc 2", meta={"category": "Alpha", "year": 2024, "status": "published"}), 765 Document(content="Doc 3", meta={"category": "Beta", "year": 2023, "status": "draft"}), 766 ] 767 document_store.write_documents(docs) 768 assert document_store.count_documents() == 3 769 770 # `delete_by_filter` is not part of the DocumentStore protocol 771 sig = inspect.signature(document_store.delete_by_filter) # type:ignore[attr-defined] 772 params = {"refresh": True} if "refresh" in sig.parameters else {} 773 deleted_count = document_store.delete_by_filter( # type:ignore[attr-defined] 774 filters={ 775 "operator": "AND", 776 "conditions": [ 777 {"field": "meta.category", "operator": "==", "value": "Alpha"}, 778 {"field": "meta.year", "operator": "==", "value": 2023}, 779 ], 780 }, 781 **params, 782 ) 783 assert deleted_count == 1 784 assert document_store.count_documents() == 2 785 786 deleted_count = document_store.delete_by_filter( # type:ignore[attr-defined] 787 filters={ 788 "operator": "OR", 789 "conditions": [ 790 {"field": "meta.category", "operator": "==", "value": "Beta"}, 791 {"field": "meta.status", "operator": "==", "value": "published"}, 792 ], 793 }, 794 **params, 795 ) 796 assert deleted_count == 2 797 assert document_store.count_documents() == 0 798 799 800 class UpdateByFilterTest: 801 """ 802 Tests for Document Store update_by_filter(). 803 """ 804 805 @staticmethod 806 def test_update_by_filter(document_store: DocumentStore, filterable_docs: list[Document]): 807 """Update documents matching a filter and verify count and meta changes.""" 808 document_store.write_documents(filterable_docs) 809 expected_count = len([d for d in filterable_docs if d.meta.get("chapter") == "intro"]) 810 assert document_store.count_documents() == len(filterable_docs) 811 812 # `update_by_filter` is not part of the DocumentStore protocol 813 sig = inspect.signature(document_store.update_by_filter) # type:ignore[attr-defined] 814 params = {"refresh": True} if "refresh" in sig.parameters else {} 815 updated_count = document_store.update_by_filter( # type:ignore[attr-defined] 816 filters={"field": "meta.chapter", "operator": "==", "value": "intro"}, meta={"updated": True}, **params 817 ) 818 assert updated_count == expected_count 819 820 updated_docs = document_store.filter_documents( 821 filters={"field": "meta.updated", "operator": "==", "value": True} 822 ) 823 assert len(updated_docs) == expected_count 824 for doc in updated_docs: 825 assert doc.meta["chapter"] == "intro" 826 assert doc.meta["updated"] is True 827 828 not_updated_docs = document_store.filter_documents( 829 filters={"field": "meta.chapter", "operator": "==", "value": "abstract"} 830 ) 831 for doc in not_updated_docs: 832 assert doc.meta.get("updated") is not True 833 834 @staticmethod 835 def test_update_by_filter_no_matches(document_store: DocumentStore, filterable_docs: list[Document]): 836 """Update with a filter that matches no documents returns 0 and leaves store unchanged.""" 837 document_store.write_documents(filterable_docs) 838 initial_count = len(filterable_docs) 839 assert document_store.count_documents() == initial_count 840 841 updated_count = document_store.update_by_filter( # type:ignore[attr-defined] 842 filters={"field": "meta.chapter", "operator": "==", "value": "nonexistent_chapter"}, meta={"updated": True} 843 ) 844 assert updated_count == 0 845 assert document_store.count_documents() == initial_count 846 847 @staticmethod 848 def test_update_by_filter_multiple_fields(document_store: DocumentStore, filterable_docs: list[Document]): 849 """Update matching documents with multiple meta fields and verify all are set.""" 850 document_store.write_documents(filterable_docs) 851 expected_count = len([d for d in filterable_docs if d.meta.get("chapter") == "intro"]) 852 assert document_store.count_documents() == len(filterable_docs) 853 854 # `update_by_filter` is not part of the DocumentStore protocol 855 sig = inspect.signature(document_store.update_by_filter) # type:ignore[attr-defined] 856 params = {"refresh": True} if "refresh" in sig.parameters else {} 857 updated_count = document_store.update_by_filter( # type:ignore[attr-defined] 858 filters={"field": "meta.chapter", "operator": "==", "value": "intro"}, 859 meta={"updated": True, "extra_field": "set"}, 860 **params, 861 ) 862 assert updated_count == expected_count 863 864 updated_docs = document_store.filter_documents( 865 filters={"field": "meta.extra_field", "operator": "==", "value": "set"} 866 ) 867 assert len(updated_docs) == expected_count 868 for doc in updated_docs: 869 assert doc.meta["updated"] is True 870 assert doc.meta["extra_field"] == "set" 871 assert doc.meta["chapter"] == "intro" 872 assert doc.meta.get("number") == 2 873 874 not_updated_docs = document_store.filter_documents( 875 filters={"field": "meta.chapter", "operator": "==", "value": "abstract"} 876 ) 877 for doc in not_updated_docs: 878 assert doc.meta.get("extra_field") != "set" 879 880 @staticmethod 881 def test_update_by_filter_advanced_filters(document_store: DocumentStore): 882 """Update with AND/OR filter combinations and verify updated documents.""" 883 docs = [ 884 Document(content="Doc 1", meta={"category": "Alpha", "year": 2023, "status": "draft"}), 885 Document(content="Doc 2", meta={"category": "Alpha", "year": 2024, "status": "draft"}), 886 Document(content="Doc 3", meta={"category": "Beta", "year": 2023, "status": "draft"}), 887 ] 888 document_store.write_documents(docs) 889 assert document_store.count_documents() == 3 890 891 # `update_by_filter` is not part of the DocumentStore protocol 892 sig = inspect.signature(document_store.update_by_filter) # type:ignore[attr-defined] 893 params = {"refresh": True} if "refresh" in sig.parameters else {} 894 updated_count = document_store.update_by_filter( # type:ignore[attr-defined] 895 filters={ 896 "operator": "AND", 897 "conditions": [ 898 {"field": "meta.category", "operator": "==", "value": "Alpha"}, 899 {"field": "meta.year", "operator": "==", "value": 2023}, 900 ], 901 }, 902 meta={"status": "published"}, 903 **params, 904 ) 905 assert updated_count == 1 906 907 published_docs = document_store.filter_documents( 908 filters={"field": "meta.status", "operator": "==", "value": "published"} 909 ) 910 assert len(published_docs) == 1 911 assert published_docs[0].meta["category"] == "Alpha" 912 assert published_docs[0].meta["year"] == 2023 913 914 updated_count = document_store.update_by_filter( # type:ignore[attr-defined] 915 filters={ 916 "operator": "OR", 917 "conditions": [ 918 {"field": "meta.category", "operator": "==", "value": "Beta"}, 919 {"field": "meta.year", "operator": "==", "value": 2024}, 920 ], 921 }, 922 meta={"featured": True}, 923 **params, 924 ) 925 assert updated_count == 2 926 927 featured_docs = document_store.filter_documents( 928 filters={"field": "meta.featured", "operator": "==", "value": True} 929 ) 930 assert len(featured_docs) == 2 931 932 933 class CountDocumentsByFilterTest: 934 """ 935 Tests for Document Store count_documents_by_filter(). 936 937 Only mix in for stores that implement count_documents_by_filter. 938 """ 939 940 @staticmethod 941 def test_count_documents_by_filter_simple(document_store: DocumentStore): 942 """Test count_documents_by_filter() with a simple equality filter.""" 943 docs = [ 944 Document(content="Doc 1", meta={"category": "A", "status": "active"}), 945 Document(content="Doc 2", meta={"category": "B", "status": "active"}), 946 Document(content="Doc 3", meta={"category": "A", "status": "inactive"}), 947 Document(content="Doc 4", meta={"category": "A", "status": "active"}), 948 ] 949 document_store.write_documents(docs) 950 assert document_store.count_documents() == 4 951 952 count = document_store.count_documents_by_filter( # type:ignore[attr-defined] 953 filters={"field": "meta.category", "operator": "==", "value": "A"} 954 ) 955 assert count == 3 956 957 count = document_store.count_documents_by_filter( # type:ignore[attr-defined] 958 filters={"field": "meta.category", "operator": "==", "value": "B"} 959 ) 960 assert count == 1 961 962 @staticmethod 963 def test_count_documents_by_filter_compound(document_store: DocumentStore): 964 """Test count_documents_by_filter() with AND filter.""" 965 docs = [ 966 Document(content="Doc 1", meta={"category": "A", "status": "active"}), 967 Document(content="Doc 2", meta={"category": "B", "status": "active"}), 968 Document(content="Doc 3", meta={"category": "A", "status": "inactive"}), 969 Document(content="Doc 4", meta={"category": "A", "status": "active"}), 970 ] 971 document_store.write_documents(docs) 972 assert document_store.count_documents() == 4 973 974 count = document_store.count_documents_by_filter( # type:ignore[attr-defined] 975 filters={ 976 "operator": "AND", 977 "conditions": [ 978 {"field": "meta.category", "operator": "==", "value": "A"}, 979 {"field": "meta.status", "operator": "==", "value": "active"}, 980 ], 981 } 982 ) 983 assert count == 2 984 985 @staticmethod 986 def test_count_documents_by_filter_no_matches(document_store: DocumentStore): 987 """Test count_documents_by_filter() when filter matches no documents.""" 988 docs = [Document(content="Doc 1", meta={"category": "A"}), Document(content="Doc 2", meta={"category": "B"})] 989 document_store.write_documents(docs) 990 assert document_store.count_documents() == 2 991 992 count = document_store.count_documents_by_filter( # type:ignore[attr-defined] 993 filters={"field": "meta.category", "operator": "==", "value": "Z"} 994 ) 995 assert count == 0 996 997 @staticmethod 998 def test_count_documents_by_filter_empty_collection(document_store: DocumentStore): 999 """Test count_documents_by_filter() on an empty store.""" 1000 assert document_store.count_documents() == 0 1001 1002 count = document_store.count_documents_by_filter( # type:ignore[attr-defined] 1003 filters={"field": "meta.category", "operator": "==", "value": "A"} 1004 ) 1005 assert count == 0 1006 1007 1008 class CountUniqueMetadataByFilterTest: 1009 """ 1010 Tests for Document Store count_unique_metadata_by_filter(). 1011 1012 Only mix in for stores that implement count_unique_metadata_by_filter. 1013 """ 1014 1015 @staticmethod 1016 def test_count_unique_metadata_by_filter_all_documents(document_store: DocumentStore): 1017 """Test count_unique_metadata_by_filter() with no filter returns distinct counts for all docs.""" 1018 docs = [ 1019 Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), 1020 Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}), 1021 Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}), 1022 Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}), 1023 Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}), 1024 ] 1025 document_store.write_documents(docs) 1026 assert document_store.count_documents() == 5 1027 1028 counts = document_store.count_unique_metadata_by_filter( # type:ignore[attr-defined] 1029 filters={}, metadata_fields=["category", "status", "priority"] 1030 ) 1031 assert counts["category"] == 3 1032 assert counts["status"] == 2 1033 assert counts["priority"] == 3 1034 1035 @staticmethod 1036 def test_count_unique_metadata_by_filter_with_filter(document_store: DocumentStore): 1037 """Test count_unique_metadata_by_filter() with a filter.""" 1038 docs = [ 1039 Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), 1040 Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}), 1041 Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}), 1042 Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}), 1043 ] 1044 document_store.write_documents(docs) 1045 assert document_store.count_documents() == 4 1046 1047 counts = document_store.count_unique_metadata_by_filter( # type:ignore[attr-defined] 1048 filters={"field": "meta.category", "operator": "==", "value": "A"}, metadata_fields=["status", "priority"] 1049 ) 1050 assert counts["status"] == 2 1051 assert counts["priority"] == 2 1052 1053 @staticmethod 1054 def test_count_unique_metadata_by_filter_with_multiple_filters(document_store: DocumentStore): 1055 """Test counting with multiple filters""" 1056 docs = [ 1057 Document(content="Doc 1", meta={"category": "A", "year": 2023}), 1058 Document(content="Doc 2", meta={"category": "A", "year": 2024}), 1059 Document(content="Doc 3", meta={"category": "B", "year": 2023}), 1060 Document(content="Doc 4", meta={"category": "B", "year": 2024}), 1061 ] 1062 document_store.write_documents(docs) 1063 count = document_store.count_documents_by_filter( # type:ignore[attr-defined] 1064 filters={ 1065 "operator": "AND", 1066 "conditions": [ 1067 {"field": "meta.category", "operator": "==", "value": "B"}, 1068 {"field": "meta.year", "operator": "==", "value": 2023}, 1069 ], 1070 } 1071 ) 1072 assert count == 1 1073 1074 1075 class GetMetadataFieldsInfoTest: 1076 """ 1077 Tests for Document Store get_metadata_fields_info(). 1078 1079 Only mix in for stores that implement get_metadata_fields_info. 1080 """ 1081 1082 @staticmethod 1083 def test_get_metadata_fields_info(document_store: DocumentStore): 1084 """Test get_metadata_fields_info() returns field names and types after writing documents.""" 1085 docs = [ 1086 Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), 1087 Document(content="Doc 2", meta={"category": "B", "status": "inactive", "rating": 0.5}), 1088 ] 1089 document_store.write_documents(docs) 1090 assert document_store.count_documents() == 2 1091 1092 fields_info = document_store.get_metadata_fields_info() # type:ignore[attr-defined] 1093 1094 assert "category" in fields_info 1095 assert "status" in fields_info 1096 assert "priority" in fields_info 1097 assert "rating" in fields_info 1098 for field_name, info in fields_info.items(): # noqa: B007, PERF102 1099 assert isinstance(info, dict) 1100 assert "type" in info 1101 1102 @staticmethod 1103 def test_get_metadata_fields_info_empty_collection(document_store: DocumentStore): 1104 """Test get_metadata_fields_info() on an empty store.""" 1105 assert document_store.count_documents() == 0 1106 1107 fields_info = document_store.get_metadata_fields_info() # type:ignore[attr-defined] 1108 assert fields_info == {} 1109 1110 1111 class GetMetadataFieldMinMaxTest: 1112 """ 1113 Tests for Document Store get_metadata_field_min_max(). 1114 1115 Only mix in for stores that implement get_metadata_field_min_max. 1116 """ 1117 1118 @staticmethod 1119 def test_get_metadata_field_min_max_numeric(document_store: DocumentStore): 1120 """Test get_metadata_field_min_max() with integer field.""" 1121 docs = [ 1122 Document(content="Doc 1", meta={"priority": 1}), 1123 Document(content="Doc 2", meta={"priority": 5}), 1124 Document(content="Doc 3", meta={"priority": 3}), 1125 Document(content="Doc 4", meta={"priority": 10}), 1126 ] 1127 document_store.write_documents(docs) 1128 assert document_store.count_documents() == 4 1129 1130 result = document_store.get_metadata_field_min_max("priority") # type:ignore[attr-defined] 1131 assert result["min"] == 1 1132 assert result["max"] == 10 1133 1134 @staticmethod 1135 def test_get_metadata_field_min_max_float(document_store: DocumentStore): 1136 """Test get_metadata_field_min_max() with float field.""" 1137 docs = [ 1138 Document(content="Doc 1", meta={"rating": 0.6}), 1139 Document(content="Doc 2", meta={"rating": 0.95}), 1140 Document(content="Doc 3", meta={"rating": 0.8}), 1141 ] 1142 document_store.write_documents(docs) 1143 assert document_store.count_documents() == 3 1144 1145 result = document_store.get_metadata_field_min_max("rating") # type:ignore[attr-defined] 1146 1147 assert result["min"] == pytest.approx(0.6) 1148 assert result["max"] == pytest.approx(0.95) 1149 1150 @staticmethod 1151 def test_get_metadata_field_min_max_single_value(document_store: DocumentStore): 1152 """Test get_metadata_field_min_max() when field has only one value.""" 1153 docs = [Document(content="Doc 1", meta={"priority": 42})] 1154 document_store.write_documents(docs) 1155 assert document_store.count_documents() == 1 1156 1157 result = document_store.get_metadata_field_min_max("priority") # type:ignore[attr-defined] 1158 assert result["min"] == 42 1159 assert result["max"] == 42 1160 1161 @staticmethod 1162 def test_get_metadata_field_min_max_empty_collection(document_store: DocumentStore): 1163 """Test get_metadata_field_min_max() on an empty store.""" 1164 assert document_store.count_documents() == 0 1165 1166 result = document_store.get_metadata_field_min_max("priority") # type:ignore[attr-defined] 1167 assert result["min"] is None 1168 assert result["max"] is None 1169 1170 @staticmethod 1171 def test_get_metadata_field_min_max_meta_prefix(document_store: DocumentStore): 1172 """Test get_metadata_field_min_max() with field names that include 'meta.' prefix.""" 1173 docs = [ 1174 Document(content="Doc 1", meta={"priority": 1, "age": 10}), 1175 Document(content="Doc 2", meta={"priority": 5, "age": 20}), 1176 Document(content="Doc 3", meta={"priority": 3, "age": 15}), 1177 Document(content="Doc 4", meta={"priority": 10, "age": 5}), 1178 Document(content="Doc 6", meta={"rating": 10.5}), 1179 Document(content="Doc 7", meta={"rating": 20.3}), 1180 Document(content="Doc 8", meta={"rating": 15.7}), 1181 Document(content="Doc 9", meta={"rating": 5.2}), 1182 ] 1183 document_store.write_documents(docs) 1184 1185 min_max_priority = document_store.get_metadata_field_min_max("meta.priority") # type:ignore[attr-defined] 1186 assert min_max_priority["min"] == 1 1187 assert min_max_priority["max"] == 10 1188 1189 # Test with float values and "meta." prefix 1190 min_max_score = document_store.get_metadata_field_min_max("meta.rating") # type:ignore[attr-defined] 1191 assert min_max_score["min"] == pytest.approx(5.2) 1192 assert min_max_score["max"] == pytest.approx(20.3) 1193 1194 1195 class GetMetadataFieldUniqueValuesTest: 1196 """ 1197 Tests for Document Store get_metadata_field_unique_values(). 1198 1199 Only mix in for stores that implement get_metadata_field_unique_values. 1200 Expects the method to return (values_list, total_count) or (values_list, pagination_key). 1201 """ 1202 1203 @staticmethod 1204 def test_get_metadata_field_unique_values_basic(document_store: DocumentStore): 1205 """Test get_metadata_field_unique_values() returns unique values and total count.""" 1206 docs = [ 1207 Document(content="Doc 1", meta={"category": "A"}), 1208 Document(content="Doc 2", meta={"category": "B"}), 1209 Document(content="Doc 3", meta={"category": "A"}), 1210 Document(content="Doc 4", meta={"category": "C"}), 1211 Document(content="Doc 5", meta={"category": "B"}), 1212 ] 1213 document_store.write_documents(docs) 1214 assert document_store.count_documents() == 5 1215 1216 sig = inspect.signature(document_store.get_metadata_field_unique_values) # type:ignore[attr-defined] 1217 params: dict = {} 1218 if "search_term" in sig.parameters: 1219 params["search_term"] = None 1220 if "from_" in sig.parameters: 1221 params["from_"] = 0 1222 elif "offset" in sig.parameters: 1223 params["offset"] = 0 1224 if "size" in sig.parameters: 1225 params["size"] = 10 1226 elif "limit" in sig.parameters: 1227 params["limit"] = 10 1228 1229 result = document_store.get_metadata_field_unique_values("category", **params) # type:ignore[attr-defined] 1230 1231 values = result[0] if isinstance(result, tuple) else result 1232 assert isinstance(values, list) 1233 assert set(values) == {"A", "B", "C"} 1234 if isinstance(result, tuple) and len(result) >= 2 and isinstance(result[1], int): 1235 assert result[1] == 3 1236 1237 1238 class DocumentStoreBaseTests(CountDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest, WriteDocumentsTest): 1239 @pytest.fixture 1240 def document_store(self) -> DocumentStore: 1241 """Base fixture, to be reimplemented when deriving from DocumentStoreBaseTests""" 1242 raise NotImplementedError() 1243 1244 1245 class DocumentStoreBaseExtendedTests(DocumentStoreBaseTests, DeleteAllTest, DeleteByFilterTest, UpdateByFilterTest): 1246 """ 1247 Extended tests for Document Stores. 1248 1249 Besides the base tests, it also tests for: 1250 - delete_all_documents() 1251 - delete_by_filter() 1252 - update_by_filter() 1253 """ 1254 1255 @pytest.fixture 1256 def document_store(self) -> DocumentStore: 1257 """Base fixture, to be reimplemented when deriving from DocumentStoreBaseTests""" 1258 raise NotImplementedError()