document_store_async.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import inspect 6 from typing import Any, Protocol 7 8 import pytest 9 10 from haystack.dataclasses import Document 11 from haystack.document_stores.errors import DuplicateDocumentError 12 from haystack.document_stores.types import DocumentStore, DuplicatePolicy 13 from haystack.testing.document_store import AssertDocumentsEqualMixin, FilterableDocsFixtureMixin 14 15 16 class AsyncDocumentStore(DocumentStore, Protocol): 17 async def count_documents_async(self) -> int: 18 """ 19 Returns the number of documents stored. 20 """ 21 ... 22 23 async def filter_documents_async(self, filters: dict[str, Any] | None = None) -> list[Document]: 24 """ 25 Returns the documents that match the filters provided. 26 """ 27 ... 28 29 async def write_documents_async( 30 self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE 31 ) -> int: 32 """ 33 Writes Documents into the DocumentStore. 34 """ 35 ... 36 37 async def delete_documents_async(self, document_ids: list[str]) -> None: 38 """ 39 Deletes all documents with matching document_ids from the DocumentStore. 40 """ 41 ... 42 43 44 class DeleteAllAsyncTest: 45 """ 46 Tests for Document Store delete_all_documents_async(). 47 48 To use it create a custom test class and override the `document_store` fixture. 49 Only mix in for stores that implement delete_all_documents_async. 50 """ 51 52 @staticmethod 53 def _delete_all_supports_recreate(document_store: AsyncDocumentStore) -> str | None: 54 """ 55 Return the recreate parameter name if delete_all_documents_async supports it, else None. 56 """ 57 sig = inspect.signature(document_store.delete_all_documents_async) # type:ignore[attr-defined] 58 if "recreate_index" in sig.parameters: 59 return "recreate_index" 60 if "recreate_collection" in sig.parameters: 61 return "recreate_collection" 62 return None 63 64 @staticmethod 65 @pytest.mark.asyncio 66 async def test_delete_all_documents_async(document_store: AsyncDocumentStore): 67 """ 68 Test delete_all_documents_async() normal behaviour. 69 70 This test verifies that delete_all_documents_async() removes all documents from the store 71 and that the store remains functional after deletion. 72 """ 73 docs = [Document(content="first doc", id="1"), Document(content="second doc", id="2")] 74 await document_store.write_documents_async(docs) 75 assert await document_store.count_documents_async() == 2 76 77 await document_store.delete_all_documents_async() # type:ignore[attr-defined] 78 assert await document_store.count_documents_async() == 0 79 80 new_doc = Document(content="new doc after delete all", id="3") 81 await document_store.write_documents_async([new_doc]) 82 assert await document_store.count_documents_async() == 1 83 84 @staticmethod 85 @pytest.mark.asyncio 86 async def test_delete_all_documents_empty_store_async(document_store: AsyncDocumentStore): 87 """ 88 Test delete_all_documents_async() on an empty store. 89 90 This should not raise an error and should leave the store empty. 91 """ 92 assert await document_store.count_documents_async() == 0 93 await document_store.delete_all_documents_async() # type:ignore[attr-defined] 94 95 96 class CountDocumentsAsyncTest: 97 """ 98 Utility class to test a Document Store `count_documents_async` method. 99 100 To use it create a custom test class and override the `document_store` fixture to return your Document Store. 101 Example usage: 102 103 ```python 104 class MyDocumentStoreTest(CountDocumentsAsyncTest): 105 @pytest.fixture 106 def document_store(self): 107 return MyDocumentStore() 108 ``` 109 """ 110 111 @staticmethod 112 @pytest.mark.asyncio 113 async def test_count_empty_async(document_store: AsyncDocumentStore): 114 """Test count is zero for an empty document store.""" 115 assert await document_store.count_documents_async() == 0 116 117 @staticmethod 118 @pytest.mark.asyncio 119 async def test_delete_all_documents_without_recreate_index_async(document_store: AsyncDocumentStore): 120 """ 121 Test delete_all_documents_async() with recreate_index/recreate_collection=False when supported. 122 123 Skipped if the store's delete_all_documents_async does not have recreate_index or recreate_collection. 124 """ 125 param_name = DeleteAllAsyncTest._delete_all_supports_recreate(document_store) 126 if param_name is None: 127 pytest.skip("delete_all_documents_async has no recreate_index or recreate_collection parameter") 128 129 docs = [Document(id="1", content="A first document"), Document(id="2", content="Second document")] 130 await document_store.write_documents_async(docs) 131 assert await document_store.count_documents_async() == 2 132 133 await document_store.delete_all_documents_async(**{param_name: False}) # type:ignore[attr-defined] 134 assert await document_store.count_documents_async() == 0 135 136 new_doc = Document(id="3", content="New document after delete all") 137 await document_store.write_documents_async([new_doc]) 138 assert await document_store.count_documents_async() == 1 139 140 @staticmethod 141 @pytest.mark.asyncio 142 async def test_delete_all_documents_with_recreate_index_async(document_store: AsyncDocumentStore): 143 """ 144 Test delete_all_documents_async() with recreate_index/recreate_collection=True when supported. 145 146 Skipped if the store's delete_all_documents_async does not have recreate_index or recreate_collection. 147 """ 148 param_name = DeleteAllAsyncTest._delete_all_supports_recreate(document_store) 149 if param_name is None: 150 pytest.skip("delete_all_documents_async has no recreate_index or recreate_collection parameter") 151 152 docs = [Document(id="1", content="A first document"), Document(id="2", content="Second document")] 153 await document_store.write_documents_async(docs) 154 assert await document_store.count_documents_async() == 2 155 156 await document_store.delete_all_documents_async(**{param_name: True}) # type:ignore[attr-defined] 157 assert await document_store.count_documents_async() == 0 158 159 new_doc = Document(id="3", content="New document after delete all with recreate") 160 await document_store.write_documents_async([new_doc]) 161 assert await document_store.count_documents_async() == 1 162 163 retrieved = await document_store.filter_documents_async() 164 assert len(retrieved) == 1 165 assert retrieved[0].content == "New document after delete all with recreate" 166 167 168 class CountDocumentsByFilterAsyncTest: 169 """ 170 Tests for Document Store count_documents_by_filter_async(). 171 172 Only mix in for stores that implement count_documents_by_filter_async. 173 """ 174 175 @staticmethod 176 @pytest.mark.asyncio 177 async def test_count_documents_by_filter_async_simple(document_store: AsyncDocumentStore): 178 """Test count_documents_by_filter_async() with a simple equality filter.""" 179 docs = [ 180 Document(content="Doc 1", meta={"category": "A", "status": "active"}), 181 Document(content="Doc 2", meta={"category": "B", "status": "active"}), 182 Document(content="Doc 3", meta={"category": "A", "status": "inactive"}), 183 Document(content="Doc 4", meta={"category": "A", "status": "active"}), 184 ] 185 await document_store.write_documents_async(docs) 186 assert await document_store.count_documents_async() == 4 187 188 count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined] 189 filters={"field": "meta.category", "operator": "==", "value": "A"} 190 ) 191 assert count == 3 192 193 count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined] 194 filters={"field": "meta.category", "operator": "==", "value": "B"} 195 ) 196 assert count == 1 197 198 @staticmethod 199 @pytest.mark.asyncio 200 async def test_count_documents_by_filter_async_compound(document_store: AsyncDocumentStore): 201 """Test count_documents_by_filter_async() with AND filter.""" 202 docs = [ 203 Document(content="Doc 1", meta={"category": "A", "status": "active"}), 204 Document(content="Doc 2", meta={"category": "B", "status": "active"}), 205 Document(content="Doc 3", meta={"category": "A", "status": "inactive"}), 206 Document(content="Doc 4", meta={"category": "A", "status": "active"}), 207 ] 208 await document_store.write_documents_async(docs) 209 assert await document_store.count_documents_async() == 4 210 211 count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined] 212 filters={ 213 "operator": "AND", 214 "conditions": [ 215 {"field": "meta.category", "operator": "==", "value": "A"}, 216 {"field": "meta.status", "operator": "==", "value": "active"}, 217 ], 218 } 219 ) 220 assert count == 2 221 222 @staticmethod 223 @pytest.mark.asyncio 224 async def test_count_documents_by_filter_async_no_matches(document_store: AsyncDocumentStore): 225 """Test count_documents_by_filter_async() when filter matches no documents.""" 226 docs = [Document(content="Doc 1", meta={"category": "A"}), Document(content="Doc 2", meta={"category": "B"})] 227 await document_store.write_documents_async(docs) 228 assert await document_store.count_documents_async() == 2 229 230 count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined] 231 filters={"field": "meta.category", "operator": "==", "value": "Z"} 232 ) 233 assert count == 0 234 235 @staticmethod 236 @pytest.mark.asyncio 237 async def test_count_documents_by_filter_async_empty_collection(document_store: AsyncDocumentStore): 238 """Test count_documents_by_filter_async() on an empty store.""" 239 assert await document_store.count_documents_async() == 0 240 241 count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined] 242 filters={"field": "meta.category", "operator": "==", "value": "A"} 243 ) 244 assert count == 0 245 246 247 class CountUniqueMetadataByFilterAsyncTest: 248 """ 249 Tests for Document Store count_unique_metadata_by_filter_async(). 250 251 Only mix in for stores that implement count_unique_metadata_by_filter_async. 252 """ 253 254 @staticmethod 255 @pytest.mark.asyncio 256 async def test_count_unique_metadata_by_filter_async_all_documents(document_store: AsyncDocumentStore): 257 """Test count_unique_metadata_by_filter_async() with no filter returns distinct counts for all docs.""" 258 docs = [ 259 Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), 260 Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}), 261 Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}), 262 Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}), 263 Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}), 264 ] 265 await document_store.write_documents_async(docs) 266 assert await document_store.count_documents_async() == 5 267 268 counts = await document_store.count_unique_metadata_by_filter_async( # type:ignore[attr-defined] 269 filters={}, metadata_fields=["category", "status", "priority"] 270 ) 271 assert counts["category"] == 3 272 assert counts["status"] == 2 273 assert counts["priority"] == 3 274 275 @staticmethod 276 @pytest.mark.asyncio 277 async def test_count_unique_metadata_by_filter_async_with_filter(document_store: AsyncDocumentStore): 278 """Test count_unique_metadata_by_filter_async() with a filter.""" 279 docs = [ 280 Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), 281 Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}), 282 Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}), 283 Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}), 284 ] 285 await document_store.write_documents_async(docs) 286 assert await document_store.count_documents_async() == 4 287 288 counts = await document_store.count_unique_metadata_by_filter_async( # type:ignore[attr-defined] 289 filters={"field": "meta.category", "operator": "==", "value": "A"}, metadata_fields=["status", "priority"] 290 ) 291 assert counts["status"] == 2 292 assert counts["priority"] == 2 293 294 @staticmethod 295 @pytest.mark.asyncio 296 async def test_count_unique_metadata_by_filter_async_with_multiple_filters(document_store: AsyncDocumentStore): 297 """Test counting unique metadata asynchronously with multiple filters.""" 298 docs = [ 299 Document(content="Doc 1", meta={"category": "A", "year": 2023}), 300 Document(content="Doc 2", meta={"category": "A", "year": 2024}), 301 Document(content="Doc 3", meta={"category": "B", "year": 2023}), 302 Document(content="Doc 4", meta={"category": "B", "year": 2024}), 303 ] 304 await document_store.write_documents_async(docs) 305 306 counts = await document_store.count_unique_metadata_by_filter_async( # type:ignore[attr-defined] 307 filters={ 308 "operator": "AND", 309 "conditions": [ 310 {"field": "meta.category", "operator": "==", "value": "B"}, 311 {"field": "meta.year", "operator": "==", "value": 2023}, 312 ], 313 }, 314 metadata_fields=["category", "year"], 315 ) 316 assert counts == {"category": 1, "year": 1} 317 318 319 class DeleteByFilterAsyncTest: 320 """ 321 Tests for Document Store delete_by_filter_async(). 322 """ 323 324 @staticmethod 325 def _delete_by_filter_params(document_store: AsyncDocumentStore) -> dict[str, bool]: 326 """ 327 Return optional parameters supported by delete_by_filter_async. 328 """ 329 sig = inspect.signature(document_store.delete_by_filter_async) # type:ignore[attr-defined] 330 return {"refresh": True} if "refresh" in sig.parameters else {} 331 332 @staticmethod 333 @pytest.mark.asyncio 334 async def test_delete_by_filter_async(document_store: AsyncDocumentStore): 335 """Delete documents matching a filter and verify count and remaining docs.""" 336 docs = [ 337 Document(content="Doc 1", meta={"category": "Alpha"}), 338 Document(content="Doc 2", meta={"category": "Beta"}), 339 Document(content="Doc 3", meta={"category": "Alpha"}), 340 ] 341 await document_store.write_documents_async(docs) 342 assert await document_store.count_documents_async() == 3 343 344 params = DeleteByFilterAsyncTest._delete_by_filter_params(document_store) 345 deleted_count = await document_store.delete_by_filter_async( # type:ignore[attr-defined] 346 filters={"field": "meta.category", "operator": "==", "value": "Alpha"}, **params 347 ) 348 assert deleted_count == 2 349 assert await document_store.count_documents_async() == 1 350 351 remaining_docs = await document_store.filter_documents_async() 352 assert len(remaining_docs) == 1 353 assert remaining_docs[0].meta["category"] == "Beta" 354 355 @staticmethod 356 @pytest.mark.asyncio 357 async def test_delete_by_filter_no_matches_async(document_store: AsyncDocumentStore): 358 """Delete with a filter that matches no documents returns 0 and leaves store unchanged.""" 359 docs = [ 360 Document(content="Doc 1", meta={"category": "Alpha"}), 361 Document(content="Doc 2", meta={"category": "Beta"}), 362 ] 363 await document_store.write_documents_async(docs) 364 assert await document_store.count_documents_async() == 2 365 366 params = DeleteByFilterAsyncTest._delete_by_filter_params(document_store) 367 deleted_count = await document_store.delete_by_filter_async( # type:ignore[attr-defined] 368 filters={"field": "meta.category", "operator": "==", "value": "Gamma"}, **params 369 ) 370 assert deleted_count == 0 371 assert await document_store.count_documents_async() == 2 372 373 @staticmethod 374 @pytest.mark.asyncio 375 async def test_delete_by_filter_advanced_filters_async(document_store: AsyncDocumentStore): 376 """Delete with AND/OR filter combinations and verify remaining documents.""" 377 docs = [ 378 Document(content="Doc 1", meta={"category": "Alpha", "year": 2023, "status": "draft"}), 379 Document(content="Doc 2", meta={"category": "Alpha", "year": 2024, "status": "published"}), 380 Document(content="Doc 3", meta={"category": "Beta", "year": 2023, "status": "draft"}), 381 ] 382 await document_store.write_documents_async(docs) 383 assert await document_store.count_documents_async() == 3 384 385 params = DeleteByFilterAsyncTest._delete_by_filter_params(document_store) 386 deleted_count = await document_store.delete_by_filter_async( # type:ignore[attr-defined] 387 filters={ 388 "operator": "AND", 389 "conditions": [ 390 {"field": "meta.category", "operator": "==", "value": "Alpha"}, 391 {"field": "meta.year", "operator": "==", "value": 2023}, 392 ], 393 }, 394 **params, 395 ) 396 assert deleted_count == 1 397 assert await document_store.count_documents_async() == 2 398 399 deleted_count = await document_store.delete_by_filter_async( # type:ignore[attr-defined] 400 filters={ 401 "operator": "OR", 402 "conditions": [ 403 {"field": "meta.category", "operator": "==", "value": "Beta"}, 404 {"field": "meta.status", "operator": "==", "value": "published"}, 405 ], 406 }, 407 **params, 408 ) 409 assert deleted_count == 2 410 assert await document_store.count_documents_async() == 0 411 412 @staticmethod 413 @pytest.mark.asyncio 414 async def test_count_not_empty_async(document_store: AsyncDocumentStore): 415 """Test count is greater than zero if the document store contains documents.""" 416 await document_store.write_documents_async( 417 [Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")] 418 ) 419 assert await document_store.count_documents_async() == 3 420 421 422 class UpdateByFilterAsyncTest: 423 """ 424 Tests for Document Store update_by_filter_async(). 425 426 Only mix in for stores that implement update_by_filter_async. 427 """ 428 429 @staticmethod 430 @pytest.mark.asyncio 431 async def test_update_by_filter_async(document_store: AsyncDocumentStore, filterable_docs: list[Document]): 432 """Update documents matching a filter asynchronously and verify count and meta changes.""" 433 await document_store.write_documents_async(filterable_docs) 434 expected_count = len([d for d in filterable_docs if d.meta.get("chapter") == "intro"]) 435 assert await document_store.count_documents_async() == len(filterable_docs) 436 437 sig = inspect.signature(document_store.update_by_filter_async) # type:ignore[attr-defined] 438 params = {"refresh": True} if "refresh" in sig.parameters else {} 439 updated_count = await document_store.update_by_filter_async( # type:ignore[attr-defined] 440 filters={"field": "meta.chapter", "operator": "==", "value": "intro"}, meta={"updated": True}, **params 441 ) 442 assert updated_count == expected_count 443 444 updated_docs = await document_store.filter_documents_async( 445 filters={"field": "meta.updated", "operator": "==", "value": True} 446 ) 447 assert len(updated_docs) == expected_count 448 for doc in updated_docs: 449 assert doc.meta["chapter"] == "intro" 450 assert doc.meta["updated"] is True 451 452 not_updated_docs = await document_store.filter_documents_async( 453 filters={"field": "meta.chapter", "operator": "==", "value": "abstract"} 454 ) 455 for doc in not_updated_docs: 456 assert doc.meta.get("updated") is not True 457 458 459 class WriteDocumentsAsyncTest(AssertDocumentsEqualMixin): 460 """ 461 Utility class to test a Document Store `write_documents_async` method. 462 463 To use it create a custom test class and override the `document_store` fixture to return your Document Store. 464 The Document Store `filter_documents_async` method must be at least partly implemented to return all stored 465 Documents for these tests to work correctly. 466 Example usage: 467 468 ```python 469 class MyDocumentStoreTest(WriteDocumentsAsyncTest): 470 @pytest.fixture 471 def document_store(self): 472 return MyDocumentStore() 473 ``` 474 """ 475 476 @pytest.mark.asyncio 477 async def test_write_documents_async(self, document_store: AsyncDocumentStore): 478 """ 479 Test write_documents_async() default behaviour. 480 """ 481 msg = ( 482 "Default write_documents_async() behaviour depends on the Document Store implementation, " 483 "as we don't enforce a default behaviour when no policy is set. " 484 "Override this test in your custom test class." 485 ) 486 raise NotImplementedError(msg) 487 488 @pytest.mark.asyncio 489 async def test_write_documents_duplicate_fail_async(self, document_store: AsyncDocumentStore): 490 """Test write_documents_async() fails when writing documents with same id and `DuplicatePolicy.FAIL`.""" 491 doc = Document(content="test doc") 492 assert await document_store.write_documents_async([doc], policy=DuplicatePolicy.FAIL) == 1 493 with pytest.raises(DuplicateDocumentError): 494 await document_store.write_documents_async(documents=[doc], policy=DuplicatePolicy.FAIL) 495 self.assert_documents_are_equal(await document_store.filter_documents_async(), [doc]) 496 497 @staticmethod 498 @pytest.mark.asyncio 499 async def test_write_documents_duplicate_skip_async(document_store: AsyncDocumentStore): 500 """Test write_documents_async() skips writing when using DuplicatePolicy.SKIP.""" 501 doc = Document(content="test doc") 502 assert await document_store.write_documents_async([doc], policy=DuplicatePolicy.SKIP) == 1 503 assert await document_store.write_documents_async(documents=[doc], policy=DuplicatePolicy.SKIP) == 0 504 505 @pytest.mark.asyncio 506 async def test_write_documents_duplicate_overwrite_async(self, document_store: AsyncDocumentStore): 507 """Test write_documents_async() overwrites when using DuplicatePolicy.OVERWRITE.""" 508 doc1 = Document(id="1", content="test doc 1") 509 doc2 = Document(id="1", content="test doc 2") 510 511 assert await document_store.write_documents_async([doc2], policy=DuplicatePolicy.OVERWRITE) == 1 512 self.assert_documents_are_equal(await document_store.filter_documents_async(), [doc2]) 513 assert await document_store.write_documents_async(documents=[doc1], policy=DuplicatePolicy.OVERWRITE) == 1 514 self.assert_documents_are_equal(await document_store.filter_documents_async(), [doc1]) 515 516 @staticmethod 517 @pytest.mark.asyncio 518 async def test_write_documents_invalid_input_async(document_store: AsyncDocumentStore): 519 """Test write_documents_async() fails when providing unexpected input.""" 520 with pytest.raises(ValueError): 521 await document_store.write_documents_async(["not a document for sure"]) # type: ignore 522 with pytest.raises(ValueError): 523 await document_store.write_documents_async("not a list actually") # type: ignore 524 525 526 class DeleteDocumentsAsyncTest: 527 """ 528 Utility class to test a Document Store `delete_documents_async` method. 529 530 To use it create a custom test class and override the `document_store` fixture to return your Document Store. 531 The Document Store `write_documents_async` and `count_documents_async` methods must be implemented for these tests 532 to work correctly. 533 Example usage: 534 535 ```python 536 class MyDocumentStoreTest(DeleteDocumentsAsyncTest): 537 @pytest.fixture 538 def document_store(self): 539 return MyDocumentStore() 540 ``` 541 """ 542 543 @staticmethod 544 @pytest.mark.asyncio 545 async def test_delete_documents_async(document_store: AsyncDocumentStore): 546 """Test delete_documents_async() normal behaviour.""" 547 doc = Document(content="test doc") 548 await document_store.write_documents_async([doc]) 549 assert await document_store.count_documents_async() == 1 550 551 await document_store.delete_documents_async([doc.id]) 552 assert await document_store.count_documents_async() == 0 553 554 @staticmethod 555 @pytest.mark.asyncio 556 async def test_delete_documents_empty_document_store_async(document_store: AsyncDocumentStore): 557 """Test delete_documents_async() doesn't fail when called using an empty Document Store.""" 558 await document_store.delete_documents_async(["non_existing_id"]) 559 560 @staticmethod 561 @pytest.mark.asyncio 562 async def test_delete_documents_non_existing_document_async(document_store: AsyncDocumentStore): 563 """Test delete_documents_async() doesn't delete any Document when called with non-existing id.""" 564 doc = Document(content="test doc") 565 await document_store.write_documents_async([doc]) 566 assert await document_store.count_documents_async() == 1 567 568 await document_store.delete_documents_async(["non_existing_id"]) 569 570 # No Document has been deleted 571 assert await document_store.count_documents_async() == 1 572 573 574 class GetMetadataFieldsInfoAsyncTest: 575 """ 576 Tests for Document Store get_metadata_fields_info_async(). 577 578 Only mix in for stores that implement get_metadata_fields_info_async. 579 """ 580 581 @staticmethod 582 @pytest.mark.asyncio 583 async def test_get_metadata_fields_info_async(document_store: AsyncDocumentStore): 584 """Test get_metadata_fields_info_async() returns field names and types after writing documents.""" 585 docs = [ 586 Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), 587 Document(content="Doc 2", meta={"category": "B", "status": "inactive", "rating": 0.5}), 588 ] 589 await document_store.write_documents_async(docs) 590 assert await document_store.count_documents_async() == 2 591 592 fields_info = await document_store.get_metadata_fields_info_async() # type:ignore[attr-defined] 593 594 assert "category" in fields_info 595 assert "status" in fields_info 596 assert "priority" in fields_info 597 assert "rating" in fields_info 598 for info in fields_info.values(): 599 assert isinstance(info, dict) 600 assert "type" in info 601 602 @staticmethod 603 @pytest.mark.asyncio 604 async def test_get_metadata_fields_info_empty_collection_async(document_store: AsyncDocumentStore): 605 """Test get_metadata_fields_info_async() on an empty store.""" 606 assert await document_store.count_documents_async() == 0 607 608 fields_info = await document_store.get_metadata_fields_info_async() # type:ignore[attr-defined] 609 assert fields_info == {} 610 611 612 class GetMetadataFieldMinMaxAsyncTest: 613 """ 614 Tests for Document Store get_metadata_field_min_max_async(). 615 616 Only mix in for stores that implement get_metadata_field_min_max_async. 617 """ 618 619 @staticmethod 620 @pytest.mark.asyncio 621 async def test_get_metadata_field_min_max_numeric_async(document_store: AsyncDocumentStore): 622 """Test get_metadata_field_min_max_async() with integer field.""" 623 docs = [ 624 Document(content="Doc 1", meta={"priority": 1}), 625 Document(content="Doc 2", meta={"priority": 5}), 626 Document(content="Doc 3", meta={"priority": 3}), 627 Document(content="Doc 4", meta={"priority": 10}), 628 ] 629 await document_store.write_documents_async(docs) 630 assert await document_store.count_documents_async() == 4 631 632 result = await document_store.get_metadata_field_min_max_async("priority") # type:ignore[attr-defined] 633 assert result["min"] == 1 634 assert result["max"] == 10 635 636 @staticmethod 637 @pytest.mark.asyncio 638 async def test_get_metadata_field_min_max_float_async(document_store: AsyncDocumentStore): 639 """Test get_metadata_field_min_max_async() with float field.""" 640 docs = [ 641 Document(content="Doc 1", meta={"rating": 0.6}), 642 Document(content="Doc 2", meta={"rating": 0.95}), 643 Document(content="Doc 3", meta={"rating": 0.8}), 644 ] 645 await document_store.write_documents_async(docs) 646 assert await document_store.count_documents_async() == 3 647 648 result = await document_store.get_metadata_field_min_max_async("rating") # type:ignore[attr-defined] 649 650 assert result["min"] == pytest.approx(0.6) 651 assert result["max"] == pytest.approx(0.95) 652 653 @staticmethod 654 @pytest.mark.asyncio 655 async def test_get_metadata_field_min_max_single_value_async(document_store: AsyncDocumentStore): 656 """Test get_metadata_field_min_max_async() when field has only one value.""" 657 docs = [Document(content="Doc 1", meta={"priority": 42})] 658 await document_store.write_documents_async(docs) 659 assert await document_store.count_documents_async() == 1 660 661 result = await document_store.get_metadata_field_min_max_async("priority") # type:ignore[attr-defined] 662 assert result["min"] == 42 663 assert result["max"] == 42 664 665 @staticmethod 666 @pytest.mark.asyncio 667 async def test_get_metadata_field_min_max_empty_collection_async(document_store: AsyncDocumentStore): 668 """Test get_metadata_field_min_max_async() on an empty store.""" 669 assert await document_store.count_documents_async() == 0 670 671 result = await document_store.get_metadata_field_min_max_async("priority") # type:ignore[attr-defined] 672 assert result["min"] is None 673 assert result["max"] is None 674 675 @staticmethod 676 @pytest.mark.asyncio 677 async def test_get_metadata_field_min_max_meta_prefix_async(document_store: AsyncDocumentStore): 678 """Test get_metadata_field_min_max_async() with field names that include 'meta.' prefix.""" 679 docs = [ 680 Document(content="Doc 1", meta={"priority": 1, "age": 10}), 681 Document(content="Doc 2", meta={"priority": 5, "age": 20}), 682 Document(content="Doc 3", meta={"priority": 3, "age": 15}), 683 Document(content="Doc 4", meta={"priority": 10, "age": 5}), 684 Document(content="Doc 6", meta={"rating": 10.5}), 685 Document(content="Doc 7", meta={"rating": 20.3}), 686 Document(content="Doc 8", meta={"rating": 15.7}), 687 Document(content="Doc 9", meta={"rating": 5.2}), 688 ] 689 await document_store.write_documents_async(docs) 690 691 min_max_priority = await document_store.get_metadata_field_min_max_async("meta.priority") # type:ignore[attr-defined] 692 assert min_max_priority["min"] == 1 693 assert min_max_priority["max"] == 10 694 695 # Test with float values and "meta." prefix 696 min_max_score = await document_store.get_metadata_field_min_max_async("meta.rating") # type:ignore[attr-defined] 697 assert min_max_score["min"] == pytest.approx(5.2) 698 assert min_max_score["max"] == pytest.approx(20.3) 699 700 701 class GetMetadataFieldUniqueValuesAsyncTest: 702 """ 703 Tests for Document Store get_metadata_field_unique_values_async(). 704 705 Only mix in for stores that implement get_metadata_field_unique_values_async. 706 Expects the method to return (values_list, total_count) or (values_list, pagination_key). 707 """ 708 709 @staticmethod 710 @pytest.mark.asyncio 711 async def test_get_metadata_field_unique_values_basic_async(document_store: AsyncDocumentStore): 712 """Test get_metadata_field_unique_values_async() returns unique values and total count.""" 713 docs = [ 714 Document(content="Doc 1", meta={"category": "A"}), 715 Document(content="Doc 2", meta={"category": "B"}), 716 Document(content="Doc 3", meta={"category": "A"}), 717 Document(content="Doc 4", meta={"category": "C"}), 718 Document(content="Doc 5", meta={"category": "B"}), 719 ] 720 await document_store.write_documents_async(docs) 721 assert await document_store.count_documents_async() == 5 722 723 sig = inspect.signature(document_store.get_metadata_field_unique_values_async) # type:ignore[attr-defined] 724 params: dict = {} 725 if "search_term" in sig.parameters: 726 params["search_term"] = None 727 if "from_" in sig.parameters: 728 params["from_"] = 0 729 elif "offset" in sig.parameters: 730 params["offset"] = 0 731 if "size" in sig.parameters: 732 params["size"] = 10 733 elif "limit" in sig.parameters: 734 params["limit"] = 10 735 736 result = await document_store.get_metadata_field_unique_values_async("category", **params) # type:ignore[attr-defined] 737 738 values = result[0] if isinstance(result, tuple) else result 739 assert isinstance(values, list) 740 assert set(values) == {"A", "B", "C"} 741 if isinstance(result, tuple) and len(result) >= 2 and isinstance(result[1], int): 742 assert result[1] == 3 743 744 745 class FilterDocumentsAsyncTest(AssertDocumentsEqualMixin, FilterableDocsFixtureMixin): 746 """ 747 Smoke tests for the async filter_documents_async() path. 748 749 These tests verify that the async plumbing works correctly with no filters, 750 a simple equality filter, and a compound AND filter. Full filter logic correctness 751 is covered by FilterDocumentsTest — the sync and async paths share the same 752 filter translation layer, so only the async dispatch needs smoke-testing here. 753 """ 754 755 @staticmethod 756 @pytest.mark.asyncio 757 async def test_no_filters_async(document_store: AsyncDocumentStore): 758 """Verify the async path returns all documents when no filter is applied.""" 759 docs = [Document(content="first doc"), Document(content="second doc"), Document(content="third doc")] 760 await document_store.write_documents_async(docs) 761 result = await document_store.filter_documents_async() 762 assert len(result) == 3 763 764 @pytest.mark.asyncio 765 async def test_filter_simple_async(self, document_store: AsyncDocumentStore, filterable_docs: list[Document]): 766 """One equality filter — confirms async plumbing works with a filter.""" 767 await document_store.write_documents_async(filterable_docs) 768 result = await document_store.filter_documents_async( 769 filters={"field": "meta.number", "operator": "==", "value": 2} 770 ) 771 self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") == 2]) 772 773 @pytest.mark.asyncio 774 async def test_filter_compound_async(self, document_store: AsyncDocumentStore, filterable_docs: list[Document]): 775 """One AND filter — verifies compound filters aren't broken by the async path.""" 776 await document_store.write_documents_async(filterable_docs) 777 result = await document_store.filter_documents_async( 778 filters={ 779 "operator": "AND", 780 "conditions": [ 781 {"field": "meta.number", "operator": "==", "value": 2}, 782 {"field": "meta.name", "operator": "==", "value": "name_0"}, 783 ], 784 } 785 ) 786 self.assert_documents_are_equal( 787 result, [d for d in filterable_docs if d.meta.get("number") == 2 and d.meta.get("name") == "name_0"] 788 )