factory.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 from typing import Any 6 7 from haystack.core.component import Component, component 8 from haystack.core.serialization import default_from_dict, default_to_dict 9 from haystack.dataclasses import Document 10 from haystack.document_stores.types import DocumentStore, DuplicatePolicy 11 12 13 def document_store_class( 14 name: str, 15 documents: list[Document] | None = None, 16 documents_count: int | None = None, 17 bases: tuple[type, ...] | None = None, 18 extra_fields: dict[str, Any] | None = None, 19 ) -> type[DocumentStore]: 20 """ 21 Utility function to create a DocumentStore class with the given name and list of documents. 22 23 If `documents` is set but `documents_count` is not, `documents_count` will be the length 24 of `documents`. 25 If both are set explicitly they don't influence each other. 26 27 `write_documents()` and `delete_documents()` are no-op. 28 You can override them using `extra_fields`. 29 30 ### Usage 31 32 Create a DocumentStore class that returns no documents: 33 ```python 34 MyFakeStore = document_store_class("MyFakeComponent") 35 document_store = MyFakeStore() 36 assert document_store.documents_count() == 0 37 assert document_store.filter_documents() == [] 38 ``` 39 40 Create a DocumentStore class that returns a single document: 41 ```python 42 doc = Document(id="fake_id", content="Fake content") 43 MyFakeStore = document_store_class("MyFakeComponent", documents=[doc]) 44 document_store = MyFakeStore() 45 assert document_store.documents_count() == 1 46 assert document_store.filter_documents() == [doc] 47 ``` 48 49 Create a DocumentStore class that returns no document but returns a custom count: 50 ```python 51 MyFakeStore = document_store_class("MyFakeComponent", documents_count=100) 52 document_store = MyFakeStore() 53 assert document_store.documents_count() == 100 54 assert document_store.filter_documents() == [] 55 ``` 56 57 Create a DocumentStore class that returns a document and a custom count: 58 ```python 59 doc = Document(id="fake_id", content="Fake content") 60 MyFakeStore = document_store_class("MyFakeComponent", documents=[doc], documents_count=100) 61 document_store = MyFakeStore() 62 assert document_store.documents_count() == 100 63 assert document_store.filter_documents() == [doc] 64 ``` 65 66 Create a DocumentStore class with a custom base class: 67 ```python 68 MyFakeStore = document_store_class( 69 "MyFakeStore", 70 bases=(MyBaseClass,) 71 ) 72 document_store = MyFakeStore() 73 assert isinstance(store, MyBaseClass) 74 ``` 75 76 Create a DocumentStore class with an extra field `my_field`: 77 ```python 78 MyFakeStore = document_store_class( 79 "MyFakeStore", 80 extra_fields={"my_field": 10} 81 ) 82 document_store = MyFakeStore() 83 assert document_store.my_field == 10 84 ``` 85 """ 86 if documents is not None and documents_count is None: 87 documents_count = len(documents) 88 elif documents_count is None: 89 documents_count = 0 90 91 def count_documents(self) -> int | None: # noqa: ARG001 92 return documents_count 93 94 def filter_documents(self, filters: dict[str, Any] | None = None) -> list[Document]: # noqa: ARG001 95 if documents is not None: 96 return documents 97 return [] 98 99 def write_documents(self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None: # noqa: ARG001 100 return 101 102 def delete_documents(self, document_ids: list[str]) -> None: # noqa: ARG001 103 return 104 105 def to_dict(self) -> dict[str, Any]: 106 return default_to_dict(self) 107 108 fields = { 109 "count_documents": count_documents, 110 "filter_documents": filter_documents, 111 "write_documents": write_documents, 112 "delete_documents": delete_documents, 113 "to_dict": to_dict, 114 "from_dict": classmethod(default_from_dict), 115 } 116 117 if extra_fields is not None: 118 fields = {**fields, **extra_fields} 119 120 if bases is None: 121 bases = (object,) 122 123 return type(name, bases, fields) 124 125 126 def component_class( 127 name: str, 128 input_types: dict[str, Any] | None = None, 129 output_types: dict[str, Any] | None = None, 130 output: dict[str, Any] | None = None, 131 bases: tuple[type, ...] | None = None, 132 extra_fields: dict[str, Any] | None = None, 133 ) -> type[Component]: 134 """ 135 Utility class to create a Component class with the given name and input and output types. 136 137 If `output` is set but `output_types` is not, `output_types` will be set to the types of the values in `output`. 138 Though if `output_types` is set but `output` is not the component's `run` method will return a dictionary 139 of the same keys as `output_types` all with a value of None. 140 141 ### Usage 142 143 Create a component class with default input and output types: 144 ```python 145 MyFakeComponent = component_class_factory("MyFakeComponent") 146 component = MyFakeComponent() 147 output = component.run(value=1) 148 assert output == {"value": None} 149 ``` 150 151 Create a component class with an "value" input of type `int` and with a "value" output of `10`: 152 ```python 153 MyFakeComponent = component_class_factory( 154 "MyFakeComponent", 155 input_types={"value": int}, 156 output={"value": 10} 157 ) 158 component = MyFakeComponent() 159 output = component.run(value=1) 160 assert output == {"value": 10} 161 ``` 162 163 Create a component class with a custom base class: 164 ```python 165 MyFakeComponent = component_class_factory( 166 "MyFakeComponent", 167 bases=(MyBaseClass,) 168 ) 169 component = MyFakeComponent() 170 assert isinstance(component, MyBaseClass) 171 ``` 172 173 Create a component class with an extra field `my_field`: 174 ```python 175 MyFakeComponent = component_class_factory( 176 "MyFakeComponent", 177 extra_fields={"my_field": 10} 178 ) 179 component = MyFakeComponent() 180 assert component.my_field == 10 181 ``` 182 183 Args: 184 name: Name of the component class 185 input_types: Dictionary of string and type that defines the inputs of the component, 186 if set to None created component will expect a single input "value" of Any type. 187 Defaults to None. 188 output_types: Dictionary of string and type that defines the outputs of the component, 189 if set to None created component will return a single output "value" of NoneType and None value. 190 Defaults to None. 191 output: Actual output dictionary returned by the created component run, 192 is set to None it will return a dictionary of string and None values. 193 Keys will be the same as the keys of output_types. Defaults to None. 194 bases: Base classes for this component, if set to None only base is object. Defaults to None. 195 extra_fields: Extra fields for the Component, defaults to None. 196 197 :return: A class definition that can be used as a component. 198 """ 199 if input_types is None: 200 input_types = {"value": Any} 201 if output_types is None and output is not None: 202 output_types = {key: type(value) for key, value in output.items()} 203 elif output_types is None: 204 output_types = {"value": type(None)} 205 206 def init(self): 207 component.set_input_types(self, **input_types) 208 component.set_output_types(self, **output_types) 209 210 # Both arguments are necessary to correctly define 211 # run but ruff doesn't like that we don't use them. 212 # It's fine ignoring the warning here. 213 def run(self, **kwargs): # noqa: ARG001 214 if output is not None: 215 return output 216 return dict.fromkeys(output_types.keys()) 217 218 def to_dict(self): 219 return default_to_dict(self) 220 221 def from_dict(cls, data: dict[str, Any]): 222 return default_from_dict(cls, data) 223 224 fields = {"__init__": init, "run": run, "to_dict": to_dict, "from_dict": classmethod(from_dict)} 225 if extra_fields is not None: 226 fields = {**fields, **extra_fields} 227 228 if bases is None: 229 bases = (object,) 230 231 cls = type(name, bases, fields) 232 return component(cls)