/ haystack / testing / factory.py
factory.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  from typing import Any
  6  
  7  from haystack.core.component import Component, component
  8  from haystack.core.serialization import default_from_dict, default_to_dict
  9  from haystack.dataclasses import Document
 10  from haystack.document_stores.types import DocumentStore, DuplicatePolicy
 11  
 12  
 13  def document_store_class(
 14      name: str,
 15      documents: list[Document] | None = None,
 16      documents_count: int | None = None,
 17      bases: tuple[type, ...] | None = None,
 18      extra_fields: dict[str, Any] | None = None,
 19  ) -> type[DocumentStore]:
 20      """
 21      Utility function to create a DocumentStore class with the given name and list of documents.
 22  
 23      If `documents` is set but `documents_count` is not, `documents_count` will be the length
 24      of `documents`.
 25      If both are set explicitly they don't influence each other.
 26  
 27      `write_documents()` and `delete_documents()` are no-op.
 28      You can override them using `extra_fields`.
 29  
 30      ### Usage
 31  
 32      Create a DocumentStore class that returns no documents:
 33      ```python
 34      MyFakeStore = document_store_class("MyFakeComponent")
 35      document_store = MyFakeStore()
 36      assert document_store.documents_count() == 0
 37      assert document_store.filter_documents() == []
 38      ```
 39  
 40      Create a DocumentStore class that returns a single document:
 41      ```python
 42      doc = Document(id="fake_id", content="Fake content")
 43      MyFakeStore = document_store_class("MyFakeComponent", documents=[doc])
 44      document_store = MyFakeStore()
 45      assert document_store.documents_count() == 1
 46      assert document_store.filter_documents() == [doc]
 47      ```
 48  
 49      Create a DocumentStore class that returns no document but returns a custom count:
 50      ```python
 51      MyFakeStore = document_store_class("MyFakeComponent", documents_count=100)
 52      document_store = MyFakeStore()
 53      assert document_store.documents_count() == 100
 54      assert document_store.filter_documents() == []
 55      ```
 56  
 57      Create a DocumentStore class that returns a document and a custom count:
 58      ```python
 59      doc = Document(id="fake_id", content="Fake content")
 60      MyFakeStore = document_store_class("MyFakeComponent", documents=[doc], documents_count=100)
 61      document_store = MyFakeStore()
 62      assert document_store.documents_count() == 100
 63      assert document_store.filter_documents() == [doc]
 64      ```
 65  
 66      Create a DocumentStore class with a custom base class:
 67      ```python
 68      MyFakeStore = document_store_class(
 69          "MyFakeStore",
 70          bases=(MyBaseClass,)
 71      )
 72      document_store = MyFakeStore()
 73      assert isinstance(store, MyBaseClass)
 74      ```
 75  
 76      Create a DocumentStore class with an extra field `my_field`:
 77      ```python
 78      MyFakeStore = document_store_class(
 79          "MyFakeStore",
 80          extra_fields={"my_field": 10}
 81      )
 82      document_store = MyFakeStore()
 83      assert document_store.my_field == 10
 84      ```
 85      """
 86      if documents is not None and documents_count is None:
 87          documents_count = len(documents)
 88      elif documents_count is None:
 89          documents_count = 0
 90  
 91      def count_documents(self) -> int | None:  # noqa: ARG001
 92          return documents_count
 93  
 94      def filter_documents(self, filters: dict[str, Any] | None = None) -> list[Document]:  # noqa: ARG001
 95          if documents is not None:
 96              return documents
 97          return []
 98  
 99      def write_documents(self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None:  # noqa: ARG001
100          return
101  
102      def delete_documents(self, document_ids: list[str]) -> None:  # noqa: ARG001
103          return
104  
105      def to_dict(self) -> dict[str, Any]:
106          return default_to_dict(self)
107  
108      fields = {
109          "count_documents": count_documents,
110          "filter_documents": filter_documents,
111          "write_documents": write_documents,
112          "delete_documents": delete_documents,
113          "to_dict": to_dict,
114          "from_dict": classmethod(default_from_dict),
115      }
116  
117      if extra_fields is not None:
118          fields = {**fields, **extra_fields}
119  
120      if bases is None:
121          bases = (object,)
122  
123      return type(name, bases, fields)
124  
125  
126  def component_class(
127      name: str,
128      input_types: dict[str, Any] | None = None,
129      output_types: dict[str, Any] | None = None,
130      output: dict[str, Any] | None = None,
131      bases: tuple[type, ...] | None = None,
132      extra_fields: dict[str, Any] | None = None,
133  ) -> type[Component]:
134      """
135      Utility class to create a Component class with the given name and input and output types.
136  
137      If `output` is set but `output_types` is not, `output_types` will be set to the types of the values in `output`.
138      Though if `output_types` is set but `output` is not the component's `run` method will return a dictionary
139      of the same keys as `output_types` all with a value of None.
140  
141      ### Usage
142  
143      Create a component class with default input and output types:
144      ```python
145      MyFakeComponent = component_class_factory("MyFakeComponent")
146      component = MyFakeComponent()
147      output = component.run(value=1)
148      assert output == {"value": None}
149      ```
150  
151      Create a component class with an "value" input of type `int` and with a "value" output of `10`:
152      ```python
153      MyFakeComponent = component_class_factory(
154          "MyFakeComponent",
155          input_types={"value": int},
156          output={"value": 10}
157      )
158      component = MyFakeComponent()
159      output = component.run(value=1)
160      assert output == {"value": 10}
161      ```
162  
163      Create a component class with a custom base class:
164      ```python
165      MyFakeComponent = component_class_factory(
166          "MyFakeComponent",
167          bases=(MyBaseClass,)
168      )
169      component = MyFakeComponent()
170      assert isinstance(component, MyBaseClass)
171      ```
172  
173      Create a component class with an extra field `my_field`:
174      ```python
175      MyFakeComponent = component_class_factory(
176          "MyFakeComponent",
177          extra_fields={"my_field": 10}
178      )
179      component = MyFakeComponent()
180      assert component.my_field == 10
181      ```
182  
183      Args:
184      name: Name of the component class
185      input_types: Dictionary of string and type that defines the inputs of the component,
186          if set to None created component will expect a single input "value" of Any type.
187          Defaults to None.
188      output_types: Dictionary of string and type that defines the outputs of the component,
189          if set to None created component will return a single output "value" of NoneType and None value.
190          Defaults to None.
191      output: Actual output dictionary returned by the created component run,
192          is set to None it will return a dictionary of string and None values.
193          Keys will be the same as the keys of output_types. Defaults to None.
194      bases: Base classes for this component, if set to None only base is object. Defaults to None.
195      extra_fields: Extra fields for the Component, defaults to None.
196  
197      :return: A class definition that can be used as a component.
198      """
199      if input_types is None:
200          input_types = {"value": Any}
201      if output_types is None and output is not None:
202          output_types = {key: type(value) for key, value in output.items()}
203      elif output_types is None:
204          output_types = {"value": type(None)}
205  
206      def init(self):
207          component.set_input_types(self, **input_types)
208          component.set_output_types(self, **output_types)
209  
210      # Both arguments are necessary to correctly define
211      # run but ruff doesn't like that we don't use them.
212      # It's fine ignoring the warning here.
213      def run(self, **kwargs):  # noqa: ARG001
214          if output is not None:
215              return output
216          return dict.fromkeys(output_types.keys())
217  
218      def to_dict(self):
219          return default_to_dict(self)
220  
221      def from_dict(cls, data: dict[str, Any]):
222          return default_from_dict(cls, data)
223  
224      fields = {"__init__": init, "run": run, "to_dict": to_dict, "from_dict": classmethod(from_dict)}
225      if extra_fields is not None:
226          fields = {**fields, **extra_fields}
227  
228      if bases is None:
229          bases = (object,)
230  
231      cls = type(name, bases, fields)
232      return component(cls)