/ haystack / components / evaluators / document_map.py
document_map.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  from typing import Any
  6  
  7  from haystack import Document, component, default_to_dict
  8  
  9  
 10  @component
 11  class DocumentMAPEvaluator:
 12      """
 13      A Mean Average Precision (MAP) evaluator for documents.
 14  
 15      Evaluator that calculates the mean average precision of the retrieved documents, a metric
 16      that measures how high retrieved documents are ranked.
 17      Each question can have multiple ground truth documents and multiple retrieved documents.
 18  
 19      `DocumentMAPEvaluator` doesn't normalize its inputs, the `DocumentCleaner` component
 20      should be used to clean and normalize the documents before passing them to this evaluator.
 21  
 22      Usage example:
 23      ```python
 24      from haystack import Document
 25      from haystack.components.evaluators import DocumentMAPEvaluator
 26  
 27      evaluator = DocumentMAPEvaluator()
 28      result = evaluator.run(
 29          ground_truth_documents=[
 30              [Document(content="France")],
 31              [Document(content="9th century"), Document(content="9th")],
 32          ],
 33          retrieved_documents=[
 34              [Document(content="France")],
 35              [Document(content="9th century"), Document(content="10th century"), Document(content="9th")],
 36          ],
 37      )
 38  
 39      print(result["individual_scores"])
 40      # [1.0, 0.8333333333333333]
 41      print(result["score"])
 42      # 0.9166666666666666
 43      ```
 44      """
 45  
 46      def __init__(self, document_comparison_field: str = "content") -> None:
 47          """
 48          Create a DocumentMAPEvaluator component.
 49  
 50          :param document_comparison_field:
 51              The Document field to use for comparison. Possible options:
 52              - `"content"`: uses `doc.content`
 53              - `"id"`: uses `doc.id`
 54              - A `meta.` prefix followed by a key name: uses `doc.meta["<key>"]`
 55                (e.g. `"meta.file_id"`, `"meta.page_number"`)
 56                Nested keys are supported (e.g. `"meta.source.url"`).
 57          """
 58          self.document_comparison_field = document_comparison_field
 59  
 60      def _get_comparison_value(self, doc: Document) -> Any:
 61          """
 62          Extract the comparison value from a document based on the configured field.
 63          """
 64          if self.document_comparison_field == "content":
 65              return doc.content
 66          if self.document_comparison_field == "id":
 67              return doc.id
 68          if self.document_comparison_field.startswith("meta."):
 69              parts = self.document_comparison_field[5:].split(".")
 70              value = doc.meta
 71              for part in parts:
 72                  if not isinstance(value, dict) or part not in value:
 73                      return None
 74                  value = value[part]
 75              return value
 76          msg = (
 77              f"Unsupported document_comparison_field: '{self.document_comparison_field}'. "
 78              "Use 'content', 'id', or 'meta.<key>'."
 79          )
 80          raise ValueError(msg)
 81  
 82      def to_dict(self) -> dict[str, Any]:
 83          """
 84          Serializes the component to a dictionary.
 85  
 86          :returns:
 87              Dictionary with serialized data.
 88          """
 89          return default_to_dict(self, document_comparison_field=self.document_comparison_field)
 90  
 91      # Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
 92      @component.output_types(score=float, individual_scores=list[float])
 93      def run(
 94          self, ground_truth_documents: list[list[Document]], retrieved_documents: list[list[Document]]
 95      ) -> dict[str, Any]:
 96          """
 97          Run the DocumentMAPEvaluator on the given inputs.
 98  
 99          All lists must have the same length.
100  
101          :param ground_truth_documents:
102              A list of expected documents for each question.
103          :param retrieved_documents:
104              A list of retrieved documents for each question.
105          :returns:
106              A dictionary with the following outputs:
107              - `score` - The average of calculated scores.
108              - `individual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents
109                  are ranked.
110          """
111          if len(ground_truth_documents) != len(retrieved_documents):
112              msg = "The length of ground_truth_documents and retrieved_documents must be the same."
113              raise ValueError(msg)
114  
115          individual_scores = []
116  
117          for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents, strict=True):
118              average_precision = 0.0
119              average_precision_numerator = 0.0
120              relevant_documents = 0
121  
122              ground_truth_values = [val for doc in ground_truth if (val := self._get_comparison_value(doc)) is not None]
123              for rank, retrieved_document in enumerate(retrieved):
124                  retrieved_value = self._get_comparison_value(retrieved_document)
125                  if retrieved_value is None:
126                      continue
127  
128                  if retrieved_value in ground_truth_values:
129                      relevant_documents += 1
130                      average_precision_numerator += relevant_documents / (rank + 1)
131              if relevant_documents > 0:
132                  average_precision = average_precision_numerator / relevant_documents
133              individual_scores.append(average_precision)
134  
135          score = sum(individual_scores) / len(ground_truth_documents)
136          return {"score": score, "individual_scores": individual_scores}