document_map.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 from typing import Any 6 7 from haystack import Document, component, default_to_dict 8 9 10 @component 11 class DocumentMAPEvaluator: 12 """ 13 A Mean Average Precision (MAP) evaluator for documents. 14 15 Evaluator that calculates the mean average precision of the retrieved documents, a metric 16 that measures how high retrieved documents are ranked. 17 Each question can have multiple ground truth documents and multiple retrieved documents. 18 19 `DocumentMAPEvaluator` doesn't normalize its inputs, the `DocumentCleaner` component 20 should be used to clean and normalize the documents before passing them to this evaluator. 21 22 Usage example: 23 ```python 24 from haystack import Document 25 from haystack.components.evaluators import DocumentMAPEvaluator 26 27 evaluator = DocumentMAPEvaluator() 28 result = evaluator.run( 29 ground_truth_documents=[ 30 [Document(content="France")], 31 [Document(content="9th century"), Document(content="9th")], 32 ], 33 retrieved_documents=[ 34 [Document(content="France")], 35 [Document(content="9th century"), Document(content="10th century"), Document(content="9th")], 36 ], 37 ) 38 39 print(result["individual_scores"]) 40 # [1.0, 0.8333333333333333] 41 print(result["score"]) 42 # 0.9166666666666666 43 ``` 44 """ 45 46 def __init__(self, document_comparison_field: str = "content") -> None: 47 """ 48 Create a DocumentMAPEvaluator component. 49 50 :param document_comparison_field: 51 The Document field to use for comparison. Possible options: 52 - `"content"`: uses `doc.content` 53 - `"id"`: uses `doc.id` 54 - A `meta.` prefix followed by a key name: uses `doc.meta["<key>"]` 55 (e.g. `"meta.file_id"`, `"meta.page_number"`) 56 Nested keys are supported (e.g. `"meta.source.url"`). 57 """ 58 self.document_comparison_field = document_comparison_field 59 60 def _get_comparison_value(self, doc: Document) -> Any: 61 """ 62 Extract the comparison value from a document based on the configured field. 63 """ 64 if self.document_comparison_field == "content": 65 return doc.content 66 if self.document_comparison_field == "id": 67 return doc.id 68 if self.document_comparison_field.startswith("meta."): 69 parts = self.document_comparison_field[5:].split(".") 70 value = doc.meta 71 for part in parts: 72 if not isinstance(value, dict) or part not in value: 73 return None 74 value = value[part] 75 return value 76 msg = ( 77 f"Unsupported document_comparison_field: '{self.document_comparison_field}'. " 78 "Use 'content', 'id', or 'meta.<key>'." 79 ) 80 raise ValueError(msg) 81 82 def to_dict(self) -> dict[str, Any]: 83 """ 84 Serializes the component to a dictionary. 85 86 :returns: 87 Dictionary with serialized data. 88 """ 89 return default_to_dict(self, document_comparison_field=self.document_comparison_field) 90 91 # Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm. 92 @component.output_types(score=float, individual_scores=list[float]) 93 def run( 94 self, ground_truth_documents: list[list[Document]], retrieved_documents: list[list[Document]] 95 ) -> dict[str, Any]: 96 """ 97 Run the DocumentMAPEvaluator on the given inputs. 98 99 All lists must have the same length. 100 101 :param ground_truth_documents: 102 A list of expected documents for each question. 103 :param retrieved_documents: 104 A list of retrieved documents for each question. 105 :returns: 106 A dictionary with the following outputs: 107 - `score` - The average of calculated scores. 108 - `individual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents 109 are ranked. 110 """ 111 if len(ground_truth_documents) != len(retrieved_documents): 112 msg = "The length of ground_truth_documents and retrieved_documents must be the same." 113 raise ValueError(msg) 114 115 individual_scores = [] 116 117 for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents, strict=True): 118 average_precision = 0.0 119 average_precision_numerator = 0.0 120 relevant_documents = 0 121 122 ground_truth_values = [val for doc in ground_truth if (val := self._get_comparison_value(doc)) is not None] 123 for rank, retrieved_document in enumerate(retrieved): 124 retrieved_value = self._get_comparison_value(retrieved_document) 125 if retrieved_value is None: 126 continue 127 128 if retrieved_value in ground_truth_values: 129 relevant_documents += 1 130 average_precision_numerator += relevant_documents / (rank + 1) 131 if relevant_documents > 0: 132 average_precision = average_precision_numerator / relevant_documents 133 individual_scores.append(average_precision) 134 135 score = sum(individual_scores) / len(ground_truth_documents) 136 return {"score": score, "individual_scores": individual_scores}