Cradicle Explorer

/ test / components / evaluators / test_answer_exact_match.py
test_answer_exact_match.py
 1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 2  #
 3  # SPDX-License-Identifier: Apache-2.0
 4  
 5  import pytest
 6  
 7  from haystack.components.evaluators import AnswerExactMatchEvaluator
 8  
 9  
10  def test_run_with_all_matching():
11      evaluator = AnswerExactMatchEvaluator()
12      result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin", "Paris"])
13  
14      assert result == {"individual_scores": [1, 1], "score": 1.0}
15  
16  
17  def test_run_with_no_matching():
18      evaluator = AnswerExactMatchEvaluator()
19      result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Paris", "London"])
20  
21      assert result == {"individual_scores": [0, 0], "score": 0.0}
22  
23  
24  def test_run_with_partial_matching():
25      evaluator = AnswerExactMatchEvaluator()
26      result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin", "London"])
27  
28      assert result == {"individual_scores": [1, 0], "score": 0.5}
29  
30  
31  def test_run_with_complex_data():
32      evaluator = AnswerExactMatchEvaluator()
33      result = evaluator.run(
34          ground_truth_answers=[
35              "France",
36              "9th century",
37              "9th",
38              "classical music",
39              "classical",
40              "11th century",
41              "the 11th",
42              "Denmark",
43              "Iceland",
44              "Norway",
45              "10th century",
46              "10th",
47          ],
48          predicted_answers=[
49              "France",
50              "9th century",
51              "10th century",
52              "9th",
53              "classic music",
54              "rock music",
55              "dubstep",
56              "the 11th",
57              "11th century",
58              "Denmark, Iceland and Norway",
59              "10th century",
60              "10th",
61          ],
62      )
63      assert result == {"individual_scores": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], "score": 0.3333333333333333}
64  
65  
66  def test_run_with_different_lengths():
67      evaluator = AnswerExactMatchEvaluator()
68  
69      with pytest.raises(ValueError):
70          evaluator.run(ground_truth_answers=["Berlin"], predicted_answers=["Berlin", "London"])
71  
72      with pytest.raises(ValueError):
73          evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin"])