haystack/test/components/evaluators/test_answer_exact_match.py
2025-05-26 16:22:51 +00:00

74 lines
2.2 KiB
Python

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
import pytest
from haystack.components.evaluators import AnswerExactMatchEvaluator
def test_run_with_all_matching():
evaluator = AnswerExactMatchEvaluator()
result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin", "Paris"])
assert result == {"individual_scores": [1, 1], "score": 1.0}
def test_run_with_no_matching():
evaluator = AnswerExactMatchEvaluator()
result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Paris", "London"])
assert result == {"individual_scores": [0, 0], "score": 0.0}
def test_run_with_partial_matching():
evaluator = AnswerExactMatchEvaluator()
result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin", "London"])
assert result == {"individual_scores": [1, 0], "score": 0.5}
def test_run_with_complex_data():
evaluator = AnswerExactMatchEvaluator()
result = evaluator.run(
ground_truth_answers=[
"France",
"9th century",
"9th",
"classical music",
"classical",
"11th century",
"the 11th",
"Denmark",
"Iceland",
"Norway",
"10th century",
"10th",
],
predicted_answers=[
"France",
"9th century",
"10th century",
"9th",
"classic music",
"rock music",
"dubstep",
"the 11th",
"11th century",
"Denmark, Iceland and Norway",
"10th century",
"10th",
],
)
assert result == {"individual_scores": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], "score": 0.3333333333333333}
def test_run_with_different_lengths():
evaluator = AnswerExactMatchEvaluator()
with pytest.raises(ValueError):
evaluator.run(ground_truth_answers=["Berlin"], predicted_answers=["Berlin", "London"])
with pytest.raises(ValueError):
evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin"])