haystack/test/components/evaluators/test_answer_exact_match.py

import pytest

from haystack.components.evaluators import AnswerExactMatchEvaluator


def test_run_with_all_matching():
    evaluator = AnswerExactMatchEvaluator()
    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"], ["Paris"]])

    assert result == {"individual_scores": [1, 1], "score": 1.0}


def test_run_with_no_matching():
    evaluator = AnswerExactMatchEvaluator()
    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Paris"], ["London"]])

    assert result == {"individual_scores": [0, 0], "score": 0.0}


def test_run_with_partial_matching():
    evaluator = AnswerExactMatchEvaluator()
    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"], ["London"]])

    assert result == {"individual_scores": [1, 0], "score": 0.5}


def test_run_with_complex_data():
    evaluator = AnswerExactMatchEvaluator()
    result = evaluator.run(
        ground_truth_answers=[
            ["France"],
            ["9th century", "9th"],
            ["classical music", "classical"],
            ["11th century", "the 11th"],
            ["Denmark", "Iceland", "Norway"],
            ["10th century", "10th"],
        ],
        predicted_answers=[
            ["France"],
            ["9th century", "10th century", "9th"],
            ["classic music", "rock music", "dubstep"],
            ["11th", "the 11th", "11th century"],
            ["Denmark, Iceland and Norway"],
            ["10th century", "the first half of the 10th century", "10th", "10th"],
        ],
    )
    assert result == {"individual_scores": [1, 1, 0, 1, 0, 1], "score": 0.6666666666666666}


def test_run_with_different_lengths():
    evaluator = AnswerExactMatchEvaluator()

    with pytest.raises(ValueError):
        evaluator.run(ground_truth_answers=[["Berlin"]], predicted_answers=[["Berlin"], ["London"]])

    with pytest.raises(ValueError):
        evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"]])
Add `AnswerExactMatchEvaluator` (#7381) * Add AnswerExactMatchEvaluator * Add release notes * Fix linting * Update docstrings * Update docstrings * Remove to_dict and from_dict * Fix linting 2024-03-19 16:58:01 +01:00			`import pytest`

			`from haystack.components.evaluators import AnswerExactMatchEvaluator`


			`def test_run_with_all_matching():`
			`evaluator = AnswerExactMatchEvaluator()`
refactor: Remove `questions` inputs from evaluators (#7466) * Remove questions input from AnswerExactMatchEvaluator * Remove questions input from DocumentRecallEvaluator 2024-04-04 14:14:18 +02:00			`result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"], ["Paris"]])`
Add `AnswerExactMatchEvaluator` (#7381) * Add AnswerExactMatchEvaluator * Add release notes * Fix linting * Update docstrings * Update docstrings * Remove to_dict and from_dict * Fix linting 2024-03-19 16:58:01 +01:00
feat: Change outputs of AnswerExactMatchEvaluator (#7390) * Change outputs of AnswerExactMatchEvaluator * Changes scores to return the number of matches per question * Revert "Changes scores to return the number of matches per question" This reverts commit e4358720793d4584b0b961402d4557c50c4c2381. * Change output names 2024-03-26 10:57:59 +01:00			`assert result == {"individual_scores": [1, 1], "score": 1.0}`
Add `AnswerExactMatchEvaluator` (#7381) * Add AnswerExactMatchEvaluator * Add release notes * Fix linting * Update docstrings * Update docstrings * Remove to_dict and from_dict * Fix linting 2024-03-19 16:58:01 +01:00

			`def test_run_with_no_matching():`
			`evaluator = AnswerExactMatchEvaluator()`
refactor: Remove `questions` inputs from evaluators (#7466) * Remove questions input from AnswerExactMatchEvaluator * Remove questions input from DocumentRecallEvaluator 2024-04-04 14:14:18 +02:00			`result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Paris"], ["London"]])`
Add `AnswerExactMatchEvaluator` (#7381) * Add AnswerExactMatchEvaluator * Add release notes * Fix linting * Update docstrings * Update docstrings * Remove to_dict and from_dict * Fix linting 2024-03-19 16:58:01 +01:00
feat: Change outputs of AnswerExactMatchEvaluator (#7390) * Change outputs of AnswerExactMatchEvaluator * Changes scores to return the number of matches per question * Revert "Changes scores to return the number of matches per question" This reverts commit e4358720793d4584b0b961402d4557c50c4c2381. * Change output names 2024-03-26 10:57:59 +01:00			`assert result == {"individual_scores": [0, 0], "score": 0.0}`
Add `AnswerExactMatchEvaluator` (#7381) * Add AnswerExactMatchEvaluator * Add release notes * Fix linting * Update docstrings * Update docstrings * Remove to_dict and from_dict * Fix linting 2024-03-19 16:58:01 +01:00

			`def test_run_with_partial_matching():`
			`evaluator = AnswerExactMatchEvaluator()`
refactor: Remove `questions` inputs from evaluators (#7466) * Remove questions input from AnswerExactMatchEvaluator * Remove questions input from DocumentRecallEvaluator 2024-04-04 14:14:18 +02:00			`result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"], ["London"]])`
Add `AnswerExactMatchEvaluator` (#7381) * Add AnswerExactMatchEvaluator * Add release notes * Fix linting * Update docstrings * Update docstrings * Remove to_dict and from_dict * Fix linting 2024-03-19 16:58:01 +01:00
feat: Change outputs of AnswerExactMatchEvaluator (#7390) * Change outputs of AnswerExactMatchEvaluator * Changes scores to return the number of matches per question * Revert "Changes scores to return the number of matches per question" This reverts commit e4358720793d4584b0b961402d4557c50c4c2381. * Change output names 2024-03-26 10:57:59 +01:00			`assert result == {"individual_scores": [1, 0], "score": 0.5}`


			`def test_run_with_complex_data():`
			`evaluator = AnswerExactMatchEvaluator()`
			`result = evaluator.run(`
			`ground_truth_answers=[`
			`["France"],`
			`["9th century", "9th"],`
			`["classical music", "classical"],`
			`["11th century", "the 11th"],`
			`["Denmark", "Iceland", "Norway"],`
			`["10th century", "10th"],`
			`],`
			`predicted_answers=[`
			`["France"],`
			`["9th century", "10th century", "9th"],`
			`["classic music", "rock music", "dubstep"],`
			`["11th", "the 11th", "11th century"],`
			`["Denmark, Iceland and Norway"],`
			`["10th century", "the first half of the 10th century", "10th", "10th"],`
			`],`
			`)`
			`assert result == {"individual_scores": [1, 1, 0, 1, 0, 1], "score": 0.6666666666666666}`
Add `AnswerExactMatchEvaluator` (#7381) * Add AnswerExactMatchEvaluator * Add release notes * Fix linting * Update docstrings * Update docstrings * Remove to_dict and from_dict * Fix linting 2024-03-19 16:58:01 +01:00

			`def test_run_with_different_lengths():`
			`evaluator = AnswerExactMatchEvaluator()`

			`with pytest.raises(ValueError):`
refactor: Remove `questions` inputs from evaluators (#7466) * Remove questions input from AnswerExactMatchEvaluator * Remove questions input from DocumentRecallEvaluator 2024-04-04 14:14:18 +02:00			`evaluator.run(ground_truth_answers=[["Berlin"]], predicted_answers=[["Berlin"], ["London"]])`
Add `AnswerExactMatchEvaluator` (#7381) * Add AnswerExactMatchEvaluator * Add release notes * Fix linting * Update docstrings * Update docstrings * Remove to_dict and from_dict * Fix linting 2024-03-19 16:58:01 +01:00
			`with pytest.raises(ValueError):`
refactor: Remove `questions` inputs from evaluators (#7466) * Remove questions input from AnswerExactMatchEvaluator * Remove questions input from DocumentRecallEvaluator 2024-04-04 14:14:18 +02:00			`evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"]])`