fix: DocumentRecallEvaluator changing division and adding checks for emptiness of documents (#9380)

* changing division and adding checks for emptiness of documents

* adding release notes

* adding tests

* Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* attending PR comments

* Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml

* Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml

Co-authored-by: Julian Risch <julian.risch@deepset.ai>

* Update haystack/components/evaluators/document_recall.py

Co-authored-by: Julian Risch <julian.risch@deepset.ai>

* Update haystack/components/evaluators/document_recall.py

Co-authored-by: Julian Risch <julian.risch@deepset.ai>

* Update haystack/components/evaluators/document_recall.py

Co-authored-by: Julian Risch <julian.risch@deepset.ai>

* Update haystack/components/evaluators/document_recall.py

Co-authored-by: Julian Risch <julian.risch@deepset.ai>

* adding tests

* linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
This commit is contained in:
David S. Batista 2025-05-14 10:37:47 +01:00 committed by GitHub
parent aeea3b2d39
commit 42b378950f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 55 additions and 2 deletions

View File

@ -5,9 +5,11 @@
from enum import Enum
from typing import Any, Dict, List, Union
from haystack import component, default_to_dict
from haystack import component, default_to_dict, logging
from haystack.dataclasses import Document
logger = logging.getLogger(__name__)
class RecallMode(Enum):
"""
@ -97,7 +99,21 @@ class DocumentRecallEvaluator:
unique_retrievals = {p.content for p in retrieved_documents}
retrieved_ground_truths = unique_truths.intersection(unique_retrievals)
return len(retrieved_ground_truths) / len(ground_truth_documents)
if not unique_truths or unique_truths == {""}:
logger.warning(
"There are no ground truth documents or all of them have an empty string as content. "
"Score will be set to 0."
)
return 0.0
if not unique_retrievals or unique_retrievals == {""}:
logger.warning(
"There are no retrieved documents or all of them have an empty string as content. "
"Score will be set to 0."
)
return 0.0
return len(retrieved_ground_truths) / len(unique_truths)
@component.output_types(score=float, individual_scores=List[float])
def run(

View File

@ -0,0 +1,5 @@
---
enhancements:
- |
The `DocumentRecallEvaluator` was updated. Now, when in `MULTI_HIT` mode, the division is over the unique ground truth documents instead of the total number of ground truth documents.
We also added checks for emptiness. If there are no retrieved documents or all of them have an empty string as content, we return 0.0 and log a warning. Likewise, if there are no ground truth documents or all of them have an empty string as content, we return 0.0 and log a warning.

View File

@ -13,6 +13,14 @@ def test_init_with_unknown_mode_string():
DocumentRecallEvaluator(mode="unknown_mode")
def test_init_with_string_mode():
evaluator = DocumentRecallEvaluator(mode="single_hit")
assert evaluator.mode == RecallMode.SINGLE_HIT
evaluator = DocumentRecallEvaluator(mode="multi_hit")
assert evaluator.mode == RecallMode.MULTI_HIT
class TestDocumentRecallEvaluatorSingleHit:
@pytest.fixture
def evaluator(self):
@ -186,3 +194,27 @@ class TestDocumentRecallEvaluatorMultiHit:
}
new_evaluator = default_from_dict(DocumentRecallEvaluator, data)
assert new_evaluator.mode == RecallMode.MULTI_HIT
def test_empty_ground_truth_documents(self, evaluator):
ground_truth_documents = [[]]
retrieved_documents = [[Document(content="test")]]
score = evaluator.run(ground_truth_documents, retrieved_documents)
assert score == {"individual_scores": [0.0], "score": 0.0}
def test_empty_retrieved_documents(self, evaluator):
ground_truth_documents = [[Document(content="test")]]
retrieved_documents = [[]]
score = evaluator.run(ground_truth_documents, retrieved_documents)
assert score == {"individual_scores": [0.0], "score": 0.0}
def test_empty_string_ground_truth_documents(self, evaluator):
ground_truth_documents = [[Document(content="")]]
retrieved_documents = [[Document(content="test")]]
score = evaluator.run(ground_truth_documents, retrieved_documents)
assert score == {"individual_scores": [0.0], "score": 0.0}
def test_empty_string_retrieved_documents(self, evaluator):
ground_truth_documents = [[Document(content="test")]]
retrieved_documents = [[Document(content="")]]
score = evaluator.run(ground_truth_documents, retrieved_documents)
assert score == {"individual_scores": [0.0], "score": 0.0}