mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-27 15:08:43 +00:00
fix: DocumentRecallEvaluator changing division and adding checks for emptiness of documents (#9380)
* changing division and adding checks for emptiness of documents * adding release notes * adding tests * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * attending PR comments * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml Co-authored-by: Julian Risch <julian.risch@deepset.ai> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <julian.risch@deepset.ai> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <julian.risch@deepset.ai> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <julian.risch@deepset.ai> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <julian.risch@deepset.ai> * adding tests * linting --------- Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> Co-authored-by: Julian Risch <julian.risch@deepset.ai>
This commit is contained in:
parent
aeea3b2d39
commit
42b378950f
@ -5,9 +5,11 @@
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from haystack import component, default_to_dict
|
||||
from haystack import component, default_to_dict, logging
|
||||
from haystack.dataclasses import Document
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RecallMode(Enum):
|
||||
"""
|
||||
@ -97,7 +99,21 @@ class DocumentRecallEvaluator:
|
||||
unique_retrievals = {p.content for p in retrieved_documents}
|
||||
retrieved_ground_truths = unique_truths.intersection(unique_retrievals)
|
||||
|
||||
return len(retrieved_ground_truths) / len(ground_truth_documents)
|
||||
if not unique_truths or unique_truths == {""}:
|
||||
logger.warning(
|
||||
"There are no ground truth documents or all of them have an empty string as content. "
|
||||
"Score will be set to 0."
|
||||
)
|
||||
return 0.0
|
||||
|
||||
if not unique_retrievals or unique_retrievals == {""}:
|
||||
logger.warning(
|
||||
"There are no retrieved documents or all of them have an empty string as content. "
|
||||
"Score will be set to 0."
|
||||
)
|
||||
return 0.0
|
||||
|
||||
return len(retrieved_ground_truths) / len(unique_truths)
|
||||
|
||||
@component.output_types(score=float, individual_scores=List[float])
|
||||
def run(
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
---
|
||||
enhancements:
|
||||
- |
|
||||
The `DocumentRecallEvaluator` was updated. Now, when in `MULTI_HIT` mode, the division is over the unique ground truth documents instead of the total number of ground truth documents.
|
||||
We also added checks for emptiness. If there are no retrieved documents or all of them have an empty string as content, we return 0.0 and log a warning. Likewise, if there are no ground truth documents or all of them have an empty string as content, we return 0.0 and log a warning.
|
||||
@ -13,6 +13,14 @@ def test_init_with_unknown_mode_string():
|
||||
DocumentRecallEvaluator(mode="unknown_mode")
|
||||
|
||||
|
||||
def test_init_with_string_mode():
|
||||
evaluator = DocumentRecallEvaluator(mode="single_hit")
|
||||
assert evaluator.mode == RecallMode.SINGLE_HIT
|
||||
|
||||
evaluator = DocumentRecallEvaluator(mode="multi_hit")
|
||||
assert evaluator.mode == RecallMode.MULTI_HIT
|
||||
|
||||
|
||||
class TestDocumentRecallEvaluatorSingleHit:
|
||||
@pytest.fixture
|
||||
def evaluator(self):
|
||||
@ -186,3 +194,27 @@ class TestDocumentRecallEvaluatorMultiHit:
|
||||
}
|
||||
new_evaluator = default_from_dict(DocumentRecallEvaluator, data)
|
||||
assert new_evaluator.mode == RecallMode.MULTI_HIT
|
||||
|
||||
def test_empty_ground_truth_documents(self, evaluator):
|
||||
ground_truth_documents = [[]]
|
||||
retrieved_documents = [[Document(content="test")]]
|
||||
score = evaluator.run(ground_truth_documents, retrieved_documents)
|
||||
assert score == {"individual_scores": [0.0], "score": 0.0}
|
||||
|
||||
def test_empty_retrieved_documents(self, evaluator):
|
||||
ground_truth_documents = [[Document(content="test")]]
|
||||
retrieved_documents = [[]]
|
||||
score = evaluator.run(ground_truth_documents, retrieved_documents)
|
||||
assert score == {"individual_scores": [0.0], "score": 0.0}
|
||||
|
||||
def test_empty_string_ground_truth_documents(self, evaluator):
|
||||
ground_truth_documents = [[Document(content="")]]
|
||||
retrieved_documents = [[Document(content="test")]]
|
||||
score = evaluator.run(ground_truth_documents, retrieved_documents)
|
||||
assert score == {"individual_scores": [0.0], "score": 0.0}
|
||||
|
||||
def test_empty_string_retrieved_documents(self, evaluator):
|
||||
ground_truth_documents = [[Document(content="test")]]
|
||||
retrieved_documents = [[Document(content="")]]
|
||||
score = evaluator.run(ground_truth_documents, retrieved_documents)
|
||||
assert score == {"individual_scores": [0.0], "score": 0.0}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user