fix: DocumentRecallEvaluator changing division and adding checks for emptiness of documents (#9380)

* changing division and adding checks for emptiness of documents * adding release notes * adding tests * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * attending PR comments * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml Co-authored-by: Julian Risch <julian.risch@deepset.ai> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <julian.risch@deepset.ai> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <julian.risch@deepset.ai> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <julian.risch@deepset.ai> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <julian.risch@deepset.ai> * adding tests * linting --------- Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> Co-authored-by: Julian Risch <julian.risch@deepset.ai>
2025-12-27 15:08:43 +00:00 · 2025-05-14 10:37:47 +01:00 · 2025-05-14 10:37:47 +01:00 · 42b378950f
commit 42b378950f
parent aeea3b2d39
3 changed files with 55 additions and 2 deletions
--- a/haystack/components/evaluators/document_recall.py
+++ b/haystack/components/evaluators/document_recall.py
@ -5,9 +5,11 @@
 from enum import Enum
 from typing import Any, Dict, List, Union

-from haystack import component, default_to_dict
+from haystack import component, default_to_dict, logging
 from haystack.dataclasses import Document

+logger = logging.getLogger(__name__)
+

 class RecallMode(Enum):
    """
@ -97,7 +99,21 @@ class DocumentRecallEvaluator:
        unique_retrievals = {p.content for p in retrieved_documents}
        retrieved_ground_truths = unique_truths.intersection(unique_retrievals)

-        return len(retrieved_ground_truths) / len(ground_truth_documents)
+        if not unique_truths or unique_truths == {""}:
+            logger.warning(
+                "There are no ground truth documents or all of them have an empty string as content. "
+                "Score will be set to 0."
+            )
+            return 0.0
+
+        if not unique_retrievals or unique_retrievals == {""}:
+            logger.warning(
+                "There are no retrieved documents or all of them have an empty string as content. "
+                "Score will be set to 0."
+            )
+            return 0.0
+
+        return len(retrieved_ground_truths) / len(unique_truths)

    @component.output_types(score=float, individual_scores=List[float])
    def run(
--- a/releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml
+++ b/releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml
@ -0,0 +1,5 @@
+---
+enhancements:
+  - |
+    The `DocumentRecallEvaluator` was updated. Now, when in `MULTI_HIT` mode, the division is over the unique ground truth documents instead of the total number of ground truth documents.
+    We also added checks for emptiness. If there are no retrieved documents or all of them have an empty string as content, we return 0.0 and log a warning. Likewise, if there are no ground truth documents or all of them have an empty string as content, we return 0.0 and log a warning.
--- a/test/components/evaluators/test_document_recall.py
+++ b/test/components/evaluators/test_document_recall.py
@ -13,6 +13,14 @@ def test_init_with_unknown_mode_string():
        DocumentRecallEvaluator(mode="unknown_mode")


+def test_init_with_string_mode():
+    evaluator = DocumentRecallEvaluator(mode="single_hit")
+    assert evaluator.mode == RecallMode.SINGLE_HIT
+
+    evaluator = DocumentRecallEvaluator(mode="multi_hit")
+    assert evaluator.mode == RecallMode.MULTI_HIT
+
+
 class TestDocumentRecallEvaluatorSingleHit:
    @pytest.fixture
    def evaluator(self):
@ -186,3 +194,27 @@ class TestDocumentRecallEvaluatorMultiHit:
        }
        new_evaluator = default_from_dict(DocumentRecallEvaluator, data)
        assert new_evaluator.mode == RecallMode.MULTI_HIT
+
+    def test_empty_ground_truth_documents(self, evaluator):
+        ground_truth_documents = [[]]
+        retrieved_documents = [[Document(content="test")]]
+        score = evaluator.run(ground_truth_documents, retrieved_documents)
+        assert score == {"individual_scores": [0.0], "score": 0.0}
+
+    def test_empty_retrieved_documents(self, evaluator):
+        ground_truth_documents = [[Document(content="test")]]
+        retrieved_documents = [[]]
+        score = evaluator.run(ground_truth_documents, retrieved_documents)
+        assert score == {"individual_scores": [0.0], "score": 0.0}
+
+    def test_empty_string_ground_truth_documents(self, evaluator):
+        ground_truth_documents = [[Document(content="")]]
+        retrieved_documents = [[Document(content="test")]]
+        score = evaluator.run(ground_truth_documents, retrieved_documents)
+        assert score == {"individual_scores": [0.0], "score": 0.0}
+
+    def test_empty_string_retrieved_documents(self, evaluator):
+        ground_truth_documents = [[Document(content="test")]]
+        retrieved_documents = [[Document(content="")]]
+        score = evaluator.run(ground_truth_documents, retrieved_documents)
+        assert score == {"individual_scores": [0.0], "score": 0.0}