fix: document joiner division by zero with distribution based rank fusion (#8520)

* Parametrize document joiner tests with empty lists * Skip loop in _distribution_based_rank_fusion if document list is empty * Parametrize test_empty_list with join_mode * Prevent division by zero in _merge and _reciprocal_rank_fusion * Add release notes --------- Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
2026-01-06 03:57:19 +00:00 · 2024-11-14 12:41:28 +01:00 · 2024-11-14 12:41:28 +01:00 · f5683bc8fa
commit f5683bc8fa
parent e5a80722c2
3 changed files with 48 additions and 6 deletions
--- a/haystack/components/joiners/document_joiner.py
+++ b/haystack/components/joiners/document_joiner.py
@ -166,6 +166,10 @@ class DocumentJoiner:
        """
        Merge multiple lists of Documents and calculate a weighted sum of the scores of duplicate Documents.
        """
+        # This check prevents a division by zero when no documents are passed
+        if not document_lists:
+            return []
+
        scores_map: dict = defaultdict(int)
        documents_map = {}
        weights = self.weights if self.weights else [1 / len(document_lists)] * len(document_lists)
@ -187,6 +191,10 @@ class DocumentJoiner:
        The constant k is set to 61 (60 was suggested by the original paper,
        plus 1 as python lists are 0-based and the paper used 1-based ranking).
        """
+        # This check prevents a division by zero when no documents are passed
+        if not document_lists:
+            return []
+
        k = 61

        scores_map: dict = defaultdict(int)
@ -217,6 +225,9 @@ class DocumentJoiner:
        If a Document is in more than one retriever, the one with the highest score is used.
        """
        for documents in document_lists:
+            if len(documents) == 0:
+                continue
+
            scores_list = []

            for doc in documents:
--- a/releasenotes/notes/fix-document-joiner-division-by-zero-b24f95d37b007264.yaml
+++ b/releasenotes/notes/fix-document-joiner-division-by-zero-b24f95d37b007264.yaml
@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    Fix `DocumentJoiner` failing when ran with an empty list of `Document`s
--- a/test/components/joiners/test_document_joiner.py
+++ b/test/components/joiners/test_document_joiner.py
@ -60,18 +60,45 @@ class TestDocumentJoiner:
        assert document_joiner.top_k == 6
        assert not document_joiner.sort_by_score

-    def test_empty_list(self):
-        joiner = DocumentJoiner()
+    @pytest.mark.parametrize(
+        "join_mode",
+        [
+            JoinMode.CONCATENATE,
+            JoinMode.MERGE,
+            JoinMode.RECIPROCAL_RANK_FUSION,
+            JoinMode.DISTRIBUTION_BASED_RANK_FUSION,
+        ],
+    )
+    def test_empty_list(self, join_mode: JoinMode):
+        joiner = DocumentJoiner(join_mode=join_mode)
        result = joiner.run([])
        assert result == {"documents": []}

-    def test_list_of_empty_lists(self):
-        joiner = DocumentJoiner()
+    @pytest.mark.parametrize(
+        "join_mode",
+        [
+            JoinMode.CONCATENATE,
+            JoinMode.MERGE,
+            JoinMode.RECIPROCAL_RANK_FUSION,
+            JoinMode.DISTRIBUTION_BASED_RANK_FUSION,
+        ],
+    )
+    def test_list_of_empty_lists(self, join_mode: JoinMode):
+        joiner = DocumentJoiner(join_mode=join_mode)
        result = joiner.run([[], []])
        assert result == {"documents": []}

-    def test_list_with_one_empty_list(self):
-        joiner = DocumentJoiner()
+    @pytest.mark.parametrize(
+        "join_mode",
+        [
+            JoinMode.CONCATENATE,
+            JoinMode.MERGE,
+            JoinMode.RECIPROCAL_RANK_FUSION,
+            JoinMode.DISTRIBUTION_BASED_RANK_FUSION,
+        ],
+    )
+    def test_list_with_one_empty_list(self, join_mode: JoinMode):
+        joiner = DocumentJoiner(join_mode=join_mode)
        documents = [Document(content="a"), Document(content="b"), Document(content="c")]
        result = joiner.run([[], documents])
        assert result == {"documents": documents}