Fixed ZeroDivisionError in JoinDocuments (#7972)

* added new strategy DBRF * fix hook * fix typos * added test for DBRF * fix format * new release note * reformatted with black * Update haystack/components/joiners/document_joiner.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * updated comments * added type-hint and return type * fix * revert for lint problems * fix * fix * fix * fix * another tentative * dict out file * only output * fix output * revert * removed unused imports * fix typing * fixed ZeroDivisionError * added test * add release note * removed try - except * renamed test * Update test/components/joiners/test_document_joiner.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/joiners/document_joiner.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * fix format error * removed releasenotes/notes/release-note-9b2bc03a8a398078.yaml * added comment --------- Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> Co-authored-by: anakin87 <stefanofiorucci@gmail.com>
2026-01-07 04:27:15 +00:00 · 2024-07-04 10:07:26 +02:00 · 2024-07-04 10:07:26 +02:00 · cafcf51cb0
commit cafcf51cb0
parent 03d9057e64
2 changed files with 33 additions and 1 deletions
--- a/haystack/components/joiners/document_joiner.py
+++ b/haystack/components/joiners/document_joiner.py
@ -195,9 +195,11 @@ class DocumentJoiner:
            std_dev = (sum((x - mean_score) ** 2 for x in scores_list) / len(scores_list)) ** 0.5
            min_score = mean_score - 3 * std_dev
            max_score = mean_score + 3 * std_dev
+            delta_score = max_score - min_score

            for doc in documents:
-                doc.score = (doc.score - min_score) / (max_score - min_score)
+                doc.score = (doc.score - min_score) / delta_score if delta_score != 0.0 else 0.0
+                # if all docs have the same score delta_score is 0, the docs are uninformative for the query

        output = self._concatenate(document_lists=document_lists)

--- a/test/components/joiners/test_document_joiner.py
+++ b/test/components/joiners/test_document_joiner.py
@ -145,6 +145,36 @@ class TestDocumentJoiner:
        ]
        assert all(doc.id in expected_document_ids for doc in output["documents"])

+    def test_run_with_distribution_based_rank_fusion_join_mode_same_scores(self):
+        joiner = DocumentJoiner(join_mode="distribution_based_rank_fusion")
+        documents_1 = [
+            Document(content="a", score=0.2),
+            Document(content="b", score=0.2),
+            Document(content="c", score=0.2),
+        ]
+        documents_2 = [
+            Document(content="d", score=0.5),
+            Document(content="e", score=0.8),
+            Document(content="f", score=1.1, meta={"key": "value"}),
+            Document(content="g", score=0.3),
+            Document(content="a", score=0.3),
+        ]
+        output = joiner.run([documents_1, documents_2])
+        assert len(output["documents"]) == 7
+        expected_document_ids = [
+            doc.id
+            for doc in [
+                Document(content="a", score=0),
+                Document(content="b", score=0),
+                Document(content="c", score=0),
+                Document(content="d", score=0.44),
+                Document(content="e", score=0.60),
+                Document(content="f", score=0.76, meta={"key": "value"}),
+                Document(content="g", score=0.33),
+            ]
+        ]
+        assert all(doc.id in expected_document_ids for doc in output["documents"])
+
    def test_run_with_top_k_in_run_method(self):
        joiner = DocumentJoiner()
        documents_1 = [Document(content="a"), Document(content="b"), Document(content="c")]