diff --git a/haystack/components/joiners/document_joiner.py b/haystack/components/joiners/document_joiner.py index 6e1e5a803..bd51613df 100644 --- a/haystack/components/joiners/document_joiner.py +++ b/haystack/components/joiners/document_joiner.py @@ -195,9 +195,11 @@ class DocumentJoiner: std_dev = (sum((x - mean_score) ** 2 for x in scores_list) / len(scores_list)) ** 0.5 min_score = mean_score - 3 * std_dev max_score = mean_score + 3 * std_dev + delta_score = max_score - min_score for doc in documents: - doc.score = (doc.score - min_score) / (max_score - min_score) + doc.score = (doc.score - min_score) / delta_score if delta_score != 0.0 else 0.0 + # if all docs have the same score delta_score is 0, the docs are uninformative for the query output = self._concatenate(document_lists=document_lists) diff --git a/test/components/joiners/test_document_joiner.py b/test/components/joiners/test_document_joiner.py index 41fe27554..0da082477 100644 --- a/test/components/joiners/test_document_joiner.py +++ b/test/components/joiners/test_document_joiner.py @@ -145,6 +145,36 @@ class TestDocumentJoiner: ] assert all(doc.id in expected_document_ids for doc in output["documents"]) + def test_run_with_distribution_based_rank_fusion_join_mode_same_scores(self): + joiner = DocumentJoiner(join_mode="distribution_based_rank_fusion") + documents_1 = [ + Document(content="a", score=0.2), + Document(content="b", score=0.2), + Document(content="c", score=0.2), + ] + documents_2 = [ + Document(content="d", score=0.5), + Document(content="e", score=0.8), + Document(content="f", score=1.1, meta={"key": "value"}), + Document(content="g", score=0.3), + Document(content="a", score=0.3), + ] + output = joiner.run([documents_1, documents_2]) + assert len(output["documents"]) == 7 + expected_document_ids = [ + doc.id + for doc in [ + Document(content="a", score=0), + Document(content="b", score=0), + Document(content="c", score=0), + Document(content="d", score=0.44), + Document(content="e", score=0.60), + Document(content="f", score=0.76, meta={"key": "value"}), + Document(content="g", score=0.33), + ] + ] + assert all(doc.id in expected_document_ids for doc in output["documents"]) + def test_run_with_top_k_in_run_method(self): joiner = DocumentJoiner() documents_1 = [Document(content="a"), Document(content="b"), Document(content="c")]