diff --git a/haystack/components/joiners/document_joiner.py b/haystack/components/joiners/document_joiner.py index 7451f6089..b81af3cde 100644 --- a/haystack/components/joiners/document_joiner.py +++ b/haystack/components/joiners/document_joiner.py @@ -166,6 +166,10 @@ class DocumentJoiner: """ Merge multiple lists of Documents and calculate a weighted sum of the scores of duplicate Documents. """ + # This check prevents a division by zero when no documents are passed + if not document_lists: + return [] + scores_map: dict = defaultdict(int) documents_map = {} weights = self.weights if self.weights else [1 / len(document_lists)] * len(document_lists) @@ -187,6 +191,10 @@ class DocumentJoiner: The constant k is set to 61 (60 was suggested by the original paper, plus 1 as python lists are 0-based and the paper used 1-based ranking). """ + # This check prevents a division by zero when no documents are passed + if not document_lists: + return [] + k = 61 scores_map: dict = defaultdict(int) @@ -217,6 +225,9 @@ class DocumentJoiner: If a Document is in more than one retriever, the one with the highest score is used. """ for documents in document_lists: + if len(documents) == 0: + continue + scores_list = [] for doc in documents: diff --git a/releasenotes/notes/fix-document-joiner-division-by-zero-b24f95d37b007264.yaml b/releasenotes/notes/fix-document-joiner-division-by-zero-b24f95d37b007264.yaml new file mode 100644 index 000000000..35d99d2ab --- /dev/null +++ b/releasenotes/notes/fix-document-joiner-division-by-zero-b24f95d37b007264.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Fix `DocumentJoiner` failing when ran with an empty list of `Document`s diff --git a/test/components/joiners/test_document_joiner.py b/test/components/joiners/test_document_joiner.py index 56bdfb644..6cc4f5f9e 100644 --- a/test/components/joiners/test_document_joiner.py +++ b/test/components/joiners/test_document_joiner.py @@ -60,18 +60,45 @@ class TestDocumentJoiner: assert document_joiner.top_k == 6 assert not document_joiner.sort_by_score - def test_empty_list(self): - joiner = DocumentJoiner() + @pytest.mark.parametrize( + "join_mode", + [ + JoinMode.CONCATENATE, + JoinMode.MERGE, + JoinMode.RECIPROCAL_RANK_FUSION, + JoinMode.DISTRIBUTION_BASED_RANK_FUSION, + ], + ) + def test_empty_list(self, join_mode: JoinMode): + joiner = DocumentJoiner(join_mode=join_mode) result = joiner.run([]) assert result == {"documents": []} - def test_list_of_empty_lists(self): - joiner = DocumentJoiner() + @pytest.mark.parametrize( + "join_mode", + [ + JoinMode.CONCATENATE, + JoinMode.MERGE, + JoinMode.RECIPROCAL_RANK_FUSION, + JoinMode.DISTRIBUTION_BASED_RANK_FUSION, + ], + ) + def test_list_of_empty_lists(self, join_mode: JoinMode): + joiner = DocumentJoiner(join_mode=join_mode) result = joiner.run([[], []]) assert result == {"documents": []} - def test_list_with_one_empty_list(self): - joiner = DocumentJoiner() + @pytest.mark.parametrize( + "join_mode", + [ + JoinMode.CONCATENATE, + JoinMode.MERGE, + JoinMode.RECIPROCAL_RANK_FUSION, + JoinMode.DISTRIBUTION_BASED_RANK_FUSION, + ], + ) + def test_list_with_one_empty_list(self, join_mode: JoinMode): + joiner = DocumentJoiner(join_mode=join_mode) documents = [Document(content="a"), Document(content="b"), Document(content="c")] result = joiner.run([[], documents]) assert result == {"documents": documents}