haystack/test/nodes/test_join_documents.py
Nicola Procopio 32e87d37c1
fixed join_docs.py concatenate (#5970)
* added hybrid search example

Added an example about hybrid search for faq pipeline on covid dataset

* formatted with back formatter

* renamed document

* fixed

* fixed typos

* added test

added test for hybrid search

* fixed withespaces

* removed test for hybrid search

* fixed pylint

* commented logging

* fixed bug in join_docs.py _concatenate_results

* Update join_docs.py

updated comment

* format with black

* added releasenote on PR

* updated release notes

* updated test_join_documents

* updated test

* updated test

* Update test_join_documents.py

* formatted with black

* fixed test

* fixed

---------

Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>
2023-10-16 09:31:52 +02:00

81 lines
3.0 KiB
Python

import pytest
from haystack import Document
from haystack.nodes.other.join_docs import JoinDocuments
@pytest.mark.unit
@pytest.mark.parametrize("join_mode", ["concatenate", "merge", "reciprocal_rank_fusion"])
def test_joindocuments(join_mode):
inputs = [
{"documents": [Document(content="text document 1", content_type="text", score=0.2)]},
{"documents": [Document(content="text document 2", content_type="text", score=0.7)]},
]
join_docs = JoinDocuments(join_mode=join_mode)
result, _ = join_docs.run(inputs)
assert len(result["documents"]) == 2
assert result["documents"] == sorted(result["documents"], reverse=True)
result, _ = join_docs.run(inputs, top_k_join=1)
assert len(result["documents"]) == 1
if join_mode == "reciprocal_rank_fusion":
assert result["documents"][0].content == "text document 1"
else:
assert result["documents"][0].content == "text document 2"
@pytest.mark.unit
@pytest.mark.parametrize("join_mode", ["concatenate", "merge", "reciprocal_rank_fusion"])
@pytest.mark.parametrize("sort_by_score", [True, False])
def test_joindocuments_score_none(join_mode, sort_by_score):
"""Testing JoinDocuments() node when some of the documents have `score=None`"""
inputs = [
{"documents": [Document(content="text document 1", content_type="text", score=0.2)]},
{"documents": [Document(content="text document 2", content_type="text", score=None)]},
]
join_docs = JoinDocuments(join_mode=join_mode, sort_by_score=sort_by_score)
result, _ = join_docs.run(inputs)
assert len(result["documents"]) == 2
result, _ = join_docs.run(inputs, top_k_join=1)
assert len(result["documents"]) == 1
@pytest.mark.unit
def test_joindocuments_preserves_root_node():
# https://github.com/deepset-ai/haystack-private/issues/51
inputs = [
{"documents": [Document(content="text document 1", content_type="text", score=0.2)], "root_node": "File"},
{"documents": [Document(content="text document 2", content_type="text", score=None)], "root_node": "File"},
]
join_docs = JoinDocuments()
result, _ = join_docs.run(inputs)
assert result["root_node"] == "File"
@pytest.mark.unit
def test_joindocuments_concatenate_keep_only_highest_ranking_duplicate():
inputs = [
{
"documents": [
Document(content="text document 1", content_type="text", score=0.2),
Document(content="text document 2", content_type="text", score=0.3),
]
},
{"documents": [Document(content="text document 2", content_type="text", score=0.7)]},
]
expected_outputs = {
"documents": [
Document(content="text document 2", content_type="text", score=0.7),
Document(content="text document 1", content_type="text", score=0.2),
]
}
join_docs = JoinDocuments(join_mode="concatenate")
result, _ = join_docs.run(inputs)
assert len(result["documents"]) == 2
assert result["documents"] == expected_outputs["documents"]