mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-07 04:27:15 +00:00
Fixed ZeroDivisionError in JoinDocuments (#7972)
* added new strategy DBRF * fix hook * fix typos * added test for DBRF * fix format * new release note * reformatted with black * Update haystack/components/joiners/document_joiner.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * updated comments * added type-hint and return type * fix * revert for lint problems * fix * fix * fix * fix * another tentative * dict out file * only output * fix output * revert * removed unused imports * fix typing * fixed ZeroDivisionError * added test * add release note * removed try - except * renamed test * Update test/components/joiners/test_document_joiner.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/joiners/document_joiner.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * fix format error * removed releasenotes/notes/release-note-9b2bc03a8a398078.yaml * added comment --------- Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> Co-authored-by: anakin87 <stefanofiorucci@gmail.com>
This commit is contained in:
parent
03d9057e64
commit
cafcf51cb0
@ -195,9 +195,11 @@ class DocumentJoiner:
|
||||
std_dev = (sum((x - mean_score) ** 2 for x in scores_list) / len(scores_list)) ** 0.5
|
||||
min_score = mean_score - 3 * std_dev
|
||||
max_score = mean_score + 3 * std_dev
|
||||
delta_score = max_score - min_score
|
||||
|
||||
for doc in documents:
|
||||
doc.score = (doc.score - min_score) / (max_score - min_score)
|
||||
doc.score = (doc.score - min_score) / delta_score if delta_score != 0.0 else 0.0
|
||||
# if all docs have the same score delta_score is 0, the docs are uninformative for the query
|
||||
|
||||
output = self._concatenate(document_lists=document_lists)
|
||||
|
||||
|
||||
@ -145,6 +145,36 @@ class TestDocumentJoiner:
|
||||
]
|
||||
assert all(doc.id in expected_document_ids for doc in output["documents"])
|
||||
|
||||
def test_run_with_distribution_based_rank_fusion_join_mode_same_scores(self):
|
||||
joiner = DocumentJoiner(join_mode="distribution_based_rank_fusion")
|
||||
documents_1 = [
|
||||
Document(content="a", score=0.2),
|
||||
Document(content="b", score=0.2),
|
||||
Document(content="c", score=0.2),
|
||||
]
|
||||
documents_2 = [
|
||||
Document(content="d", score=0.5),
|
||||
Document(content="e", score=0.8),
|
||||
Document(content="f", score=1.1, meta={"key": "value"}),
|
||||
Document(content="g", score=0.3),
|
||||
Document(content="a", score=0.3),
|
||||
]
|
||||
output = joiner.run([documents_1, documents_2])
|
||||
assert len(output["documents"]) == 7
|
||||
expected_document_ids = [
|
||||
doc.id
|
||||
for doc in [
|
||||
Document(content="a", score=0),
|
||||
Document(content="b", score=0),
|
||||
Document(content="c", score=0),
|
||||
Document(content="d", score=0.44),
|
||||
Document(content="e", score=0.60),
|
||||
Document(content="f", score=0.76, meta={"key": "value"}),
|
||||
Document(content="g", score=0.33),
|
||||
]
|
||||
]
|
||||
assert all(doc.id in expected_document_ids for doc in output["documents"])
|
||||
|
||||
def test_run_with_top_k_in_run_method(self):
|
||||
joiner = DocumentJoiner()
|
||||
documents_1 = [Document(content="a"), Document(content="b"), Document(content="c")]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user