mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 20:03:38 +00:00
bug: JoinDocuments nodes produce incorrect results if preceded by another JoinDocuments node (#3170)
* don't send the list of inputs back as an output in the running of a node. * updated documentation * Update pydoc-markdown.py * added test case for pipeline join fix Co-authored-by: JeffRisberg <jrisberg@aol.com>
This commit is contained in:
parent
e2e6887ee8
commit
ad8fbe56ee
@ -265,9 +265,9 @@ class BaseComponent(ABC):
|
||||
if all_debug:
|
||||
output["_debug"] = all_debug
|
||||
|
||||
# add "extra" args that were not used by the node
|
||||
# add "extra" args that were not used by the node, but not the 'inputs' value
|
||||
for k, v in arguments.items():
|
||||
if k not in output.keys():
|
||||
if k not in output.keys() and k != "inputs":
|
||||
output[k] = v
|
||||
|
||||
output["params"] = params
|
||||
|
@ -18,9 +18,11 @@ import pandas as pd
|
||||
from haystack import __version__
|
||||
from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
|
||||
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
||||
from haystack.document_stores.memory import InMemoryDocumentStore
|
||||
from haystack.nodes.other.join_docs import JoinDocuments
|
||||
from haystack.nodes.base import BaseComponent
|
||||
from haystack.nodes.retriever.sparse import BM25Retriever
|
||||
from haystack.nodes.retriever.sparse import FilterRetriever
|
||||
from haystack.pipelines import (
|
||||
Pipeline,
|
||||
RootNode,
|
||||
@ -1999,3 +2001,66 @@ def test_batch_querying_multiple_queries(document_store_with_docs):
|
||||
assert isinstance(result["answers"][0][0], Answer)
|
||||
assert len(result["answers"]) == 2 # Predictions for 2 collections of documents
|
||||
assert len(result["answers"][0]) == 5 # top-k of 5 for collection of docs
|
||||
|
||||
|
||||
def test_fix_to_pipeline_execution_when_join_follows_join():
|
||||
# wire up 4 retrievers, each with one document
|
||||
document_store_1 = InMemoryDocumentStore()
|
||||
retriever_1 = FilterRetriever(document_store_1, scale_score=True)
|
||||
dicts_1 = [{"content": "Alpha", "score": 0.552}]
|
||||
document_store_1.write_documents(dicts_1)
|
||||
|
||||
document_store_2 = InMemoryDocumentStore()
|
||||
retriever_2 = FilterRetriever(document_store_2, scale_score=True)
|
||||
dicts_2 = [{"content": "Beta", "score": 0.542}]
|
||||
document_store_2.write_documents(dicts_2)
|
||||
|
||||
document_store_3 = InMemoryDocumentStore()
|
||||
retriever_3 = FilterRetriever(document_store_3, scale_score=True)
|
||||
dicts_3 = [{"content": "Gamma", "score": 0.532}]
|
||||
document_store_3.write_documents(dicts_3)
|
||||
|
||||
document_store_4 = InMemoryDocumentStore()
|
||||
retriever_4 = FilterRetriever(document_store_4, scale_score=True)
|
||||
dicts_4 = [{"content": "Delta", "score": 0.512}]
|
||||
document_store_4.write_documents(dicts_4)
|
||||
|
||||
# wire up a pipeline of the retrievers, with 4-way join
|
||||
pipeline = Pipeline()
|
||||
pipeline.add_node(component=retriever_1, name="Retriever1", inputs=["Query"])
|
||||
pipeline.add_node(component=retriever_2, name="Retriever2", inputs=["Query"])
|
||||
pipeline.add_node(component=retriever_3, name="Retriever3", inputs=["Query"])
|
||||
pipeline.add_node(component=retriever_4, name="Retriever4", inputs=["Query"])
|
||||
pipeline.add_node(
|
||||
component=JoinDocuments(weights=[0.25, 0.25, 0.25, 0.25], join_mode="merge"),
|
||||
name="Join",
|
||||
inputs=["Retriever1", "Retriever2", "Retriever3", "Retriever4"],
|
||||
)
|
||||
|
||||
res = pipeline.run(query="Alpha Beta Gamma Delta")
|
||||
documents = res["documents"]
|
||||
assert len(documents) == 4 # all four documents should be found
|
||||
|
||||
# wire up a pipeline of the retrievers, with join following join
|
||||
pipeline = Pipeline()
|
||||
pipeline.add_node(component=retriever_1, name="Retriever1", inputs=["Query"])
|
||||
pipeline.add_node(component=retriever_2, name="Retriever2", inputs=["Query"])
|
||||
pipeline.add_node(component=retriever_3, name="Retriever3", inputs=["Query"])
|
||||
pipeline.add_node(component=retriever_4, name="Retriever4", inputs=["Query"])
|
||||
pipeline.add_node(
|
||||
component=JoinDocuments(weights=[0.5, 0.5], join_mode="merge"),
|
||||
name="Join12",
|
||||
inputs=["Retriever1", "Retriever2"],
|
||||
)
|
||||
pipeline.add_node(
|
||||
component=JoinDocuments(weights=[0.5, 0.5], join_mode="merge"),
|
||||
name="Join34",
|
||||
inputs=["Retriever3", "Retriever4"],
|
||||
)
|
||||
pipeline.add_node(
|
||||
component=JoinDocuments(weights=[0.5, 0.5], join_mode="merge"), name="JoinFinal", inputs=["Join12", "Join34"]
|
||||
)
|
||||
|
||||
res = pipeline.run(query="Alpha Beta Gamma Delta")
|
||||
documents = res["documents"]
|
||||
assert len(documents) == 4 # all four documents should be found
|
||||
|
Loading…
x
Reference in New Issue
Block a user