From 45136badfe1dedc10a8ea2ab15a126f3fa488b40 Mon Sep 17 00:00:00 2001 From: tstadel <60758086+tstadel@users.noreply.github.com> Date: Thu, 7 Jul 2022 15:10:13 +0200 Subject: [PATCH] Fix _debug info getting lost for previous nodes when using join nodes (#2776) * fix debug output for pipelines with join nodes * add test * Update Documentation & Code Style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- haystack/pipelines/base.py | 6 +++- .../test_pipeline_debug_and_validation.py | 36 ++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 750f268a3..e635cf74b 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -528,7 +528,11 @@ class Pipeline: if queue.get(n): # concatenate inputs if it's a join node existing_input = queue[n] if "inputs" not in existing_input.keys(): - updated_input: dict = {"inputs": [existing_input, node_output], "params": params} + updated_input: dict = { + "inputs": [existing_input, node_output], + "params": params, + "_debug": {**existing_input["_debug"], **node_output["_debug"]}, + } if query: updated_input["query"] = query if file_paths: diff --git a/test/pipelines/test_pipeline_debug_and_validation.py b/test/pipelines/test_pipeline_debug_and_validation.py index fa666a8ce..753fd95c0 100644 --- a/test/pipelines/test_pipeline_debug_and_validation.py +++ b/test/pipelines/test_pipeline_debug_and_validation.py @@ -4,7 +4,7 @@ import json import pytest from haystack.pipelines import Pipeline, RootNode -from haystack.nodes import FARMReader, BM25Retriever +from haystack.nodes import FARMReader, BM25Retriever, JoinDocuments from ..conftest import SAMPLES_PATH, MockRetriever as BaseMockRetriever, MockReader @@ -108,6 +108,40 @@ def test_debug_attributes_per_node(document_store_with_docs, tmp_path): json.dumps(prediction, default=str) +@pytest.mark.elasticsearch +@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True) +def test_debug_attributes_for_join_nodes(document_store_with_docs, tmp_path): + + es_retriever_1 = BM25Retriever(document_store=document_store_with_docs) + es_retriever_2 = BM25Retriever(document_store=document_store_with_docs) + + pipeline = Pipeline() + pipeline.add_node(component=es_retriever_1, name="ESRetriever1", inputs=["Query"]) + pipeline.add_node(component=es_retriever_2, name="ESRetriever2", inputs=["Query"]) + pipeline.add_node(component=JoinDocuments(), name="JoinDocuments", inputs=["ESRetriever1", "ESRetriever2"]) + + prediction = pipeline.run(query="Who lives in Berlin?", debug=True) + assert "_debug" in prediction.keys() + assert "ESRetriever1" in prediction["_debug"].keys() + assert "ESRetriever2" in prediction["_debug"].keys() + assert "JoinDocuments" in prediction["_debug"].keys() + assert "input" in prediction["_debug"]["ESRetriever1"].keys() + assert "output" in prediction["_debug"]["ESRetriever1"].keys() + assert "input" in prediction["_debug"]["ESRetriever2"].keys() + assert "output" in prediction["_debug"]["ESRetriever2"].keys() + assert "input" in prediction["_debug"]["JoinDocuments"].keys() + assert "output" in prediction["_debug"]["JoinDocuments"].keys() + assert prediction["_debug"]["ESRetriever1"]["input"] + assert prediction["_debug"]["ESRetriever1"]["output"] + assert prediction["_debug"]["ESRetriever2"]["input"] + assert prediction["_debug"]["ESRetriever2"]["output"] + assert prediction["_debug"]["JoinDocuments"]["input"] + assert prediction["_debug"]["JoinDocuments"]["output"] + + # Avoid circular reference: easiest way to detect those is to use json.dumps + json.dumps(prediction, default=str) + + @pytest.mark.elasticsearch @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True) def test_global_debug_attributes_override_node_ones(document_store_with_docs, tmp_path):