From 45136badfe1dedc10a8ea2ab15a126f3fa488b40 Mon Sep 17 00:00:00 2001
From: tstadel <60758086+tstadel@users.noreply.github.com>
Date: Thu, 7 Jul 2022 15:10:13 +0200
Subject: [PATCH] Fix _debug info getting lost for previous nodes when using
 join nodes (#2776)

* fix debug output for pipelines with join nodes

* add test

* Update Documentation & Code Style

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 haystack/pipelines/base.py                    |  6 +++-
 .../test_pipeline_debug_and_validation.py     | 36 ++++++++++++++++++-
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 750f268a3..e635cf74b 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -528,7 +528,11 @@ class Pipeline:
                         if queue.get(n):  # concatenate inputs if it's a join node
                             existing_input = queue[n]
                             if "inputs" not in existing_input.keys():
-                                updated_input: dict = {"inputs": [existing_input, node_output], "params": params}
+                                updated_input: dict = {
+                                    "inputs": [existing_input, node_output],
+                                    "params": params,
+                                    "_debug": {**existing_input["_debug"], **node_output["_debug"]},
+                                }
                                 if query:
                                     updated_input["query"] = query
                                 if file_paths:
diff --git a/test/pipelines/test_pipeline_debug_and_validation.py b/test/pipelines/test_pipeline_debug_and_validation.py
index fa666a8ce..753fd95c0 100644
--- a/test/pipelines/test_pipeline_debug_and_validation.py
+++ b/test/pipelines/test_pipeline_debug_and_validation.py
@@ -4,7 +4,7 @@ import json
 import pytest
 
 from haystack.pipelines import Pipeline, RootNode
-from haystack.nodes import FARMReader, BM25Retriever
+from haystack.nodes import FARMReader, BM25Retriever, JoinDocuments
 
 from ..conftest import SAMPLES_PATH, MockRetriever as BaseMockRetriever, MockReader
 
@@ -108,6 +108,40 @@ def test_debug_attributes_per_node(document_store_with_docs, tmp_path):
     json.dumps(prediction, default=str)
 
 
+@pytest.mark.elasticsearch
+@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
+def test_debug_attributes_for_join_nodes(document_store_with_docs, tmp_path):
+
+    es_retriever_1 = BM25Retriever(document_store=document_store_with_docs)
+    es_retriever_2 = BM25Retriever(document_store=document_store_with_docs)
+
+    pipeline = Pipeline()
+    pipeline.add_node(component=es_retriever_1, name="ESRetriever1", inputs=["Query"])
+    pipeline.add_node(component=es_retriever_2, name="ESRetriever2", inputs=["Query"])
+    pipeline.add_node(component=JoinDocuments(), name="JoinDocuments", inputs=["ESRetriever1", "ESRetriever2"])
+
+    prediction = pipeline.run(query="Who lives in Berlin?", debug=True)
+    assert "_debug" in prediction.keys()
+    assert "ESRetriever1" in prediction["_debug"].keys()
+    assert "ESRetriever2" in prediction["_debug"].keys()
+    assert "JoinDocuments" in prediction["_debug"].keys()
+    assert "input" in prediction["_debug"]["ESRetriever1"].keys()
+    assert "output" in prediction["_debug"]["ESRetriever1"].keys()
+    assert "input" in prediction["_debug"]["ESRetriever2"].keys()
+    assert "output" in prediction["_debug"]["ESRetriever2"].keys()
+    assert "input" in prediction["_debug"]["JoinDocuments"].keys()
+    assert "output" in prediction["_debug"]["JoinDocuments"].keys()
+    assert prediction["_debug"]["ESRetriever1"]["input"]
+    assert prediction["_debug"]["ESRetriever1"]["output"]
+    assert prediction["_debug"]["ESRetriever2"]["input"]
+    assert prediction["_debug"]["ESRetriever2"]["output"]
+    assert prediction["_debug"]["JoinDocuments"]["input"]
+    assert prediction["_debug"]["JoinDocuments"]["output"]
+
+    # Avoid circular reference: easiest way to detect those is to use json.dumps
+    json.dumps(prediction, default=str)
+
+
 @pytest.mark.elasticsearch
 @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
 def test_global_debug_attributes_override_node_ones(document_store_with_docs, tmp_path):