fix: eval() with add_isolated_node_eval=True breaks if no node supports it (#3347)

* fix isolated eval for pipelines without a node supporting isolated mode * reformat * add test
2025-11-07 05:14:08 +00:00 · 2022-10-10 20:48:13 +02:00 · 2022-10-10 20:48:13 +02:00 · 7fe5003c97
commit 7fe5003c97
parent 84aff5e2b3
2 changed files with 40 additions and 1 deletions
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@ -2094,7 +2094,9 @@ class Pipeline:

                # Might be a non-targeted param. Verify that too
                not_a_node = set(params.keys()) - set(self.graph.nodes)
-                valid_global_params = set(["debug"])  # Debug will be picked up by _dispatch_run, see its code
+                # "debug" will be picked up by _dispatch_run, see its code
+                # "add_isolated_node_eval" is set by pipeline.eval / pipeline.eval_batch
+                valid_global_params = set(["debug", "add_isolated_node_eval"])
                for node_id in self.graph.nodes:
                    run_signature_args = self._get_run_node_signature(node_id)
                    valid_global_params |= set(run_signature_args)
--- a/test/pipelines/test_eval.py
+++ b/test/pipelines/test_eval.py
@ -1020,6 +1020,43 @@ def test_document_search_calculate_metrics(retriever_with_docs):
    assert metrics["Retriever"]["ndcg"] == 0.5


+@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
+@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
+def test_document_search_isolated(retriever_with_docs):
+    pipeline = DocumentSearchPipeline(retriever=retriever_with_docs)
+    # eval run must not fail even though no node supports add_isolated_node_eval
+    eval_result: EvaluationResult = pipeline.eval(
+        labels=EVAL_LABELS, params={"Retriever": {"top_k": 5}}, add_isolated_node_eval=True
+    )
+
+    metrics = eval_result.calculate_metrics(document_scope="document_id")
+
+    assert "Retriever" in eval_result
+    assert len(eval_result) == 1
+    retriever_result = eval_result["Retriever"]
+    retriever_berlin = retriever_result[retriever_result["query"] == "Who lives in Berlin?"]
+    retriever_munich = retriever_result[retriever_result["query"] == "Who lives in Munich?"]
+
+    assert (
+        retriever_berlin[retriever_berlin["rank"] == 1]["document_id"].iloc[0]
+        in retriever_berlin[retriever_berlin["rank"] == 1]["gold_document_ids"].iloc[0]
+    )
+    assert (
+        retriever_munich[retriever_munich["rank"] == 1]["document_id"].iloc[0]
+        not in retriever_munich[retriever_munich["rank"] == 1]["gold_document_ids"].iloc[0]
+    )
+    assert metrics["Retriever"]["mrr"] == 0.5
+    assert metrics["Retriever"]["map"] == 0.5
+    assert metrics["Retriever"]["recall_multi_hit"] == 0.5
+    assert metrics["Retriever"]["recall_single_hit"] == 0.5
+    assert metrics["Retriever"]["precision"] == 0.1
+    assert metrics["Retriever"]["ndcg"] == 0.5
+
+    isolated_metrics = eval_result.calculate_metrics(document_scope="document_id", eval_mode="isolated")
+    # empty metrics for nodes that do not support add_isolated_node_eval
+    assert isolated_metrics["Retriever"] == {}
+
+
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
 def test_faq_calculate_metrics(retriever_with_docs):