diff --git a/haystack/nodes/reader/base.py b/haystack/nodes/reader/base.py
index 454515191..0cc4a44f0 100644
--- a/haystack/nodes/reader/base.py
+++ b/haystack/nodes/reader/base.py
@@ -118,9 +118,13 @@ class BaseReader(BaseComponent):
 
         # run evaluation with labels as node inputs
         if add_isolated_node_eval and labels is not None:
-            relevant_documents = [label.document for label in labels.labels]
-            # Filter out empty documents
-            relevant_documents = [d for d in relevant_documents if d.content.strip() != ""]
+            # This dict comprehension deduplicates same Documents in a MultiLabel based on their Document ID and
+            # filters out empty documents
+            relevant_documents = list(
+                {
+                    label.document.id: label.document for label in labels.labels if label.document.content.strip() != ""
+                }.values()
+            )
             results_label_input = predict(query=query, documents=relevant_documents, top_k=top_k)
 
             # Add corresponding document_name and more meta data, if an answer contains the document_id
@@ -174,10 +178,15 @@ class BaseReader(BaseComponent):
         if add_isolated_node_eval and labels is not None:
             relevant_documents = []
             for labelx in labels:
-                # Filter out empty documents
-                relevant_docs_labelx = [
-                    label.document for label in labelx.labels if label.document.content.strip() != ""
-                ]
+                # This dict comprehension deduplicates same Documents in a MultiLabel based on their Document ID
+                # and filters out empty documents
+                relevant_docs_labelx = list(
+                    {
+                        label.document.id: label.document
+                        for label in labelx.labels
+                        if label.document.content.strip() != ""
+                    }.values()
+                )
                 relevant_documents.append(relevant_docs_labelx)
             results_label_input = predict_batch(queries=queries, documents=relevant_documents, top_k=top_k)
 
diff --git a/test/pipelines/test_eval.py b/test/pipelines/test_eval.py
index bbb83656a..b51933f72 100644
--- a/test/pipelines/test_eval.py
+++ b/test/pipelines/test_eval.py
@@ -1,6 +1,7 @@
 import logging
 import pytest
 import sys
+from copy import deepcopy
 from haystack.document_stores.memory import InMemoryDocumentStore
 from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
 from haystack.nodes.preprocessor import PreProcessor
@@ -1267,9 +1268,14 @@ def test_extractive_qa_eval_simulated_top_k_reader_and_retriever(reader, retriev
 @pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
 @pytest.mark.parametrize("reader", ["farm"], indirect=True)
 def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
+    labels = deepcopy(EVAL_LABELS)
+    # Copy one of the labels and change only the answer have a label with a different answer but same Document
+    label_copy = deepcopy(labels[0].labels[0])
+    label_copy.answer = Answer(answer="I", offsets_in_context=[Span(21, 22)])
+    labels[0].labels.append(label_copy)
     pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
     eval_result: EvaluationResult = pipeline.eval(
-        labels=EVAL_LABELS,
+        labels=labels,
         sas_model_name_or_path="sentence-transformers/paraphrase-MiniLM-L3-v2",
         add_isolated_node_eval=True,
     )
@@ -1292,6 +1298,12 @@ def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
     assert metrics_top_1["Reader"]["f1"] == 1.0
     assert metrics_top_1["Reader"]["sas"] == pytest.approx(1.0, abs=1e-4)
 
+    # Check if same Document in MultiLabel got deduplicated
+    assert labels[0].labels[0].id == labels[0].labels[1].id
+    reader_eval_df = eval_result.node_results["Reader"]
+    isolated_reader_eval_df = reader_eval_df[reader_eval_df["eval_mode"] == "isolated"]
+    assert len(isolated_reader_eval_df) == len(labels) * reader.top_k_per_candidate
+
 
 @pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
 @pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
diff --git a/test/pipelines/test_eval_batch.py b/test/pipelines/test_eval_batch.py
index bcb00e61c..0dac41075 100644
--- a/test/pipelines/test_eval_batch.py
+++ b/test/pipelines/test_eval_batch.py
@@ -1,6 +1,7 @@
 import logging
 import pytest
 import sys
+from copy import deepcopy
 from haystack.document_stores.memory import InMemoryDocumentStore
 from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
 from haystack.nodes.preprocessor import PreProcessor
@@ -607,9 +608,14 @@ def test_extractive_qa_eval_simulated_top_k_reader_and_retriever(reader, retriev
 @pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
 @pytest.mark.parametrize("reader", ["farm"], indirect=True)
 def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
+    labels = deepcopy(EVAL_LABELS)
+    # Copy one of the labels and change only the answer have a label with a different answer but same Document
+    label_copy = deepcopy(labels[0].labels[0])
+    label_copy.answer = Answer(answer="I", offsets_in_context=[Span(21, 22)])
+    labels[0].labels.append(label_copy)
     pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
     eval_result: EvaluationResult = pipeline.eval_batch(
-        labels=EVAL_LABELS,
+        labels=labels,
         sas_model_name_or_path="sentence-transformers/paraphrase-MiniLM-L3-v2",
         add_isolated_node_eval=True,
     )
@@ -632,6 +638,12 @@ def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
     assert metrics_top_1["Reader"]["f1"] == 1.0
     assert metrics_top_1["Reader"]["sas"] == pytest.approx(1.0, abs=1e-4)
 
+    # Check if same Document in MultiLabel got deduplicated
+    assert labels[0].labels[0].id == labels[0].labels[1].id
+    reader_eval_df = eval_result.node_results["Reader"]
+    isolated_reader_eval_df = reader_eval_df[reader_eval_df["eval_mode"] == "isolated"]
+    assert len(isolated_reader_eval_df) == len(labels) * reader.top_k_per_candidate
+
 
 @pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
 @pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)