diff --git a/haystack/components/evaluators/answer_exact_match.py b/haystack/components/evaluators/answer_exact_match.py
index f6292f32c..dcd44408f 100644
--- a/haystack/components/evaluators/answer_exact_match.py
+++ b/haystack/components/evaluators/answer_exact_match.py
@@ -7,9 +7,9 @@ from haystack.core.component import component
 class AnswerExactMatchEvaluator:
     """
     Evaluator that checks if the predicted answers matches any of the ground truth answers exactly.
-    The result is a number from 0.0 to 1.0, it represents the proportion of questions where any predicted answer
-    matched one of the ground truth answers.
-    Each question can have multiple ground truth answers and multiple predicted answers.
+    The result is a number from 0.0 to 1.0, it represents the proportion any predicted answer
+    that matched one of the ground truth answers.
+    There can be multiple ground truth answers and multiple predicted answers as input.
 
     Usage example:
     ```python
@@ -17,7 +17,6 @@ class AnswerExactMatchEvaluator:
 
     evaluator = AnswerExactMatchEvaluator()
     result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
         ground_truth_answers=[["Berlin"], ["Paris"]],
         predicted_answers=[["Berlin"], ["Lyon"]],
     )
@@ -30,15 +29,11 @@ class AnswerExactMatchEvaluator:
     """
 
     @component.output_types(individual_scores=List[int], score=float)
-    def run(
-        self, questions: List[str], ground_truth_answers: List[List[str]], predicted_answers: List[List[str]]
-    ) -> Dict[str, Any]:
+    def run(self, ground_truth_answers: List[List[str]], predicted_answers: List[List[str]]) -> Dict[str, Any]:
         """
         Run the AnswerExactMatchEvaluator on the given inputs.
-        All lists must have the same length.
+        `ground_truth_answers` and `retrieved_answers` must have the same length.
 
-        :param questions:
-            A list of questions.
         :param ground_truth_answers:
             A list of expected answers for each question.
         :param predicted_answers:
@@ -49,8 +44,8 @@ class AnswerExactMatchEvaluator:
             - `score` - A number from 0.0 to 1.0 that represents the proportion of questions where any predicted
                          answer matched one of the ground truth answers.
         """
-        if not len(questions) == len(ground_truth_answers) == len(predicted_answers):
-            raise ValueError("The length of questions, ground_truth_answers, and predicted_answers must be the same.")
+        if not len(ground_truth_answers) == len(predicted_answers):
+            raise ValueError("The length of ground_truth_answers and predicted_answers must be the same.")
 
         matches = []
         for truths, extracted in zip(ground_truth_answers, predicted_answers):
@@ -60,6 +55,6 @@ class AnswerExactMatchEvaluator:
                 matches.append(0)
 
         # The proportion of questions where any predicted answer matched one of the ground truth answers
-        average = sum(matches) / len(questions)
+        average = sum(matches) / len(predicted_answers)
 
         return {"individual_scores": matches, "score": average}
diff --git a/haystack/components/evaluators/document_recall.py b/haystack/components/evaluators/document_recall.py
index 0aaa2bd17..4102aa1ff 100644
--- a/haystack/components/evaluators/document_recall.py
+++ b/haystack/components/evaluators/document_recall.py
@@ -31,16 +31,15 @@ class RecallMode(Enum):
 @component
 class DocumentRecallEvaluator:
     """
-    Evaluator that calculates the Recall score for a list of questions.
+    Evaluator that calculates the Recall score for a list of documents.
     Returns both a list of scores for each question and the average.
-    Each question can have multiple ground truth documents and multiple predicted documents.
+    There can be multiple ground truth documents and multiple predicted documents as input.
 
     Usage example:
     ```python
     from haystack.components.evaluators import DocumentRecallEvaluator
     evaluator = DocumentRecallEvaluator()
     result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
         ground_truth_answers=[["Berlin"], ["Paris"]],
         predicted_answers=[["Paris"], ["London"]],
     )
@@ -80,17 +79,12 @@ class DocumentRecallEvaluator:
 
     @component.output_types(score=float, individual_scores=List[float])
     def run(
-        self,
-        questions: List[str],
-        ground_truth_documents: List[List[Document]],
-        retrieved_documents: List[List[Document]],
+        self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
     ) -> Dict[str, Any]:
         """
         Run the DocumentRecallEvaluator on the given inputs.
-        All lists must have the same length.
+        `ground_truth_documents` and `retrieved_documents` must have the same length.
 
-        :param questions:
-            A list of questions.
         :param ground_truth_documents:
             A list of expected documents for each question.
         :param retrieved_documents:
@@ -100,8 +94,8 @@ class DocumentRecallEvaluator:
             - `invididual_scores` - A list of numbers from 0.0 to 1.0 that represents the proportion of matching documents retrieved.
                                     If the mode is `single_hit`, the individual scores are True or False.
         """
-        if not len(questions) == len(ground_truth_documents) == len(retrieved_documents):
-            msg = "The length of questions, ground_truth_documents, and predicted_documents must be the same."
+        if len(ground_truth_documents) != len(retrieved_documents):
+            msg = "The length of ground_truth_documents and retrieved_documents must be the same."
             raise ValueError(msg)
 
         scores = []
@@ -109,4 +103,4 @@ class DocumentRecallEvaluator:
             score = self.mode_function(ground_truth, retrieved)
             scores.append(score)
 
-        return {"score": sum(scores) / len(questions), "individual_scores": scores}
+        return {"score": sum(scores) / len(retrieved_documents), "individual_scores": scores}
diff --git a/test/components/evaluators/test_answer_exact_match.py b/test/components/evaluators/test_answer_exact_match.py
index 91e4647aa..9c7b395b2 100644
--- a/test/components/evaluators/test_answer_exact_match.py
+++ b/test/components/evaluators/test_answer_exact_match.py
@@ -5,33 +5,21 @@ from haystack.components.evaluators import AnswerExactMatchEvaluator
 
 def test_run_with_all_matching():
     evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Berlin"], ["Paris"]],
-    )
+    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"], ["Paris"]])
 
     assert result == {"individual_scores": [1, 1], "score": 1.0}
 
 
 def test_run_with_no_matching():
     evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Paris"], ["London"]],
-    )
+    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Paris"], ["London"]])
 
     assert result == {"individual_scores": [0, 0], "score": 0.0}
 
 
 def test_run_with_partial_matching():
     evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Berlin"], ["London"]],
-    )
+    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"], ["London"]])
 
     assert result == {"individual_scores": [1, 0], "score": 0.5}
 
@@ -39,14 +27,6 @@ def test_run_with_partial_matching():
 def test_run_with_complex_data():
     evaluator = AnswerExactMatchEvaluator()
     result = evaluator.run(
-        questions=[
-            "In what country is Normandy located?",
-            "When was the Latin version of the word Norman first recorded?",
-            "What developed in Normandy during the 1100s?",
-            "In what century did important classical music developments occur in Normandy?",
-            "From which countries did the Norse originate?",
-            "What century did the Normans first gain their separate identity?",
-        ],
         ground_truth_answers=[
             ["France"],
             ["9th century", "9th"],
@@ -71,22 +51,7 @@ def test_run_with_different_lengths():
     evaluator = AnswerExactMatchEvaluator()
 
     with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?"],
-            ground_truth_answers=[["Berlin"], ["Paris"]],
-            predicted_answers=[["Berlin"], ["London"]],
-        )
+        evaluator.run(ground_truth_answers=[["Berlin"]], predicted_answers=[["Berlin"], ["London"]])
 
     with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
-            ground_truth_answers=[["Berlin"]],
-            predicted_answers=[["Berlin"], ["London"]],
-        )
-
-    with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
-            ground_truth_answers=[["Berlin"], ["Paris"]],
-            predicted_answers=[["Berlin"]],
-        )
+        evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"]])
diff --git a/test/components/evaluators/test_document_recall.py b/test/components/evaluators/test_document_recall.py
index d73406df0..56e77f02c 100644
--- a/test/components/evaluators/test_document_recall.py
+++ b/test/components/evaluators/test_document_recall.py
@@ -16,7 +16,6 @@ class TestDocumentRecallEvaluatorSingleHit:
 
     def test_run_with_all_matching(self, evaluator):
         result = evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
             ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
             retrieved_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
         )
@@ -25,7 +24,6 @@ class TestDocumentRecallEvaluatorSingleHit:
 
     def test_run_with_no_matching(self, evaluator):
         result = evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
             ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
             retrieved_documents=[[Document(content="Paris")], [Document(content="London")]],
         )
@@ -34,7 +32,6 @@ class TestDocumentRecallEvaluatorSingleHit:
 
     def test_run_with_partial_matching(self, evaluator):
         result = evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
             ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
             retrieved_documents=[[Document(content="Berlin")], [Document(content="London")]],
         )
@@ -43,14 +40,6 @@ class TestDocumentRecallEvaluatorSingleHit:
 
     def test_run_with_complex_data(self, evaluator):
         result = evaluator.run(
-            questions=[
-                "In what country is Normandy located?",
-                "When was the Latin version of the word Norman first recorded?",
-                "What developed in Normandy during the 1100s?",
-                "In what century did important classical music developments occur in Normandy?",
-                "From which countries did the Norse originate?",
-                "What century did the Normans first gain their separate identity?",
-            ],
             ground_truth_documents=[
                 [Document(content="France")],
                 [Document(content="9th century"), Document(content="9th")],
@@ -78,21 +67,12 @@ class TestDocumentRecallEvaluatorSingleHit:
     def test_run_with_different_lengths(self, evaluator):
         with pytest.raises(ValueError):
             evaluator.run(
-                questions=["What is the capital of Germany?"],
-                ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
-                retrieved_documents=[[Document(content="Berlin")], [Document(content="London")]],
-            )
-
-        with pytest.raises(ValueError):
-            evaluator.run(
-                questions=["What is the capital of Germany?", "What is the capital of France?"],
                 ground_truth_documents=[[Document(content="Berlin")]],
                 retrieved_documents=[[Document(content="Berlin")], [Document(content="London")]],
             )
 
         with pytest.raises(ValueError):
             evaluator.run(
-                questions=["What is the capital of Germany?", "What is the capital of France?"],
                 ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
                 retrieved_documents=[[Document(content="Berlin")]],
             )
@@ -105,7 +85,6 @@ class TestDocumentRecallEvaluatorMultiHit:
 
     def test_run_with_all_matching(self, evaluator):
         result = evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
             ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
             retrieved_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
         )
@@ -114,7 +93,6 @@ class TestDocumentRecallEvaluatorMultiHit:
 
     def test_run_with_no_matching(self, evaluator):
         result = evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
             ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
             retrieved_documents=[[Document(content="Paris")], [Document(content="London")]],
         )
@@ -123,7 +101,6 @@ class TestDocumentRecallEvaluatorMultiHit:
 
     def test_run_with_partial_matching(self, evaluator):
         result = evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
             ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
             retrieved_documents=[[Document(content="Berlin")], [Document(content="London")]],
         )
@@ -132,14 +109,6 @@ class TestDocumentRecallEvaluatorMultiHit:
 
     def test_run_with_complex_data(self, evaluator):
         result = evaluator.run(
-            questions=[
-                "In what country is Normandy located?",
-                "When was the Latin version of the word Norman first recorded?",
-                "What developed in Normandy during the 1100s?",
-                "In what century did important classical music developments occur in Normandy?",
-                "From which countries did the Norse originate?",
-                "What century did the Normans first gain their separate identity?",
-            ],
             ground_truth_documents=[
                 [Document(content="France")],
                 [Document(content="9th century"), Document(content="9th")],
@@ -172,21 +141,12 @@ class TestDocumentRecallEvaluatorMultiHit:
     def test_run_with_different_lengths(self, evaluator):
         with pytest.raises(ValueError):
             evaluator.run(
-                questions=["What is the capital of Germany?"],
-                ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
-                retrieved_documents=[[Document(content="Berlin")], [Document(content="London")]],
-            )
-
-        with pytest.raises(ValueError):
-            evaluator.run(
-                questions=["What is the capital of Germany?", "What is the capital of France?"],
                 ground_truth_documents=[[Document(content="Berlin")]],
                 retrieved_documents=[[Document(content="Berlin")], [Document(content="London")]],
             )
 
         with pytest.raises(ValueError):
             evaluator.run(
-                questions=["What is the capital of Germany?", "What is the capital of France?"],
                 ground_truth_documents=[[Document(content="Berlin")], [Document(content="Paris")]],
                 retrieved_documents=[[Document(content="Berlin")]],
             )