diff --git a/haystack/components/evaluators/answer_exact_match.py b/haystack/components/evaluators/answer_exact_match.py
index dcd44408f..db5e72259 100644
--- a/haystack/components/evaluators/answer_exact_match.py
+++ b/haystack/components/evaluators/answer_exact_match.py
@@ -6,10 +6,11 @@ from haystack.core.component import component
 @component
 class AnswerExactMatchEvaluator:
     """
-    Evaluator that checks if the predicted answers matches any of the ground truth answers exactly.
-    The result is a number from 0.0 to 1.0, it represents the proportion any predicted answer
-    that matched one of the ground truth answers.
-    There can be multiple ground truth answers and multiple predicted answers as input.
+    Evaluator that checks if predicted answers exactly match ground truth answers.
+
+    Each predicted answer is compared to one ground truth answer.
+    The final score is a number ranging from 0.0 to 1.0.
+    It represents the proportion of predicted answers that match their corresponding ground truth answer.
 
     Usage example:
     ```python
@@ -17,8 +18,8 @@ class AnswerExactMatchEvaluator:
 
     evaluator = AnswerExactMatchEvaluator()
     result = evaluator.run(
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Berlin"], ["Lyon"]],
+        ground_truth_answers=["Berlin", "Paris"],
+        predicted_answers=["Berlin", "Lyon"],
     )
 
     print(result["individual_scores"])
@@ -29,15 +30,15 @@ class AnswerExactMatchEvaluator:
     """
 
     @component.output_types(individual_scores=List[int], score=float)
-    def run(self, ground_truth_answers: List[List[str]], predicted_answers: List[List[str]]) -> Dict[str, Any]:
+    def run(self, ground_truth_answers: List[str], predicted_answers: List[str]) -> Dict[str, Any]:
         """
         Run the AnswerExactMatchEvaluator on the given inputs.
         `ground_truth_answers` and `retrieved_answers` must have the same length.
 
         :param ground_truth_answers:
-            A list of expected answers for each question.
+            A list of expected answers.
         :param predicted_answers:
-            A list of predicted answers for each question.
+            A list of predicted answers.
         :returns:
             A dictionary with the following outputs:
             - `individual_scores` - A list of 0s and 1s, where 1 means that the predicted answer matched one of the ground truth.
@@ -48,8 +49,8 @@ class AnswerExactMatchEvaluator:
             raise ValueError("The length of ground_truth_answers and predicted_answers must be the same.")
 
         matches = []
-        for truths, extracted in zip(ground_truth_answers, predicted_answers):
-            if set(truths) & set(extracted):
+        for truth, extracted in zip(ground_truth_answers, predicted_answers):
+            if truth == extracted:
                 matches.append(1)
             else:
                 matches.append(0)
diff --git a/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml b/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml
index ad380617d..c872542be 100644
--- a/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml
+++ b/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml
@@ -1,6 +1,5 @@
 ---
 features:
   - |
-    Add `AnswerExactMatchEvaluator`, a Component that can be used to calculate the Exact Match metric
-    given a list of questions, a list of expected answers for each question and the list of predicted
-    answers for each question.
+    Add `AnswerExactMatchEvaluator`, a component that can be used to calculate the Exact Match metric
+    comparing a list of expected answers with a list of predicted answers.
diff --git a/test/components/evaluators/test_answer_exact_match.py b/test/components/evaluators/test_answer_exact_match.py
index 9c7b395b2..2aa4fe7ea 100644
--- a/test/components/evaluators/test_answer_exact_match.py
+++ b/test/components/evaluators/test_answer_exact_match.py
@@ -5,21 +5,21 @@ from haystack.components.evaluators import AnswerExactMatchEvaluator
 
 def test_run_with_all_matching():
     evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"], ["Paris"]])
+    result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin", "Paris"])
 
     assert result == {"individual_scores": [1, 1], "score": 1.0}
 
 
 def test_run_with_no_matching():
     evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Paris"], ["London"]])
+    result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Paris", "London"])
 
     assert result == {"individual_scores": [0, 0], "score": 0.0}
 
 
 def test_run_with_partial_matching():
     evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"], ["London"]])
+    result = evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin", "London"])
 
     assert result == {"individual_scores": [1, 0], "score": 0.5}
 
@@ -28,30 +28,42 @@ def test_run_with_complex_data():
     evaluator = AnswerExactMatchEvaluator()
     result = evaluator.run(
         ground_truth_answers=[
-            ["France"],
-            ["9th century", "9th"],
-            ["classical music", "classical"],
-            ["11th century", "the 11th"],
-            ["Denmark", "Iceland", "Norway"],
-            ["10th century", "10th"],
+            "France",
+            "9th century",
+            "9th",
+            "classical music",
+            "classical",
+            "11th century",
+            "the 11th",
+            "Denmark",
+            "Iceland",
+            "Norway",
+            "10th century",
+            "10th",
         ],
         predicted_answers=[
-            ["France"],
-            ["9th century", "10th century", "9th"],
-            ["classic music", "rock music", "dubstep"],
-            ["11th", "the 11th", "11th century"],
-            ["Denmark, Iceland and Norway"],
-            ["10th century", "the first half of the 10th century", "10th", "10th"],
+            "France",
+            "9th century",
+            "10th century",
+            "9th",
+            "classic music",
+            "rock music",
+            "dubstep",
+            "the 11th",
+            "11th century",
+            "Denmark, Iceland and Norway",
+            "10th century",
+            "10th",
         ],
     )
-    assert result == {"individual_scores": [1, 1, 0, 1, 0, 1], "score": 0.6666666666666666}
+    assert result == {"individual_scores": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], "score": 0.3333333333333333}
 
 
 def test_run_with_different_lengths():
     evaluator = AnswerExactMatchEvaluator()
 
     with pytest.raises(ValueError):
-        evaluator.run(ground_truth_answers=[["Berlin"]], predicted_answers=[["Berlin"], ["London"]])
+        evaluator.run(ground_truth_answers=["Berlin"], predicted_answers=["Berlin", "London"])
 
     with pytest.raises(ValueError):
-        evaluator.run(ground_truth_answers=[["Berlin"], ["Paris"]], predicted_answers=[["Berlin"]])
+        evaluator.run(ground_truth_answers=["Berlin", "Paris"], predicted_answers=["Berlin"])