diff --git a/haystack/components/evaluators/__init__.py b/haystack/components/evaluators/__init__.py
deleted file mode 100644
index 9550a5f42..000000000
--- a/haystack/components/evaluators/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .answer_exact_match import AnswerExactMatchEvaluator
-
-__all__ = ["AnswerExactMatchEvaluator"]
diff --git a/haystack/components/evaluators/answer_exact_match.py b/haystack/components/evaluators/answer_exact_match.py
deleted file mode 100644
index eb509e8be..000000000
--- a/haystack/components/evaluators/answer_exact_match.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from typing import Any, Dict, List
-
-from haystack import default_from_dict, default_to_dict
-from haystack.core.component import component
-
-
-@component
-class AnswerExactMatchEvaluator:
-    """
-    Evaluator that checks if the predicted answers matches any of the ground truth answers exactly.
-    The result is a number from 0.0 to 1.0, it represents the proportion of questions where any predicted answer
-    matched one of the ground truth answers.
-    Each question can have multiple ground truth answers and multiple predicted answers.
-    """
-
-    def to_dict(self) -> Dict[str, Any]:
-        return default_to_dict(self)
-
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "AnswerExactMatchEvaluator":
-        return default_from_dict(cls, data)
-
-    @component.output_types(result=float)
-    def run(
-        self, questions: List[str], ground_truth_answers: List[List[str]], predicted_answers: List[List[str]]
-    ) -> Dict[str, float]:
-        """
-        Run the AnswerExactMatchEvaluator on the given inputs.
-        All lists must have the same length.
-
-        :param questions: A list of questions.
-        :param ground_truth_answers: A list of expected answers for each question.
-        :param predicted_answers: A list of predicted answers for each question.
-        :returns: A dictionary with the following outputs:
-                * `result` - A number from 0.0 to 1.0 that represents the proportion of questions where any predicted
-                answer matched one of the ground truth answers.
-        """
-        if not len(questions) == len(ground_truth_answers) == len(predicted_answers):
-            raise ValueError("The length of questions, ground_truth_answers, and predicted_answers must be the same.")
-
-        matches = 0
-        for truths, extracted in zip(ground_truth_answers, predicted_answers):
-            if set(truths) & set(extracted):
-                matches += 1
-
-        # The proportion of questions where any predicted answer matched one of the ground truth answers
-        result = matches / len(questions)
-
-        return {"result": result}
diff --git a/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml b/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml
deleted file mode 100644
index ad380617d..000000000
--- a/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
----
-features:
-  - |
-    Add `AnswerExactMatchEvaluator`, a Component that can be used to calculate the Exact Match metric
-    given a list of questions, a list of expected answers for each question and the list of predicted
-    answers for each question.
diff --git a/test/components/evaluators/test_answer_exact_match.py b/test/components/evaluators/test_answer_exact_match.py
deleted file mode 100644
index c179c74a2..000000000
--- a/test/components/evaluators/test_answer_exact_match.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import pytest
-
-from haystack.components.evaluators import AnswerExactMatchEvaluator
-
-
-def test_run_with_all_matching():
-    evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Berlin"], ["Paris"]],
-    )
-
-    assert result["result"] == 1.0
-
-
-def test_run_with_no_matching():
-    evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Paris"], ["London"]],
-    )
-
-    assert result["result"] == 0.0
-
-
-def test_run_with_partial_matching():
-    evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Berlin"], ["London"]],
-    )
-
-    assert result["result"] == 0.5
-
-
-def test_run_with_different_lengths():
-    evaluator = AnswerExactMatchEvaluator()
-
-    with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?"],
-            ground_truth_answers=[["Berlin"], ["Paris"]],
-            predicted_answers=[["Berlin"], ["London"]],
-        )
-
-    with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
-            ground_truth_answers=[["Berlin"]],
-            predicted_answers=[["Berlin"], ["London"]],
-        )
-
-    with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
-            ground_truth_answers=[["Berlin"], ["Paris"]],
-            predicted_answers=[["Berlin"]],
-        )