feat!: Rename ExtractiveReader's confidence_threshold to score_threshold (#6532)

* rename to score_threshold * Update haystack/components/readers/extractive.py Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com> --------- Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>
2025-10-29 16:59:47 +00:00 · 2023-12-12 15:12:28 +01:00 · 2023-12-12 15:12:28 +01:00 · 25a6eaae05
commit 25a6eaae05
parent 82fe80ce68
3 changed files with 28 additions and 22 deletions
--- a/haystack/components/readers/extractive.py
+++ b/haystack/components/readers/extractive.py
@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
 class ExtractiveReader:
    """
    A component that locates and extract answers to a given query from Documents. It's used for performing extractive
-    QA. The Reader assigns a probability score to every possible answer span independently of other answer spans.
+    QA. The Reader assigns a score to every possible answer span independently of other answer spans.
    This fixes a common issue of other implementations which make comparisons across documents harder by normalizing
    each document's answers independently.

@ -42,7 +42,7 @@ class ExtractiveReader:
        device: Optional[str] = None,
        token: Union[bool, str, None] = None,
        top_k: int = 20,
-        confidence_threshold: Optional[float] = None,
+        score_threshold: Optional[float] = None,
        max_seq_length: int = 384,
        stride: int = 128,
        max_batch_size: Optional[int] = None,
@ -61,9 +61,9 @@ class ExtractiveReader:
            If this parameter is set to `True`, then the token generated when running
            `transformers-cli login` (stored in ~/.huggingface) is used.
        :param top_k: Number of answers to return per query.
-            It is required even if confidence_threshold is set. Defaults to 20.
+            It is required even if score_threshold is set. Defaults to 20.
            An additional answer with no text is returned if no_answer is set to True (default).
-        :param confidence_threshold: Returns only answers with the probability score above this threshold.
+        :param score_threshold: Returns only answers with the probability score above this threshold.
        :param max_seq_length: Maximum number of tokens.
            If a sequence exceeds it, the sequence is split.
            Default: 384
@ -72,8 +72,9 @@ class ExtractiveReader:
        :param max_batch_size: Maximum number of samples that are fed through the model at the same time.
        :param answers_per_seq: Number of answer candidates to consider per sequence.
            This is relevant when a Document was split into multiple sequences because of max_seq_length.
-        :param no_answer: Whether to return no answer scores.
-        :param calibration_factor: Factor used for calibrating probability scores.
+        :param no_answer: Whether to return an additional `no answer` with an empty text and a score representing the
+            probability that the other top_k answers are incorrect.
+        :param calibration_factor: Factor used for calibrating probabilities.
        :param model_kwargs: Additional keyword arguments passed to `AutoModelForQuestionAnswering.from_pretrained`
            when loading the model specified in `model_name_or_path`. For details on what kwargs you can pass,
            see the model's documentation.
@ -85,7 +86,7 @@ class ExtractiveReader:
        self.token = token
        self.max_seq_length = max_seq_length
        self.top_k = top_k
-        self.confidence_threshold = confidence_threshold
+        self.score_threshold = score_threshold
        self.stride = stride
        self.max_batch_size = max_batch_size
        self.answers_per_seq = answers_per_seq
@ -110,7 +111,7 @@ class ExtractiveReader:
            token=self.token if not isinstance(self.token, str) else None,
            max_seq_length=self.max_seq_length,
            top_k=self.top_k,
-            confidence_threshold=self.confidence_threshold,
+            score_threshold=self.score_threshold,
            stride=self.stride,
            max_batch_size=self.max_batch_size,
            answers_per_seq=self.answers_per_seq,
@ -202,8 +203,8 @@ class ExtractiveReader:
        encodings: List["Encoding"],
    ) -> Tuple[List[List[int]], List[List[int]], "torch.Tensor"]:
        """
-        Turn start and end logits into probability scores for each answer span. Unlike most other
-        implementations, it doesn't normalize the scores to make them easier to compare across different
+        Turn start and end logits into probabilities for each answer span. Unlike most other
+        implementations, it doesn't normalize the scores in each split to make them easier to compare across different
        splits. Returns the top k answer spans.
        """
        mask = sequence_ids == 1  # Only keep tokens from the context (should ignore special tokens)
@ -262,14 +263,14 @@ class ExtractiveReader:
        queries: List[str],
        answers_per_seq: int,
        top_k: Optional[int],
-        confidence_threshold: Optional[float],
+        score_threshold: Optional[float],
        query_ids: List[int],
        document_ids: List[int],
        no_answer: bool,
    ) -> List[List[ExtractedAnswer]]:
        """
-        Reconstructs the nested structure that existed before flattening. Also computes a no answer probability.
-        This probability is different from most other implementations because it does not consider the no answer
+        Reconstructs the nested structure that existed before flattening. Also computes a no answer score.
+        This score is different from most other implementations because it does not consider the no answer
        logit introduced with SQuAD 2. Instead, it just computes the probability that the answer does not exist
        in the top k or top p.
        """
@ -307,8 +308,8 @@ class ExtractiveReader:
                )
                current_answers.append(answer_)
            current_answers = sorted(current_answers, key=lambda answer: answer.score, reverse=True)
-            if confidence_threshold is not None:
-                current_answers = [answer for answer in current_answers if answer.score >= confidence_threshold]
+            if score_threshold is not None:
+                current_answers = [answer for answer in current_answers if answer.score >= score_threshold]
            nested_answers.append(current_answers)

        return nested_answers
@ -319,7 +320,7 @@ class ExtractiveReader:
        query: str,
        documents: List[Document],
        top_k: Optional[int] = None,
-        confidence_threshold: Optional[float] = None,
+        score_threshold: Optional[float] = None,
        max_seq_length: Optional[int] = None,
        stride: Optional[int] = None,
        max_batch_size: Optional[int] = None,
@ -333,9 +334,9 @@ class ExtractiveReader:
        :param documents: List of Documents in which you want to search for an answer to the query.
        :param top_k: The maximum number of answers to return.
            An additional answer is returned if no_answer is set to True (default).
-        :param confidence_threshold:
+        :param score_threshold:
        :return: List of ExtractedAnswers sorted by (desc.) answer score.
-        :param confidence_threshold: Returns only answers with the probability score above this threshold.
+        :param score_threshold: Returns only answers with the score above this threshold.
        :param max_seq_length: Maximum number of tokens.
            If a sequence exceeds it, the sequence is split.
            Default: 384
@ -352,7 +353,7 @@ class ExtractiveReader:
            raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.")

        top_k = top_k or self.top_k
-        confidence_threshold = confidence_threshold or self.confidence_threshold
+        score_threshold = score_threshold or self.score_threshold
        max_seq_length = max_seq_length or self.max_seq_length
        stride = stride or self.stride
        max_batch_size = max_batch_size or self.max_batch_size
@ -400,7 +401,7 @@ class ExtractiveReader:
            queries,
            answers_per_seq,
            top_k,
-            confidence_threshold,
+            score_threshold,
            query_ids,
            document_ids,
            no_answer,
--- a/releasenotes/notes/rename-confidence-score-3ba604b8f9bd0454.yaml
+++ b/releasenotes/notes/rename-confidence-score-3ba604b8f9bd0454.yaml
@ -0,0 +1,5 @@
+---
+enhancements:
+  - |
+    Renamed the confidence_threshold parameter of the ExtractiveReader to score_threshold as ExtractedAnswers have a score and this is what the threshold is for.
+    For consistency, the term confidence is not mentioned anymore in favor of score.
--- a/test/components/readers/test_extractive.py
+++ b/test/components/readers/test_extractive.py
@ -98,7 +98,7 @@ def test_to_dict():
            "device": None,
            "token": None,  # don't serialize valid tokens
            "top_k": 20,
-            "confidence_threshold": None,
+            "score_threshold": None,
            "max_seq_length": 384,
            "stride": 128,
            "max_batch_size": None,
@ -121,7 +121,7 @@ def test_to_dict_empty_model_kwargs():
            "device": None,
            "token": None,  # don't serialize valid tokens
            "top_k": 20,
-            "confidence_threshold": None,
+            "score_threshold": None,
            "max_seq_length": 384,
            "stride": 128,
            "max_batch_size": None,