feat!: Rename ExtractiveReader's confidence_threshold to score_threshold (#6532)

* rename to score_threshold

* Update haystack/components/readers/extractive.py

Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>

---------

Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>
This commit is contained in:
Julian Risch 2023-12-12 15:12:28 +01:00 committed by GitHub
parent 82fe80ce68
commit 25a6eaae05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 22 deletions

View File

@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
class ExtractiveReader: class ExtractiveReader:
""" """
A component that locates and extract answers to a given query from Documents. It's used for performing extractive A component that locates and extract answers to a given query from Documents. It's used for performing extractive
QA. The Reader assigns a probability score to every possible answer span independently of other answer spans. QA. The Reader assigns a score to every possible answer span independently of other answer spans.
This fixes a common issue of other implementations which make comparisons across documents harder by normalizing This fixes a common issue of other implementations which make comparisons across documents harder by normalizing
each document's answers independently. each document's answers independently.
@ -42,7 +42,7 @@ class ExtractiveReader:
device: Optional[str] = None, device: Optional[str] = None,
token: Union[bool, str, None] = None, token: Union[bool, str, None] = None,
top_k: int = 20, top_k: int = 20,
confidence_threshold: Optional[float] = None, score_threshold: Optional[float] = None,
max_seq_length: int = 384, max_seq_length: int = 384,
stride: int = 128, stride: int = 128,
max_batch_size: Optional[int] = None, max_batch_size: Optional[int] = None,
@ -61,9 +61,9 @@ class ExtractiveReader:
If this parameter is set to `True`, then the token generated when running If this parameter is set to `True`, then the token generated when running
`transformers-cli login` (stored in ~/.huggingface) is used. `transformers-cli login` (stored in ~/.huggingface) is used.
:param top_k: Number of answers to return per query. :param top_k: Number of answers to return per query.
It is required even if confidence_threshold is set. Defaults to 20. It is required even if score_threshold is set. Defaults to 20.
An additional answer with no text is returned if no_answer is set to True (default). An additional answer with no text is returned if no_answer is set to True (default).
:param confidence_threshold: Returns only answers with the probability score above this threshold. :param score_threshold: Returns only answers with the probability score above this threshold.
:param max_seq_length: Maximum number of tokens. :param max_seq_length: Maximum number of tokens.
If a sequence exceeds it, the sequence is split. If a sequence exceeds it, the sequence is split.
Default: 384 Default: 384
@ -72,8 +72,9 @@ class ExtractiveReader:
:param max_batch_size: Maximum number of samples that are fed through the model at the same time. :param max_batch_size: Maximum number of samples that are fed through the model at the same time.
:param answers_per_seq: Number of answer candidates to consider per sequence. :param answers_per_seq: Number of answer candidates to consider per sequence.
This is relevant when a Document was split into multiple sequences because of max_seq_length. This is relevant when a Document was split into multiple sequences because of max_seq_length.
:param no_answer: Whether to return no answer scores. :param no_answer: Whether to return an additional `no answer` with an empty text and a score representing the
:param calibration_factor: Factor used for calibrating probability scores. probability that the other top_k answers are incorrect.
:param calibration_factor: Factor used for calibrating probabilities.
:param model_kwargs: Additional keyword arguments passed to `AutoModelForQuestionAnswering.from_pretrained` :param model_kwargs: Additional keyword arguments passed to `AutoModelForQuestionAnswering.from_pretrained`
when loading the model specified in `model_name_or_path`. For details on what kwargs you can pass, when loading the model specified in `model_name_or_path`. For details on what kwargs you can pass,
see the model's documentation. see the model's documentation.
@ -85,7 +86,7 @@ class ExtractiveReader:
self.token = token self.token = token
self.max_seq_length = max_seq_length self.max_seq_length = max_seq_length
self.top_k = top_k self.top_k = top_k
self.confidence_threshold = confidence_threshold self.score_threshold = score_threshold
self.stride = stride self.stride = stride
self.max_batch_size = max_batch_size self.max_batch_size = max_batch_size
self.answers_per_seq = answers_per_seq self.answers_per_seq = answers_per_seq
@ -110,7 +111,7 @@ class ExtractiveReader:
token=self.token if not isinstance(self.token, str) else None, token=self.token if not isinstance(self.token, str) else None,
max_seq_length=self.max_seq_length, max_seq_length=self.max_seq_length,
top_k=self.top_k, top_k=self.top_k,
confidence_threshold=self.confidence_threshold, score_threshold=self.score_threshold,
stride=self.stride, stride=self.stride,
max_batch_size=self.max_batch_size, max_batch_size=self.max_batch_size,
answers_per_seq=self.answers_per_seq, answers_per_seq=self.answers_per_seq,
@ -202,8 +203,8 @@ class ExtractiveReader:
encodings: List["Encoding"], encodings: List["Encoding"],
) -> Tuple[List[List[int]], List[List[int]], "torch.Tensor"]: ) -> Tuple[List[List[int]], List[List[int]], "torch.Tensor"]:
""" """
Turn start and end logits into probability scores for each answer span. Unlike most other Turn start and end logits into probabilities for each answer span. Unlike most other
implementations, it doesn't normalize the scores to make them easier to compare across different implementations, it doesn't normalize the scores in each split to make them easier to compare across different
splits. Returns the top k answer spans. splits. Returns the top k answer spans.
""" """
mask = sequence_ids == 1 # Only keep tokens from the context (should ignore special tokens) mask = sequence_ids == 1 # Only keep tokens from the context (should ignore special tokens)
@ -262,14 +263,14 @@ class ExtractiveReader:
queries: List[str], queries: List[str],
answers_per_seq: int, answers_per_seq: int,
top_k: Optional[int], top_k: Optional[int],
confidence_threshold: Optional[float], score_threshold: Optional[float],
query_ids: List[int], query_ids: List[int],
document_ids: List[int], document_ids: List[int],
no_answer: bool, no_answer: bool,
) -> List[List[ExtractedAnswer]]: ) -> List[List[ExtractedAnswer]]:
""" """
Reconstructs the nested structure that existed before flattening. Also computes a no answer probability. Reconstructs the nested structure that existed before flattening. Also computes a no answer score.
This probability is different from most other implementations because it does not consider the no answer This score is different from most other implementations because it does not consider the no answer
logit introduced with SQuAD 2. Instead, it just computes the probability that the answer does not exist logit introduced with SQuAD 2. Instead, it just computes the probability that the answer does not exist
in the top k or top p. in the top k or top p.
""" """
@ -307,8 +308,8 @@ class ExtractiveReader:
) )
current_answers.append(answer_) current_answers.append(answer_)
current_answers = sorted(current_answers, key=lambda answer: answer.score, reverse=True) current_answers = sorted(current_answers, key=lambda answer: answer.score, reverse=True)
if confidence_threshold is not None: if score_threshold is not None:
current_answers = [answer for answer in current_answers if answer.score >= confidence_threshold] current_answers = [answer for answer in current_answers if answer.score >= score_threshold]
nested_answers.append(current_answers) nested_answers.append(current_answers)
return nested_answers return nested_answers
@ -319,7 +320,7 @@ class ExtractiveReader:
query: str, query: str,
documents: List[Document], documents: List[Document],
top_k: Optional[int] = None, top_k: Optional[int] = None,
confidence_threshold: Optional[float] = None, score_threshold: Optional[float] = None,
max_seq_length: Optional[int] = None, max_seq_length: Optional[int] = None,
stride: Optional[int] = None, stride: Optional[int] = None,
max_batch_size: Optional[int] = None, max_batch_size: Optional[int] = None,
@ -333,9 +334,9 @@ class ExtractiveReader:
:param documents: List of Documents in which you want to search for an answer to the query. :param documents: List of Documents in which you want to search for an answer to the query.
:param top_k: The maximum number of answers to return. :param top_k: The maximum number of answers to return.
An additional answer is returned if no_answer is set to True (default). An additional answer is returned if no_answer is set to True (default).
:param confidence_threshold: :param score_threshold:
:return: List of ExtractedAnswers sorted by (desc.) answer score. :return: List of ExtractedAnswers sorted by (desc.) answer score.
:param confidence_threshold: Returns only answers with the probability score above this threshold. :param score_threshold: Returns only answers with the score above this threshold.
:param max_seq_length: Maximum number of tokens. :param max_seq_length: Maximum number of tokens.
If a sequence exceeds it, the sequence is split. If a sequence exceeds it, the sequence is split.
Default: 384 Default: 384
@ -352,7 +353,7 @@ class ExtractiveReader:
raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.") raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.")
top_k = top_k or self.top_k top_k = top_k or self.top_k
confidence_threshold = confidence_threshold or self.confidence_threshold score_threshold = score_threshold or self.score_threshold
max_seq_length = max_seq_length or self.max_seq_length max_seq_length = max_seq_length or self.max_seq_length
stride = stride or self.stride stride = stride or self.stride
max_batch_size = max_batch_size or self.max_batch_size max_batch_size = max_batch_size or self.max_batch_size
@ -400,7 +401,7 @@ class ExtractiveReader:
queries, queries,
answers_per_seq, answers_per_seq,
top_k, top_k,
confidence_threshold, score_threshold,
query_ids, query_ids,
document_ids, document_ids,
no_answer, no_answer,

View File

@ -0,0 +1,5 @@
---
enhancements:
- |
Renamed the confidence_threshold parameter of the ExtractiveReader to score_threshold as ExtractedAnswers have a score and this is what the threshold is for.
For consistency, the term confidence is not mentioned anymore in favor of score.

View File

@ -98,7 +98,7 @@ def test_to_dict():
"device": None, "device": None,
"token": None, # don't serialize valid tokens "token": None, # don't serialize valid tokens
"top_k": 20, "top_k": 20,
"confidence_threshold": None, "score_threshold": None,
"max_seq_length": 384, "max_seq_length": 384,
"stride": 128, "stride": 128,
"max_batch_size": None, "max_batch_size": None,
@ -121,7 +121,7 @@ def test_to_dict_empty_model_kwargs():
"device": None, "device": None,
"token": None, # don't serialize valid tokens "token": None, # don't serialize valid tokens
"top_k": 20, "top_k": 20,
"confidence_threshold": None, "score_threshold": None,
"max_seq_length": 384, "max_seq_length": 384,
"stride": 128, "stride": 128,
"max_batch_size": None, "max_batch_size": None,