feat!: Rename ExtractiveReader's confidence_threshold to score_threshold (#6532)

* rename to score_threshold

* Update haystack/components/readers/extractive.py

Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>

---------

Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>
This commit is contained in:
Julian Risch 2023-12-12 15:12:28 +01:00 committed by GitHub
parent 82fe80ce68
commit 25a6eaae05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 22 deletions

View File

@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
class ExtractiveReader:
"""
A component that locates and extract answers to a given query from Documents. It's used for performing extractive
QA. The Reader assigns a probability score to every possible answer span independently of other answer spans.
QA. The Reader assigns a score to every possible answer span independently of other answer spans.
This fixes a common issue of other implementations which make comparisons across documents harder by normalizing
each document's answers independently.
@ -42,7 +42,7 @@ class ExtractiveReader:
device: Optional[str] = None,
token: Union[bool, str, None] = None,
top_k: int = 20,
confidence_threshold: Optional[float] = None,
score_threshold: Optional[float] = None,
max_seq_length: int = 384,
stride: int = 128,
max_batch_size: Optional[int] = None,
@ -61,9 +61,9 @@ class ExtractiveReader:
If this parameter is set to `True`, then the token generated when running
`transformers-cli login` (stored in ~/.huggingface) is used.
:param top_k: Number of answers to return per query.
It is required even if confidence_threshold is set. Defaults to 20.
It is required even if score_threshold is set. Defaults to 20.
An additional answer with no text is returned if no_answer is set to True (default).
:param confidence_threshold: Returns only answers with the probability score above this threshold.
:param score_threshold: Returns only answers with the probability score above this threshold.
:param max_seq_length: Maximum number of tokens.
If a sequence exceeds it, the sequence is split.
Default: 384
@ -72,8 +72,9 @@ class ExtractiveReader:
:param max_batch_size: Maximum number of samples that are fed through the model at the same time.
:param answers_per_seq: Number of answer candidates to consider per sequence.
This is relevant when a Document was split into multiple sequences because of max_seq_length.
:param no_answer: Whether to return no answer scores.
:param calibration_factor: Factor used for calibrating probability scores.
:param no_answer: Whether to return an additional `no answer` with an empty text and a score representing the
probability that the other top_k answers are incorrect.
:param calibration_factor: Factor used for calibrating probabilities.
:param model_kwargs: Additional keyword arguments passed to `AutoModelForQuestionAnswering.from_pretrained`
when loading the model specified in `model_name_or_path`. For details on what kwargs you can pass,
see the model's documentation.
@ -85,7 +86,7 @@ class ExtractiveReader:
self.token = token
self.max_seq_length = max_seq_length
self.top_k = top_k
self.confidence_threshold = confidence_threshold
self.score_threshold = score_threshold
self.stride = stride
self.max_batch_size = max_batch_size
self.answers_per_seq = answers_per_seq
@ -110,7 +111,7 @@ class ExtractiveReader:
token=self.token if not isinstance(self.token, str) else None,
max_seq_length=self.max_seq_length,
top_k=self.top_k,
confidence_threshold=self.confidence_threshold,
score_threshold=self.score_threshold,
stride=self.stride,
max_batch_size=self.max_batch_size,
answers_per_seq=self.answers_per_seq,
@ -202,8 +203,8 @@ class ExtractiveReader:
encodings: List["Encoding"],
) -> Tuple[List[List[int]], List[List[int]], "torch.Tensor"]:
"""
Turn start and end logits into probability scores for each answer span. Unlike most other
implementations, it doesn't normalize the scores to make them easier to compare across different
Turn start and end logits into probabilities for each answer span. Unlike most other
implementations, it doesn't normalize the scores in each split to make them easier to compare across different
splits. Returns the top k answer spans.
"""
mask = sequence_ids == 1 # Only keep tokens from the context (should ignore special tokens)
@ -262,14 +263,14 @@ class ExtractiveReader:
queries: List[str],
answers_per_seq: int,
top_k: Optional[int],
confidence_threshold: Optional[float],
score_threshold: Optional[float],
query_ids: List[int],
document_ids: List[int],
no_answer: bool,
) -> List[List[ExtractedAnswer]]:
"""
Reconstructs the nested structure that existed before flattening. Also computes a no answer probability.
This probability is different from most other implementations because it does not consider the no answer
Reconstructs the nested structure that existed before flattening. Also computes a no answer score.
This score is different from most other implementations because it does not consider the no answer
logit introduced with SQuAD 2. Instead, it just computes the probability that the answer does not exist
in the top k or top p.
"""
@ -307,8 +308,8 @@ class ExtractiveReader:
)
current_answers.append(answer_)
current_answers = sorted(current_answers, key=lambda answer: answer.score, reverse=True)
if confidence_threshold is not None:
current_answers = [answer for answer in current_answers if answer.score >= confidence_threshold]
if score_threshold is not None:
current_answers = [answer for answer in current_answers if answer.score >= score_threshold]
nested_answers.append(current_answers)
return nested_answers
@ -319,7 +320,7 @@ class ExtractiveReader:
query: str,
documents: List[Document],
top_k: Optional[int] = None,
confidence_threshold: Optional[float] = None,
score_threshold: Optional[float] = None,
max_seq_length: Optional[int] = None,
stride: Optional[int] = None,
max_batch_size: Optional[int] = None,
@ -333,9 +334,9 @@ class ExtractiveReader:
:param documents: List of Documents in which you want to search for an answer to the query.
:param top_k: The maximum number of answers to return.
An additional answer is returned if no_answer is set to True (default).
:param confidence_threshold:
:param score_threshold:
:return: List of ExtractedAnswers sorted by (desc.) answer score.
:param confidence_threshold: Returns only answers with the probability score above this threshold.
:param score_threshold: Returns only answers with the score above this threshold.
:param max_seq_length: Maximum number of tokens.
If a sequence exceeds it, the sequence is split.
Default: 384
@ -352,7 +353,7 @@ class ExtractiveReader:
raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.")
top_k = top_k or self.top_k
confidence_threshold = confidence_threshold or self.confidence_threshold
score_threshold = score_threshold or self.score_threshold
max_seq_length = max_seq_length or self.max_seq_length
stride = stride or self.stride
max_batch_size = max_batch_size or self.max_batch_size
@ -400,7 +401,7 @@ class ExtractiveReader:
queries,
answers_per_seq,
top_k,
confidence_threshold,
score_threshold,
query_ids,
document_ids,
no_answer,

View File

@ -0,0 +1,5 @@
---
enhancements:
- |
Renamed the confidence_threshold parameter of the ExtractiveReader to score_threshold as ExtractedAnswers have a score and this is what the threshold is for.
For consistency, the term confidence is not mentioned anymore in favor of score.

View File

@ -98,7 +98,7 @@ def test_to_dict():
"device": None,
"token": None, # don't serialize valid tokens
"top_k": 20,
"confidence_threshold": None,
"score_threshold": None,
"max_seq_length": 384,
"stride": 128,
"max_batch_size": None,
@ -121,7 +121,7 @@ def test_to_dict_empty_model_kwargs():
"device": None,
"token": None, # don't serialize valid tokens
"top_k": 20,
"confidence_threshold": None,
"score_threshold": None,
"max_seq_length": 384,
"stride": 128,
"max_batch_size": None,