diff --git a/haystack/reader/farm.py b/haystack/reader/farm.py index df534096f..43c31173c 100644 --- a/haystack/reader/farm.py +++ b/haystack/reader/farm.py @@ -45,19 +45,18 @@ class FARMReader: :param batch_size: Number of samples the model receives in one batch for inference :param use_gpu: Whether to use GPU (if available) :param n_candidates_per_passage: How many candidate answers are extracted per text sequence that the model can process at once (depends on `max_seq_len`). - Note: This is not the number of "final answers" you will receive - (see `top_k` in FARMReader.predict() or Finder.get_answers() for that) - # TODO adjust farm. n_cand = 2 returns no answer + highest positive answer - # should return no answer + 2 best positive answers - # drawback: answers from a single paragraph might be very similar in text and score - # we need to have more varied answers (by excluding overlapping answers?) + Note: - This is not the number of "final answers" you will receive + (see `top_k` in FARMReader.predict() or Finder.get_answers() for that) + - FARM includes no_answer in the sorted list of predictions + + """ self.inferencer = Inferencer.load(model_name_or_path, batch_size=batch_size, gpu=use_gpu, task_type="question_answering") self.inferencer.model.prediction_heads[0].context_window_size = context_window_size - self.inferencer.model.prediction_heads[0].no_ans_threshold = no_ans_boost # TODO adjust naming and concept in FARM - self.no_ans_boost = no_ans_boost + self.inferencer.model.prediction_heads[0].no_ans_boost = no_ans_boost + # TODO adjust terminology: a haystack passage is a FARM document (which gets devided into FARM passages depending on document length and max_seq_len) self.inferencer.model.prediction_heads[0].n_best = n_candidates_per_passage def train(self, data_dir, train_filename, dev_filename=None, test_file_name=None, @@ -188,7 +187,7 @@ class FARMReader: } input_dicts.append(cur) - # get answers from QA model (Default: top 5 per input paragraph) + # get answers from QA model predictions = self.inferencer.inference_from_dicts( dicts=input_dicts, rest_api_schema=True, max_processes=max_processes ) @@ -200,9 +199,10 @@ class FARMReader: no_ans_gaps = [] best_score_answer = 0 for pred in predictions: + positive_found = False for a in pred["predictions"][0]["answers"]: # skip "no answers" here - if a["answer"]: + if(not positive_found and a["answer"]): cur = {"answer": a["answer"], "score": a["score"], "probability": float(expit(np.asarray([a["score"]]) / 8)), #just a pseudo prob for now @@ -214,30 +214,27 @@ class FARMReader: no_ans_gaps.append(pred["predictions"][0]["no_ans_gap"]) if a["score"] > best_score_answer: best_score_answer = a["score"] + positive_found = True - # adjust no_ans_gaps + + # "no answer" scores and positive answers scores are difficult to compare, because + # + a positive answer score is related to one specific document + # - a "no answer" score is related to all input documents + # Thus we compute the "no answer" score relative to the best possible answer and adjust it by + # the most significant difference between scores. + # Most significant difference: a model switching from predicting an answer to "no answer" (or vice versa). + # No_ans_gap coming from FARM mean how much no_ans_boost should change to switch predictions no_ans_gaps = np.array(no_ans_gaps) - no_ans_gaps_adjusted = no_ans_gaps + self.no_ans_boost + max_no_ans_gap = np.max(no_ans_gaps) + if(np.sum(no_ans_gaps < 0) == len(no_ans_gaps)): # all passages "no answer" as top score + no_ans_score = best_score_answer - max_no_ans_gap # max_no_ans_gap is negative, so it increases best pos score + else: # case: at least one passage predicts an answer (positive no_ans_gap) + no_ans_score = best_score_answer - max_no_ans_gap - # We want to heuristically rank how likely or unlikely the "no answer" option is. - - # case: all documents return no answer, then all no_ans_gaps are positive - if np.sum(no_ans_gaps_adjusted < 0) == 0: - # to rank we add the smallest no_ans_gap (a document where an answer would be nearly as likely as the no anser) - # to the highest answer score we found - no_ans_score = best_score_answer + min(no_ans_gaps_adjusted) - # case: documents where answers are preferred over no answer, the no_ans_gap is negative - else: - # the lowest (highest negative) no_ans_gap would be needed as positive no_ans_boost for the - # model to return "no answer" on all documents - # we subtract this value from the best answer score to rank our "no answer" option - # magically this is the same equation as used for the case above : ) - no_ans_score = best_score_answer + min(no_ans_gaps_adjusted) - - cur = {"answer": "[computer says no answer is likely]", + cur = {"answer": None, "score": no_ans_score, "probability": float(expit(np.asarray(no_ans_score) / 8)), # just a pseudo prob for now - "context": "", + "context": None, "offset_start": 0, "offset_end": 0, "document_id": None} @@ -249,7 +246,7 @@ class FARMReader: ) answers = answers[:top_k] result = {"question": question, - "adjust_no_ans_boost": -min(no_ans_gaps_adjusted), + "no_ans_gap": max_no_ans_gap, "answers": answers} return result \ No newline at end of file diff --git a/tutorials/Tutorial1_Basic_QA_Pipeline.py b/tutorials/Tutorial1_Basic_QA_Pipeline.py index 1b117e7c7..6efdfc900 100755 --- a/tutorials/Tutorial1_Basic_QA_Pipeline.py +++ b/tutorials/Tutorial1_Basic_QA_Pipeline.py @@ -51,7 +51,10 @@ finder = Finder(reader, retriever) # The higher top_k_retriever, the better (but also the slower) your answers. prediction = finder.get_answers(question="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5) -#prediction = finder.get_answers(question="Who is the daughter of Arya Stark?", top_k_reader=5) # impossible question test +# to test impossible questions we need a large QA model, e.g. deepset/bert-large-uncased-whole-word-masking-squad2 +#prediction = finder.get_answers(question="Who is the first daughter of Arya Stark?", top_k_retriever=10, top_k_reader=5) + + #prediction = finder.get_answers(question="Who created the Dothraki vocabulary?", top_k_reader=5) #prediction = finder.get_answers(question="Who is the sister of Sansa?", top_k_reader=5)