From cca8676f905085fbb9216811f6363ab42bfcf7ab Mon Sep 17 00:00:00 2001 From: brandenchan Date: Wed, 26 Aug 2020 12:01:59 +0200 Subject: [PATCH 1/3] More robust eval --- haystack/indexing/utils.py | 22 +++++++++++++++----- haystack/reader/farm.py | 42 +++++++++++++++++++++++--------------- test/test_eval.py | 2 +- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/haystack/indexing/utils.py b/haystack/indexing/utils.py index 292d4f37f..e157cf952 100644 --- a/haystack/indexing/utils.py +++ b/haystack/indexing/utils.py @@ -45,19 +45,31 @@ def eval_data_from_file(filename: str) -> Tuple[List[Document], List[Label]]: # Get Labels for qa in paragraph["qas"]: - for answer in qa["answers"]: + if len(qa["answers"]) > 0: + for answer in qa["answers"]: + label = Label( + question=qa["question"], + answer=answer["text"], + is_correct_answer=True, + is_correct_document=True, + document_id=cur_doc.id, + offset_start_in_doc=answer["answer_start"], + no_answer=qa["is_impossible"], + origin="gold_label", + ) + labels.append(label) + else: label = Label( question=qa["question"], - answer=answer["text"], + answer="", is_correct_answer=True, is_correct_document=True, document_id=cur_doc.id, - offset_start_in_doc=answer["answer_start"], + offset_start_in_doc=0, no_answer=qa["is_impossible"], origin="gold_label", - ) + ) labels.append(label) - return docs, labels diff --git a/haystack/reader/farm.py b/haystack/reader/farm.py index db228e70f..e003f7ccb 100644 --- a/haystack/reader/farm.py +++ b/haystack/reader/farm.py @@ -394,6 +394,11 @@ class FARMReader(BaseReader): :param doc_index: Index/Table name where documents that are used for evaluation are stored """ + if self.top_k_per_candidate != 4: + logger.warning(f"Performing Evaluation using top_k_per_candidate = {self.top_k_per_candidate} \n" + f"and consequently, QuestionAnsweringPredictionHead.n_best = {self.top_k_per_candidate + 1}. \n" + f"This deviates from FARM's default where QuestionAnsweringPredictionHead.n_best = 5") + # extract all questions for evaluation filters = {"origin": [label_origin]} @@ -409,7 +414,8 @@ class FARMReader(BaseReader): # Create squad style dicts d: Dict[str, Any] = {} - for doc_id in aggregated_per_doc.keys(): + all_doc_ids = [x.id for x in document_store.get_all_documents(doc_index)] + for doc_id in all_doc_ids: doc = document_store.get_document_by_id(doc_id, index=doc_index) if not doc: logger.error(f"Document with the ID '{doc_id}' is not present in the document store.") @@ -419,21 +425,25 @@ class FARMReader(BaseReader): } # get all questions / answers aggregated_per_question: Dict[str, Any] = defaultdict(list) - for label in aggregated_per_doc[doc_id]: - # add to existing answers - if label.question in aggregated_per_question.keys(): - aggregated_per_question[label.question]["answers"].append({ - "text": label.answer, - "answer_start": label.offset_start_in_doc}) - # create new one - else: - aggregated_per_question[label.question] = { - "id": str(hash(str(doc_id)+label.question)), - "question": label.question, - "answers": [{ - "text": label.answer, - "answer_start": label.offset_start_in_doc}] - } + if doc_id in aggregated_per_doc: + for label in aggregated_per_doc[doc_id]: + # add to existing answers + if label.question in aggregated_per_question.keys(): + # Hack to fix problem where duplicate questions are merged by doc_store processing creating a QA example with 8 annotations > 6 annotation max + if len(aggregated_per_question[label.question]["answers"]) >= 6: + continue + aggregated_per_question[label.question]["answers"].append({ + "text": label.answer, + "answer_start": label.offset_start_in_doc}) + # create new one + else: + aggregated_per_question[label.question] = { + "id": str(hash(str(doc_id)+label.question)), + "question": label.question, + "answers": [{ + "text": label.answer, + "answer_start": label.offset_start_in_doc}] + } # Get rid of the question key again (after we aggregated we don't need it anymore) d[str(doc_id)]["qas"] = [v for v in aggregated_per_question.values()] diff --git a/test/test_eval.py b/test/test_eval.py index 67e90d247..836dc6f93 100644 --- a/test/test_eval.py +++ b/test/test_eval.py @@ -11,7 +11,7 @@ def test_add_eval_data(document_store): document_store.add_eval_data(filename="samples/squad/small.json", doc_index="test_eval_document", label_index="test_feedback") assert document_store.get_document_count(index="test_eval_document") == 87 - assert document_store.get_label_count(index="test_feedback") == 881 + assert document_store.get_label_count(index="test_feedback") == 1214 # test documents docs = document_store.get_all_documents(index="test_eval_document") From b44b1ac6ec4d60f62d565918a3de4bc36aefef41 Mon Sep 17 00:00:00 2001 From: brandenchan Date: Wed, 26 Aug 2020 12:03:56 +0200 Subject: [PATCH 2/3] Set top_k_per_candidate --- tutorials/Tutorial5_Evaluation.ipynb | 4 ++-- tutorials/Tutorial5_Evaluation.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tutorials/Tutorial5_Evaluation.ipynb b/tutorials/Tutorial5_Evaluation.ipynb index 168109518..db06dca8d 100644 --- a/tutorials/Tutorial5_Evaluation.ipynb +++ b/tutorials/Tutorial5_Evaluation.ipynb @@ -297,7 +297,7 @@ "# Initialize Reader\n", "from haystack.reader.farm import FARMReader\n", "\n", - "reader = FARMReader(\"deepset/roberta-base-squad2\")" + "reader = FARMReader(\"deepset/roberta-base-squad2\", top_k_per_candidate=4)" ] }, { @@ -1957,4 +1957,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file diff --git a/tutorials/Tutorial5_Evaluation.py b/tutorials/Tutorial5_Evaluation.py index 7c64b4ce5..78b78369d 100644 --- a/tutorials/Tutorial5_Evaluation.py +++ b/tutorials/Tutorial5_Evaluation.py @@ -75,7 +75,7 @@ retriever = ElasticsearchRetriever(document_store=document_store) # Initialize Reader -reader = FARMReader("deepset/roberta-base-squad2") +reader = FARMReader("deepset/roberta-base-squad2", top_k_per_candidate=4) # Initialize Finder which sticks together Reader and Retriever finder = Finder(reader, retriever) From f108939fc33408df50fbd1f834de5bac0ecad795 Mon Sep 17 00:00:00 2001 From: brandenchan Date: Wed, 26 Aug 2020 13:27:30 +0200 Subject: [PATCH 3/3] Change warning to info --- haystack/reader/farm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/haystack/reader/farm.py b/haystack/reader/farm.py index e003f7ccb..23c242919 100644 --- a/haystack/reader/farm.py +++ b/haystack/reader/farm.py @@ -395,9 +395,9 @@ class FARMReader(BaseReader): """ if self.top_k_per_candidate != 4: - logger.warning(f"Performing Evaluation using top_k_per_candidate = {self.top_k_per_candidate} \n" - f"and consequently, QuestionAnsweringPredictionHead.n_best = {self.top_k_per_candidate + 1}. \n" - f"This deviates from FARM's default where QuestionAnsweringPredictionHead.n_best = 5") + logger.info(f"Performing Evaluation using top_k_per_candidate = {self.top_k_per_candidate} \n" + f"and consequently, QuestionAnsweringPredictionHead.n_best = {self.top_k_per_candidate + 1}. \n" + f"This deviates from FARM's default where QuestionAnsweringPredictionHead.n_best = 5") # extract all questions for evaluation filters = {"origin": [label_origin]}