diff --git a/README.rst b/README.rst index 9cc77688f..3d1167b25 100644 --- a/README.rst +++ b/README.rst @@ -55,10 +55,13 @@ Components Resources ========= -- Tutorial 1 - Basic QA Pipeline: `Jupyter notebook `__ or `Colab `__ -- Tutorial 2 - Fine-tuning a model on own data: `Jupyter notebook `__ or `Colab `__ -- Tutorial 3 - Basic QA Pipeline without Elasticsearch: `Jupyter notebook `__ or `Colab `__ + +- Tutorial 1 - Basic QA Pipeline: `Jupyter notebook `_ or `Colab `_ +- Tutorial 2 - Fine-tuning a model on own data: `Jupyter notebook `_ or `Colab `_ +- Tutorial 3 - Basic QA Pipeline without Elasticsearch: `Jupyter notebook `_ or `Colab `_ - Tutorial 4 - FAQ-style QA: `Jupyter notebook `__ or `Colab `__ +- Tutorial 5 - Evaluation of the whole QA-Pipeline: `Jupyter noteboook `_ or `Colab `_ + Quick Start =========== diff --git a/haystack/database/elasticsearch.py b/haystack/database/elasticsearch.py index 43d6c5141..42e2a6741 100644 --- a/haystack/database/elasticsearch.py +++ b/haystack/database/elasticsearch.py @@ -108,8 +108,12 @@ class ElasticsearchDocumentStore(BaseDocumentStore): filters: dict = None, top_k: int = 10, custom_query: str = None, + index: str = None, ) -> [Document]: + if index is None: + index = self.index + if custom_query: # substitute placeholder for question and filters for the custom_query template string template = Template(custom_query) @@ -145,7 +149,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore): body["_source"] = {"excludes": self.excluded_meta_data} logger.debug(f"Retriever query: {body}") - result = self.client.search(index=self.index, body=body)["hits"]["hits"] + result = self.client.search(index=index, body=body)["hits"]["hits"] documents = [self._convert_es_hit_to_document(hit) for hit in result] return documents @@ -199,3 +203,76 @@ class ElasticsearchDocumentStore(BaseDocumentStore): query_score=hit["_score"] + score_adjustment if hit["_score"] else None, ) return document + + def add_eval_data(self, filename: str, doc_index: str = "eval_document", label_index: str = "feedback"): + """ + Adds a SQuAD-formatted file to the DocumentStore in order to be able to perform evaluation on it. + + :param filename: Name of the file containing evaluation data + :type filename: str + :param doc_index: Elasticsearch index where evaluation documents should be stored + :type doc_index: str + :param label_index: Elasticsearch index where labeled questions should be stored + :type label_index: str + """ + + eval_docs_to_index = [] + questions_to_index = [] + + with open(filename, "r") as file: + data = json.load(file) + for document in data["data"]: + for paragraph in document["paragraphs"]: + doc_to_index= {} + id = hash(paragraph["context"]) + for fieldname, value in paragraph.items(): + # write docs to doc_index + if fieldname == "context": + doc_to_index[self.text_field] = value + doc_to_index["doc_id"] = str(id) + doc_to_index["_op_type"] = "create" + doc_to_index["_index"] = doc_index + # write questions to label_index + elif fieldname == "qas": + for qa in value: + question_to_index = { + "question": qa["question"], + "answers": qa["answers"], + "doc_id": str(id), + "origin": "gold_label", + "index_name": doc_index, + "_op_type": "create", + "_index": label_index + } + questions_to_index.append(question_to_index) + # additional fields for docs + else: + doc_to_index[fieldname] = value + + for key, value in document.items(): + if key == "title": + doc_to_index[self.name_field] = value + elif key != "paragraphs": + doc_to_index[key] = value + + eval_docs_to_index.append(doc_to_index) + + bulk(self.client, eval_docs_to_index) + bulk(self.client, questions_to_index) + + def get_all_documents_in_index(self, index, filters=None): + body = { + "query": { + "bool": { + "must": { + "match_all" : {} + } + } + } + } + + if filters: + body["query"]["bool"]["filter"] = {"term": filters} + result = scan(self.client, query=body, index=index) + + return result \ No newline at end of file diff --git a/haystack/finder.py b/haystack/finder.py index 9cf047c87..b2a71dc97 100644 --- a/haystack/finder.py +++ b/haystack/finder.py @@ -2,6 +2,8 @@ import logging import numpy as np from scipy.special import expit +import time +from statistics import mean logger = logging.getLogger(__name__) @@ -96,3 +98,239 @@ class Finder: results["answers"].append(cur_answer) return results + + def eval(self, label_index: str = "feedback", doc_index: str = "eval_document", label_origin: str = "gold_label", + top_k_retriever: int = 10, top_k_reader: int = 10): + """ + Evaluation of the whole pipeline by first evaluating the Retriever and then evaluating the Reader on the result + of the Retriever. + + Returns a dict containing the following metrics: + - "retriever_recall": Proportion of questions for which correct document is among retrieved documents + - "retriever_map": Mean of average precision for each question. Rewards retrievers that give relevant + documents a higher rank. + - "reader_top1_accuracy": Proportion of highest ranked predicted answers that overlap with corresponding correct answer + - "reader_top1_accuracy_has_answer": Proportion of highest ranked predicted answers that overlap + with corresponding correct answer for answerable questions + - "reader_top_k_accuracy": Proportion of predicted answers that overlap with corresponding correct answer + - "reader_topk_accuracy_has_answer": Proportion of predicted answers that overlap with corresponding correct answer + for answerable questions + - "reader_top1_em": Proportion of exact matches of highest ranked predicted answers with their corresponding + correct answers + - "reader_top1_em_has_answer": Proportion of exact matches of highest ranked predicted answers with their corresponding + correct answers for answerable questions + - "reader_topk_em": Proportion of exact matches of predicted answers with their corresponding correct answers + - "reader_topk_em_has_answer": Proportion of exact matches of predicted answers with their corresponding + correct answers for answerable questions + - "reader_top1_f1": Average overlap between highest ranked predicted answers and their corresponding correct answers + - "reader_top1_f1_has_answer": Average overlap between highest ranked predicted answers and their corresponding + correct answers for answerable questions + - "reader_topk_f1": Average overlap between predicted answers and their corresponding correct answers + - "reader_topk_f1_has_answer": Average overlap between predicted answers and their corresponding correct answers + for answerable questions + - "reader_top1_no_answer_accuracy": Proportion of correct predicting unanswerable question at highest ranked prediction + - "reader_topk_no_answer_accuracy": Proportion of correct predicting unanswerable question among all predictions + - "total_retrieve_time": Time retriever needed to retrieve documents for all questions + - "avg_retrieve_time": Average time needed to retrieve documents for one question + - "total_reader_time": Time reader needed to extract answer out of retrieved documents for all questions + where the correct document is among the retrieved ones + - "avg_reader_time": Average time needed to extract answer out of retrieved documents for one question + - "total_finder_time": Total time for whole pipeline + + :param label_index: Elasticsearch index where labeled questions are stored + :type label_index: str + :param doc_index: Elasticsearch index where documents that are used for evaluation are stored + :type doc_index: str + :param top_k_retriever: How many documents per question to return and pass to reader + :type top_k_retriever: int + :param top_k_reader: How many answers to return per question + :type top_k_reader: int + """ + finder_start_time = time.time() + # extract all questions for evaluation + filter = {"origin": label_origin} + questions = self.retriever.document_store.get_all_documents_in_index(index=label_index, filters=filter) + + correct_retrievals = 0 + summed_avg_precision_retriever = 0 + retrieve_times = [] + + correct_readings_top1 = 0 + correct_readings_topk = 0 + correct_readings_top1_has_answer = 0 + correct_readings_topk_has_answer = 0 + exact_matches_top1 = 0 + exact_matches_topk = 0 + exact_matches_top1_has_answer = 0 + exact_matches_topk_has_answer = 0 + summed_f1_top1 = 0 + summed_f1_topk = 0 + summed_f1_top1_has_answer = 0 + summed_f1_topk_has_answer = 0 + correct_no_answers_top1 = 0 + correct_no_answers_topk = 0 + read_times = [] + + # retrieve documents + questions_with_docs = [] + retriever_start_time = time.time() + for q_idx, question in enumerate(questions): + question_string = question["_source"]["question"] + single_retrieve_start = time.time() + retrieved_docs = self.retriever.retrieve(question_string, top_k=top_k_retriever, index=doc_index) + retrieve_times.append(time.time() - single_retrieve_start) + for doc_idx, doc in enumerate(retrieved_docs): + # check if correct doc among retrieved docs + if doc.meta["doc_id"] == question["_source"]["doc_id"]: + correct_retrievals += 1 + summed_avg_precision_retriever += 1 / (doc_idx + 1) + questions_with_docs.append({ + "question": question, + "docs": retrieved_docs, + "correct_es_doc_id": doc.id}) + break + retriever_total_time = time.time() - retriever_start_time + number_of_questions = q_idx + 1 + + number_of_no_answer = 0 + previous_return_no_answers = self.reader.return_no_answers + self.reader.return_no_answers = True + # extract answers + reader_start_time = time.time() + for q_idx, question in enumerate(questions_with_docs): + if (q_idx + 1) % 100 == 0: + print(f"Processed {q_idx+1} questions.") + question_string = question["question"]["_source"]["question"] + docs = question["docs"] + single_reader_start = time.time() + predicted_answers = self.reader.predict(question_string, docs, top_k_reader) + read_times.append(time.time() - single_reader_start) + # check if question is answerable + if question["question"]["_source"]["answers"]: + for answer_idx, answer in enumerate(predicted_answers["answers"]): + found_answer = False + found_em = False + best_f1 = 0 + # check if correct document + if answer["document_id"] == question["correct_es_doc_id"]: + gold_spans = [(gold_answer["answer_start"], gold_answer["answer_start"] + len(gold_answer["text"]) + 1) + for gold_answer in question["question"]["_source"]["answers"]] + predicted_span = (answer["offset_start_in_doc"], answer["offset_end_in_doc"]) + + for gold_span in gold_spans: + # check if overlap between gold answer and predicted answer + # top-1 answer + if not found_answer: + if (gold_span[0] <= predicted_span[1]) and (predicted_span[0] <= gold_span[1]): + # top-1 answer + if answer_idx == 0: + correct_readings_top1 += 1 + correct_readings_top1_has_answer += 1 + # top-k answers + correct_readings_topk += 1 + correct_readings_topk_has_answer += 1 + found_answer = True + # check for exact match + if not found_em: + if (gold_span[0] == predicted_span[0]) and (gold_span[1] == predicted_span[1]): + # top-1-answer + if answer_idx == 0: + exact_matches_top1 += 1 + exact_matches_top1_has_answer += 1 + # top-k answers + exact_matches_topk += 1 + exact_matches_topk_has_answer += 1 + found_em = True + # calculate f1 + pred_indices = list(range(predicted_span[0], predicted_span[1] + 1)) + gold_indices = list(range(gold_span[0], gold_span[1] + 1)) + n_overlap = len([x for x in pred_indices if x in gold_indices]) + if pred_indices and gold_indices and n_overlap: + precision = n_overlap / len(pred_indices) + recall = n_overlap / len(gold_indices) + current_f1 = (2 * precision * recall) / (precision + recall) + # top-1 answer + if answer_idx == 0: + summed_f1_top1 += current_f1 + summed_f1_top1_has_answer += current_f1 + if current_f1 > best_f1: + best_f1 = current_f1 + # top-k answers: use best f1-score + summed_f1_topk += best_f1 + summed_f1_topk_has_answer += best_f1 + + if found_answer and found_em: + break + # question not answerable + else: + number_of_no_answer += 1 + # As question is not answerable, it is not clear how to compute average precision for this question. + # For now, we decided to calculate average precision based on the rank of 'no answer'. + for answer_idx, answer in enumerate(predicted_answers["answers"]): + # check if 'no answer' + if answer["answer"] is None: + if answer_idx == 0: + correct_no_answers_top1 += 1 + correct_readings_top1 += 1 + exact_matches_top1 += 1 + summed_f1_top1 += 1 + correct_no_answers_topk += 1 + correct_readings_topk += 1 + exact_matches_topk += 1 + summed_f1_topk += 1 + break + number_of_has_answer = correct_retrievals - number_of_no_answer + + reader_total_time = time.time() - reader_start_time + finder_total_time = time.time() - finder_start_time + + retriever_recall = correct_retrievals / number_of_questions + retriever_map = summed_avg_precision_retriever / number_of_questions + + reader_top1_accuracy = correct_readings_top1 / correct_retrievals + reader_top1_accuracy_has_answer = correct_readings_top1_has_answer / number_of_has_answer + reader_top_k_accuracy = correct_readings_topk / correct_retrievals + reader_topk_accuracy_has_answer = correct_readings_topk_has_answer / number_of_has_answer + reader_top1_em = exact_matches_top1 / correct_retrievals + reader_top1_em_has_answer = exact_matches_top1_has_answer / number_of_has_answer + reader_topk_em = exact_matches_topk / correct_retrievals + reader_topk_em_has_answer = exact_matches_topk_has_answer / number_of_has_answer + reader_top1_f1 = summed_f1_top1 / correct_retrievals + reader_top1_f1_has_answer = summed_f1_top1_has_answer / number_of_has_answer + reader_topk_f1 = summed_f1_topk / correct_retrievals + reader_topk_f1_has_answer = summed_f1_topk_has_answer / number_of_has_answer + reader_top1_no_answer_accuracy = correct_no_answers_top1 / number_of_no_answer + reader_topk_no_answer_accuracy = correct_no_answers_topk / number_of_no_answer + + self.reader.return_no_answers = previous_return_no_answers + + logger.info((f"{correct_readings_topk} out of {number_of_questions} questions were correctly answered " + f"({(correct_readings_topk/number_of_questions):.2%}).")) + logger.info(f"{number_of_questions-correct_retrievals} questions could not be answered due to the retriever.") + logger.info(f"{correct_retrievals-correct_readings_topk} questions could not be answered due to the reader.") + + results = { + "retriever_recall": retriever_recall, + "retriever_map": retriever_map, + "reader_top1_accuracy": reader_top1_accuracy, + "reader_top1_accuracy_has_answer": reader_top1_accuracy_has_answer, + "reader_top_k_accuracy": reader_top_k_accuracy, + "reader_topk_accuracy_has_answer": reader_topk_accuracy_has_answer, + "reader_top1_em": reader_top1_em, + "reader_top1_em_has_answer": reader_top1_em_has_answer, + "reader_topk_em": reader_topk_em, + "reader_topk_em_has_answer": reader_topk_em_has_answer, + "reader_top1_f1": reader_top1_f1, + "reader_top1_f1_has_answer": reader_top1_f1_has_answer, + "reader_topk_f1": reader_topk_f1, + "reader_topk_f1_has_answer": reader_topk_f1_has_answer, + "reader_top1_no_answer_accuracy": reader_top1_no_answer_accuracy, + "reader_topk_no_answer_accuracy": reader_topk_no_answer_accuracy, + "total_retrieve_time": retriever_total_time, + "avg_retrieve_time": mean(retrieve_times), + "total_reader_time": reader_total_time, + "avg_reader_time": mean(read_times), + "total_finder_time": finder_total_time + } + + return results \ No newline at end of file diff --git a/haystack/reader/farm.py b/haystack/reader/farm.py index eb7fc0330..75eeb0f1a 100644 --- a/haystack/reader/farm.py +++ b/haystack/reader/farm.py @@ -4,13 +4,16 @@ from pathlib import Path import numpy as np from farm.data_handler.data_silo import DataSilo from farm.data_handler.processor import SquadProcessor +from farm.data_handler.dataloader import NamedDataLoader from farm.infer import Inferencer from farm.modeling.optimization import initialize_optimizer from farm.train import Trainer +from farm.eval import Evaluator from farm.utils import set_all_seeds, initialize_device_settings from scipy.special import expit from haystack.database.base import Document +from haystack.database.elasticsearch import ElasticsearchDocumentStore logger = logging.getLogger(__name__) @@ -272,6 +275,116 @@ class FARMReader: return result + def eval_on_file(self, data_dir: str, test_filename: str, device: str): + """ + Performs evaluation on a SQuAD-formatted file. + + Returns a dict containing the following metrics: + - "EM": exact match score + - "f1": F1-Score + - "top_n_recall": Proportion of predicted answers that overlap with correct answer + + :param data_dir: The directory in which the test set can be found + :type data_dir: Path or str + :param test_filename: The name of the file containing the test data in SQuAD format. + :type test_filename: str + :param device: The device on which the tensors should be processed. Choose from "cpu" and "cuda". + :type device: str + """ + eval_processor = SquadProcessor( + tokenizer=self.inferencer.processor.tokenizer, + max_seq_len=self.inferencer.processor.max_seq_len, + label_list=self.inferencer.processor.tasks["question_answering"]["label_list"], + metric=self.inferencer.processor.tasks["question_answering"]["metric"], + train_filename=None, + dev_filename=None, + dev_split=0, + test_filename=test_filename, + data_dir=Path(data_dir), + ) + + data_silo = DataSilo(processor=eval_processor, batch_size=self.inferencer.batch_size, distributed=False) + data_loader = data_silo.get_data_loader("test") + + evaluator = Evaluator(data_loader=data_loader, tasks=eval_processor.tasks, device=device) + + eval_results = evaluator.eval(self.inferencer.model) + results = { + "EM": eval_results[0]["EM"], + "f1": eval_results[0]["f1"], + "top_n_recall": eval_results[0]["top_n_recall"] + } + return results + + def eval(self, document_store: ElasticsearchDocumentStore, device: str, label_index: str = "feedback", + doc_index: str = "eval_document", label_origin: str = "gold_label"): + """ + Performs evaluation on evaluation documents in Elasticsearch DocumentStore. + + Returns a dict containing the following metrics: + - "EM": Proportion of exact matches of predicted answers with their corresponding correct answers + - "f1": Average overlap between predicted answers and their corresponding correct answers + - "top_n_recall": Proportion of predicted answers that overlap with correct answer + + :param document_store: The ElasticsearchDocumentStore containing the evaluation documents + :type document_store: ElasticsearchDocumentStore + :param device: The device on which the tensors should be processed. Choose from "cpu" and "cuda". + :type device: str + :param label_index: Elasticsearch index where labeled questions are stored + :type label_index: str + :param doc_index: Elasticsearch index where documents that are used for evaluation are stored + :type doc_index: str + """ + + # extract all questions for evaluation + filter = {"origin": label_origin} + questions = document_store.get_all_documents_in_index(index=label_index, filters=filter) + + # mapping from doc_id to questions + doc_questions_dict = {} + id = 0 + for question in questions: + doc_id = question["_source"]["doc_id"] + if doc_id not in doc_questions_dict: + doc_questions_dict[doc_id] = [{ + "id": id, + "question" : question["_source"]["question"], + "answers" : question["_source"]["answers"], + "is_impossible" : False if question["_source"]["answers"] else True + }] + else: + doc_questions_dict[doc_id].append({ + "id": id, + "question" : question["_source"]["question"], + "answers" : question["_source"]["answers"], + "is_impossible" : False if question["_source"]["answers"] else True + }) + id += 1 + + # extract eval documents and convert data back to SQuAD-like format + documents = document_store.get_all_documents_in_index(index=doc_index) + dicts = [] + for document in documents: + doc_id = document["_source"]["doc_id"] + text = document["_source"]["text"] + questions = doc_questions_dict[doc_id] + dicts.append({"qas" : questions, "context" : text}) + + # Create DataLoader that can be passed to the Evaluator + indices = range(len(dicts)) + dataset, tensor_names = self.inferencer.processor.dataset_from_dicts(dicts, indices=indices) + data_loader = NamedDataLoader(dataset=dataset, batch_size=self.inferencer.batch_size, tensor_names=tensor_names) + + evaluator = Evaluator(data_loader=data_loader, tasks=self.inferencer.processor.tasks, device=device) + + eval_results = evaluator.eval(self.inferencer.model) + results = { + "EM": eval_results[0]["EM"], + "f1": eval_results[0]["f1"], + "top_n_recall": eval_results[0]["top_n_recall"] + } + return results + @staticmethod def _calc_no_answer(no_ans_gaps,best_score_answer): # "no answer" scores and positive answers scores are difficult to compare, because diff --git a/haystack/retriever/elasticsearch.py b/haystack/retriever/elasticsearch.py index 7d891baa4..643f0a550 100644 --- a/haystack/retriever/elasticsearch.py +++ b/haystack/retriever/elasticsearch.py @@ -41,12 +41,58 @@ class ElasticsearchRetriever(BaseRetriever): self.document_store = document_store self.custom_query = custom_query - def retrieve(self, query: str, filters: dict = None, top_k: int = 10) -> [Document]: - documents = self.document_store.query(query, filters, top_k, self.custom_query) + def retrieve(self, query: str, filters: dict = None, top_k: int = 10, index: str = None) -> [Document]: + if index is None: + index = self.document_store.index + + documents = self.document_store.query(query, filters, top_k, self.custom_query, index) logger.info(f"Got {len(documents)} candidates from retriever") return documents + def eval(self, label_index: str = "feedback", doc_index: str = "eval_document", label_origin: str = "gold_label", + top_k: int = 10) -> dict: + """ + Performs evaluation on the Retriever. + Retriever is evaluated based on whether it finds the correct document given the question string and at which + position in the ranking of documents the correct document is. + + Returns a dict containing the following metrics: + - "recall": Proportion of questions for which correct document is among retrieved documents + - "mean avg precision": Mean of average precision for each question. Rewards retrievers that give relevant + documents a higher rank. + + :param label_index: Index/Table in DocumentStore where labeled questions are stored + :param doc_index: Index/Table in DocumentStore where documents that are used for evaluation are stored + :param top_k: How many documents to return per question + """ + + # extract all questions for evaluation + filter = {"origin": label_origin} + questions = self.document_store.get_all_docs_in_index(index=label_index, filters=filter) + + # calculate recall and mean-average-precision + correct_retrievals = 0 + summed_avg_precision = 0 + for q_idx, question in enumerate(questions): + question_string = question["_source"]["question"] + retrieved_docs = self.retrieve(question_string, top_k=top_k, index=doc_index) + # check if correct doc in retrieved docs + for doc_idx, doc in enumerate(retrieved_docs): + if doc.meta["doc_id"] == question["_source"]["doc_id"]: + correct_retrievals += 1 + summed_avg_precision += 1 / (doc_idx + 1) + break + + number_of_questions = q_idx + 1 + recall = correct_retrievals / number_of_questions + mean_avg_precision = summed_avg_precision / number_of_questions + + logger.info((f"For {correct_retrievals} out of {number_of_questions} questions ({recall:.2%}), the answer was in" + f" the top-{top_k} candidate passages selected by the retriever.")) + + return {"recall": recall, "map": mean_avg_precision} + class EmbeddingRetriever(BaseRetriever): def __init__( diff --git a/tutorials/Tutorial5_Evaluation.ipynb b/tutorials/Tutorial5_Evaluation.ipynb new file mode 100644 index 000000000..e746315a2 --- /dev/null +++ b/tutorials/Tutorial5_Evaluation.ipynb @@ -0,0 +1,1238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evalutaion\n", + "To be able to make a statement about the performance of a question-asnwering system, it is important to evalute it. Furthermore, evaluation allows to determine which parts of the system can be improved." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start an Elasticsearch server\n", + "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (eg., in Colab notebooks), then you can manually download and execute Elasticsearch from source." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a844e3ec4f41b5d2b24fe3d562e8302896baea1d0a761295998434c2de490714\r\n" + ] + } + ], + "source": [ + "# Recommended: Start Elasticsearch using Docker\n", + "! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.6.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# In Colab / No Docker environments: Start Elasticsearch from source\n", + "#! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n", + "#! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n", + "#! chown -R daemon:daemon elasticsearch-7.6.2\n", + "\n", + "#import os\n", + "#from subprocess import Popen, PIPE, STDOUT\n", + "#es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n", + " stdout=PIPE, stderr=STDOUT,\n", + " preexec_fn=lambda: os.setuid(1) # as daemon\n", + " )\n", + "# wait until ES has started\n", + "#! sleep 30" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "05/19/2020 09:03:25 - INFO - farm.utils - device: cuda n_gpu: 1, distributed training: False, automatic mixed precision training: None\n" + ] + } + ], + "source": [ + "from farm.utils import initialize_device_settings\n", + "\n", + "device, n_gpu = initialize_device_settings(use_cuda=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "\n", + "from haystack.indexing.io import fetch_archive_from_http\n", + "\n", + "# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents\n", + "doc_dir = \"../data/nq\"\n", + "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip\"\n", + "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Connect to Elasticsearch\n", + "from haystack.database.elasticsearch import ElasticsearchDocumentStore\n", + "\n", + "document_store = ElasticsearchDocumentStore(host=\"localhost\", username=\"\", password=\"\", create_index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "05/19/2020 09:03:37 - INFO - elasticsearch - POST http://localhost:9200/_bulk [status:200 request:0.796s]\n", + "05/19/2020 09:03:38 - INFO - elasticsearch - POST http://localhost:9200/_bulk [status:200 request:0.222s]\n" + ] + } + ], + "source": [ + "# Add evaluation data to Elasticsearch database\n", + "document_store.add_eval_data(\"../data/natural_questions/dev_subset.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize components of QA-System" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize Retriever\n", + "from haystack.retriever.elasticsearch import ElasticsearchRetriever\n", + "\n", + "retriever = ElasticsearchRetriever(document_store=document_store)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "05/19/2020 09:03:46 - INFO - farm.utils - device: cuda n_gpu: 1, distributed training: False, automatic mixed precision training: None\n", + "05/19/2020 09:03:46 - INFO - farm.infer - Could not find `deepset/roberta-base-squad2` locally. Try to download from model hub ...\n", + "05/19/2020 09:03:50 - WARNING - farm.modeling.language_model - Could not automatically detect from language model name what language it is. \n", + "\t We guess it's an *ENGLISH* model ... \n", + "\t If not: Init the language model by supplying the 'language' param.\n", + "05/19/2020 09:03:56 - WARNING - farm.modeling.prediction_head - Some unused parameters are passed to the QuestionAnsweringHead. Might not be a problem. Params: {\"loss_ignore_index\": -1}\n", + "05/19/2020 09:04:02 - INFO - farm.utils - device: cuda n_gpu: 1, distributed training: False, automatic mixed precision training: None\n", + "05/19/2020 09:04:02 - INFO - farm.infer - Got ya 7 parallel workers to do inference ...\n", + "05/19/2020 09:04:02 - INFO - farm.infer - 0 0 0 0 0 0 0 \n", + "05/19/2020 09:04:02 - INFO - farm.infer - /w\\ /w\\ /w\\ /w\\ /w\\ /w\\ /w\\\n", + "05/19/2020 09:04:02 - INFO - farm.infer - /'\\ / \\ /'\\ /'\\ / \\ / \\ /'\\\n", + "05/19/2020 09:04:02 - INFO - farm.infer - \n" + ] + } + ], + "source": [ + "# Initialize Reader\n", + "from haystack.reader.farm import FARMReader\n", + "\n", + "reader = FARMReader(\"deepset/roberta-base-squad2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize Finder which sticks together Reader and Retriever\n", + "from haystack.finder import Finder\n", + "\n", + "finder = Finder(reader, retriever)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation of Retriever" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/feedback/_search?scroll=5m&size=1000 [status:200 request:0.090s]\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.051s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.013s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.012s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.012s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.013s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.010s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.010s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.009s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.011s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.010s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.010s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.009s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.009s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:11 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.010s]\n", + "05/19/2020 09:04:11 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.009s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - GET http://localhost:9200/_search/scroll [status:200 request:0.011s]\n", + "05/19/2020 09:04:12 - INFO - elasticsearch - DELETE http://localhost:9200/_search/scroll [status:200 request:0.005s]\n", + "05/19/2020 09:04:12 - INFO - haystack.retriever.elasticsearch - For 59 out of 59 questions (100.00%), the answer was in the top-10 candidate passages selected by the retriever.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Retriever Recall: 1.0\n", + "Retriever Mean Avg Precision: 0.984934086629002\n" + ] + } + ], + "source": [ + "# Evaluate Retriever on its own\n", + "retriever_eval_results = retriever.eval()\n", + "\n", + "## Retriever Recall is the proportion of questions for which the correct document containing the answer is\n", + "## among the correct documents\n", + "print(\"Retriever Recall:\", retriever_eval_results[\"recall\"])\n", + "## Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank\n", + "print(\"Retriever Mean Avg Precision:\", retriever_eval_results[\"mean avg precision\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation of Reader" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "05/19/2020 09:04:22 - INFO - elasticsearch - GET http://localhost:9200/feedback/_search?scroll=5m&size=1000 [status:200 request:0.007s]\n", + "05/19/2020 09:04:22 - INFO - elasticsearch - GET http://localhost:9200/_search/scroll [status:200 request:0.003s]\n", + "05/19/2020 09:04:22 - INFO - elasticsearch - DELETE http://localhost:9200/_search/scroll [status:200 request:0.001s]\n", + "05/19/2020 09:04:22 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search?scroll=5m&size=1000 [status:200 request:0.014s]\n", + "05/19/2020 09:04:22 - INFO - elasticsearch - GET http://localhost:9200/_search/scroll [status:200 request:0.002s]\n", + "05/19/2020 09:04:22 - INFO - elasticsearch - DELETE http://localhost:9200/_search/scroll [status:200 request:0.002s]\n", + "Evaluating: 100%|██████████| 64/64 [00:14<00:00, 4.28it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reader Top-N-Recall: 0.5084745762711864\n", + "Reader Exact Match: 0.23728813559322035\n", + "Reader F1-Score: 0.23728813559322035\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "# Evaluate Reader on its own\n", + "reader_eval_results = reader.eval(document_store=document_store, device=device)\n", + "\n", + "# Evaluation of Reader can also be done directly on a SQuAD-formatted file \n", + "# without passing the data to Elasticsearch\n", + "#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset.json\", device=device)\n", + "\n", + "## Reader Top-N-Recall is the proportion of predicted answers that overlap with their corresponding correct answer\n", + "print(\"Reader Top-N-Recall:\", reader_eval_results[\"top_n_recall\"])\n", + "## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer\n", + "print(\"Reader Exact Match:\", reader_eval_results[\"EM\"])\n", + "## Reader F1-Score is the average overlap between the predicted answers and the correct answers\n", + "print(\"Reader F1-Score:\", reader_eval_results[\"f1\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation of Finder" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/feedback/_search?scroll=5m&size=1000 [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:57 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:57 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.008s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.007s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.004s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.004s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.005s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/eval_document/_search [status:200 request:0.006s]\n", + "05/19/2020 09:04:58 - INFO - haystack.retriever.elasticsearch - Got 10 candidates from retriever\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - GET http://localhost:9200/_search/scroll [status:200 request:0.003s]\n", + "05/19/2020 09:04:58 - INFO - elasticsearch - DELETE http://localhost:9200/_search/scroll [status:200 request:0.001s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.40 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.83 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.91 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.53 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.48 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.86 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.23 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.68 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.95 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.43 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.19 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.77 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.51 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.69 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.33 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.68 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.97 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.78 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.52 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 2.90 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.82 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.47 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.83 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.26 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.95 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.54 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.17 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.29 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:01<00:00, 2.64 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.22 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.79 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 3.17 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.56 Batches/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.98 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 19.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.17 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.89 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:01<00:00, 2.80 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.64 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 2.79 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:01<00:00, 1.50 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.58 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:01<00:00, 2.79 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.94 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 3.99 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.75 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.39 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.56 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.13 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.91 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.67 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.88 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.82 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 3.18 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:01<00:00, 2.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.16 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.77 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:01<00:00, 2.06 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 3.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.08 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.88 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.56 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.86 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.91 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 8.07 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.69 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 9.20 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.31 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.66 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.79 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.14 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.29 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.00 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.00 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.96 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.51 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.42 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.97 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.11 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.54 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.89 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 9.43 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.22 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.52 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.20 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.98 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.98 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 3.75 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.45 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.96 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.58 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.07 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.37 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.84 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.63 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 18.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.54 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.58 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.06 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.73 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 11.17 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.86 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.85 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.90 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.63 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.33 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.18 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.08 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.23 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.16 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.64 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.49 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.80 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.66 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.21 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.16 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.74 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.31 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.27 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.36 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.96 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.34 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.59 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.82 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.25 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.49 Batches/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.01 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 11.02 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.27 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.53 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.87 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.84 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.74 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.36 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.73 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 2.39 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.39 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.61 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:01<00:00, 1.85 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 9.27 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.63 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.02 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.35 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.28 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.29 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.59 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.94 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.11 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.52 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.06 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.74 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.03 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.49 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.56 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.53 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.97 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.96 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.80 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.26 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 62.85 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.86 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 11.94 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.64 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.22 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 24.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.13 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.90 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.03 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.63 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.45 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.61 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.13 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.64 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.66 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.90 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.69 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.01 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.11 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.79 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.00 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.01 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.68 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.45 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.99 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.91 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.73 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.33 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.96 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.70 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.78 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.59 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.95 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.83 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.78 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.39 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.59 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.42 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.95 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.59 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.64 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.00 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.79 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.03 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.59 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.48 Batches/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.83 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.67 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 10.14 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.77 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.95 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.58 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.76 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 29.85 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.61 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.83 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.28 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.24 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 27.26 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.80 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.23 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.66 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 10.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.42 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.08 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 9.75 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.73 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.32 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.07 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.06 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.50 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 11.30 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.33 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 26.64 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.75 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.80 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.02 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.44 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.26 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.23 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.77 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.85 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.21 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.89 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.69 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.14 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.32 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 3.80 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.02 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 23.44 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 2.37 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.01 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.34 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.02 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.23 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.17 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.83 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.80 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.43 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.48 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.65 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 58.54 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.75 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.28 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.85 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.78 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.18 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.82 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.51 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.42 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.67 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.99 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.07 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.89 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 3.22 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.67 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.63 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.23 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.68 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.73 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.18 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.86 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.43 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.95 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.61 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.17 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.97 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.13 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.86 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.03 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.74 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.41 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.00 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 26.84 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.11 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.17 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.00 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.73 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.87 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.94 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.00 Batches/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.15 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.08 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.66 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.15 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.22 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.98 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.23 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.77 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.43 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.56 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.86 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.24 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.99 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.80 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.98 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.21 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 10.65 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.03 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.39 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.16 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 9.06 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.37 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.27 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 9.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.98 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.76 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.82 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.94 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 10.95 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.06 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.01 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.28 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.53 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.95 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.78 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.69 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.17 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.49 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.74 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.74 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.59 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.54 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 17.25 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 26.22 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.35 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.02 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.76 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.68 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.56 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.07 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.24 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.76 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 19.85 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.01 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.19 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.37 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.08 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.71 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.11 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.66 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.16 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.53 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.54 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.58 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.76 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.36 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.03 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.98 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.01 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 6.69 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.98 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.48 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.51 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.24 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.42 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.85 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.49 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 2.42 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 2.20 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:02<00:00, 1.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 2.15 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:01<00:00, 1.83 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:01<00:00, 1.48 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:01<00:00, 1.64 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.78 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:01<00:00, 1.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.97 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.26 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.21 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.25 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.89 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.23 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.93 Batches/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.07 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.50 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.18 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.75 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.62 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.68 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.49 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.52 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.59 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 17.37 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.15 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.56 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.11 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.89 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.04 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.90 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.43 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.88 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.65 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.39 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.28 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.30 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.87 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.75 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.42 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.12 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.70 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.50 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.28 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.68 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.30 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.21 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.45 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.94 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.15 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.91 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 3.46 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 2.29 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.68 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:01<00:00, 2.58 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.16 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.97 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 7.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.43 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.71 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.35 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.86 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 4.26 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.53 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.51 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.37 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 24.83 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 3.79 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.57 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.05 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 13.77 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.43 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.48 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.87 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.56 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.40 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.10 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.66 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 5.13 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.79 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00, 7.65 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 6.07 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 8.36 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.24 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.56 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.92 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 1.84 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 5.00 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 6.91 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.99 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.99 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.60 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.55 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.67 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 9.03 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 4.28 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.34 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 11.72 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 16.78 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 3/3 [00:00<00:00, 3.81 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 7/7 [00:01<00:00, 4.28 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 4.20 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 5.09 Batches/s]\n", + "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 2.27 Batches/s]\n", + "05/19/2020 09:09:54 - INFO - haystack.finder - 57 out of 59 questions were correctly answered (96.61%).\n", + "05/19/2020 09:09:54 - INFO - haystack.finder - 0 questions could not be answered due to the retriever.\n", + "05/19/2020 09:09:54 - INFO - haystack.finder - 2 questions could not be answered due to the reader.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Retriever Recall in Finder: 1.0\n", + "Retriever Mean Avg Precision in Finder: 0.984934086629002\n", + "Reader Recall in Finder: 0.9661016949152542\n", + "Reader Mean Avg Precision in Finder: 0.44187516814635447\n", + "Reader Exact Match in Finder: 0.9661016949152542\n", + "Reader F1-Score in Finder: 0.9661016949152542\n" + ] + } + ], + "source": [ + "# Evaluate combination of Reader and Retriever through Finder\n", + "finder_eval_results = finder.eval()\n", + "\n", + "print(\"Retriever Recall in Finder:\", finder_eval_results[\"retriever_recall\"])\n", + "print(\"Retriever Mean Avg Precision in Finder:\", finder_eval_results[\"retriever_map\"])\n", + "\n", + "# Reader is only evaluated with those questions, where the correct document is among the retrieved ones\n", + "print(\"Reader Recall in Finder:\", finder_eval_results[\"reader_recall\"])\n", + "print(\"Reader Mean Avg Precision in Finder:\", finder_eval_results[\"reader_map\"])\n", + "print(\"Reader Exact Match in Finder:\", finder_eval_results[\"reader_em\"])\n", + "print(\"Reader F1-Score in Finder:\", finder_eval_results[\"reader_f1\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "name": "haystack", + "language": "python", + "display_name": "haystack" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/tutorials/Tutorial5_Evaluation.py b/tutorials/Tutorial5_Evaluation.py new file mode 100644 index 000000000..605480e26 --- /dev/null +++ b/tutorials/Tutorial5_Evaluation.py @@ -0,0 +1,80 @@ +from haystack.database.elasticsearch import ElasticsearchDocumentStore +from haystack.indexing.io import fetch_archive_from_http +from haystack.retriever.elasticsearch import ElasticsearchRetriever +from haystack.reader.farm import FARMReader +from haystack.finder import Finder +from farm.utils import initialize_device_settings + +import logging +import subprocess +import time + +LAUNCH_ELASTICSEARCH = False +device, n_gpu = initialize_device_settings(use_cuda=True) + +# Start an Elasticsearch server +# You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in +# your environment (eg., in Colab notebooks), then you can manually download and execute Elasticsearch from source. +if LAUNCH_ELASTICSEARCH: + logging.info("Starting Elasticsearch ...") + status = subprocess.run( + ['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.6.2'], shell=True + ) + if status.returncode: + raise Exception("Failed to launch Elasticsearch. If you want to connect to an existing Elasticsearch instance" + "then set LAUNCH_ELASTICSEARCH in the script to False.") + time.sleep(30) + +# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents +doc_dir = "../data/nq" +s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip" +fetch_archive_from_http(url=s3_url, output_dir=doc_dir) + +# Connect to Elasticsearch +document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document", create_index=False) +# Add evaluation data to Elasticsearch database +document_store.add_eval_data("../data/nq/nq_dev_subset.json") + +# Initialize Retriever +retriever = ElasticsearchRetriever(document_store=document_store) + +# Initialize Reader +reader = FARMReader("deepset/roberta-base-squad2") + +# Initialize Finder which sticks together Reader and Retriever +finder = Finder(reader, retriever) + +# Evaluate Retriever on its own +retriever_eval_results = retriever.eval() +## Retriever Recall is the proportion of questions for which the correct document containing the answer is +## among the correct documents +print("Retriever Recall:", retriever_eval_results["recall"]) +## Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank +print("Retriever Mean Avg Precision:", retriever_eval_results["mean avg precision"]) + +# Evaluate Reader on its own +reader_start = time.time() +reader_eval_results = reader.eval(document_store=document_store, device=device) +reader_total = time.time() - reader_start +# Evaluation of Reader can also be done directly on a SQuAD-formatted file without passing the data to Elasticsearch +#reader_eval_results = reader.eval_on_file("../data/natural_questions", "dev_subset.json", device=device) + +## Reader Top-N-Recall is the proportion of predicted answers that overlap with their corresponding correct answer +print("Reader Top-N-Recall:", reader_eval_results["top_n_recall"]) +## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer +print("Reader Exact Match:", reader_eval_results["EM"]) +## Reader F1-Score is the average overlap between the predicted answers and the correct answers +print("Reader F1-Score:", reader_eval_results["f1"]) + + +# Evaluate combination of Reader and Retriever through Finder +finder_eval_results = finder.eval() +print("Retriever Recall in Finder:", finder_eval_results["retriever_recall"]) +print("Retriever Mean Avg Precision in Finder:", finder_eval_results["retriever_map"]) +# Reader is only evaluated with those questions, where the correct document is among the retrieved ones +print("Reader Recall in Finder:", finder_eval_results["reader_recall"]) +print("Reader Mean Avg Precision in Finder:", finder_eval_results["reader_map"]) +print("Reader Exact Match in Finder:", finder_eval_results["reader_em"]) +print("Reader F1-Score in Finder:", finder_eval_results["reader_f1"]) + +print(f"Finder time: {finder_eval_results['total_finder_time']}s")