From c55477e0ce9e5c50369578891e15c997efa303db Mon Sep 17 00:00:00 2001 From: Branden Chan Date: Wed, 15 Jul 2020 16:14:52 +0200 Subject: [PATCH 1/2] update eval dataset --- tutorials/Tutorial5_Evaluation.ipynb | 6 +++--- tutorials/Tutorial5_Evaluation.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tutorials/Tutorial5_Evaluation.ipynb b/tutorials/Tutorial5_Evaluation.ipynb index 830032ec2..758fbc3a5 100644 --- a/tutorials/Tutorial5_Evaluation.ipynb +++ b/tutorials/Tutorial5_Evaluation.ipynb @@ -1639,7 +1639,7 @@ "\n", "# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents\n", "doc_dir = \"../data/nq\"\n", - "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip\"\n", + "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip\"\n", "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" ], "execution_count": 7, @@ -1701,7 +1701,7 @@ }, "source": [ "# Add evaluation data to Elasticsearch database\n", - "document_store.add_eval_data(\"../data/nq/nq_dev_subset.json\")" + "document_store.add_eval_data(\"../data/nq/nq_dev_subset_v2.json\")" ], "execution_count": 9, "outputs": [ @@ -2283,7 +2283,7 @@ "\n", "# Evaluation of Reader can also be done directly on a SQuAD-formatted file\n", "# without passing the data to Elasticsearch\n", - "#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset.json\", device=device)\n", + "#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset_v2.json\", device=device)\n", "\n", "## Reader Top-N-Recall is the proportion of predicted answers that overlap with their corresponding correct answer\n", "print(\"Reader Top-N-Recall:\", reader_eval_results[\"top_n_recall\"])\n", diff --git a/tutorials/Tutorial5_Evaluation.py b/tutorials/Tutorial5_Evaluation.py index 33c5b2006..0200f35c1 100644 --- a/tutorials/Tutorial5_Evaluation.py +++ b/tutorials/Tutorial5_Evaluation.py @@ -39,17 +39,17 @@ if LAUNCH_ELASTICSEARCH: # Download evaluation data, which is a subset of Natural Questions development set containing 50 documents doc_dir = "../data/nq" -s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip" +s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip" fetch_archive_from_http(url=s3_url, output_dir=doc_dir) # Connect to Elasticsearch document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document", create_index=False) # Add evaluation data to Elasticsearch database if LAUNCH_ELASTICSEARCH: - document_store.add_eval_data("../data/nq/nq_dev_subset.json") + document_store.add_eval_data("../data/nq/nq_dev_subset_v2.json") else: logger.warning("Since we already have a running ES instance we should not index the same documents again." - "If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset.json')' manually ") + "If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset_v2.json')' manually ") # Initialize Retriever retriever = ElasticsearchRetriever(document_store=document_store) From 64721d3196ff8f04ebd956e5a29e368043577195 Mon Sep 17 00:00:00 2001 From: Branden Chan Date: Wed, 15 Jul 2020 16:24:10 +0200 Subject: [PATCH 2/2] One more update --- tutorials/Tutorial5_Evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/Tutorial5_Evaluation.py b/tutorials/Tutorial5_Evaluation.py index 0200f35c1..da9ccb590 100644 --- a/tutorials/Tutorial5_Evaluation.py +++ b/tutorials/Tutorial5_Evaluation.py @@ -74,7 +74,7 @@ if eval_retriever_only: if eval_reader_only: reader_eval_results = reader.eval(document_store=document_store, device=device) # Evaluation of Reader can also be done directly on a SQuAD-formatted file without passing the data to Elasticsearch - #reader_eval_results = reader.eval_on_file("../data/natural_questions", "dev_subset.json", device=device) + #reader_eval_results = reader.eval_on_file("../data/nq", "nq_dev_subset_v2.json", device=device) ## Reader Top-N-Accuracy is the proportion of predicted answers that match with their corresponding correct answer print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])