diff --git a/tutorials/Tutorial5_Evaluation.ipynb b/tutorials/Tutorial5_Evaluation.ipynb index 830032ec2..758fbc3a5 100644 --- a/tutorials/Tutorial5_Evaluation.ipynb +++ b/tutorials/Tutorial5_Evaluation.ipynb @@ -1639,7 +1639,7 @@ "\n", "# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents\n", "doc_dir = \"../data/nq\"\n", - "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip\"\n", + "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip\"\n", "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" ], "execution_count": 7, @@ -1701,7 +1701,7 @@ }, "source": [ "# Add evaluation data to Elasticsearch database\n", - "document_store.add_eval_data(\"../data/nq/nq_dev_subset.json\")" + "document_store.add_eval_data(\"../data/nq/nq_dev_subset_v2.json\")" ], "execution_count": 9, "outputs": [ @@ -2283,7 +2283,7 @@ "\n", "# Evaluation of Reader can also be done directly on a SQuAD-formatted file\n", "# without passing the data to Elasticsearch\n", - "#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset.json\", device=device)\n", + "#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset_v2.json\", device=device)\n", "\n", "## Reader Top-N-Recall is the proportion of predicted answers that overlap with their corresponding correct answer\n", "print(\"Reader Top-N-Recall:\", reader_eval_results[\"top_n_recall\"])\n", diff --git a/tutorials/Tutorial5_Evaluation.py b/tutorials/Tutorial5_Evaluation.py index 33c5b2006..da9ccb590 100644 --- a/tutorials/Tutorial5_Evaluation.py +++ b/tutorials/Tutorial5_Evaluation.py @@ -39,17 +39,17 @@ if LAUNCH_ELASTICSEARCH: # Download evaluation data, which is a subset of Natural Questions development set containing 50 documents doc_dir = "../data/nq" -s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip" +s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip" fetch_archive_from_http(url=s3_url, output_dir=doc_dir) # Connect to Elasticsearch document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document", create_index=False) # Add evaluation data to Elasticsearch database if LAUNCH_ELASTICSEARCH: - document_store.add_eval_data("../data/nq/nq_dev_subset.json") + document_store.add_eval_data("../data/nq/nq_dev_subset_v2.json") else: logger.warning("Since we already have a running ES instance we should not index the same documents again." - "If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset.json')' manually ") + "If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset_v2.json')' manually ") # Initialize Retriever retriever = ElasticsearchRetriever(document_store=document_store) @@ -74,7 +74,7 @@ if eval_retriever_only: if eval_reader_only: reader_eval_results = reader.eval(document_store=document_store, device=device) # Evaluation of Reader can also be done directly on a SQuAD-formatted file without passing the data to Elasticsearch - #reader_eval_results = reader.eval_on_file("../data/natural_questions", "dev_subset.json", device=device) + #reader_eval_results = reader.eval_on_file("../data/nq", "nq_dev_subset_v2.json", device=device) ## Reader Top-N-Accuracy is the proportion of predicted answers that match with their corresponding correct answer print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])