update eval dataset

This commit is contained in:
Branden Chan 2020-07-15 16:14:52 +02:00
parent 912e98cd40
commit c55477e0ce
2 changed files with 6 additions and 6 deletions

View File

@ -1639,7 +1639,7 @@
"\n",
"# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents\n",
"doc_dir = \"../data/nq\"\n",
"s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip\"\n",
"s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip\"\n",
"fetch_archive_from_http(url=s3_url, output_dir=doc_dir)"
],
"execution_count": 7,
@ -1701,7 +1701,7 @@
},
"source": [
"# Add evaluation data to Elasticsearch database\n",
"document_store.add_eval_data(\"../data/nq/nq_dev_subset.json\")"
"document_store.add_eval_data(\"../data/nq/nq_dev_subset_v2.json\")"
],
"execution_count": 9,
"outputs": [
@ -2283,7 +2283,7 @@
"\n",
"# Evaluation of Reader can also be done directly on a SQuAD-formatted file\n",
"# without passing the data to Elasticsearch\n",
"#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset.json\", device=device)\n",
"#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset_v2.json\", device=device)\n",
"\n",
"## Reader Top-N-Recall is the proportion of predicted answers that overlap with their corresponding correct answer\n",
"print(\"Reader Top-N-Recall:\", reader_eval_results[\"top_n_recall\"])\n",

View File

@ -39,17 +39,17 @@ if LAUNCH_ELASTICSEARCH:
# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents
doc_dir = "../data/nq"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip"
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
# Connect to Elasticsearch
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document", create_index=False)
# Add evaluation data to Elasticsearch database
if LAUNCH_ELASTICSEARCH:
document_store.add_eval_data("../data/nq/nq_dev_subset.json")
document_store.add_eval_data("../data/nq/nq_dev_subset_v2.json")
else:
logger.warning("Since we already have a running ES instance we should not index the same documents again."
"If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset.json')' manually ")
"If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset_v2.json')' manually ")
# Initialize Retriever
retriever = ElasticsearchRetriever(document_store=document_store)