Choose correct similarity fns during benchmark runs & re-run benchmarks (#773)

* Adapt to new dataset_from_dicts return signature

* rename fn

* Align similarity fn in benchmark doc store

* Better choice of similarity fn

* Increase postgres wait time

* Add more expected returned variables

* update benchmark results

* Fix typo

* update all benchmark runs

* multiply stats by 100

* Specify similarity fns for website

Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
This commit is contained in:
Branden Chan 2021-02-03 11:45:18 +01:00 committed by GitHub
parent 8a5dc8f826
commit f3a3b73d9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 247 additions and 256 deletions

View File

@ -11,33 +11,33 @@
],
"data": [
{
"F1": 82.62983412843887,
"Speed": 98.86638639776464,
"F1": 82.58860575299658,
"Speed": 125.81040525892848,
"Model": "RoBERTa"
},
{
"F1": 78.90026641413856,
"Speed": 181.96379531485616,
"F1": 78.87858491007042,
"Speed": 260.6443097981493,
"Model": "MiniLM"
},
{
"F1": 74.32668866064459,
"Speed": 106.04748306200683,
"F1": 74.31182400443286,
"Speed": 121.08066567525722,
"Model": "BERT base"
},
{
"F1": 83.29492827667042,
"Speed": 40.408497243719076,
"F1": 83.26306774734308,
"Speed": 42.21949937744112,
"Model": "BERT large"
},
{
"F1": 84.62174414643722,
"Speed": 40.483264542292716,
"F1": 84.50422699207468,
"Speed": 42.07400844838985,
"Model": "XLM-RoBERTa"
},
{
"F1": 42.342513261953935,
"Speed": 160.41712955027901,
"F1": 42.31925844723574,
"Speed": 222.91207128366702,
"Model": "DistilBERT"
}
]

View File

@ -20,82 +20,82 @@
{
"model": "DPR / ElasticSearch",
"n_docs": 1000,
"map": 0.929
"map": 92.95105322830888
},
{
"model": "DPR / ElasticSearch",
"n_docs": 10000,
"map": 0.881
"map": 89.87097014904354
},
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"map": 0.821
"map": 86.54564090434241
},
{
"model": "DPR / ElasticSearch",
"n_docs": 500000,
"map": 0.730
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 1000,
"map": 0.929
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"map": 0.898
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"map": 0.863
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"map": 0.805
"map": 80.86137228234089
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 1000,
"map": 0.748
"map": 74.20444712972909
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 10000,
"map": 0.6609999999999999
"map": 66.20627317806674
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"map": 0.56
"map": 56.25959153101251
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 500000,
"map": 0.452
"map": 45.59452709000341
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 1000,
"map": 92.95105322830888
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"map": 89.87097014904354
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"map": 86.54606328368972
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"map": 80.8613722823409
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 1000,
"map": 0.929
"map": 92.95105322830888
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 10000,
"map": 0.896
"map": 89.69941373746582
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 100000,
"map": 0.849
"map": 85.07984377595874
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 500000,
"map": 0.766
"map": 76.91475821598232
}
]
}

View File

@ -2,7 +2,7 @@
"chart_type": "BarChart",
"title": "Retriever Performance",
"subtitle": "Time and Accuracy Benchmarks",
"description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
"description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. We use a cosine similarity function with BM25 retrievers, and dot product with DPR. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
"bars": "horizontal",
"columns": [
"Model",
@ -24,30 +24,30 @@
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"index_speed": 73.93635160290218,
"query_speed": 6.23,
"map": 82
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"index_speed": 104.77116699738369,
"query_speed": 4.89,
"map": 86.3
"index_speed": 69.75508852811794,
"query_speed": 4.5992769354707805,
"map": 86.54564090434241
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"index_speed": 484.32931514144724,
"query_speed": 162.59,
"map": 56
"index_speed": 482.9993330442806,
"query_speed": 162.42378943468643,
"map": 56.25959153101251
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"index_speed": 95.52108545730724,
"query_speed": 6.511162294559942,
"map": 86.54606328368972
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 100000,
"index_speed": 91.41086878008392,
"query_speed": 12.85,
"map": 84.9
"index_speed": 84.11829911061136,
"query_speed": 33.65729082116796,
"map": 85.07984377595874
}
]
}

View File

@ -20,82 +20,82 @@
{
"model": "DPR / ElasticSearch",
"n_docs": 1000,
"query_speed": 40.802
"query_speed": 30.68451185154913
},
{
"model": "DPR / ElasticSearch",
"n_docs": 10000,
"query_speed": 24.8
"query_speed": 19.568754413737462
},
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"query_speed": 6.23
"query_speed": 4.5992769354707805
},
{
"model": "DPR / ElasticSearch",
"n_docs": 500000,
"query_speed": 1.45
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 1000,
"query_speed": 40.048
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"query_speed": 22.47
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"query_speed": 4.90
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"query_speed": 1.08
"query_speed": 1.0558140319761546
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 1000,
"query_speed": 232.97799999999998
"query_speed": 262.9405144288997
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 10000,
"query_speed": 248.97
"query_speed": 183.6070813438718
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"query_speed": 162.59
"query_speed": 162.42378943468643
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 500000,
"query_speed": 91.39
"query_speed": 82.43179203331141
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 1000,
"query_speed": 35.40380445859966
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"query_speed": 25.78749025294445
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"query_speed": 6.511162294559942
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"query_speed": 1.5161593755666505
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 1000,
"query_speed": 37.884
"query_speed": 39.16414272911727
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 10000,
"query_speed": 31.34
"query_speed": 33.6432023480111
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 100000,
"query_speed": 12.85
"query_speed": 33.65729082116796
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 500000,
"query_speed": 3.32
"query_speed": 34.27671486454735
}
]
}

View File

@ -11,33 +11,33 @@
],
"data": [
{
"F1": 82.62983412843887,
"Speed": 98.86638639776464,
"F1": 82.58860575299658,
"Speed": 128.25544874114064,
"Model": "RoBERTa"
},
{
"F1": 78.90026641413856,
"Speed": 181.96379531485616,
"F1": 78.87858491007042,
"Speed": 269.33155450679567,
"Model": "MiniLM"
},
{
"F1": 74.32668866064459,
"Speed": 106.04748306200683,
"F1": 74.31182400443286,
"Speed": 123.82266420208393,
"Model": "BERT base"
},
{
"F1": 83.29492827667042,
"Speed": 40.408497243719076,
"F1": 83.26306774734308,
"Speed": 43.188105620245494,
"Model": "BERT large"
},
{
"F1": 84.62174414643722,
"Speed": 40.483264542292716,
"F1": 84.50422699207468,
"Speed": 42.956527893643,
"Model": "XLM-RoBERTa"
},
{
"F1": 42.342513261953935,
"Speed": 160.41712955027901,
"F1": 42.31925844723574,
"Speed": 226.281948654048,
"Model": "DistilBERT"
}
]

View File

@ -17,85 +17,65 @@
}
],
"data": [
{
"model": "DPR / ElasticSearch",
"n_docs": 1000,
"map": 0.929
},
{
"model": "DPR / ElasticSearch",
"n_docs": 10000,
"map": 0.881
"map": 88.26183154948457
},
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"map": 0.821
"map": 82.47044752499787
},
{
"model": "DPR / ElasticSearch",
"n_docs": 500000,
"map": 0.730
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 1000,
"map": 0.929
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"map": 0.898
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"map": 0.863
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"map": 0.805
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 1000,
"map": 0.748
"map": 73.4952735751035
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 10000,
"map": 0.6609999999999999
"map": 66.33019927857616
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"map": 0.56
"map": 56.25959153101251
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 500000,
"map": 0.452
"map": 45.60339705629754
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 1000,
"map": 0.929
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"map": 89.87097014904354
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"map": 86.54606328368972
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"map": 80.8613722823409
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 10000,
"map": 0.896
"map": 89.69941373746582
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 100000,
"map": 0.849
"map": 85.07984377595874
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 500000,
"map": 0.766
"map": 76.91475821598232
}
]
}

View File

@ -24,30 +24,30 @@
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"index_speed": 73.93635160290218,
"query_speed": 6.23,
"map": 82
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"index_speed": 104.77116699738369,
"query_speed": 4.89,
"map": 86.3
"index_speed": 70.7842323095542,
"query_speed": 6.108417678791537,
"map": 82.47044752499787
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"index_speed": 484.32931514144724,
"query_speed": 162.59,
"map": 56
"index_speed": 486.8274411916137,
"query_speed": 162.40717155994315,
"map": 56.25959153101251
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"index_speed": 98.76191471019415,
"query_speed": 6.614453113633132,
"map": 86.54606328368972
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 100000,
"index_speed": 91.41086878008392,
"query_speed": 12.85,
"map": 84.9
"index_speed": 86.8695584169603,
"query_speed": 38.24323466239034,
"map": 85.07984377595874
}
]
}

View File

@ -17,85 +17,65 @@
}
],
"data": [
{
"model": "DPR / ElasticSearch",
"n_docs": 1000,
"query_speed": 40.802
},
{
"model": "DPR / ElasticSearch",
"n_docs": 10000,
"query_speed": 24.8
"query_speed": 23.3662850838307
},
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"query_speed": 6.23
"query_speed": 6.108417678791537
},
{
"model": "DPR / ElasticSearch",
"n_docs": 500000,
"query_speed": 1.45
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 1000,
"query_speed": 40.048
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"query_speed": 22.47
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"query_speed": 4.90
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"query_speed": 1.08
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 1000,
"query_speed": 232.97799999999998
"query_speed": 1.4393100251286972
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 10000,
"query_speed": 248.97
"query_speed": 244.5160214986071
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"query_speed": 162.59
"query_speed": 162.40717155994315
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 500000,
"query_speed": 91.39
"query_speed": 88.52692529827672
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 1000,
"query_speed": 37.884
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"query_speed": 27.22085301792775
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"query_speed": 6.614453113633132
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"query_speed": 1.5222363376940002
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 10000,
"query_speed": 31.34
"query_speed": 39.903073511580295
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 100000,
"query_speed": 12.85
"query_speed": 38.24323466239034
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 500000,
"query_speed": 3.32
"query_speed": 37.13917579922844
}
]
}

View File

@ -524,9 +524,9 @@ class FARMReader(BaseReader):
toc = perf_counter()
reader_time = toc - tic
results = {
"EM": eval_results[0]["EM"],
"f1": eval_results[0]["f1"],
"top_n_accuracy": eval_results[0]["top_n_accuracy"],
"EM": eval_results[0]["EM"] * 100,
"f1": eval_results[0]["f1"] * 100,
"top_n_accuracy": eval_results[0]["top_n_accuracy"] * 100,
"top_n": self.inferencer.model.prediction_heads[0].n_best,
"reader_time": reader_time,
"seconds_per_query": reader_time / n_queries

View File

@ -187,8 +187,7 @@ class DensePassageRetriever(BaseRetriever):
:return: dictionary of embeddings for "passages" and "query"
"""
dataset, tensor_names, problematic_ids, baskets = self.processor.dataset_from_dicts(
dataset, tensor_names, _, baskets = self.processor.dataset_from_dicts(
dicts, indices=[i for i in range(len(dicts))], return_baskets=True
)

View File

@ -20,6 +20,7 @@
]
],
"n_docs_options": [
1000,
10000,
100000,
500000

View File

@ -1,5 +1,5 @@
from utils import get_document_store, index_to_doc_store, get_reader
from haystack.preprocessor.utils import eval_data_from_file
from haystack.preprocessor.utils import eval_data_from_json
from farm.data_handler.utils import _download_extract_downstream_data
from pathlib import Path
@ -40,7 +40,7 @@ def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs)
doc_store = get_document_store("elasticsearch")
# download squad data
_download_extract_downstream_data(input_file=data_dir/filename)
docs, labels = eval_data_from_file(data_dir/filename, max_docs=None)
docs, labels = eval_data_from_json(data_dir/filename, max_docs=None)
index_to_doc_store(doc_store, docs, None, labels)
for reader_name in reader_models:

View File

@ -1,7 +1,7 @@
,EM,f1,top_n_accuracy,top_n,reader_time,seconds_per_query,passages_per_second,reader,error
0,0.7836676217765043,0.8262983412843887,0.9742963087813922,5,124.91606550999859,0.01052722615118815,98.86638639776463,deepset/roberta-base-squad2,
1,0.7439743805831789,0.7890026641413856,0.9720209000505646,5,67.87064415001078,0.005719757639475036,181.96379531485616,deepset/minilm-uncased-squad2,
2,0.6947581324793528,0.7432668866064459,0.9557559413450194,5,116.45726653200109,0.009814365964267747,106.04748306200683,deepset/bert-base-cased-squad2,
3,0.7900724759817968,0.8329492827667042,0.976908815101972,5,305.62878707199707,0.02575668187021718,40.40849724371908,deepset/bert-large-uncased-whole-word-masking-squad2,
4,0.803472105174448,0.846217441464372,0.9742120343839542,5,305.06433064700104,0.025709112645120602,40.48326454229272,deepset/xlm-roberta-large-squad2,
5,0.3730827574582842,0.42342513261953935,0.9539019046013821,5,76.98679084099422,0.006488015408814615,160.417129550279,distilbert-base-uncased-distilled-squad,
0,0.7839204449688185,0.8258860575299658,0.9742120343839542,5,98.16358173700064,0.008272676701247315,125.81040525892847,deepset/roberta-base-squad2,
1,0.7438058317883027,0.7887858491007042,0.9719366256531266,5,47.38258053499885,0.003993138423647299,260.6443097981493,deepset/minilm-uncased-squad2,
2,0.6947581324793528,0.7431182400443286,0.9557559413450194,5,101.99811779300217,0.008595829916821352,121.08066567525722,deepset/bert-base-cased-squad2,
3,0.7897353783920446,0.8326306774734308,0.976908815101972,5,292.51886408200517,0.024651851009776266,42.21949937744112,deepset/bert-large-uncased-whole-word-masking-squad2,
4,0.8021237148154391,0.8450422699207468,0.974043485589078,5,293.53038741600176,0.024737096529243364,42.07400844838984,deepset/xlm-roberta-large-squad2,
5,0.3729984830608461,0.4231925844723574,0.9539019046013821,5,55.403011280999635,0.004669055391960192,222.91207128366705,distilbert-base-uncased-distilled-squad,

1 EM f1 top_n_accuracy top_n reader_time seconds_per_query passages_per_second reader error
2 0 0.7836676217765043 0.7839204449688185 0.8262983412843887 0.8258860575299658 0.9742963087813922 0.9742120343839542 5 124.91606550999859 98.16358173700064 0.01052722615118815 0.008272676701247315 98.86638639776463 125.81040525892847 deepset/roberta-base-squad2
3 1 0.7439743805831789 0.7438058317883027 0.7890026641413856 0.7887858491007042 0.9720209000505646 0.9719366256531266 5 67.87064415001078 47.38258053499885 0.005719757639475036 0.003993138423647299 181.96379531485616 260.6443097981493 deepset/minilm-uncased-squad2
4 2 0.6947581324793528 0.7432668866064459 0.7431182400443286 0.9557559413450194 5 116.45726653200109 101.99811779300217 0.009814365964267747 0.008595829916821352 106.04748306200683 121.08066567525722 deepset/bert-base-cased-squad2
5 3 0.7900724759817968 0.7897353783920446 0.8329492827667042 0.8326306774734308 0.976908815101972 5 305.62878707199707 292.51886408200517 0.02575668187021718 0.024651851009776266 40.40849724371908 42.21949937744112 deepset/bert-large-uncased-whole-word-masking-squad2
6 4 0.803472105174448 0.8021237148154391 0.846217441464372 0.8450422699207468 0.9742120343839542 0.974043485589078 5 305.06433064700104 293.53038741600176 0.025709112645120602 0.024737096529243364 40.48326454229272 42.07400844838984 deepset/xlm-roberta-large-squad2
7 5 0.3730827574582842 0.3729984830608461 0.42342513261953935 0.4231925844723574 0.9539019046013821 5 76.98679084099422 55.403011280999635 0.006488015408814615 0.004669055391960192 160.417129550279 222.91207128366705 distilbert-base-uncased-distilled-squad

View File

@ -2,7 +2,7 @@ import pandas as pd
from pathlib import Path
from time import perf_counter
from utils import get_document_store, get_retriever, index_to_doc_store, load_config
from haystack.preprocessor.utils import eval_data_from_file
from haystack.preprocessor.utils import eval_data_from_json
from haystack.document_store.faiss import FAISSDocumentStore
from haystack import Document
@ -134,7 +134,11 @@ def benchmark_querying(n_docs_options,
for retriever_name, doc_store_name in retriever_doc_stores:
try:
logger.info(f"##### Start querying run: {retriever_name}, {doc_store_name}, {n_docs} docs ##### ")
doc_store = get_document_store(doc_store_name)
if retriever_name == "elastic":
similarity = "cosine"
else:
similarity = "dot_product"
doc_store = get_document_store(doc_store_name, similarity=similarity)
retriever = get_retriever(retriever_name, doc_store)
add_precomputed = retriever_name in ["dpr"]
# For DPR, precomputed embeddings are loaded from file
@ -160,8 +164,8 @@ def benchmark_querying(n_docs_options,
"retrieve_time": raw_results["retrieve_time"],
"queries_per_second": raw_results["n_questions"] / raw_results["retrieve_time"],
"seconds_per_query": raw_results["retrieve_time"]/ raw_results["n_questions"],
"recall": raw_results["recall"],
"map": raw_results["map"],
"recall": raw_results["recall"] * 100,
"map": raw_results["map"] * 100,
"top_k": raw_results["top_k"],
"date_time": datetime.datetime.now(),
"error": None
@ -265,7 +269,7 @@ def prepare_data(data_dir, filename_gold, filename_negative, data_s3_url, embed
download_from_s3(data_s3_url + str(embeddings_dir) + embedding_filename, cache_dir=data_dir)
logging.getLogger("farm").setLevel(logging.WARN)
gold_docs, labels = eval_data_from_file(data_dir + filename_gold)
gold_docs, labels = eval_data_from_json(data_dir + filename_gold)
# Reduce number of docs
gold_docs = gold_docs[:n_docs]

View File

@ -1,13 +1,17 @@
,retriever,doc_store,n_docs,indexing_time,docs_per_second,date_time,error
1,dpr,elasticsearch,10000,135.8048727600035,73.63506033890373,2020-12-02 06:51:48.587178,
5,dpr,elasticsearch,100000,1352.514667440999,73.93635160290218,2020-12-02 07:23:04.264694,
9,dpr,elasticsearch,500000,6781.024389943996,73.7351720400064,2020-12-02 10:10:42.147031,
0,elastic,elasticsearch,10000,20.694342684997537,483.223852635317,2020-12-02 06:49:00.317977,
4,elastic,elasticsearch,100000,206.47108666299755,484.32931514144724,2020-12-02 06:59:54.055199,
8,elastic,elasticsearch,500000,1032.1480222880054,484.4266415311529,2020-12-02 08:16:15.828533,
2,dpr,faiss_flat,10000,95.10171413100034,105.15057579535569,2020-12-02 06:53:59.472952,
6,dpr,faiss_flat,100000,954.4610684969957,104.77116699738367,2020-12-02 07:39:56.194345,
10,dpr,faiss_flat,500000,4865.149988802004,102.77175444761984,2020-12-02 11:34:34.726687,
3,dpr,faiss_hnsw,10000,103.25490099400486,96.84770314757859,2020-12-02 06:56:14.230579,
7,dpr,faiss_hnsw,100000,1093.9618158599915,91.41086878008392,2020-12-02 07:58:43.508489,
11,dpr,faiss_hnsw,500000,5784.850161597002,86.43266221816312,2020-12-02 13:11:43.328380,
1,dpr,elasticsearch,1000,15.336494209999728,65.20394989279743,2021-01-29 11:18:25.436371,
5,dpr,elasticsearch,10000,144.0823780490009,69.40474008972218,2021-01-29 11:23:19.896920,
9,dpr,elasticsearch,100000,1433.587170629,69.75508852811794,2021-01-29 11:56:22.502185,
13,dpr,elasticsearch,500000,7196.396471723998,69.47921810097519,2021-01-29 14:54:08.769187,
0,elastic,elasticsearch,1000,2.1182381880007597,472.0904408506686,2021-01-29 11:17:14.160560,
4,elastic,elasticsearch,10000,20.23965223199957,494.0796356268248,2021-01-29 11:20:27.378846,
8,elastic,elasticsearch,100000,207.03962336699988,482.9993330442806,2021-01-29 11:31:50.829072,
12,elastic,elasticsearch,500000,1029.1638562459993,485.8312862091863,2021-01-29 12:52:45.994426,
2,dpr,faiss_flat,1000,9.899907313998483,101.01104669798258,2021-01-29 11:19:11.304749,
6,dpr,faiss_flat,10000,104.1660261320012,96.00059032037764,2021-01-29 11:25:43.069491,
10,dpr,faiss_flat,100000,1046.8892760299987,95.52108545730724,2021-01-29 12:14:51.105055,
14,dpr,faiss_flat,500000,5243.775349973999,95.35114810028603,2021-01-29 16:24:19.855339,
3,dpr,faiss_hnsw,1000,10.329135104999295,96.81352696374361,2021-01-29 11:19:55.337391,
7,dpr,faiss_hnsw,10000,112.53792207699917,88.85893586304122,2021-01-29 11:28:10.284866,
11,dpr,faiss_hnsw,100000,1188.8019736170008,84.11829911061136,2021-01-29 12:35:16.166263,
15,dpr,faiss_hnsw,500000,6125.295488232001,81.62871504903015,2021-01-29 18:07:08.100722,

1 retriever doc_store n_docs indexing_time docs_per_second date_time error
2 1 dpr elasticsearch 10000 1000 135.8048727600035 15.336494209999728 73.63506033890373 65.20394989279743 2020-12-02 06:51:48.587178 2021-01-29 11:18:25.436371
3 5 dpr elasticsearch 100000 10000 1352.514667440999 144.0823780490009 73.93635160290218 69.40474008972218 2020-12-02 07:23:04.264694 2021-01-29 11:23:19.896920
4 9 dpr elasticsearch 500000 100000 6781.024389943996 1433.587170629 73.7351720400064 69.75508852811794 2020-12-02 10:10:42.147031 2021-01-29 11:56:22.502185
5 0 13 elastic dpr elasticsearch 10000 500000 20.694342684997537 7196.396471723998 483.223852635317 69.47921810097519 2020-12-02 06:49:00.317977 2021-01-29 14:54:08.769187
6 4 0 elastic elasticsearch 100000 1000 206.47108666299755 2.1182381880007597 484.32931514144724 472.0904408506686 2020-12-02 06:59:54.055199 2021-01-29 11:17:14.160560
7 8 4 elastic elasticsearch 500000 10000 1032.1480222880054 20.23965223199957 484.4266415311529 494.0796356268248 2020-12-02 08:16:15.828533 2021-01-29 11:20:27.378846
8 2 8 dpr elastic faiss_flat elasticsearch 10000 100000 95.10171413100034 207.03962336699988 105.15057579535569 482.9993330442806 2020-12-02 06:53:59.472952 2021-01-29 11:31:50.829072
9 6 12 dpr elastic faiss_flat elasticsearch 100000 500000 954.4610684969957 1029.1638562459993 104.77116699738367 485.8312862091863 2020-12-02 07:39:56.194345 2021-01-29 12:52:45.994426
10 10 2 dpr faiss_flat 500000 1000 4865.149988802004 9.899907313998483 102.77175444761984 101.01104669798258 2020-12-02 11:34:34.726687 2021-01-29 11:19:11.304749
11 3 6 dpr faiss_hnsw faiss_flat 10000 103.25490099400486 104.1660261320012 96.84770314757859 96.00059032037764 2020-12-02 06:56:14.230579 2021-01-29 11:25:43.069491
12 7 10 dpr faiss_hnsw faiss_flat 100000 1093.9618158599915 1046.8892760299987 91.41086878008392 95.52108545730724 2020-12-02 07:58:43.508489 2021-01-29 12:14:51.105055
13 11 14 dpr faiss_hnsw faiss_flat 500000 5784.850161597002 5243.775349973999 86.43266221816312 95.35114810028603 2020-12-02 13:11:43.328380 2021-01-29 16:24:19.855339
14 3 dpr faiss_hnsw 1000 10.329135104999295 96.81352696374361 2021-01-29 11:19:55.337391
15 7 dpr faiss_hnsw 10000 112.53792207699917 88.85893586304122 2021-01-29 11:28:10.284866
16 11 dpr faiss_hnsw 100000 1188.8019736170008 84.11829911061136 2021-01-29 12:35:16.166263
17 15 dpr faiss_hnsw 500000 6125.295488232001 81.62871504903015 2021-01-29 18:07:08.100722

View File

@ -1,13 +1,17 @@
,retriever,doc_store,n_docs,n_queries,retrieve_time,queries_per_second,seconds_per_query,recall,map,top_k,date_time,error
1,dpr,elasticsearch,10000,5791,233.54168710688828,24.796429587106445,0.040328386652890395,0.9690899671904679,0.8808447974826822,10,2020-12-02 13:18:27.808539,
5,dpr,elasticsearch,100000,5791,928.9148432369257,6.234155953220104,0.1604066384453334,0.9397340701087895,0.8212235461156204,10,2020-12-02 13:53:44.689757,
9,dpr,elasticsearch,500000,5791,3992.798643678747,1.45036114184423,0.6894834473629333,0.8919012260404076,0.7302081363253893,10,2020-12-02 17:35:25.795083,
0,elastic,elasticsearch,10000,5791,23.260322959773475,248.9647289083211,0.00401663321702184,0.8103954412018649,0.6609973604361457,10,2020-12-02 13:13:03.957613,
4,elastic,elasticsearch,100000,5791,35.61682877641579,162.59167924109505,0.006150376234918976,0.7168019340355725,0.559593430418849,10,2020-12-02 13:33:30.417021,
8,elastic,elasticsearch,500000,5791,63.36918604133825,91.38510941614904,0.010942701785760362,0.6238991538594371,0.45245893326535686,10,2020-12-02 16:08:13.070376,
2,dpr,faiss_flat,10000,5791,257.67369354520633,22.474160712040344,0.044495543696288435,0.9746157831117251,0.8978985590667505,10,2020-12-02 13:23:51.002905,
6,dpr,faiss_flat,100000,5791,1182.7107160334417,4.896379073508164,0.2042325532780939,0.9575202901053359,0.8630120493486063,10,2020-12-02 14:18:14.837806,
3,dpr,faiss_hnsw,10000,5791,184.7552210999711,31.34417509568776,0.03190385444655001,0.972198238646175,0.8961883245210815,10,2020-12-02 13:28:33.415220,
7,dpr,faiss_hnsw,100000,5791,450.7693457186833,12.84692505158515,0.0778396383558424,0.9399067518563288,0.8486882354392283,10,2020-12-02 15:10:44.114148,
8,dpr,faiss_flat,500000,5791,5365.806154628852,1.0792413727067556,0.9265767837383616,0.9295458470039717,0.8045832613826054,10,2020-12-02 23:14:44.503864,
9,dpr,faiss_hnsw,500000,5791,1745.922715222303,3.3168707580865915,0.30148898553312087,0.8820583664306683,0.765677378416975,10,2020-12-03 00:18:53.376265,
1,dpr,elasticsearch,1000,1064,34.67547423102587,30.684511851549132,0.0325897314201371,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:27:43.048502,
5,dpr,elasticsearch,10000,5637,288.06125728895495,19.568754413737462,0.05110187285594375,0.9749866950505588,0.8987097014904354,10,2021-02-01 11:37:21.149887,
9,dpr,elasticsearch,100000,5637,1225.6274364620313,4.5992769354707805,0.2174254810115365,0.9579563597658329,0.8654564090434241,10,2021-02-01 12:15:52.757320,
13,dpr,elasticsearch,500000,5637,5339.008413678017,1.0558140319761546,0.947136493467805,0.9308142629058009,0.8086137228234089,10,2021-02-01 14:52:23.056230,
0,elastic,elasticsearch,1000,1064,4.046542626992959,262.9405144288997,0.0038031415667227064,0.8909774436090225,0.742044471297291,10,2021-02-01 11:26:04.346134,
4,elastic,elasticsearch,10000,5637,30.701430243001596,183.6070813438718,0.005446413028738974,0.8110697179350719,0.6620627317806674,10,2021-02-01 11:31:20.470092,
8,elastic,elasticsearch,100000,5637,34.705507238930295,162.42378943468643,0.006156733588598598,0.7193542664537875,0.5625959153101251,10,2021-02-01 11:50:36.048887,
12,elastic,elasticsearch,500000,5637,68.3838099470413,82.4317920333114,0.01213124178588634,0.6274614156466205,0.45594527090003406,10,2021-02-01 13:02:16.905187,
2,dpr,faiss_flat,1000,1064,30.053267333012627,35.40380445859966,0.028245552004711117,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:28:55.544474,
6,dpr,faiss_flat,10000,5637,218.59436279792135,25.78749025294445,0.038778492602079356,0.9749866950505588,0.8987097014904354,10,2021-02-01 11:42:07.545869,
10,dpr,faiss_flat,100000,5637,865.7440476809643,6.511162294559943,0.15358241044544338,0.9579563597658329,0.8654606328368972,10,2021-02-01 12:34:29.493598,
14,dpr,faiss_flat,500000,5637,3717.9468668280497,1.5161593755666505,0.6595612678424783,0.9308142629058009,0.808613722823409,10,2021-02-01 16:12:52.804436,
3,dpr,faiss_hnsw,1000,1064,27.167708159968242,39.164142729117266,0.02553356030072203,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:30:02.684535,
7,dpr,faiss_hnsw,10000,5637,167.55242089293097,33.6432023480111,0.02972368651639719,0.972503104488203,0.8969941373746582,10,2021-02-01 11:46:07.130588,
11,dpr,faiss_hnsw,100000,5637,167.48228578322596,33.65729082116796,0.029711244595214823,0.9402164271775767,0.8507984377595874,10,2021-02-01 12:43:21.697968,
15,dpr,faiss_hnsw,500000,5637,164.45566683610014,34.27671486454735,0.029174324434291313,0.8825616462657442,0.7691475821598232,10,2021-02-01 16:47:01.710072,

1 retriever doc_store n_docs n_queries retrieve_time queries_per_second seconds_per_query recall map top_k date_time error
2 1 dpr elasticsearch 10000 1000 5791 1064 233.54168710688828 34.67547423102587 24.796429587106445 30.684511851549132 0.040328386652890395 0.0325897314201371 0.9690899671904679 0.9915413533834586 0.8808447974826822 0.9295105322830889 10 2020-12-02 13:18:27.808539 2021-02-01 11:27:43.048502
3 5 dpr elasticsearch 100000 10000 5791 5637 928.9148432369257 288.06125728895495 6.234155953220104 19.568754413737462 0.1604066384453334 0.05110187285594375 0.9397340701087895 0.9749866950505588 0.8212235461156204 0.8987097014904354 10 2020-12-02 13:53:44.689757 2021-02-01 11:37:21.149887
4 9 dpr elasticsearch 500000 100000 5791 5637 3992.798643678747 1225.6274364620313 1.45036114184423 4.5992769354707805 0.6894834473629333 0.2174254810115365 0.8919012260404076 0.9579563597658329 0.7302081363253893 0.8654564090434241 10 2020-12-02 17:35:25.795083 2021-02-01 12:15:52.757320
5 0 13 elastic dpr elasticsearch 10000 500000 5791 5637 23.260322959773475 5339.008413678017 248.9647289083211 1.0558140319761546 0.00401663321702184 0.947136493467805 0.8103954412018649 0.9308142629058009 0.6609973604361457 0.8086137228234089 10 2020-12-02 13:13:03.957613 2021-02-01 14:52:23.056230
6 4 0 elastic elasticsearch 100000 1000 5791 1064 35.61682877641579 4.046542626992959 162.59167924109505 262.9405144288997 0.006150376234918976 0.0038031415667227064 0.7168019340355725 0.8909774436090225 0.559593430418849 0.742044471297291 10 2020-12-02 13:33:30.417021 2021-02-01 11:26:04.346134
7 8 4 elastic elasticsearch 500000 10000 5791 5637 63.36918604133825 30.701430243001596 91.38510941614904 183.6070813438718 0.010942701785760362 0.005446413028738974 0.6238991538594371 0.8110697179350719 0.45245893326535686 0.6620627317806674 10 2020-12-02 16:08:13.070376 2021-02-01 11:31:20.470092
8 2 8 dpr elastic faiss_flat elasticsearch 10000 100000 5791 5637 257.67369354520633 34.705507238930295 22.474160712040344 162.42378943468643 0.044495543696288435 0.006156733588598598 0.9746157831117251 0.7193542664537875 0.8978985590667505 0.5625959153101251 10 2020-12-02 13:23:51.002905 2021-02-01 11:50:36.048887
9 6 12 dpr elastic faiss_flat elasticsearch 100000 500000 5791 5637 1182.7107160334417 68.3838099470413 4.896379073508164 82.4317920333114 0.2042325532780939 0.01213124178588634 0.9575202901053359 0.6274614156466205 0.8630120493486063 0.45594527090003406 10 2020-12-02 14:18:14.837806 2021-02-01 13:02:16.905187
10 3 2 dpr faiss_hnsw faiss_flat 10000 1000 5791 1064 184.7552210999711 30.053267333012627 31.34417509568776 35.40380445859966 0.03190385444655001 0.028245552004711117 0.972198238646175 0.9915413533834586 0.8961883245210815 0.9295105322830889 10 2020-12-02 13:28:33.415220 2021-02-01 11:28:55.544474
11 7 6 dpr faiss_hnsw faiss_flat 100000 10000 5791 5637 450.7693457186833 218.59436279792135 12.84692505158515 25.78749025294445 0.0778396383558424 0.038778492602079356 0.9399067518563288 0.9749866950505588 0.8486882354392283 0.8987097014904354 10 2020-12-02 15:10:44.114148 2021-02-01 11:42:07.545869
12 8 10 dpr faiss_flat 500000 100000 5791 5637 5365.806154628852 865.7440476809643 1.0792413727067556 6.511162294559943 0.9265767837383616 0.15358241044544338 0.9295458470039717 0.9579563597658329 0.8045832613826054 0.8654606328368972 10 2020-12-02 23:14:44.503864 2021-02-01 12:34:29.493598
13 9 14 dpr faiss_hnsw faiss_flat 500000 5791 5637 1745.922715222303 3717.9468668280497 3.3168707580865915 1.5161593755666505 0.30148898553312087 0.6595612678424783 0.8820583664306683 0.9308142629058009 0.765677378416975 0.808613722823409 10 2020-12-03 00:18:53.376265 2021-02-01 16:12:52.804436
14 3 dpr faiss_hnsw 1000 1064 27.167708159968242 39.164142729117266 0.02553356030072203 0.9915413533834586 0.9295105322830889 10 2021-02-01 11:30:02.684535
15 7 dpr faiss_hnsw 10000 5637 167.55242089293097 33.6432023480111 0.02972368651639719 0.972503104488203 0.8969941373746582 10 2021-02-01 11:46:07.130588
16 11 dpr faiss_hnsw 100000 5637 167.48228578322596 33.65729082116796 0.029711244595214823 0.9402164271775767 0.8507984377595874 10 2021-02-01 12:43:21.697968
17 15 dpr faiss_hnsw 500000 5637 164.45566683610014 34.27671486454735 0.029174324434291313 0.8825616462657442 0.7691475821598232 10 2021-02-01 16:47:01.710072

View File

@ -0,0 +1,18 @@
| | retriever | doc_store | n_docs | n_queries | retrieve_time | queries_per_second | seconds_per_query | recall | map | top_k | date_time | error |
|---:|:------------|:--------------|---------:|------------:|----------------:|---------------------:|--------------------:|---------:|---------:|--------:|:---------------------------|:--------|
| 1 | dpr | elasticsearch | 1000 | 1064 | 34.6755 | 30.6845 | 0.0325897 | 0.991541 | 0.929511 | 10 | 2021-02-01 11:27:43.048502 | |
| 5 | dpr | elasticsearch | 10000 | 5637 | 288.061 | 19.5688 | 0.0511019 | 0.974987 | 0.89871 | 10 | 2021-02-01 11:37:21.149887 | |
| 9 | dpr | elasticsearch | 100000 | 5637 | 1225.63 | 4.59928 | 0.217425 | 0.957956 | 0.865456 | 10 | 2021-02-01 12:15:52.757320 | |
| 13 | dpr | elasticsearch | 500000 | 5637 | 5339.01 | 1.05581 | 0.947136 | 0.930814 | 0.808614 | 10 | 2021-02-01 14:52:23.056230 | |
| 0 | elastic | elasticsearch | 1000 | 1064 | 4.04654 | 262.941 | 0.00380314 | 0.890977 | 0.742044 | 10 | 2021-02-01 11:26:04.346134 | |
| 4 | elastic | elasticsearch | 10000 | 5637 | 30.7014 | 183.607 | 0.00544641 | 0.81107 | 0.662063 | 10 | 2021-02-01 11:31:20.470092 | |
| 8 | elastic | elasticsearch | 100000 | 5637 | 34.7055 | 162.424 | 0.00615673 | 0.719354 | 0.562596 | 10 | 2021-02-01 11:50:36.048887 | |
| 12 | elastic | elasticsearch | 500000 | 5637 | 68.3838 | 82.4318 | 0.0121312 | 0.627461 | 0.455945 | 10 | 2021-02-01 13:02:16.905187 | |
| 2 | dpr | faiss_flat | 1000 | 1064 | 30.0533 | 35.4038 | 0.0282456 | 0.991541 | 0.929511 | 10 | 2021-02-01 11:28:55.544474 | |
| 6 | dpr | faiss_flat | 10000 | 5637 | 218.594 | 25.7875 | 0.0387785 | 0.974987 | 0.89871 | 10 | 2021-02-01 11:42:07.545869 | |
| 10 | dpr | faiss_flat | 100000 | 5637 | 865.744 | 6.51116 | 0.153582 | 0.957956 | 0.865461 | 10 | 2021-02-01 12:34:29.493598 | |
| 14 | dpr | faiss_flat | 500000 | 5637 | 3717.95 | 1.51616 | 0.659561 | 0.930814 | 0.808614 | 10 | 2021-02-01 16:12:52.804436 | |
| 3 | dpr | faiss_hnsw | 1000 | 1064 | 27.1677 | 39.1641 | 0.0255336 | 0.991541 | 0.929511 | 10 | 2021-02-01 11:30:02.684535 | |
| 7 | dpr | faiss_hnsw | 10000 | 5637 | 167.552 | 33.6432 | 0.0297237 | 0.972503 | 0.896994 | 10 | 2021-02-01 11:46:07.130588 | |
| 11 | dpr | faiss_hnsw | 100000 | 5637 | 167.482 | 33.6573 | 0.0297112 | 0.940216 | 0.850798 | 10 | 2021-02-01 12:43:21.697968 | |
| 15 | dpr | faiss_hnsw | 500000 | 5637 | 164.456 | 34.2767 | 0.0291743 | 0.882562 | 0.769148 | 10 | 2021-02-01 16:47:01.710072 | |

View File

@ -16,7 +16,7 @@ RETRIEVER_TEMPLATE = {
"chart_type": "BarChart",
"title": "Retriever Performance",
"subtitle": "Time and Accuracy Benchmarks",
"description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
"description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. We use a cosine similarity function with BM25 retrievers, and dot product with DPR. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
"bars": "horizontal",
"columns": [
"Model",

View File

@ -22,7 +22,7 @@ reader_types = ["farm"]
doc_index = "eval_document"
label_index = "label"
def get_document_store(document_store_type, es_similarity='cosine'):
def get_document_store(document_store_type, similarity='dot_product'):
""" TODO This method is taken from test/conftest.py but maybe should be within Haystack.
Perhaps a class method of DocStore that just takes string for type of DocStore"""
if document_store_type == "sql":
@ -35,7 +35,7 @@ def get_document_store(document_store_type, es_similarity='cosine'):
# make sure we start from a fresh index
client = Elasticsearch()
client.indices.delete(index='haystack_test*', ignore=[404])
document_store = ElasticsearchDocumentStore(index="eval_document", similarity=es_similarity, timeout=3000)
document_store = ElasticsearchDocumentStore(index="eval_document", similarity=similarity, timeout=3000)
elif document_store_type in("faiss_flat", "faiss_hnsw"):
if document_store_type == "faiss_flat":
index_type = "Flat"
@ -48,12 +48,13 @@ def get_document_store(document_store_type, es_similarity='cosine'):
status = subprocess.run(
['docker run --name haystack-postgres -p 5432:5432 -e POSTGRES_PASSWORD=password -d postgres'],
shell=True)
time.sleep(3)
time.sleep(6)
status = subprocess.run(
['docker exec -it haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"'], shell=True)
time.sleep(1)
document_store = FAISSDocumentStore(sql_url="postgresql://postgres:password@localhost:5432/haystack",
faiss_index_factory_str=index_type)
faiss_index_factory_str=index_type,
similarity=similarity)
else:
raise Exception(f"No document store fixture for '{document_store_type}'")

View File

@ -62,10 +62,10 @@ def test_eval_reader(reader, document_store: BaseDocumentStore):
doc_index="haystack_test_eval_document",
device="cpu",
)
assert reader_eval_results["f1"] > 0.65
assert reader_eval_results["f1"] < 0.67
assert reader_eval_results["EM"] == 0.5
assert reader_eval_results["top_n_accuracy"] == 1.0
assert reader_eval_results["f1"] > 66.65
assert reader_eval_results["f1"] < 66.67
assert reader_eval_results["EM"] == 50
assert reader_eval_results["top_n_accuracy"] == 100.0
@pytest.mark.elasticsearch