mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-27 15:08:43 +00:00
Choose correct similarity fns during benchmark runs & re-run benchmarks (#773)
* Adapt to new dataset_from_dicts return signature * rename fn * Align similarity fn in benchmark doc store * Better choice of similarity fn * Increase postgres wait time * Add more expected returned variables * update benchmark results * Fix typo * update all benchmark runs * multiply stats by 100 * Specify similarity fns for website Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
This commit is contained in:
parent
8a5dc8f826
commit
f3a3b73d9b
@ -11,33 +11,33 @@
|
||||
],
|
||||
"data": [
|
||||
{
|
||||
"F1": 82.62983412843887,
|
||||
"Speed": 98.86638639776464,
|
||||
"F1": 82.58860575299658,
|
||||
"Speed": 125.81040525892848,
|
||||
"Model": "RoBERTa"
|
||||
},
|
||||
{
|
||||
"F1": 78.90026641413856,
|
||||
"Speed": 181.96379531485616,
|
||||
"F1": 78.87858491007042,
|
||||
"Speed": 260.6443097981493,
|
||||
"Model": "MiniLM"
|
||||
},
|
||||
{
|
||||
"F1": 74.32668866064459,
|
||||
"Speed": 106.04748306200683,
|
||||
"F1": 74.31182400443286,
|
||||
"Speed": 121.08066567525722,
|
||||
"Model": "BERT base"
|
||||
},
|
||||
{
|
||||
"F1": 83.29492827667042,
|
||||
"Speed": 40.408497243719076,
|
||||
"F1": 83.26306774734308,
|
||||
"Speed": 42.21949937744112,
|
||||
"Model": "BERT large"
|
||||
},
|
||||
{
|
||||
"F1": 84.62174414643722,
|
||||
"Speed": 40.483264542292716,
|
||||
"F1": 84.50422699207468,
|
||||
"Speed": 42.07400844838985,
|
||||
"Model": "XLM-RoBERTa"
|
||||
},
|
||||
{
|
||||
"F1": 42.342513261953935,
|
||||
"Speed": 160.41712955027901,
|
||||
"F1": 42.31925844723574,
|
||||
"Speed": 222.91207128366702,
|
||||
"Model": "DistilBERT"
|
||||
}
|
||||
]
|
||||
|
||||
@ -20,82 +20,82 @@
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"map": 0.929
|
||||
"map": 92.95105322830888
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"map": 0.881
|
||||
"map": 89.87097014904354
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"map": 0.821
|
||||
"map": 86.54564090434241
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"map": 0.730
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 1000,
|
||||
"map": 0.929
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"map": 0.898
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"map": 0.863
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"map": 0.805
|
||||
"map": 80.86137228234089
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"map": 0.748
|
||||
"map": 74.20444712972909
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"map": 0.6609999999999999
|
||||
"map": 66.20627317806674
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"map": 0.56
|
||||
"map": 56.25959153101251
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"map": 0.452
|
||||
"map": 45.59452709000341
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 1000,
|
||||
"map": 92.95105322830888
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"map": 89.87097014904354
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"map": 86.54606328368972
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"map": 80.8613722823409
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 1000,
|
||||
"map": 0.929
|
||||
"map": 92.95105322830888
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 10000,
|
||||
"map": 0.896
|
||||
"map": 89.69941373746582
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 100000,
|
||||
"map": 0.849
|
||||
"map": 85.07984377595874
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 500000,
|
||||
"map": 0.766
|
||||
"map": 76.91475821598232
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
"chart_type": "BarChart",
|
||||
"title": "Retriever Performance",
|
||||
"subtitle": "Time and Accuracy Benchmarks",
|
||||
"description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
|
||||
"description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. We use a cosine similarity function with BM25 retrievers, and dot product with DPR. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
|
||||
"bars": "horizontal",
|
||||
"columns": [
|
||||
"Model",
|
||||
@ -24,30 +24,30 @@
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 73.93635160290218,
|
||||
"query_speed": 6.23,
|
||||
"map": 82
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 104.77116699738369,
|
||||
"query_speed": 4.89,
|
||||
"map": 86.3
|
||||
"index_speed": 69.75508852811794,
|
||||
"query_speed": 4.5992769354707805,
|
||||
"map": 86.54564090434241
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 484.32931514144724,
|
||||
"query_speed": 162.59,
|
||||
"map": 56
|
||||
"index_speed": 482.9993330442806,
|
||||
"query_speed": 162.42378943468643,
|
||||
"map": 56.25959153101251
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 95.52108545730724,
|
||||
"query_speed": 6.511162294559942,
|
||||
"map": 86.54606328368972
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 91.41086878008392,
|
||||
"query_speed": 12.85,
|
||||
"map": 84.9
|
||||
"index_speed": 84.11829911061136,
|
||||
"query_speed": 33.65729082116796,
|
||||
"map": 85.07984377595874
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -20,82 +20,82 @@
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 40.802
|
||||
"query_speed": 30.68451185154913
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 24.8
|
||||
"query_speed": 19.568754413737462
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 6.23
|
||||
"query_speed": 4.5992769354707805
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 1.45
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 40.048
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 22.47
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 4.90
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 1.08
|
||||
"query_speed": 1.0558140319761546
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 232.97799999999998
|
||||
"query_speed": 262.9405144288997
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 248.97
|
||||
"query_speed": 183.6070813438718
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 162.59
|
||||
"query_speed": 162.42378943468643
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 91.39
|
||||
"query_speed": 82.43179203331141
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 35.40380445859966
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 25.78749025294445
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 6.511162294559942
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 1.5161593755666505
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 37.884
|
||||
"query_speed": 39.16414272911727
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 31.34
|
||||
"query_speed": 33.6432023480111
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 12.85
|
||||
"query_speed": 33.65729082116796
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 3.32
|
||||
"query_speed": 34.27671486454735
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -11,33 +11,33 @@
|
||||
],
|
||||
"data": [
|
||||
{
|
||||
"F1": 82.62983412843887,
|
||||
"Speed": 98.86638639776464,
|
||||
"F1": 82.58860575299658,
|
||||
"Speed": 128.25544874114064,
|
||||
"Model": "RoBERTa"
|
||||
},
|
||||
{
|
||||
"F1": 78.90026641413856,
|
||||
"Speed": 181.96379531485616,
|
||||
"F1": 78.87858491007042,
|
||||
"Speed": 269.33155450679567,
|
||||
"Model": "MiniLM"
|
||||
},
|
||||
{
|
||||
"F1": 74.32668866064459,
|
||||
"Speed": 106.04748306200683,
|
||||
"F1": 74.31182400443286,
|
||||
"Speed": 123.82266420208393,
|
||||
"Model": "BERT base"
|
||||
},
|
||||
{
|
||||
"F1": 83.29492827667042,
|
||||
"Speed": 40.408497243719076,
|
||||
"F1": 83.26306774734308,
|
||||
"Speed": 43.188105620245494,
|
||||
"Model": "BERT large"
|
||||
},
|
||||
{
|
||||
"F1": 84.62174414643722,
|
||||
"Speed": 40.483264542292716,
|
||||
"F1": 84.50422699207468,
|
||||
"Speed": 42.956527893643,
|
||||
"Model": "XLM-RoBERTa"
|
||||
},
|
||||
{
|
||||
"F1": 42.342513261953935,
|
||||
"Speed": 160.41712955027901,
|
||||
"F1": 42.31925844723574,
|
||||
"Speed": 226.281948654048,
|
||||
"Model": "DistilBERT"
|
||||
}
|
||||
]
|
||||
|
||||
@ -17,85 +17,65 @@
|
||||
}
|
||||
],
|
||||
"data": [
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"map": 0.929
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"map": 0.881
|
||||
"map": 88.26183154948457
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"map": 0.821
|
||||
"map": 82.47044752499787
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"map": 0.730
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 1000,
|
||||
"map": 0.929
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"map": 0.898
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"map": 0.863
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"map": 0.805
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"map": 0.748
|
||||
"map": 73.4952735751035
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"map": 0.6609999999999999
|
||||
"map": 66.33019927857616
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"map": 0.56
|
||||
"map": 56.25959153101251
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"map": 0.452
|
||||
"map": 45.60339705629754
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 1000,
|
||||
"map": 0.929
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"map": 89.87097014904354
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"map": 86.54606328368972
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"map": 80.8613722823409
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 10000,
|
||||
"map": 0.896
|
||||
"map": 89.69941373746582
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 100000,
|
||||
"map": 0.849
|
||||
"map": 85.07984377595874
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 500000,
|
||||
"map": 0.766
|
||||
"map": 76.91475821598232
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -24,30 +24,30 @@
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 73.93635160290218,
|
||||
"query_speed": 6.23,
|
||||
"map": 82
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 104.77116699738369,
|
||||
"query_speed": 4.89,
|
||||
"map": 86.3
|
||||
"index_speed": 70.7842323095542,
|
||||
"query_speed": 6.108417678791537,
|
||||
"map": 82.47044752499787
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 484.32931514144724,
|
||||
"query_speed": 162.59,
|
||||
"map": 56
|
||||
"index_speed": 486.8274411916137,
|
||||
"query_speed": 162.40717155994315,
|
||||
"map": 56.25959153101251
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 98.76191471019415,
|
||||
"query_speed": 6.614453113633132,
|
||||
"map": 86.54606328368972
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 91.41086878008392,
|
||||
"query_speed": 12.85,
|
||||
"map": 84.9
|
||||
"index_speed": 86.8695584169603,
|
||||
"query_speed": 38.24323466239034,
|
||||
"map": 85.07984377595874
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -17,85 +17,65 @@
|
||||
}
|
||||
],
|
||||
"data": [
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 40.802
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 24.8
|
||||
"query_speed": 23.3662850838307
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 6.23
|
||||
"query_speed": 6.108417678791537
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 1.45
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 40.048
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 22.47
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 4.90
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 1.08
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 232.97799999999998
|
||||
"query_speed": 1.4393100251286972
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 248.97
|
||||
"query_speed": 244.5160214986071
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 162.59
|
||||
"query_speed": 162.40717155994315
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 91.39
|
||||
"query_speed": 88.52692529827672
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 37.884
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 27.22085301792775
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 6.614453113633132
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 1.5222363376940002
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 31.34
|
||||
"query_speed": 39.903073511580295
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 12.85
|
||||
"query_speed": 38.24323466239034
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 3.32
|
||||
"query_speed": 37.13917579922844
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -524,9 +524,9 @@ class FARMReader(BaseReader):
|
||||
toc = perf_counter()
|
||||
reader_time = toc - tic
|
||||
results = {
|
||||
"EM": eval_results[0]["EM"],
|
||||
"f1": eval_results[0]["f1"],
|
||||
"top_n_accuracy": eval_results[0]["top_n_accuracy"],
|
||||
"EM": eval_results[0]["EM"] * 100,
|
||||
"f1": eval_results[0]["f1"] * 100,
|
||||
"top_n_accuracy": eval_results[0]["top_n_accuracy"] * 100,
|
||||
"top_n": self.inferencer.model.prediction_heads[0].n_best,
|
||||
"reader_time": reader_time,
|
||||
"seconds_per_query": reader_time / n_queries
|
||||
|
||||
@ -187,8 +187,7 @@ class DensePassageRetriever(BaseRetriever):
|
||||
:return: dictionary of embeddings for "passages" and "query"
|
||||
"""
|
||||
|
||||
|
||||
dataset, tensor_names, problematic_ids, baskets = self.processor.dataset_from_dicts(
|
||||
dataset, tensor_names, _, baskets = self.processor.dataset_from_dicts(
|
||||
dicts, indices=[i for i in range(len(dicts))], return_baskets=True
|
||||
)
|
||||
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
]
|
||||
],
|
||||
"n_docs_options": [
|
||||
1000,
|
||||
10000,
|
||||
100000,
|
||||
500000
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
from utils import get_document_store, index_to_doc_store, get_reader
|
||||
from haystack.preprocessor.utils import eval_data_from_file
|
||||
from haystack.preprocessor.utils import eval_data_from_json
|
||||
from farm.data_handler.utils import _download_extract_downstream_data
|
||||
|
||||
from pathlib import Path
|
||||
@ -40,7 +40,7 @@ def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs)
|
||||
doc_store = get_document_store("elasticsearch")
|
||||
# download squad data
|
||||
_download_extract_downstream_data(input_file=data_dir/filename)
|
||||
docs, labels = eval_data_from_file(data_dir/filename, max_docs=None)
|
||||
docs, labels = eval_data_from_json(data_dir/filename, max_docs=None)
|
||||
|
||||
index_to_doc_store(doc_store, docs, None, labels)
|
||||
for reader_name in reader_models:
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
,EM,f1,top_n_accuracy,top_n,reader_time,seconds_per_query,passages_per_second,reader,error
|
||||
0,0.7836676217765043,0.8262983412843887,0.9742963087813922,5,124.91606550999859,0.01052722615118815,98.86638639776463,deepset/roberta-base-squad2,
|
||||
1,0.7439743805831789,0.7890026641413856,0.9720209000505646,5,67.87064415001078,0.005719757639475036,181.96379531485616,deepset/minilm-uncased-squad2,
|
||||
2,0.6947581324793528,0.7432668866064459,0.9557559413450194,5,116.45726653200109,0.009814365964267747,106.04748306200683,deepset/bert-base-cased-squad2,
|
||||
3,0.7900724759817968,0.8329492827667042,0.976908815101972,5,305.62878707199707,0.02575668187021718,40.40849724371908,deepset/bert-large-uncased-whole-word-masking-squad2,
|
||||
4,0.803472105174448,0.846217441464372,0.9742120343839542,5,305.06433064700104,0.025709112645120602,40.48326454229272,deepset/xlm-roberta-large-squad2,
|
||||
5,0.3730827574582842,0.42342513261953935,0.9539019046013821,5,76.98679084099422,0.006488015408814615,160.417129550279,distilbert-base-uncased-distilled-squad,
|
||||
0,0.7839204449688185,0.8258860575299658,0.9742120343839542,5,98.16358173700064,0.008272676701247315,125.81040525892847,deepset/roberta-base-squad2,
|
||||
1,0.7438058317883027,0.7887858491007042,0.9719366256531266,5,47.38258053499885,0.003993138423647299,260.6443097981493,deepset/minilm-uncased-squad2,
|
||||
2,0.6947581324793528,0.7431182400443286,0.9557559413450194,5,101.99811779300217,0.008595829916821352,121.08066567525722,deepset/bert-base-cased-squad2,
|
||||
3,0.7897353783920446,0.8326306774734308,0.976908815101972,5,292.51886408200517,0.024651851009776266,42.21949937744112,deepset/bert-large-uncased-whole-word-masking-squad2,
|
||||
4,0.8021237148154391,0.8450422699207468,0.974043485589078,5,293.53038741600176,0.024737096529243364,42.07400844838984,deepset/xlm-roberta-large-squad2,
|
||||
5,0.3729984830608461,0.4231925844723574,0.9539019046013821,5,55.403011280999635,0.004669055391960192,222.91207128366705,distilbert-base-uncased-distilled-squad,
|
||||
|
||||
|
@ -2,7 +2,7 @@ import pandas as pd
|
||||
from pathlib import Path
|
||||
from time import perf_counter
|
||||
from utils import get_document_store, get_retriever, index_to_doc_store, load_config
|
||||
from haystack.preprocessor.utils import eval_data_from_file
|
||||
from haystack.preprocessor.utils import eval_data_from_json
|
||||
from haystack.document_store.faiss import FAISSDocumentStore
|
||||
|
||||
from haystack import Document
|
||||
@ -134,7 +134,11 @@ def benchmark_querying(n_docs_options,
|
||||
for retriever_name, doc_store_name in retriever_doc_stores:
|
||||
try:
|
||||
logger.info(f"##### Start querying run: {retriever_name}, {doc_store_name}, {n_docs} docs ##### ")
|
||||
doc_store = get_document_store(doc_store_name)
|
||||
if retriever_name == "elastic":
|
||||
similarity = "cosine"
|
||||
else:
|
||||
similarity = "dot_product"
|
||||
doc_store = get_document_store(doc_store_name, similarity=similarity)
|
||||
retriever = get_retriever(retriever_name, doc_store)
|
||||
add_precomputed = retriever_name in ["dpr"]
|
||||
# For DPR, precomputed embeddings are loaded from file
|
||||
@ -160,8 +164,8 @@ def benchmark_querying(n_docs_options,
|
||||
"retrieve_time": raw_results["retrieve_time"],
|
||||
"queries_per_second": raw_results["n_questions"] / raw_results["retrieve_time"],
|
||||
"seconds_per_query": raw_results["retrieve_time"]/ raw_results["n_questions"],
|
||||
"recall": raw_results["recall"],
|
||||
"map": raw_results["map"],
|
||||
"recall": raw_results["recall"] * 100,
|
||||
"map": raw_results["map"] * 100,
|
||||
"top_k": raw_results["top_k"],
|
||||
"date_time": datetime.datetime.now(),
|
||||
"error": None
|
||||
@ -265,7 +269,7 @@ def prepare_data(data_dir, filename_gold, filename_negative, data_s3_url, embed
|
||||
download_from_s3(data_s3_url + str(embeddings_dir) + embedding_filename, cache_dir=data_dir)
|
||||
logging.getLogger("farm").setLevel(logging.WARN)
|
||||
|
||||
gold_docs, labels = eval_data_from_file(data_dir + filename_gold)
|
||||
gold_docs, labels = eval_data_from_json(data_dir + filename_gold)
|
||||
|
||||
# Reduce number of docs
|
||||
gold_docs = gold_docs[:n_docs]
|
||||
|
||||
@ -1,13 +1,17 @@
|
||||
,retriever,doc_store,n_docs,indexing_time,docs_per_second,date_time,error
|
||||
1,dpr,elasticsearch,10000,135.8048727600035,73.63506033890373,2020-12-02 06:51:48.587178,
|
||||
5,dpr,elasticsearch,100000,1352.514667440999,73.93635160290218,2020-12-02 07:23:04.264694,
|
||||
9,dpr,elasticsearch,500000,6781.024389943996,73.7351720400064,2020-12-02 10:10:42.147031,
|
||||
0,elastic,elasticsearch,10000,20.694342684997537,483.223852635317,2020-12-02 06:49:00.317977,
|
||||
4,elastic,elasticsearch,100000,206.47108666299755,484.32931514144724,2020-12-02 06:59:54.055199,
|
||||
8,elastic,elasticsearch,500000,1032.1480222880054,484.4266415311529,2020-12-02 08:16:15.828533,
|
||||
2,dpr,faiss_flat,10000,95.10171413100034,105.15057579535569,2020-12-02 06:53:59.472952,
|
||||
6,dpr,faiss_flat,100000,954.4610684969957,104.77116699738367,2020-12-02 07:39:56.194345,
|
||||
10,dpr,faiss_flat,500000,4865.149988802004,102.77175444761984,2020-12-02 11:34:34.726687,
|
||||
3,dpr,faiss_hnsw,10000,103.25490099400486,96.84770314757859,2020-12-02 06:56:14.230579,
|
||||
7,dpr,faiss_hnsw,100000,1093.9618158599915,91.41086878008392,2020-12-02 07:58:43.508489,
|
||||
11,dpr,faiss_hnsw,500000,5784.850161597002,86.43266221816312,2020-12-02 13:11:43.328380,
|
||||
1,dpr,elasticsearch,1000,15.336494209999728,65.20394989279743,2021-01-29 11:18:25.436371,
|
||||
5,dpr,elasticsearch,10000,144.0823780490009,69.40474008972218,2021-01-29 11:23:19.896920,
|
||||
9,dpr,elasticsearch,100000,1433.587170629,69.75508852811794,2021-01-29 11:56:22.502185,
|
||||
13,dpr,elasticsearch,500000,7196.396471723998,69.47921810097519,2021-01-29 14:54:08.769187,
|
||||
0,elastic,elasticsearch,1000,2.1182381880007597,472.0904408506686,2021-01-29 11:17:14.160560,
|
||||
4,elastic,elasticsearch,10000,20.23965223199957,494.0796356268248,2021-01-29 11:20:27.378846,
|
||||
8,elastic,elasticsearch,100000,207.03962336699988,482.9993330442806,2021-01-29 11:31:50.829072,
|
||||
12,elastic,elasticsearch,500000,1029.1638562459993,485.8312862091863,2021-01-29 12:52:45.994426,
|
||||
2,dpr,faiss_flat,1000,9.899907313998483,101.01104669798258,2021-01-29 11:19:11.304749,
|
||||
6,dpr,faiss_flat,10000,104.1660261320012,96.00059032037764,2021-01-29 11:25:43.069491,
|
||||
10,dpr,faiss_flat,100000,1046.8892760299987,95.52108545730724,2021-01-29 12:14:51.105055,
|
||||
14,dpr,faiss_flat,500000,5243.775349973999,95.35114810028603,2021-01-29 16:24:19.855339,
|
||||
3,dpr,faiss_hnsw,1000,10.329135104999295,96.81352696374361,2021-01-29 11:19:55.337391,
|
||||
7,dpr,faiss_hnsw,10000,112.53792207699917,88.85893586304122,2021-01-29 11:28:10.284866,
|
||||
11,dpr,faiss_hnsw,100000,1188.8019736170008,84.11829911061136,2021-01-29 12:35:16.166263,
|
||||
15,dpr,faiss_hnsw,500000,6125.295488232001,81.62871504903015,2021-01-29 18:07:08.100722,
|
||||
|
||||
|
@ -1,13 +1,17 @@
|
||||
,retriever,doc_store,n_docs,n_queries,retrieve_time,queries_per_second,seconds_per_query,recall,map,top_k,date_time,error
|
||||
1,dpr,elasticsearch,10000,5791,233.54168710688828,24.796429587106445,0.040328386652890395,0.9690899671904679,0.8808447974826822,10,2020-12-02 13:18:27.808539,
|
||||
5,dpr,elasticsearch,100000,5791,928.9148432369257,6.234155953220104,0.1604066384453334,0.9397340701087895,0.8212235461156204,10,2020-12-02 13:53:44.689757,
|
||||
9,dpr,elasticsearch,500000,5791,3992.798643678747,1.45036114184423,0.6894834473629333,0.8919012260404076,0.7302081363253893,10,2020-12-02 17:35:25.795083,
|
||||
0,elastic,elasticsearch,10000,5791,23.260322959773475,248.9647289083211,0.00401663321702184,0.8103954412018649,0.6609973604361457,10,2020-12-02 13:13:03.957613,
|
||||
4,elastic,elasticsearch,100000,5791,35.61682877641579,162.59167924109505,0.006150376234918976,0.7168019340355725,0.559593430418849,10,2020-12-02 13:33:30.417021,
|
||||
8,elastic,elasticsearch,500000,5791,63.36918604133825,91.38510941614904,0.010942701785760362,0.6238991538594371,0.45245893326535686,10,2020-12-02 16:08:13.070376,
|
||||
2,dpr,faiss_flat,10000,5791,257.67369354520633,22.474160712040344,0.044495543696288435,0.9746157831117251,0.8978985590667505,10,2020-12-02 13:23:51.002905,
|
||||
6,dpr,faiss_flat,100000,5791,1182.7107160334417,4.896379073508164,0.2042325532780939,0.9575202901053359,0.8630120493486063,10,2020-12-02 14:18:14.837806,
|
||||
3,dpr,faiss_hnsw,10000,5791,184.7552210999711,31.34417509568776,0.03190385444655001,0.972198238646175,0.8961883245210815,10,2020-12-02 13:28:33.415220,
|
||||
7,dpr,faiss_hnsw,100000,5791,450.7693457186833,12.84692505158515,0.0778396383558424,0.9399067518563288,0.8486882354392283,10,2020-12-02 15:10:44.114148,
|
||||
8,dpr,faiss_flat,500000,5791,5365.806154628852,1.0792413727067556,0.9265767837383616,0.9295458470039717,0.8045832613826054,10,2020-12-02 23:14:44.503864,
|
||||
9,dpr,faiss_hnsw,500000,5791,1745.922715222303,3.3168707580865915,0.30148898553312087,0.8820583664306683,0.765677378416975,10,2020-12-03 00:18:53.376265,
|
||||
1,dpr,elasticsearch,1000,1064,34.67547423102587,30.684511851549132,0.0325897314201371,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:27:43.048502,
|
||||
5,dpr,elasticsearch,10000,5637,288.06125728895495,19.568754413737462,0.05110187285594375,0.9749866950505588,0.8987097014904354,10,2021-02-01 11:37:21.149887,
|
||||
9,dpr,elasticsearch,100000,5637,1225.6274364620313,4.5992769354707805,0.2174254810115365,0.9579563597658329,0.8654564090434241,10,2021-02-01 12:15:52.757320,
|
||||
13,dpr,elasticsearch,500000,5637,5339.008413678017,1.0558140319761546,0.947136493467805,0.9308142629058009,0.8086137228234089,10,2021-02-01 14:52:23.056230,
|
||||
0,elastic,elasticsearch,1000,1064,4.046542626992959,262.9405144288997,0.0038031415667227064,0.8909774436090225,0.742044471297291,10,2021-02-01 11:26:04.346134,
|
||||
4,elastic,elasticsearch,10000,5637,30.701430243001596,183.6070813438718,0.005446413028738974,0.8110697179350719,0.6620627317806674,10,2021-02-01 11:31:20.470092,
|
||||
8,elastic,elasticsearch,100000,5637,34.705507238930295,162.42378943468643,0.006156733588598598,0.7193542664537875,0.5625959153101251,10,2021-02-01 11:50:36.048887,
|
||||
12,elastic,elasticsearch,500000,5637,68.3838099470413,82.4317920333114,0.01213124178588634,0.6274614156466205,0.45594527090003406,10,2021-02-01 13:02:16.905187,
|
||||
2,dpr,faiss_flat,1000,1064,30.053267333012627,35.40380445859966,0.028245552004711117,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:28:55.544474,
|
||||
6,dpr,faiss_flat,10000,5637,218.59436279792135,25.78749025294445,0.038778492602079356,0.9749866950505588,0.8987097014904354,10,2021-02-01 11:42:07.545869,
|
||||
10,dpr,faiss_flat,100000,5637,865.7440476809643,6.511162294559943,0.15358241044544338,0.9579563597658329,0.8654606328368972,10,2021-02-01 12:34:29.493598,
|
||||
14,dpr,faiss_flat,500000,5637,3717.9468668280497,1.5161593755666505,0.6595612678424783,0.9308142629058009,0.808613722823409,10,2021-02-01 16:12:52.804436,
|
||||
3,dpr,faiss_hnsw,1000,1064,27.167708159968242,39.164142729117266,0.02553356030072203,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:30:02.684535,
|
||||
7,dpr,faiss_hnsw,10000,5637,167.55242089293097,33.6432023480111,0.02972368651639719,0.972503104488203,0.8969941373746582,10,2021-02-01 11:46:07.130588,
|
||||
11,dpr,faiss_hnsw,100000,5637,167.48228578322596,33.65729082116796,0.029711244595214823,0.9402164271775767,0.8507984377595874,10,2021-02-01 12:43:21.697968,
|
||||
15,dpr,faiss_hnsw,500000,5637,164.45566683610014,34.27671486454735,0.029174324434291313,0.8825616462657442,0.7691475821598232,10,2021-02-01 16:47:01.710072,
|
||||
|
||||
|
18
test/benchmarks/retriever_query_results.md
Normal file
18
test/benchmarks/retriever_query_results.md
Normal file
@ -0,0 +1,18 @@
|
||||
| | retriever | doc_store | n_docs | n_queries | retrieve_time | queries_per_second | seconds_per_query | recall | map | top_k | date_time | error |
|
||||
|---:|:------------|:--------------|---------:|------------:|----------------:|---------------------:|--------------------:|---------:|---------:|--------:|:---------------------------|:--------|
|
||||
| 1 | dpr | elasticsearch | 1000 | 1064 | 34.6755 | 30.6845 | 0.0325897 | 0.991541 | 0.929511 | 10 | 2021-02-01 11:27:43.048502 | |
|
||||
| 5 | dpr | elasticsearch | 10000 | 5637 | 288.061 | 19.5688 | 0.0511019 | 0.974987 | 0.89871 | 10 | 2021-02-01 11:37:21.149887 | |
|
||||
| 9 | dpr | elasticsearch | 100000 | 5637 | 1225.63 | 4.59928 | 0.217425 | 0.957956 | 0.865456 | 10 | 2021-02-01 12:15:52.757320 | |
|
||||
| 13 | dpr | elasticsearch | 500000 | 5637 | 5339.01 | 1.05581 | 0.947136 | 0.930814 | 0.808614 | 10 | 2021-02-01 14:52:23.056230 | |
|
||||
| 0 | elastic | elasticsearch | 1000 | 1064 | 4.04654 | 262.941 | 0.00380314 | 0.890977 | 0.742044 | 10 | 2021-02-01 11:26:04.346134 | |
|
||||
| 4 | elastic | elasticsearch | 10000 | 5637 | 30.7014 | 183.607 | 0.00544641 | 0.81107 | 0.662063 | 10 | 2021-02-01 11:31:20.470092 | |
|
||||
| 8 | elastic | elasticsearch | 100000 | 5637 | 34.7055 | 162.424 | 0.00615673 | 0.719354 | 0.562596 | 10 | 2021-02-01 11:50:36.048887 | |
|
||||
| 12 | elastic | elasticsearch | 500000 | 5637 | 68.3838 | 82.4318 | 0.0121312 | 0.627461 | 0.455945 | 10 | 2021-02-01 13:02:16.905187 | |
|
||||
| 2 | dpr | faiss_flat | 1000 | 1064 | 30.0533 | 35.4038 | 0.0282456 | 0.991541 | 0.929511 | 10 | 2021-02-01 11:28:55.544474 | |
|
||||
| 6 | dpr | faiss_flat | 10000 | 5637 | 218.594 | 25.7875 | 0.0387785 | 0.974987 | 0.89871 | 10 | 2021-02-01 11:42:07.545869 | |
|
||||
| 10 | dpr | faiss_flat | 100000 | 5637 | 865.744 | 6.51116 | 0.153582 | 0.957956 | 0.865461 | 10 | 2021-02-01 12:34:29.493598 | |
|
||||
| 14 | dpr | faiss_flat | 500000 | 5637 | 3717.95 | 1.51616 | 0.659561 | 0.930814 | 0.808614 | 10 | 2021-02-01 16:12:52.804436 | |
|
||||
| 3 | dpr | faiss_hnsw | 1000 | 1064 | 27.1677 | 39.1641 | 0.0255336 | 0.991541 | 0.929511 | 10 | 2021-02-01 11:30:02.684535 | |
|
||||
| 7 | dpr | faiss_hnsw | 10000 | 5637 | 167.552 | 33.6432 | 0.0297237 | 0.972503 | 0.896994 | 10 | 2021-02-01 11:46:07.130588 | |
|
||||
| 11 | dpr | faiss_hnsw | 100000 | 5637 | 167.482 | 33.6573 | 0.0297112 | 0.940216 | 0.850798 | 10 | 2021-02-01 12:43:21.697968 | |
|
||||
| 15 | dpr | faiss_hnsw | 500000 | 5637 | 164.456 | 34.2767 | 0.0291743 | 0.882562 | 0.769148 | 10 | 2021-02-01 16:47:01.710072 | |
|
||||
@ -16,7 +16,7 @@ RETRIEVER_TEMPLATE = {
|
||||
"chart_type": "BarChart",
|
||||
"title": "Retriever Performance",
|
||||
"subtitle": "Time and Accuracy Benchmarks",
|
||||
"description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
|
||||
"description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. We use a cosine similarity function with BM25 retrievers, and dot product with DPR. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
|
||||
"bars": "horizontal",
|
||||
"columns": [
|
||||
"Model",
|
||||
|
||||
@ -22,7 +22,7 @@ reader_types = ["farm"]
|
||||
doc_index = "eval_document"
|
||||
label_index = "label"
|
||||
|
||||
def get_document_store(document_store_type, es_similarity='cosine'):
|
||||
def get_document_store(document_store_type, similarity='dot_product'):
|
||||
""" TODO This method is taken from test/conftest.py but maybe should be within Haystack.
|
||||
Perhaps a class method of DocStore that just takes string for type of DocStore"""
|
||||
if document_store_type == "sql":
|
||||
@ -35,7 +35,7 @@ def get_document_store(document_store_type, es_similarity='cosine'):
|
||||
# make sure we start from a fresh index
|
||||
client = Elasticsearch()
|
||||
client.indices.delete(index='haystack_test*', ignore=[404])
|
||||
document_store = ElasticsearchDocumentStore(index="eval_document", similarity=es_similarity, timeout=3000)
|
||||
document_store = ElasticsearchDocumentStore(index="eval_document", similarity=similarity, timeout=3000)
|
||||
elif document_store_type in("faiss_flat", "faiss_hnsw"):
|
||||
if document_store_type == "faiss_flat":
|
||||
index_type = "Flat"
|
||||
@ -48,12 +48,13 @@ def get_document_store(document_store_type, es_similarity='cosine'):
|
||||
status = subprocess.run(
|
||||
['docker run --name haystack-postgres -p 5432:5432 -e POSTGRES_PASSWORD=password -d postgres'],
|
||||
shell=True)
|
||||
time.sleep(3)
|
||||
time.sleep(6)
|
||||
status = subprocess.run(
|
||||
['docker exec -it haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"'], shell=True)
|
||||
time.sleep(1)
|
||||
document_store = FAISSDocumentStore(sql_url="postgresql://postgres:password@localhost:5432/haystack",
|
||||
faiss_index_factory_str=index_type)
|
||||
faiss_index_factory_str=index_type,
|
||||
similarity=similarity)
|
||||
|
||||
else:
|
||||
raise Exception(f"No document store fixture for '{document_store_type}'")
|
||||
|
||||
@ -62,10 +62,10 @@ def test_eval_reader(reader, document_store: BaseDocumentStore):
|
||||
doc_index="haystack_test_eval_document",
|
||||
device="cpu",
|
||||
)
|
||||
assert reader_eval_results["f1"] > 0.65
|
||||
assert reader_eval_results["f1"] < 0.67
|
||||
assert reader_eval_results["EM"] == 0.5
|
||||
assert reader_eval_results["top_n_accuracy"] == 1.0
|
||||
assert reader_eval_results["f1"] > 66.65
|
||||
assert reader_eval_results["f1"] < 66.67
|
||||
assert reader_eval_results["EM"] == 50
|
||||
assert reader_eval_results["top_n_accuracy"] == 100.0
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user