mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-04 11:07:52 +00:00
Benchmark milvus (#850)
* Add milvus benchmarking support * Add latest docstring and tutorial changes * Edit config * Disable docker interactive mode * Add milvus index type support * Adjust FAISS and Milvus node branching * Remove duplicate in config * Revert method for speedup * Add latest docstring and tutorial changes * Add latest benchmark run * Add latest docstring and tutorial changes * Add json files * Revert "Add latest docstring and tutorial changes" This reverts commit e2efa5f08aa4fb55bbeeed42aa76817d63fc8923. * Add latest docstring and tutorial changes * Revert "Add latest docstring and tutorial changes" This reverts commit b085a679b9d5f175e91c2c59565e73c5dec1374b. * Fix typo Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
b87daed62b
commit
77d4c2ca1c
1
.gitignore
vendored
1
.gitignore
vendored
@ -149,6 +149,7 @@ tutorials/cache
|
||||
tutorials/mlruns
|
||||
tutorials/model
|
||||
models
|
||||
saved_models
|
||||
*_build
|
||||
|
||||
.DS_Store
|
||||
|
||||
@ -827,8 +827,9 @@ the vector embeddings are indexed in a FAISS Index.
|
||||
Recommended options:
|
||||
- "Flat" (default): Best accuracy (= exact). Becomes slow and RAM intense for > 1 Mio docs.
|
||||
- "HNSW": Graph-based heuristic. If not further specified,
|
||||
we use a RAM intense, but more accurate config:
|
||||
HNSW256, efConstruction=256 and efSearch=256
|
||||
we use the following config:
|
||||
HNSW64, efConstruction=80 and efSearch=20
|
||||
|
||||
- "IVFx,Flat": Inverted Index. Replace x with the number of centroids aka nlist.
|
||||
Rule of thumb: nlist = 10 * sqrt (num_docs) is a good starting point.
|
||||
For more details see:
|
||||
|
||||
@ -8,7 +8,10 @@
|
||||
"BM25 / ElasticSearch",
|
||||
"DPR / ElasticSearch",
|
||||
"DPR / FAISS (flat)",
|
||||
"DPR / FAISS (HSNW)"
|
||||
"DPR / FAISS (HNSW)",
|
||||
"DPR / Milvus (flat)",
|
||||
"DPR / Milvus (HNSW)"
|
||||
|
||||
],
|
||||
"axis": [
|
||||
{
|
||||
@ -17,25 +20,25 @@
|
||||
}
|
||||
],
|
||||
"data": [
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"map": 92.95105322830888
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"map": 89.87097014904354
|
||||
"map": 89.87097014904356
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"map": 86.54564090434241
|
||||
"map": 56.259591531012504
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"map": 66.33019927857616
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"map": 80.86137228234089
|
||||
"n_docs": 1000,
|
||||
"map": 92.95105322830891
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
@ -43,24 +46,29 @@
|
||||
"map": 74.20444712972909
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"map": 66.20627317806674
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"map": 56.25959153101251
|
||||
"map": 86.54606328368973
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"map": 45.59452709000341
|
||||
"map": 45.60339705629754
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"map": 80.86137228234091
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 1000,
|
||||
"map": 92.95105322830888
|
||||
"map": 92.95105322830891
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"map": 80.86137228234091
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
@ -70,32 +78,67 @@
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"map": 86.54606328368972
|
||||
"map": 86.54606328368973
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"map": 80.8613722823409
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"n_docs": 1000,
|
||||
"map": 92.95105322830888
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 10000,
|
||||
"map": 89.69941373746582
|
||||
"map": 89.49563682134192
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 100000,
|
||||
"map": 85.07984377595874
|
||||
"map": 84.33419639513305
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 500000,
|
||||
"map": 76.91475821598232
|
||||
"map": 75.73315903145605
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 1000,
|
||||
"map": 92.95105322830891
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 1000,
|
||||
"map": 92.95105322830891
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 10000,
|
||||
"map": 89.87097014904354
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 100000,
|
||||
"map": 86.54606328368973
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 500000,
|
||||
"map": 80.86137228234091
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 1000,
|
||||
"map": 92.95105322830891
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 10000,
|
||||
"map": 89.87097014904354
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 500000,
|
||||
"map": 74.85616575291942
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 100000,
|
||||
"map": 86.54606328368973
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -21,33 +21,47 @@
|
||||
"time_label": "seconds"
|
||||
},
|
||||
"data": [
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 69.75508852811794,
|
||||
"query_speed": 4.5992769354707805,
|
||||
"map": 86.54564090434241
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 482.9993330442806,
|
||||
"query_speed": 162.42378943468643,
|
||||
"map": 56.25959153101251
|
||||
"index_speed": 485.5602670200369,
|
||||
"query_speed": 165.51512861040828,
|
||||
"map": 56.259591531012504
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 71.36964873196698,
|
||||
"query_speed": 5.355677072083696,
|
||||
"map": 86.54606328368973
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 95.52108545730724,
|
||||
"query_speed": 6.511162294559942,
|
||||
"map": 86.54606328368972
|
||||
"index_speed": 100.01184910084558,
|
||||
"query_speed": 6.624479268751268,
|
||||
"map": 86.54606328368973
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 84.11829911061136,
|
||||
"query_speed": 33.65729082116796,
|
||||
"map": 85.07984377595874
|
||||
"index_speed": 89.90389306648805,
|
||||
"query_speed": 40.68196225525062,
|
||||
"map": 84.33419639513305
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 116.00982709720004,
|
||||
"query_speed": 28.30393009791128,
|
||||
"map": 86.54606328368973
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 100000,
|
||||
"index_speed": 115.61076852516383,
|
||||
"query_speed": 28.076443272229284,
|
||||
"map": 86.54606328368973
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -8,7 +8,9 @@
|
||||
"BM25 / ElasticSearch",
|
||||
"DPR / ElasticSearch",
|
||||
"DPR / FAISS (flat)",
|
||||
"DPR / FAISS (HSNW)"
|
||||
"DPR / FAISS (HNSW)",
|
||||
"DPR / Milvus (flat)",
|
||||
"DPR / Milvus (HNSW)"
|
||||
],
|
||||
"axis": [
|
||||
{
|
||||
@ -19,83 +21,123 @@
|
||||
"data": [
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 30.68451185154913
|
||||
"n_docs": 10000,
|
||||
"query_speed": 22.92376153263135
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 165.51512861040828
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 260.2575025618042
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 19.568754413737462
|
||||
"n_docs": 1000,
|
||||
"query_speed": 34.64504401787953
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 357.3482189096959
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 4.5992769354707805
|
||||
"query_speed": 5.355677072083696
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 90.8126875640674
|
||||
},
|
||||
{
|
||||
"model": "DPR / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 1.0558140319761546
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 262.9405144288997
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 183.6070813438718
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 162.42378943468643
|
||||
},
|
||||
{
|
||||
"model": "BM25 / ElasticSearch",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 82.43179203331141
|
||||
"query_speed": 1.2322912620168611
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 35.40380445859966
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 25.78749025294445
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 6.511162294559942
|
||||
"query_speed": 40.40867245729847
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 1.5161593755666505
|
||||
"query_speed": 1.5414031869280982
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 28.034963597300674
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (flat)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 6.624479268751268
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 42.12400556900196
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 40.68196225525062
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 39.42722374998517
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HNSW)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 39.16414272911727
|
||||
"query_speed": 44.31880791041422
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 40.48084417170779
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 33.6432023480111
|
||||
"query_speed": 38.132788329389
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 33.65729082116796
|
||||
"query_speed": 28.30393009791128
|
||||
},
|
||||
{
|
||||
"model": "DPR / FAISS (HSNW)",
|
||||
"model": "DPR / Milvus (flat)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 34.27671486454735
|
||||
"query_speed": 15.30425741318099
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 1000,
|
||||
"query_speed": 40.38894718145225
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 10000,
|
||||
"query_speed": 38.15488156137084
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 500000,
|
||||
"query_speed": 24.503220592922823
|
||||
},
|
||||
{
|
||||
"model": "DPR / Milvus (HNSW)",
|
||||
"n_docs": 100000,
|
||||
"query_speed": 28.076443272229284
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -51,8 +51,8 @@ class FAISSDocumentStore(SQLDocumentStore):
|
||||
Recommended options:
|
||||
- "Flat" (default): Best accuracy (= exact). Becomes slow and RAM intense for > 1 Mio docs.
|
||||
- "HNSW": Graph-based heuristic. If not further specified,
|
||||
we use a RAM intense, but more accurate config:
|
||||
HNSW256, efConstruction=256 and efSearch=256
|
||||
we use the following config:
|
||||
HNSW64, efConstruction=80 and efSearch=20
|
||||
- "IVFx,Flat": Inverted Index. Replace x with the number of centroids aka nlist.
|
||||
Rule of thumb: nlist = 10 * sqrt (num_docs) is a good starting point.
|
||||
For more details see:
|
||||
@ -103,7 +103,7 @@ class FAISSDocumentStore(SQLDocumentStore):
|
||||
if index_factory == "HNSW" and metric_type == faiss.METRIC_INNER_PRODUCT:
|
||||
# faiss index factory doesn't give the same results for HNSW IP, therefore direct init.
|
||||
# defaults here are similar to DPR codebase (good accuracy, but very high RAM consumption)
|
||||
n_links = kwargs.get("n_links", 128)
|
||||
n_links = kwargs.get("n_links", 64)
|
||||
index = faiss.IndexHNSWFlat(vector_dim, n_links, metric_type)
|
||||
index.hnsw.efSearch = kwargs.get("efSearch", 20)#20
|
||||
index.hnsw.efConstruction = kwargs.get("efConstruction", 80)#80
|
||||
|
||||
@ -99,7 +99,8 @@ class MilvusDocumentStore(SQLDocumentStore):
|
||||
self.index_file_size = index_file_size
|
||||
|
||||
if similarity == "dot_product":
|
||||
self.metric_type = MetricType.L2
|
||||
self.metric_type = MetricType.IP
|
||||
self.similarity = similarity
|
||||
else:
|
||||
raise ValueError("The Milvus document store can currently only support dot_product similarity. "
|
||||
"Please set similarity=\"dot_product\"")
|
||||
|
||||
@ -124,27 +124,19 @@ class SQLDocumentStore(BaseDocumentStore):
|
||||
|
||||
return documents
|
||||
|
||||
def get_documents_by_vector_ids(
|
||||
self,
|
||||
vector_ids: List[str],
|
||||
index: Optional[str] = None,
|
||||
batch_size: int = 10_000
|
||||
):
|
||||
"""
|
||||
Fetch documents by specifying a list of text vector id strings
|
||||
def get_documents_by_vector_ids(self, vector_ids: List[str], index: Optional[str] = None, batch_size: int = 10_000):
|
||||
"""Fetch documents by specifying a list of text vector id strings"""
|
||||
index = index or self.index
|
||||
|
||||
:param vector_ids: List of vector_id strings.
|
||||
:param index: Name of the index to get the documents from. If None, the
|
||||
DocumentStore's default index (self.index) will be used.
|
||||
:param batch_size: When working with large number of documents, batching can help reduce memory footprint.
|
||||
"""
|
||||
documents = []
|
||||
for i in range(0, len(vector_ids), batch_size):
|
||||
query = self.session.query(DocumentORM).filter(
|
||||
DocumentORM.vector_id.in_(vector_ids[i: i + batch_size]),
|
||||
DocumentORM.index == index
|
||||
)
|
||||
for row in query.all():
|
||||
documents.append(self._convert_sql_row_to_document(row))
|
||||
|
||||
result = self._query(
|
||||
index=index,
|
||||
vector_ids=vector_ids,
|
||||
batch_size=batch_size
|
||||
)
|
||||
documents = list(result)
|
||||
sorted_documents = sorted(documents, key=lambda doc: vector_ids.index(doc.meta["vector_id"]))
|
||||
return sorted_documents
|
||||
|
||||
|
||||
@ -2,6 +2,14 @@
|
||||
"params": {
|
||||
"full": {
|
||||
"retriever_doc_stores": [
|
||||
[
|
||||
"dpr",
|
||||
"milvus_flat"
|
||||
],
|
||||
[
|
||||
"dpr",
|
||||
"milvus_hnsw"
|
||||
],
|
||||
[
|
||||
"elastic",
|
||||
"elasticsearch"
|
||||
|
||||
@ -44,7 +44,9 @@ def retriever(index_csv="retriever_index_results.csv", query_csv="retriever_quer
|
||||
"elasticsearch": "ElasticSearch",
|
||||
"faiss": "FAISS",
|
||||
"faiss_flat": "FAISS (flat)",
|
||||
"faiss_hnsw": "FAISS (HSNW)"
|
||||
"faiss_hnsw": "FAISS (HNSW)",
|
||||
"milvus_flat": "Milvus (flat)",
|
||||
"milvus_hnsw": "Milvus (HNSW)"
|
||||
}
|
||||
|
||||
index = pd.read_csv(index_csv)
|
||||
|
||||
@ -1,17 +1,25 @@
|
||||
,retriever,doc_store,n_docs,indexing_time,docs_per_second,date_time,error
|
||||
1,dpr,elasticsearch,1000,15.336494209999728,65.20394989279743,2021-01-29 11:18:25.436371,
|
||||
5,dpr,elasticsearch,10000,144.0823780490009,69.40474008972218,2021-01-29 11:23:19.896920,
|
||||
9,dpr,elasticsearch,100000,1433.587170629,69.75508852811794,2021-01-29 11:56:22.502185,
|
||||
13,dpr,elasticsearch,500000,7196.396471723998,69.47921810097519,2021-01-29 14:54:08.769187,
|
||||
0,elastic,elasticsearch,1000,2.1182381880007597,472.0904408506686,2021-01-29 11:17:14.160560,
|
||||
4,elastic,elasticsearch,10000,20.23965223199957,494.0796356268248,2021-01-29 11:20:27.378846,
|
||||
8,elastic,elasticsearch,100000,207.03962336699988,482.9993330442806,2021-01-29 11:31:50.829072,
|
||||
12,elastic,elasticsearch,500000,1029.1638562459993,485.8312862091863,2021-01-29 12:52:45.994426,
|
||||
2,dpr,faiss_flat,1000,9.899907313998483,101.01104669798258,2021-01-29 11:19:11.304749,
|
||||
6,dpr,faiss_flat,10000,104.1660261320012,96.00059032037764,2021-01-29 11:25:43.069491,
|
||||
10,dpr,faiss_flat,100000,1046.8892760299987,95.52108545730724,2021-01-29 12:14:51.105055,
|
||||
14,dpr,faiss_flat,500000,5243.775349973999,95.35114810028603,2021-01-29 16:24:19.855339,
|
||||
3,dpr,faiss_hnsw,1000,10.329135104999295,96.81352696374361,2021-01-29 11:19:55.337391,
|
||||
7,dpr,faiss_hnsw,10000,112.53792207699917,88.85893586304122,2021-01-29 11:28:10.284866,
|
||||
11,dpr,faiss_hnsw,100000,1188.8019736170008,84.11829911061136,2021-01-29 12:35:16.166263,
|
||||
15,dpr,faiss_hnsw,500000,6125.295488232001,81.62871504903015,2021-01-29 18:07:08.100722,
|
||||
9,dpr,elasticsearch,10000,139.7465313429998,71.55812673057035,2021-04-12 13:06:34.024778,
|
||||
14,elastic,elasticsearch,100000,205.94765839000047,485.56026702003703,2021-04-12 13:44:31.464961,
|
||||
8,elastic,elasticsearch,10000,19.96974077699997,500.7576268349683,2021-04-12 13:03:44.944941,
|
||||
3,dpr,elasticsearch,1000,14.592372578999857,68.52895199777984,2021-04-12 12:58:01.128834,
|
||||
2,elastic,elasticsearch,1000,2.1051091760000418,475.034744706267,2021-04-12 12:57:18.604681,
|
||||
15,dpr,elasticsearch,100000,1401.1558383250003,71.36964873196699,2021-04-12 14:08:31.400192,
|
||||
20,elastic,elasticsearch,500000,1027.416534557,486.6575368242339,2021-04-12 17:30:22.080196,
|
||||
21,dpr,elasticsearch,500000,7010.269106937998,71.32393812174124,2021-04-12 19:28:39.657070,
|
||||
4,dpr,faiss_flat,1000,9.570316116999948,104.48975642755202,2021-04-12 12:58:47.918981,
|
||||
22,dpr,faiss_flat,500000,5041.962777018001,99.16772933728758,2021-04-12 20:55:28.443354,
|
||||
10,dpr,faiss_flat,10000,95.71089355200002,104.48131481049198,2021-04-12 13:08:50.343175,
|
||||
16,dpr,faiss_flat,100000,999.8815230299997,100.0118491008456,2021-04-12 14:26:14.495997,
|
||||
11,dpr,faiss_hnsw,10000,108.9302881550002,91.80183188142033,2021-04-12 13:11:13.117266,
|
||||
17,dpr,faiss_hnsw,100000,1112.2988848330006,89.90389306648807,2021-04-12 14:45:22.644624,
|
||||
23,dpr,faiss_hnsw,500000,5802.5877488399965,86.16845132586847,2021-04-12 22:32:53.095579,
|
||||
5,dpr,faiss_hnsw,1000,9.837438108000242,101.65248197970928,2021-04-12 12:59:30.777696,
|
||||
0,dpr,milvus_flat,1000,9.717840198999966,102.90352377917338,2021-04-12 12:56:32.363797,
|
||||
6,dpr,milvus_flat,10000,87.06480573199997,114.85697252666792,2021-04-12 13:01:21.834327,
|
||||
12,dpr,milvus_flat,100000,861.995940363,116.00982709720004,2021-04-12 13:26:00.742197,
|
||||
18,dpr,milvus_flat,500000,4364.3841063849995,114.56370195934652,2021-04-12 15:58:40.069278,
|
||||
1,dpr,milvus_hnsw,1000,8.522245804999784,117.33996212750934,2021-04-12 12:57:04.976604,
|
||||
7,dpr,milvus_hnsw,10000,87.128293364,114.77327988306308,2021-04-12 13:03:13.381764,
|
||||
19,dpr,milvus_hnsw,500000,4414.051032668,113.27463056035022,2021-04-12 17:12:50.943619,
|
||||
13,dpr,milvus_hnsw,100000,864.9713281529998,115.61076852516385,2021-04-12 13:40:51.875517,
|
||||
|
||||
|
@ -1,17 +1,27 @@
|
||||
,retriever,doc_store,n_docs,n_queries,retrieve_time,queries_per_second,seconds_per_query,recall,map,top_k,date_time,error
|
||||
1,dpr,elasticsearch,1000,1064,34.67547423102587,30.684511851549132,0.0325897314201371,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:27:43.048502,
|
||||
5,dpr,elasticsearch,10000,5637,288.06125728895495,19.568754413737462,0.05110187285594375,0.9749866950505588,0.8987097014904354,10,2021-02-01 11:37:21.149887,
|
||||
9,dpr,elasticsearch,100000,5637,1225.6274364620313,4.5992769354707805,0.2174254810115365,0.9579563597658329,0.8654564090434241,10,2021-02-01 12:15:52.757320,
|
||||
13,dpr,elasticsearch,500000,5637,5339.008413678017,1.0558140319761546,0.947136493467805,0.9308142629058009,0.8086137228234089,10,2021-02-01 14:52:23.056230,
|
||||
0,elastic,elasticsearch,1000,1064,4.046542626992959,262.9405144288997,0.0038031415667227064,0.8909774436090225,0.742044471297291,10,2021-02-01 11:26:04.346134,
|
||||
4,elastic,elasticsearch,10000,5637,30.701430243001596,183.6070813438718,0.005446413028738974,0.8110697179350719,0.6620627317806674,10,2021-02-01 11:31:20.470092,
|
||||
8,elastic,elasticsearch,100000,5637,34.705507238930295,162.42378943468643,0.006156733588598598,0.7193542664537875,0.5625959153101251,10,2021-02-01 11:50:36.048887,
|
||||
12,elastic,elasticsearch,500000,5637,68.3838099470413,82.4317920333114,0.01213124178588634,0.6274614156466205,0.45594527090003406,10,2021-02-01 13:02:16.905187,
|
||||
2,dpr,faiss_flat,1000,1064,30.053267333012627,35.40380445859966,0.028245552004711117,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:28:55.544474,
|
||||
6,dpr,faiss_flat,10000,5637,218.59436279792135,25.78749025294445,0.038778492602079356,0.9749866950505588,0.8987097014904354,10,2021-02-01 11:42:07.545869,
|
||||
10,dpr,faiss_flat,100000,5637,865.7440476809643,6.511162294559943,0.15358241044544338,0.9579563597658329,0.8654606328368972,10,2021-02-01 12:34:29.493598,
|
||||
14,dpr,faiss_flat,500000,5637,3717.9468668280497,1.5161593755666505,0.6595612678424783,0.9308142629058009,0.808613722823409,10,2021-02-01 16:12:52.804436,
|
||||
3,dpr,faiss_hnsw,1000,1064,27.167708159968242,39.164142729117266,0.02553356030072203,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:30:02.684535,
|
||||
7,dpr,faiss_hnsw,10000,5637,167.55242089293097,33.6432023480111,0.02972368651639719,0.972503104488203,0.8969941373746582,10,2021-02-01 11:46:07.130588,
|
||||
11,dpr,faiss_hnsw,100000,5637,167.48228578322596,33.65729082116796,0.029711244595214823,0.9402164271775767,0.8507984377595874,10,2021-02-01 12:43:21.697968,
|
||||
15,dpr,faiss_hnsw,500000,5637,164.45566683610014,34.27671486454735,0.029174324434291313,0.8825616462657442,0.7691475821598232,10,2021-02-01 16:47:01.710072,
|
||||
9,dpr,elasticsearch,10000,5637,245.9020519811238,22.92376153263135,0.043622858254589995,97.49866950505589,89.87097014904357,10,2021-04-12 22:50:49.396735,
|
||||
14,elastic,elasticsearch,100000,5637,34.05730972948368,165.51512861040828,0.006041743787384013,71.93542664537875,56.25959153101251,10,2021-04-12 23:13:45.908089,
|
||||
8,elastic,elasticsearch,10000,5637,21.6593179620686,260.2575025618042,0.0038423484055470285,81.1424516586837,66.33019927857616,10,2021-04-12 22:45:31.522940,
|
||||
3,dpr,elasticsearch,1000,1064,30.71146336113452,34.645044017879535,0.02886415729429936,99.15413533834587,92.9510532283089,10,2021-04-12 22:36:03.760187,
|
||||
2,elastic,elasticsearch,1000,1064,2.977487905904127,357.3482189096959,0.002798390889007638,89.09774436090225,74.2044471297291,10,2021-04-12 22:34:57.937655,
|
||||
15,dpr,elasticsearch,100000,5637,1052.5279855618428,5.3556770720836955,0.18671775511120148,95.7956359765833,86.54606328368972,10,2021-04-12 23:36:05.648808,
|
||||
20,elastic,elasticsearch,500000,5637,62.07282430688065,90.81268756406739,0.011011677187667313,62.746141564662054,45.60339705629754,10,2021-04-13 00:49:21.340846,
|
||||
21,dpr,elasticsearch,500000,5637,4574.405559586667,1.2322912620168613,0.8114964625841169,93.0814262905801,80.8613722823409,10,2021-04-13 02:26:34.009405,
|
||||
4,dpr,faiss_flat,1000,1064,26.330981328930648,40.408672457298465,0.024747162903130306,99.15413533834587,92.9510532283089,10,2021-04-12 22:37:12.563041,
|
||||
22,dpr,faiss_flat,500000,5637,3657.0574446742394,1.5414031869280982,0.6487595253990136,93.0814262905801,80.86137228234091,10,2021-04-13 03:45:25.432560,
|
||||
10,dpr,faiss_flat,10000,5637,201.07035204221756,28.03496359730067,0.03566974490725875,97.49866950505589,89.87097014904354,10,2021-04-12 22:55:15.966314,
|
||||
16,dpr,faiss_flat,100000,5637,850.93480880688,6.6244792687512675,0.150955261452347,95.7956359765833,86.54606328368972,10,2021-04-12 23:54:18.954839,
|
||||
11,dpr,faiss_hnsw,10000,5637,133.81918276423676,42.12400556900196,0.023739432812530912,96.96647152740819,89.49563682134192,10,2021-04-12 22:58:37.011857,
|
||||
17,dpr,faiss_hnsw,100000,5637,138.56263777621643,40.68196225525062,0.024580918534010367,93.18786588610963,84.33419639513305,10,2021-04-13 00:02:06.239176,
|
||||
23,dpr,faiss_hnsw,500000,5637,142.9722781331293,39.42722374998517,0.025363185760711247,86.90792974986695,75.73315903145605,10,2021-04-13 04:15:55.931738,
|
||||
5,dpr,faiss_hnsw,1000,1064,24.00786596405669,44.31880791041422,0.022563783800805162,99.15413533834587,92.9510532283089,10,2021-04-12 22:38:15.165568,
|
||||
0,dpr,milvus_flat,1000,1064,26.284036851771816,40.480844171707794,0.02470304215392088,99.15413533834587,92.9510532283089,10,2021-04-12 22:33:51.172786,
|
||||
6,dpr,milvus_flat,10000,5637,147.82553930512222,38.132788329389,0.0262241510209548,97.49866950505589,89.87097014904354,10,2021-04-12 22:41:19.833104,
|
||||
12,dpr,milvus_flat,100000,5637,199.15962131407287,28.30393009791128,0.03533078256414278,95.7956359765833,86.54606328368972,10,2021-04-12 23:03:55.360165,
|
||||
18,dpr,milvus_flat,500000,5637,368.32888050779,15.30425741318099,0.06534129510516055,93.0814262905801,80.8613722823409,10,2021-04-13 00:17:05.346842,
|
||||
1,dpr,milvus_hnsw,1000,1064,26.34384093301196,40.38894718145225,0.02475924899719169,99.15413533834587,92.9510532283089,10,2021-04-12 22:34:43.389192,
|
||||
7,dpr,milvus_hnsw,10000,5637,147.73994229108212,38.154881561370836,0.02620896616836653,97.49866950505589,89.87097014904354,10,2021-04-12 22:44:25.082029,
|
||||
19,dpr,milvus_hnsw,500000,5637,230.05139175982913,24.50322059292282,0.04081096181653879,85.71935426645379,74.85616575291942,10,2021-04-13 00:30:29.654851,
|
||||
13,dpr,milvus_hnsw,100000,5637,200.77329401532916,28.076443272229284,0.035617047013540744,95.7956359765833,86.54606328368972,10,2021-04-12 23:09:18.273909,
|
||||
|
||||
|
||||
|
||||
|
@ -3,6 +3,7 @@ from haystack.document_store.sql import SQLDocumentStore
|
||||
from haystack.document_store.memory import InMemoryDocumentStore
|
||||
from haystack.document_store.elasticsearch import Elasticsearch, ElasticsearchDocumentStore
|
||||
from haystack.document_store.faiss import FAISSDocumentStore
|
||||
from haystack.document_store.milvus import MilvusDocumentStore, IndexType
|
||||
from haystack.retriever.sparse import ElasticsearchRetriever, TfidfRetriever
|
||||
from haystack.retriever.dense import DensePassageRetriever, EmbeddingRetriever
|
||||
from haystack.reader.farm import FARMReader
|
||||
@ -39,6 +40,16 @@ def get_document_store(document_store_type, similarity='dot_product'):
|
||||
client = Elasticsearch()
|
||||
client.indices.delete(index='haystack_test*', ignore=[404])
|
||||
document_store = ElasticsearchDocumentStore(index="eval_document", similarity=similarity, timeout=3000)
|
||||
elif document_store_type in ("milvus_flat", "milvus_hnsw"):
|
||||
if document_store_type == "milvus_flat":
|
||||
index_type = IndexType.FLAT
|
||||
index_param = None
|
||||
search_param = None
|
||||
elif document_store_type == "milvus_hnsw":
|
||||
index_type = IndexType.HNSW
|
||||
index_param = {"M": 64, "efConstruction": 80}
|
||||
search_param = {"ef": 20}
|
||||
document_store = MilvusDocumentStore(similarity=similarity, index_type=index_type, index_param=index_param, search_param=search_param)
|
||||
assert document_store.get_document_count(index="eval_document") == 0
|
||||
elif document_store_type in("faiss_flat", "faiss_hnsw"):
|
||||
if document_store_type == "faiss_flat":
|
||||
@ -54,7 +65,7 @@ def get_document_store(document_store_type, similarity='dot_product'):
|
||||
shell=True)
|
||||
time.sleep(6)
|
||||
status = subprocess.run(
|
||||
['docker exec -it haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"'], shell=True)
|
||||
['docker exec haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"'], shell=True)
|
||||
time.sleep(1)
|
||||
document_store = FAISSDocumentStore(sql_url="postgresql://postgres:password@localhost:5432/haystack",
|
||||
faiss_index_factory_str=index_type,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user