Benchmark milvus (#850)

* Add milvus benchmarking support

* Add latest docstring and tutorial changes

* Edit config

* Disable docker interactive mode

* Add milvus index type support

* Adjust FAISS and Milvus node branching

* Remove duplicate in config

* Revert method for speedup

* Add latest docstring and tutorial changes

* Add latest benchmark run

* Add latest docstring and tutorial changes

* Add json files

* Revert "Add latest docstring and tutorial changes"

This reverts commit e2efa5f08aa4fb55bbeeed42aa76817d63fc8923.

* Add latest docstring and tutorial changes

* Revert "Add latest docstring and tutorial changes"

This reverts commit b085a679b9d5f175e91c2c59565e73c5dec1374b.

* Fix typo

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Branden Chan 2021-04-13 14:54:15 +02:00 committed by GitHub
parent b87daed62b
commit 77d4c2ca1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 293 additions and 160 deletions

1
.gitignore vendored
View File

@ -149,6 +149,7 @@ tutorials/cache
tutorials/mlruns
tutorials/model
models
saved_models
*_build
.DS_Store

View File

@ -827,8 +827,9 @@ the vector embeddings are indexed in a FAISS Index.
Recommended options:
- "Flat" (default): Best accuracy (= exact). Becomes slow and RAM intense for > 1 Mio docs.
- "HNSW": Graph-based heuristic. If not further specified,
we use a RAM intense, but more accurate config:
HNSW256, efConstruction=256 and efSearch=256
we use the following config:
HNSW64, efConstruction=80 and efSearch=20
- "IVFx,Flat": Inverted Index. Replace x with the number of centroids aka nlist.
Rule of thumb: nlist = 10 * sqrt (num_docs) is a good starting point.
For more details see:

View File

@ -8,7 +8,10 @@
"BM25 / ElasticSearch",
"DPR / ElasticSearch",
"DPR / FAISS (flat)",
"DPR / FAISS (HSNW)"
"DPR / FAISS (HNSW)",
"DPR / Milvus (flat)",
"DPR / Milvus (HNSW)"
],
"axis": [
{
@ -17,25 +20,25 @@
}
],
"data": [
{
"model": "DPR / ElasticSearch",
"n_docs": 1000,
"map": 92.95105322830888
},
{
"model": "DPR / ElasticSearch",
"n_docs": 10000,
"map": 89.87097014904354
"map": 89.87097014904356
},
{
"model": "DPR / ElasticSearch",
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"map": 86.54564090434241
"map": 56.259591531012504
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 10000,
"map": 66.33019927857616
},
{
"model": "DPR / ElasticSearch",
"n_docs": 500000,
"map": 80.86137228234089
"n_docs": 1000,
"map": 92.95105322830891
},
{
"model": "BM25 / ElasticSearch",
@ -43,24 +46,29 @@
"map": 74.20444712972909
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 10000,
"map": 66.20627317806674
},
{
"model": "BM25 / ElasticSearch",
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"map": 56.25959153101251
"map": 86.54606328368973
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 500000,
"map": 45.59452709000341
"map": 45.60339705629754
},
{
"model": "DPR / ElasticSearch",
"n_docs": 500000,
"map": 80.86137228234091
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 1000,
"map": 92.95105322830888
"map": 92.95105322830891
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"map": 80.86137228234091
},
{
"model": "DPR / FAISS (flat)",
@ -70,32 +78,67 @@
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"map": 86.54606328368972
"map": 86.54606328368973
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"map": 80.8613722823409
},
{
"model": "DPR / FAISS (HSNW)",
"n_docs": 1000,
"map": 92.95105322830888
},
{
"model": "DPR / FAISS (HSNW)",
"model": "DPR / FAISS (HNSW)",
"n_docs": 10000,
"map": 89.69941373746582
"map": 89.49563682134192
},
{
"model": "DPR / FAISS (HSNW)",
"model": "DPR / FAISS (HNSW)",
"n_docs": 100000,
"map": 85.07984377595874
"map": 84.33419639513305
},
{
"model": "DPR / FAISS (HSNW)",
"model": "DPR / FAISS (HNSW)",
"n_docs": 500000,
"map": 76.91475821598232
"map": 75.73315903145605
},
{
"model": "DPR / FAISS (HNSW)",
"n_docs": 1000,
"map": 92.95105322830891
},
{
"model": "DPR / Milvus (flat)",
"n_docs": 1000,
"map": 92.95105322830891
},
{
"model": "DPR / Milvus (flat)",
"n_docs": 10000,
"map": 89.87097014904354
},
{
"model": "DPR / Milvus (flat)",
"n_docs": 100000,
"map": 86.54606328368973
},
{
"model": "DPR / Milvus (flat)",
"n_docs": 500000,
"map": 80.86137228234091
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 1000,
"map": 92.95105322830891
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 10000,
"map": 89.87097014904354
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 500000,
"map": 74.85616575291942
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 100000,
"map": 86.54606328368973
}
]
}

View File

@ -21,33 +21,47 @@
"time_label": "seconds"
},
"data": [
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"index_speed": 69.75508852811794,
"query_speed": 4.5992769354707805,
"map": 86.54564090434241
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"index_speed": 482.9993330442806,
"query_speed": 162.42378943468643,
"map": 56.25959153101251
"index_speed": 485.5602670200369,
"query_speed": 165.51512861040828,
"map": 56.259591531012504
},
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"index_speed": 71.36964873196698,
"query_speed": 5.355677072083696,
"map": 86.54606328368973
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"index_speed": 95.52108545730724,
"query_speed": 6.511162294559942,
"map": 86.54606328368972
"index_speed": 100.01184910084558,
"query_speed": 6.624479268751268,
"map": 86.54606328368973
},
{
"model": "DPR / FAISS (HSNW)",
"model": "DPR / FAISS (HNSW)",
"n_docs": 100000,
"index_speed": 84.11829911061136,
"query_speed": 33.65729082116796,
"map": 85.07984377595874
"index_speed": 89.90389306648805,
"query_speed": 40.68196225525062,
"map": 84.33419639513305
},
{
"model": "DPR / Milvus (flat)",
"n_docs": 100000,
"index_speed": 116.00982709720004,
"query_speed": 28.30393009791128,
"map": 86.54606328368973
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 100000,
"index_speed": 115.61076852516383,
"query_speed": 28.076443272229284,
"map": 86.54606328368973
}
]
}

View File

@ -8,7 +8,9 @@
"BM25 / ElasticSearch",
"DPR / ElasticSearch",
"DPR / FAISS (flat)",
"DPR / FAISS (HSNW)"
"DPR / FAISS (HNSW)",
"DPR / Milvus (flat)",
"DPR / Milvus (HNSW)"
],
"axis": [
{
@ -19,83 +21,123 @@
"data": [
{
"model": "DPR / ElasticSearch",
"n_docs": 1000,
"query_speed": 30.68451185154913
"n_docs": 10000,
"query_speed": 22.92376153263135
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"query_speed": 165.51512861040828
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 10000,
"query_speed": 260.2575025618042
},
{
"model": "DPR / ElasticSearch",
"n_docs": 10000,
"query_speed": 19.568754413737462
"n_docs": 1000,
"query_speed": 34.64504401787953
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 1000,
"query_speed": 357.3482189096959
},
{
"model": "DPR / ElasticSearch",
"n_docs": 100000,
"query_speed": 4.5992769354707805
"query_speed": 5.355677072083696
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 500000,
"query_speed": 90.8126875640674
},
{
"model": "DPR / ElasticSearch",
"n_docs": 500000,
"query_speed": 1.0558140319761546
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 1000,
"query_speed": 262.9405144288997
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 10000,
"query_speed": 183.6070813438718
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 100000,
"query_speed": 162.42378943468643
},
{
"model": "BM25 / ElasticSearch",
"n_docs": 500000,
"query_speed": 82.43179203331141
"query_speed": 1.2322912620168611
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 1000,
"query_speed": 35.40380445859966
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"query_speed": 25.78749025294445
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"query_speed": 6.511162294559942
"query_speed": 40.40867245729847
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 500000,
"query_speed": 1.5161593755666505
"query_speed": 1.5414031869280982
},
{
"model": "DPR / FAISS (HSNW)",
"model": "DPR / FAISS (flat)",
"n_docs": 10000,
"query_speed": 28.034963597300674
},
{
"model": "DPR / FAISS (flat)",
"n_docs": 100000,
"query_speed": 6.624479268751268
},
{
"model": "DPR / FAISS (HNSW)",
"n_docs": 10000,
"query_speed": 42.12400556900196
},
{
"model": "DPR / FAISS (HNSW)",
"n_docs": 100000,
"query_speed": 40.68196225525062
},
{
"model": "DPR / FAISS (HNSW)",
"n_docs": 500000,
"query_speed": 39.42722374998517
},
{
"model": "DPR / FAISS (HNSW)",
"n_docs": 1000,
"query_speed": 39.16414272911727
"query_speed": 44.31880791041422
},
{
"model": "DPR / FAISS (HSNW)",
"model": "DPR / Milvus (flat)",
"n_docs": 1000,
"query_speed": 40.48084417170779
},
{
"model": "DPR / Milvus (flat)",
"n_docs": 10000,
"query_speed": 33.6432023480111
"query_speed": 38.132788329389
},
{
"model": "DPR / FAISS (HSNW)",
"model": "DPR / Milvus (flat)",
"n_docs": 100000,
"query_speed": 33.65729082116796
"query_speed": 28.30393009791128
},
{
"model": "DPR / FAISS (HSNW)",
"model": "DPR / Milvus (flat)",
"n_docs": 500000,
"query_speed": 34.27671486454735
"query_speed": 15.30425741318099
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 1000,
"query_speed": 40.38894718145225
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 10000,
"query_speed": 38.15488156137084
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 500000,
"query_speed": 24.503220592922823
},
{
"model": "DPR / Milvus (HNSW)",
"n_docs": 100000,
"query_speed": 28.076443272229284
}
]
}

View File

@ -51,8 +51,8 @@ class FAISSDocumentStore(SQLDocumentStore):
Recommended options:
- "Flat" (default): Best accuracy (= exact). Becomes slow and RAM intense for > 1 Mio docs.
- "HNSW": Graph-based heuristic. If not further specified,
we use a RAM intense, but more accurate config:
HNSW256, efConstruction=256 and efSearch=256
we use the following config:
HNSW64, efConstruction=80 and efSearch=20
- "IVFx,Flat": Inverted Index. Replace x with the number of centroids aka nlist.
Rule of thumb: nlist = 10 * sqrt (num_docs) is a good starting point.
For more details see:
@ -103,7 +103,7 @@ class FAISSDocumentStore(SQLDocumentStore):
if index_factory == "HNSW" and metric_type == faiss.METRIC_INNER_PRODUCT:
# faiss index factory doesn't give the same results for HNSW IP, therefore direct init.
# defaults here are similar to DPR codebase (good accuracy, but very high RAM consumption)
n_links = kwargs.get("n_links", 128)
n_links = kwargs.get("n_links", 64)
index = faiss.IndexHNSWFlat(vector_dim, n_links, metric_type)
index.hnsw.efSearch = kwargs.get("efSearch", 20)#20
index.hnsw.efConstruction = kwargs.get("efConstruction", 80)#80

View File

@ -99,7 +99,8 @@ class MilvusDocumentStore(SQLDocumentStore):
self.index_file_size = index_file_size
if similarity == "dot_product":
self.metric_type = MetricType.L2
self.metric_type = MetricType.IP
self.similarity = similarity
else:
raise ValueError("The Milvus document store can currently only support dot_product similarity. "
"Please set similarity=\"dot_product\"")

View File

@ -124,27 +124,19 @@ class SQLDocumentStore(BaseDocumentStore):
return documents
def get_documents_by_vector_ids(
self,
vector_ids: List[str],
index: Optional[str] = None,
batch_size: int = 10_000
):
"""
Fetch documents by specifying a list of text vector id strings
def get_documents_by_vector_ids(self, vector_ids: List[str], index: Optional[str] = None, batch_size: int = 10_000):
"""Fetch documents by specifying a list of text vector id strings"""
index = index or self.index
:param vector_ids: List of vector_id strings.
:param index: Name of the index to get the documents from. If None, the
DocumentStore's default index (self.index) will be used.
:param batch_size: When working with large number of documents, batching can help reduce memory footprint.
"""
documents = []
for i in range(0, len(vector_ids), batch_size):
query = self.session.query(DocumentORM).filter(
DocumentORM.vector_id.in_(vector_ids[i: i + batch_size]),
DocumentORM.index == index
)
for row in query.all():
documents.append(self._convert_sql_row_to_document(row))
result = self._query(
index=index,
vector_ids=vector_ids,
batch_size=batch_size
)
documents = list(result)
sorted_documents = sorted(documents, key=lambda doc: vector_ids.index(doc.meta["vector_id"]))
return sorted_documents

View File

@ -2,6 +2,14 @@
"params": {
"full": {
"retriever_doc_stores": [
[
"dpr",
"milvus_flat"
],
[
"dpr",
"milvus_hnsw"
],
[
"elastic",
"elasticsearch"

View File

@ -44,7 +44,9 @@ def retriever(index_csv="retriever_index_results.csv", query_csv="retriever_quer
"elasticsearch": "ElasticSearch",
"faiss": "FAISS",
"faiss_flat": "FAISS (flat)",
"faiss_hnsw": "FAISS (HSNW)"
"faiss_hnsw": "FAISS (HNSW)",
"milvus_flat": "Milvus (flat)",
"milvus_hnsw": "Milvus (HNSW)"
}
index = pd.read_csv(index_csv)

View File

@ -1,17 +1,25 @@
,retriever,doc_store,n_docs,indexing_time,docs_per_second,date_time,error
1,dpr,elasticsearch,1000,15.336494209999728,65.20394989279743,2021-01-29 11:18:25.436371,
5,dpr,elasticsearch,10000,144.0823780490009,69.40474008972218,2021-01-29 11:23:19.896920,
9,dpr,elasticsearch,100000,1433.587170629,69.75508852811794,2021-01-29 11:56:22.502185,
13,dpr,elasticsearch,500000,7196.396471723998,69.47921810097519,2021-01-29 14:54:08.769187,
0,elastic,elasticsearch,1000,2.1182381880007597,472.0904408506686,2021-01-29 11:17:14.160560,
4,elastic,elasticsearch,10000,20.23965223199957,494.0796356268248,2021-01-29 11:20:27.378846,
8,elastic,elasticsearch,100000,207.03962336699988,482.9993330442806,2021-01-29 11:31:50.829072,
12,elastic,elasticsearch,500000,1029.1638562459993,485.8312862091863,2021-01-29 12:52:45.994426,
2,dpr,faiss_flat,1000,9.899907313998483,101.01104669798258,2021-01-29 11:19:11.304749,
6,dpr,faiss_flat,10000,104.1660261320012,96.00059032037764,2021-01-29 11:25:43.069491,
10,dpr,faiss_flat,100000,1046.8892760299987,95.52108545730724,2021-01-29 12:14:51.105055,
14,dpr,faiss_flat,500000,5243.775349973999,95.35114810028603,2021-01-29 16:24:19.855339,
3,dpr,faiss_hnsw,1000,10.329135104999295,96.81352696374361,2021-01-29 11:19:55.337391,
7,dpr,faiss_hnsw,10000,112.53792207699917,88.85893586304122,2021-01-29 11:28:10.284866,
11,dpr,faiss_hnsw,100000,1188.8019736170008,84.11829911061136,2021-01-29 12:35:16.166263,
15,dpr,faiss_hnsw,500000,6125.295488232001,81.62871504903015,2021-01-29 18:07:08.100722,
9,dpr,elasticsearch,10000,139.7465313429998,71.55812673057035,2021-04-12 13:06:34.024778,
14,elastic,elasticsearch,100000,205.94765839000047,485.56026702003703,2021-04-12 13:44:31.464961,
8,elastic,elasticsearch,10000,19.96974077699997,500.7576268349683,2021-04-12 13:03:44.944941,
3,dpr,elasticsearch,1000,14.592372578999857,68.52895199777984,2021-04-12 12:58:01.128834,
2,elastic,elasticsearch,1000,2.1051091760000418,475.034744706267,2021-04-12 12:57:18.604681,
15,dpr,elasticsearch,100000,1401.1558383250003,71.36964873196699,2021-04-12 14:08:31.400192,
20,elastic,elasticsearch,500000,1027.416534557,486.6575368242339,2021-04-12 17:30:22.080196,
21,dpr,elasticsearch,500000,7010.269106937998,71.32393812174124,2021-04-12 19:28:39.657070,
4,dpr,faiss_flat,1000,9.570316116999948,104.48975642755202,2021-04-12 12:58:47.918981,
22,dpr,faiss_flat,500000,5041.962777018001,99.16772933728758,2021-04-12 20:55:28.443354,
10,dpr,faiss_flat,10000,95.71089355200002,104.48131481049198,2021-04-12 13:08:50.343175,
16,dpr,faiss_flat,100000,999.8815230299997,100.0118491008456,2021-04-12 14:26:14.495997,
11,dpr,faiss_hnsw,10000,108.9302881550002,91.80183188142033,2021-04-12 13:11:13.117266,
17,dpr,faiss_hnsw,100000,1112.2988848330006,89.90389306648807,2021-04-12 14:45:22.644624,
23,dpr,faiss_hnsw,500000,5802.5877488399965,86.16845132586847,2021-04-12 22:32:53.095579,
5,dpr,faiss_hnsw,1000,9.837438108000242,101.65248197970928,2021-04-12 12:59:30.777696,
0,dpr,milvus_flat,1000,9.717840198999966,102.90352377917338,2021-04-12 12:56:32.363797,
6,dpr,milvus_flat,10000,87.06480573199997,114.85697252666792,2021-04-12 13:01:21.834327,
12,dpr,milvus_flat,100000,861.995940363,116.00982709720004,2021-04-12 13:26:00.742197,
18,dpr,milvus_flat,500000,4364.3841063849995,114.56370195934652,2021-04-12 15:58:40.069278,
1,dpr,milvus_hnsw,1000,8.522245804999784,117.33996212750934,2021-04-12 12:57:04.976604,
7,dpr,milvus_hnsw,10000,87.128293364,114.77327988306308,2021-04-12 13:03:13.381764,
19,dpr,milvus_hnsw,500000,4414.051032668,113.27463056035022,2021-04-12 17:12:50.943619,
13,dpr,milvus_hnsw,100000,864.9713281529998,115.61076852516385,2021-04-12 13:40:51.875517,

1 retriever doc_store n_docs indexing_time docs_per_second date_time error
2 1 9 dpr elasticsearch 1000 10000 15.336494209999728 139.7465313429998 65.20394989279743 71.55812673057035 2021-01-29 11:18:25.436371 2021-04-12 13:06:34.024778
3 5 14 dpr elastic elasticsearch 10000 100000 144.0823780490009 205.94765839000047 69.40474008972218 485.56026702003703 2021-01-29 11:23:19.896920 2021-04-12 13:44:31.464961
4 9 8 dpr elastic elasticsearch 100000 10000 1433.587170629 19.96974077699997 69.75508852811794 500.7576268349683 2021-01-29 11:56:22.502185 2021-04-12 13:03:44.944941
5 13 3 dpr elasticsearch 500000 1000 7196.396471723998 14.592372578999857 69.47921810097519 68.52895199777984 2021-01-29 14:54:08.769187 2021-04-12 12:58:01.128834
6 0 2 elastic elasticsearch 1000 2.1182381880007597 2.1051091760000418 472.0904408506686 475.034744706267 2021-01-29 11:17:14.160560 2021-04-12 12:57:18.604681
7 4 15 elastic dpr elasticsearch 10000 100000 20.23965223199957 1401.1558383250003 494.0796356268248 71.36964873196699 2021-01-29 11:20:27.378846 2021-04-12 14:08:31.400192
8 8 20 elastic elasticsearch 100000 500000 207.03962336699988 1027.416534557 482.9993330442806 486.6575368242339 2021-01-29 11:31:50.829072 2021-04-12 17:30:22.080196
9 12 21 elastic dpr elasticsearch 500000 1029.1638562459993 7010.269106937998 485.8312862091863 71.32393812174124 2021-01-29 12:52:45.994426 2021-04-12 19:28:39.657070
10 2 4 dpr faiss_flat 1000 9.899907313998483 9.570316116999948 101.01104669798258 104.48975642755202 2021-01-29 11:19:11.304749 2021-04-12 12:58:47.918981
11 6 22 dpr faiss_flat 10000 500000 104.1660261320012 5041.962777018001 96.00059032037764 99.16772933728758 2021-01-29 11:25:43.069491 2021-04-12 20:55:28.443354
12 10 dpr faiss_flat 100000 10000 1046.8892760299987 95.71089355200002 95.52108545730724 104.48131481049198 2021-01-29 12:14:51.105055 2021-04-12 13:08:50.343175
13 14 16 dpr faiss_flat 500000 100000 5243.775349973999 999.8815230299997 95.35114810028603 100.0118491008456 2021-01-29 16:24:19.855339 2021-04-12 14:26:14.495997
14 3 11 dpr faiss_hnsw 1000 10000 10.329135104999295 108.9302881550002 96.81352696374361 91.80183188142033 2021-01-29 11:19:55.337391 2021-04-12 13:11:13.117266
15 7 17 dpr faiss_hnsw 10000 100000 112.53792207699917 1112.2988848330006 88.85893586304122 89.90389306648807 2021-01-29 11:28:10.284866 2021-04-12 14:45:22.644624
16 11 23 dpr faiss_hnsw 100000 500000 1188.8019736170008 5802.5877488399965 84.11829911061136 86.16845132586847 2021-01-29 12:35:16.166263 2021-04-12 22:32:53.095579
17 15 5 dpr faiss_hnsw 500000 1000 6125.295488232001 9.837438108000242 81.62871504903015 101.65248197970928 2021-01-29 18:07:08.100722 2021-04-12 12:59:30.777696
18 0 dpr milvus_flat 1000 9.717840198999966 102.90352377917338 2021-04-12 12:56:32.363797
19 6 dpr milvus_flat 10000 87.06480573199997 114.85697252666792 2021-04-12 13:01:21.834327
20 12 dpr milvus_flat 100000 861.995940363 116.00982709720004 2021-04-12 13:26:00.742197
21 18 dpr milvus_flat 500000 4364.3841063849995 114.56370195934652 2021-04-12 15:58:40.069278
22 1 dpr milvus_hnsw 1000 8.522245804999784 117.33996212750934 2021-04-12 12:57:04.976604
23 7 dpr milvus_hnsw 10000 87.128293364 114.77327988306308 2021-04-12 13:03:13.381764
24 19 dpr milvus_hnsw 500000 4414.051032668 113.27463056035022 2021-04-12 17:12:50.943619
25 13 dpr milvus_hnsw 100000 864.9713281529998 115.61076852516385 2021-04-12 13:40:51.875517

View File

@ -1,17 +1,27 @@
,retriever,doc_store,n_docs,n_queries,retrieve_time,queries_per_second,seconds_per_query,recall,map,top_k,date_time,error
1,dpr,elasticsearch,1000,1064,34.67547423102587,30.684511851549132,0.0325897314201371,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:27:43.048502,
5,dpr,elasticsearch,10000,5637,288.06125728895495,19.568754413737462,0.05110187285594375,0.9749866950505588,0.8987097014904354,10,2021-02-01 11:37:21.149887,
9,dpr,elasticsearch,100000,5637,1225.6274364620313,4.5992769354707805,0.2174254810115365,0.9579563597658329,0.8654564090434241,10,2021-02-01 12:15:52.757320,
13,dpr,elasticsearch,500000,5637,5339.008413678017,1.0558140319761546,0.947136493467805,0.9308142629058009,0.8086137228234089,10,2021-02-01 14:52:23.056230,
0,elastic,elasticsearch,1000,1064,4.046542626992959,262.9405144288997,0.0038031415667227064,0.8909774436090225,0.742044471297291,10,2021-02-01 11:26:04.346134,
4,elastic,elasticsearch,10000,5637,30.701430243001596,183.6070813438718,0.005446413028738974,0.8110697179350719,0.6620627317806674,10,2021-02-01 11:31:20.470092,
8,elastic,elasticsearch,100000,5637,34.705507238930295,162.42378943468643,0.006156733588598598,0.7193542664537875,0.5625959153101251,10,2021-02-01 11:50:36.048887,
12,elastic,elasticsearch,500000,5637,68.3838099470413,82.4317920333114,0.01213124178588634,0.6274614156466205,0.45594527090003406,10,2021-02-01 13:02:16.905187,
2,dpr,faiss_flat,1000,1064,30.053267333012627,35.40380445859966,0.028245552004711117,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:28:55.544474,
6,dpr,faiss_flat,10000,5637,218.59436279792135,25.78749025294445,0.038778492602079356,0.9749866950505588,0.8987097014904354,10,2021-02-01 11:42:07.545869,
10,dpr,faiss_flat,100000,5637,865.7440476809643,6.511162294559943,0.15358241044544338,0.9579563597658329,0.8654606328368972,10,2021-02-01 12:34:29.493598,
14,dpr,faiss_flat,500000,5637,3717.9468668280497,1.5161593755666505,0.6595612678424783,0.9308142629058009,0.808613722823409,10,2021-02-01 16:12:52.804436,
3,dpr,faiss_hnsw,1000,1064,27.167708159968242,39.164142729117266,0.02553356030072203,0.9915413533834586,0.9295105322830889,10,2021-02-01 11:30:02.684535,
7,dpr,faiss_hnsw,10000,5637,167.55242089293097,33.6432023480111,0.02972368651639719,0.972503104488203,0.8969941373746582,10,2021-02-01 11:46:07.130588,
11,dpr,faiss_hnsw,100000,5637,167.48228578322596,33.65729082116796,0.029711244595214823,0.9402164271775767,0.8507984377595874,10,2021-02-01 12:43:21.697968,
15,dpr,faiss_hnsw,500000,5637,164.45566683610014,34.27671486454735,0.029174324434291313,0.8825616462657442,0.7691475821598232,10,2021-02-01 16:47:01.710072,
9,dpr,elasticsearch,10000,5637,245.9020519811238,22.92376153263135,0.043622858254589995,97.49866950505589,89.87097014904357,10,2021-04-12 22:50:49.396735,
14,elastic,elasticsearch,100000,5637,34.05730972948368,165.51512861040828,0.006041743787384013,71.93542664537875,56.25959153101251,10,2021-04-12 23:13:45.908089,
8,elastic,elasticsearch,10000,5637,21.6593179620686,260.2575025618042,0.0038423484055470285,81.1424516586837,66.33019927857616,10,2021-04-12 22:45:31.522940,
3,dpr,elasticsearch,1000,1064,30.71146336113452,34.645044017879535,0.02886415729429936,99.15413533834587,92.9510532283089,10,2021-04-12 22:36:03.760187,
2,elastic,elasticsearch,1000,1064,2.977487905904127,357.3482189096959,0.002798390889007638,89.09774436090225,74.2044471297291,10,2021-04-12 22:34:57.937655,
15,dpr,elasticsearch,100000,5637,1052.5279855618428,5.3556770720836955,0.18671775511120148,95.7956359765833,86.54606328368972,10,2021-04-12 23:36:05.648808,
20,elastic,elasticsearch,500000,5637,62.07282430688065,90.81268756406739,0.011011677187667313,62.746141564662054,45.60339705629754,10,2021-04-13 00:49:21.340846,
21,dpr,elasticsearch,500000,5637,4574.405559586667,1.2322912620168613,0.8114964625841169,93.0814262905801,80.8613722823409,10,2021-04-13 02:26:34.009405,
4,dpr,faiss_flat,1000,1064,26.330981328930648,40.408672457298465,0.024747162903130306,99.15413533834587,92.9510532283089,10,2021-04-12 22:37:12.563041,
22,dpr,faiss_flat,500000,5637,3657.0574446742394,1.5414031869280982,0.6487595253990136,93.0814262905801,80.86137228234091,10,2021-04-13 03:45:25.432560,
10,dpr,faiss_flat,10000,5637,201.07035204221756,28.03496359730067,0.03566974490725875,97.49866950505589,89.87097014904354,10,2021-04-12 22:55:15.966314,
16,dpr,faiss_flat,100000,5637,850.93480880688,6.6244792687512675,0.150955261452347,95.7956359765833,86.54606328368972,10,2021-04-12 23:54:18.954839,
11,dpr,faiss_hnsw,10000,5637,133.81918276423676,42.12400556900196,0.023739432812530912,96.96647152740819,89.49563682134192,10,2021-04-12 22:58:37.011857,
17,dpr,faiss_hnsw,100000,5637,138.56263777621643,40.68196225525062,0.024580918534010367,93.18786588610963,84.33419639513305,10,2021-04-13 00:02:06.239176,
23,dpr,faiss_hnsw,500000,5637,142.9722781331293,39.42722374998517,0.025363185760711247,86.90792974986695,75.73315903145605,10,2021-04-13 04:15:55.931738,
5,dpr,faiss_hnsw,1000,1064,24.00786596405669,44.31880791041422,0.022563783800805162,99.15413533834587,92.9510532283089,10,2021-04-12 22:38:15.165568,
0,dpr,milvus_flat,1000,1064,26.284036851771816,40.480844171707794,0.02470304215392088,99.15413533834587,92.9510532283089,10,2021-04-12 22:33:51.172786,
6,dpr,milvus_flat,10000,5637,147.82553930512222,38.132788329389,0.0262241510209548,97.49866950505589,89.87097014904354,10,2021-04-12 22:41:19.833104,
12,dpr,milvus_flat,100000,5637,199.15962131407287,28.30393009791128,0.03533078256414278,95.7956359765833,86.54606328368972,10,2021-04-12 23:03:55.360165,
18,dpr,milvus_flat,500000,5637,368.32888050779,15.30425741318099,0.06534129510516055,93.0814262905801,80.8613722823409,10,2021-04-13 00:17:05.346842,
1,dpr,milvus_hnsw,1000,1064,26.34384093301196,40.38894718145225,0.02475924899719169,99.15413533834587,92.9510532283089,10,2021-04-12 22:34:43.389192,
7,dpr,milvus_hnsw,10000,5637,147.73994229108212,38.154881561370836,0.02620896616836653,97.49866950505589,89.87097014904354,10,2021-04-12 22:44:25.082029,
19,dpr,milvus_hnsw,500000,5637,230.05139175982913,24.50322059292282,0.04081096181653879,85.71935426645379,74.85616575291942,10,2021-04-13 00:30:29.654851,
13,dpr,milvus_hnsw,100000,5637,200.77329401532916,28.076443272229284,0.035617047013540744,95.7956359765833,86.54606328368972,10,2021-04-12 23:09:18.273909,

1 retriever doc_store n_docs n_queries retrieve_time queries_per_second seconds_per_query recall map top_k date_time error
2 1 9 dpr elasticsearch 1000 10000 1064 5637 34.67547423102587 245.9020519811238 30.684511851549132 22.92376153263135 0.0325897314201371 0.043622858254589995 0.9915413533834586 97.49866950505589 0.9295105322830889 89.87097014904357 10 2021-02-01 11:27:43.048502 2021-04-12 22:50:49.396735
3 5 14 dpr elastic elasticsearch 10000 100000 5637 288.06125728895495 34.05730972948368 19.568754413737462 165.51512861040828 0.05110187285594375 0.006041743787384013 0.9749866950505588 71.93542664537875 0.8987097014904354 56.25959153101251 10 2021-02-01 11:37:21.149887 2021-04-12 23:13:45.908089
4 9 8 dpr elastic elasticsearch 100000 10000 5637 1225.6274364620313 21.6593179620686 4.5992769354707805 260.2575025618042 0.2174254810115365 0.0038423484055470285 0.9579563597658329 81.1424516586837 0.8654564090434241 66.33019927857616 10 2021-02-01 12:15:52.757320 2021-04-12 22:45:31.522940
5 13 3 dpr elasticsearch 500000 1000 5637 1064 5339.008413678017 30.71146336113452 1.0558140319761546 34.645044017879535 0.947136493467805 0.02886415729429936 0.9308142629058009 99.15413533834587 0.8086137228234089 92.9510532283089 10 2021-02-01 14:52:23.056230 2021-04-12 22:36:03.760187
6 0 2 elastic elasticsearch 1000 1064 4.046542626992959 2.977487905904127 262.9405144288997 357.3482189096959 0.0038031415667227064 0.002798390889007638 0.8909774436090225 89.09774436090225 0.742044471297291 74.2044471297291 10 2021-02-01 11:26:04.346134 2021-04-12 22:34:57.937655
7 4 15 elastic dpr elasticsearch 10000 100000 5637 30.701430243001596 1052.5279855618428 183.6070813438718 5.3556770720836955 0.005446413028738974 0.18671775511120148 0.8110697179350719 95.7956359765833 0.6620627317806674 86.54606328368972 10 2021-02-01 11:31:20.470092 2021-04-12 23:36:05.648808
8 8 20 elastic elasticsearch 100000 500000 5637 34.705507238930295 62.07282430688065 162.42378943468643 90.81268756406739 0.006156733588598598 0.011011677187667313 0.7193542664537875 62.746141564662054 0.5625959153101251 45.60339705629754 10 2021-02-01 11:50:36.048887 2021-04-13 00:49:21.340846
9 12 21 elastic dpr elasticsearch 500000 5637 68.3838099470413 4574.405559586667 82.4317920333114 1.2322912620168613 0.01213124178588634 0.8114964625841169 0.6274614156466205 93.0814262905801 0.45594527090003406 80.8613722823409 10 2021-02-01 13:02:16.905187 2021-04-13 02:26:34.009405
10 2 4 dpr faiss_flat 1000 1064 30.053267333012627 26.330981328930648 35.40380445859966 40.408672457298465 0.028245552004711117 0.024747162903130306 0.9915413533834586 99.15413533834587 0.9295105322830889 92.9510532283089 10 2021-02-01 11:28:55.544474 2021-04-12 22:37:12.563041
11 6 22 dpr faiss_flat 10000 500000 5637 218.59436279792135 3657.0574446742394 25.78749025294445 1.5414031869280982 0.038778492602079356 0.6487595253990136 0.9749866950505588 93.0814262905801 0.8987097014904354 80.86137228234091 10 2021-02-01 11:42:07.545869 2021-04-13 03:45:25.432560
12 10 dpr faiss_flat 100000 10000 5637 865.7440476809643 201.07035204221756 6.511162294559943 28.03496359730067 0.15358241044544338 0.03566974490725875 0.9579563597658329 97.49866950505589 0.8654606328368972 89.87097014904354 10 2021-02-01 12:34:29.493598 2021-04-12 22:55:15.966314
13 14 16 dpr faiss_flat 500000 100000 5637 3717.9468668280497 850.93480880688 1.5161593755666505 6.6244792687512675 0.6595612678424783 0.150955261452347 0.9308142629058009 95.7956359765833 0.808613722823409 86.54606328368972 10 2021-02-01 16:12:52.804436 2021-04-12 23:54:18.954839
14 3 11 dpr faiss_hnsw 1000 10000 1064 5637 27.167708159968242 133.81918276423676 39.164142729117266 42.12400556900196 0.02553356030072203 0.023739432812530912 0.9915413533834586 96.96647152740819 0.9295105322830889 89.49563682134192 10 2021-02-01 11:30:02.684535 2021-04-12 22:58:37.011857
15 7 17 dpr faiss_hnsw 10000 100000 5637 167.55242089293097 138.56263777621643 33.6432023480111 40.68196225525062 0.02972368651639719 0.024580918534010367 0.972503104488203 93.18786588610963 0.8969941373746582 84.33419639513305 10 2021-02-01 11:46:07.130588 2021-04-13 00:02:06.239176
16 11 23 dpr faiss_hnsw 100000 500000 5637 167.48228578322596 142.9722781331293 33.65729082116796 39.42722374998517 0.029711244595214823 0.025363185760711247 0.9402164271775767 86.90792974986695 0.8507984377595874 75.73315903145605 10 2021-02-01 12:43:21.697968 2021-04-13 04:15:55.931738
17 15 5 dpr faiss_hnsw 500000 1000 5637 1064 164.45566683610014 24.00786596405669 34.27671486454735 44.31880791041422 0.029174324434291313 0.022563783800805162 0.8825616462657442 99.15413533834587 0.7691475821598232 92.9510532283089 10 2021-02-01 16:47:01.710072 2021-04-12 22:38:15.165568
18 0 dpr milvus_flat 1000 1064 26.284036851771816 40.480844171707794 0.02470304215392088 99.15413533834587 92.9510532283089 10 2021-04-12 22:33:51.172786
19 6 dpr milvus_flat 10000 5637 147.82553930512222 38.132788329389 0.0262241510209548 97.49866950505589 89.87097014904354 10 2021-04-12 22:41:19.833104
20 12 dpr milvus_flat 100000 5637 199.15962131407287 28.30393009791128 0.03533078256414278 95.7956359765833 86.54606328368972 10 2021-04-12 23:03:55.360165
21 18 dpr milvus_flat 500000 5637 368.32888050779 15.30425741318099 0.06534129510516055 93.0814262905801 80.8613722823409 10 2021-04-13 00:17:05.346842
22 1 dpr milvus_hnsw 1000 1064 26.34384093301196 40.38894718145225 0.02475924899719169 99.15413533834587 92.9510532283089 10 2021-04-12 22:34:43.389192
23 7 dpr milvus_hnsw 10000 5637 147.73994229108212 38.154881561370836 0.02620896616836653 97.49866950505589 89.87097014904354 10 2021-04-12 22:44:25.082029
24 19 dpr milvus_hnsw 500000 5637 230.05139175982913 24.50322059292282 0.04081096181653879 85.71935426645379 74.85616575291942 10 2021-04-13 00:30:29.654851
25 13 dpr milvus_hnsw 100000 5637 200.77329401532916 28.076443272229284 0.035617047013540744 95.7956359765833 86.54606328368972 10 2021-04-12 23:09:18.273909
26
27

View File

@ -3,6 +3,7 @@ from haystack.document_store.sql import SQLDocumentStore
from haystack.document_store.memory import InMemoryDocumentStore
from haystack.document_store.elasticsearch import Elasticsearch, ElasticsearchDocumentStore
from haystack.document_store.faiss import FAISSDocumentStore
from haystack.document_store.milvus import MilvusDocumentStore, IndexType
from haystack.retriever.sparse import ElasticsearchRetriever, TfidfRetriever
from haystack.retriever.dense import DensePassageRetriever, EmbeddingRetriever
from haystack.reader.farm import FARMReader
@ -39,6 +40,16 @@ def get_document_store(document_store_type, similarity='dot_product'):
client = Elasticsearch()
client.indices.delete(index='haystack_test*', ignore=[404])
document_store = ElasticsearchDocumentStore(index="eval_document", similarity=similarity, timeout=3000)
elif document_store_type in ("milvus_flat", "milvus_hnsw"):
if document_store_type == "milvus_flat":
index_type = IndexType.FLAT
index_param = None
search_param = None
elif document_store_type == "milvus_hnsw":
index_type = IndexType.HNSW
index_param = {"M": 64, "efConstruction": 80}
search_param = {"ef": 20}
document_store = MilvusDocumentStore(similarity=similarity, index_type=index_type, index_param=index_param, search_param=search_param)
assert document_store.get_document_count(index="eval_document") == 0
elif document_store_type in("faiss_flat", "faiss_hnsw"):
if document_store_type == "faiss_flat":
@ -54,7 +65,7 @@ def get_document_store(document_store_type, similarity='dot_product'):
shell=True)
time.sleep(6)
status = subprocess.run(
['docker exec -it haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"'], shell=True)
['docker exec haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"'], shell=True)
time.sleep(1)
document_store = FAISSDocumentStore(sql_url="postgresql://postgres:password@localhost:5432/haystack",
faiss_index_factory_str=index_type,