diff --git a/docs/_src/benchmarks/reader_performance.json b/docs/_src/benchmarks/reader_performance.json
index 41595406f..24209b35b 100644
--- a/docs/_src/benchmarks/reader_performance.json
+++ b/docs/_src/benchmarks/reader_performance.json
@@ -10,10 +10,35 @@
         "Speed (passages/sec)"
     ],
     "data": [
-        {"F1": 80.67985794671885, "Model": "RoBERTa", "Speed": 92.3039712094936},
-        {"F1": 78.23306265318686, "Model": "MiniLM", "Speed": 98.62387044489223},
-        {"F1": 74.90271600053505, "Model": "BERT base", "Speed": 99.92750782409666},
-        {"F1": 82.64545708097472, "Model": "BERT large", "Speed": 39.529824033964466},
-        {"F1": 85.26275190954586, "Model": "XLM-RoBERTa", "Speed": 39.29142006004379}
+        {
+            "F1": 82.62983412843887,
+            "Speed": 98.86638639776464,
+            "Model": "RoBERTa"
+        },
+        {
+            "F1": 78.90026641413856,
+            "Speed": 181.96379531485616,
+            "Model": "MiniLM"
+        },
+        {
+            "F1": 74.32668866064459,
+            "Speed": 106.04748306200683,
+            "Model": "BERT base"
+        },
+        {
+            "F1": 83.29492827667042,
+            "Speed": 40.408497243719076,
+            "Model": "BERT large"
+        },
+        {
+            "F1": 84.62174414643722,
+            "Speed": 40.483264542292716,
+            "Model": "XLM-RoBERTa"
+        },
+        {
+            "F1": 42.342513261953935,
+            "Speed": 160.41712955027901,
+            "Model": "DistilBERT"
+        }
     ]
 }
\ No newline at end of file
diff --git a/docs/_src/benchmarks/retriever_map.json b/docs/_src/benchmarks/retriever_map.json
index 3acda44be..8584e7065 100644
--- a/docs/_src/benchmarks/retriever_map.json
+++ b/docs/_src/benchmarks/retriever_map.json
@@ -1,77 +1,101 @@
 {
-  "chart_type": "LineChart",
-  "title": "Retriever Accuracy",
-  "subtitle": "mAP at different number of docs",
-  "description": "Here you can see how the mean avg. precision (mAP) of the retriever decays as the number of documents increases. The set up is the same as the above querying benchmark except that a varying number of negative documents are used to fill the document store.",
-  "columns": [
-    "n_docs",
-    "BM25 / ElasticSearch",
-    "DPR / ElasticSearch or FAISS (flat)",
-    "DPR / FAISS (HSNW)"
-  ],
-  "axis": [
-     { "x": "Number of docs", "y": "mAP" }
-   ],
-  "data": [
-    {
-        "model": "DPR / ElasticSearch or FAISS (flat)",
-        "n_docs": 1000,
-        "map": 0.929
-    },
-    {
-        "model": "DPR / ElasticSearch or FAISS (flat)",
-        "n_docs": 10000,
-        "map": 0.898
-    },
-    {
-        "model": "DPR / ElasticSearch or FAISS (flat)",
-        "n_docs": 100000,
-        "map": 0.863
-    },
-    {
-        "model": "DPR / ElasticSearch or FAISS (flat)",
-        "n_docs": 500000,
-        "map": 0.805
-    },
-    {
-        "model": "BM25 / ElasticSearch",
-        "n_docs": 1000,
-        "map": 0.748
-    },
-    {
-        "model": "BM25 / ElasticSearch",
-        "n_docs": 10000,
-        "map": 0.6609999999999999
-    },
-    {
-        "model": "BM25 / ElasticSearch",
-        "n_docs": 100000,
-        "map": 0.56
-    },
-    {
-        "model": "BM25 / ElasticSearch",
-        "n_docs": 500000,
-        "map": 0.452
-    },
-    {
-        "model": "DPR / FAISS (HSNW)",
-        "n_docs": 1000,
-        "map": 0.929
-    },
-    {
-        "model": "DPR / FAISS (HSNW)",
-        "n_docs": 10000,
-        "map": 0.8959999999999999
-    },
-    {
-        "model": "DPR / FAISS (HSNW)",
-        "n_docs": 100000,
-        "map": 0.8490000000000001
-    },
-    {
-        "model": "DPR / FAISS (HSNW)",
-        "n_docs": 500000,
-        "map": 0.7659999999999999
-    }
-  ]
+    "chart_type": "LineChart",
+    "title": "Retriever Accuracy",
+    "subtitle": "mAP at different number of docs",
+    "description": "Here you can see how the mean avg. precision (mAP) of the retriever decays as the number of documents increases. The set up is the same as the above querying benchmark except that a varying number of negative documents are used to fill the document store.",
+    "columns": [
+        "n_docs",
+        "BM25 / ElasticSearch",
+        "DPR / ElasticSearch",
+        "DPR / FAISS (flat)",
+        "DPR / FAISS (HSNW)"
+    ],
+    "axis": [
+        {
+            "x": "Number of docs",
+            "y": "mAP"
+        }
+    ],
+    "data": [
+        {
+            "model": "DPR / ElasticSearch",
+            "n_docs": 1000,
+            "map": 0.929
+        },
+        {
+            "model": "DPR / ElasticSearch",
+            "n_docs": 10000,
+            "map": 0.881
+        },
+        {
+            "model": "DPR / ElasticSearch",
+            "n_docs": 100000,
+            "map": 0.821
+        },
+        {
+            "model": "DPR / ElasticSearch",
+            "n_docs": 500000,
+            "map": 0.730
+        },
+        {
+            "model": "DPR / FAISS (flat)",
+            "n_docs": 1000,
+            "map": 0.929
+        },
+        {
+            "model": "DPR / FAISS (flat)",
+            "n_docs": 10000,
+            "map": 0.898
+        },
+        {
+            "model": "DPR / FAISS (flat)",
+            "n_docs": 100000,
+            "map": 0.863
+        },
+        {
+            "model": "DPR / FAISS (flat)",
+            "n_docs": 500000,
+            "map": 0.805
+        },
+        {
+            "model": "BM25 / ElasticSearch",
+            "n_docs": 1000,
+            "map": 0.748
+        },
+        {
+            "model": "BM25 / ElasticSearch",
+            "n_docs": 10000,
+            "map": 0.6609999999999999
+        },
+        {
+            "model": "BM25 / ElasticSearch",
+            "n_docs": 100000,
+            "map": 0.56
+        },
+        {
+            "model": "BM25 / ElasticSearch",
+            "n_docs": 500000,
+            "map": 0.452
+        },
+        {
+            "model": "DPR / FAISS (HSNW)",
+            "n_docs": 1000,
+            "map": 0.929
+        },
+        {
+            "model": "DPR / FAISS (HSNW)",
+            "n_docs": 10000,
+            "map": 0.896
+        },
+        {
+            "model": "DPR / FAISS (HSNW)",
+            "n_docs": 100000,
+            "map": 0.849
+        },
+        {
+            "model": "DPR / FAISS (HSNW)",
+            "n_docs": 500000,
+            "map": 0.766
+        }
+    ]
 }
\ No newline at end of file
diff --git a/docs/_src/benchmarks/retriever_performance.json b/docs/_src/benchmarks/retriever_performance.json
index 944eac526..f4985dade 100644
--- a/docs/_src/benchmarks/retriever_performance.json
+++ b/docs/_src/benchmarks/retriever_performance.json
@@ -1,54 +1,53 @@
 {
-  "chart_type": "BarChart",
-  "title": "Retriever Performance",
-  "subtitle": "Time and Accuracy Benchmarks",
-  "description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
-  "bars": "horizontal",
-  "columns": [
-    "Model",
-    "mAP",
-    "Index Speed (docs/sec)",
-    "Query Speed (queries/sec)"
-  ],
-  "series": {
-    "s0": "map",
-    "s1": "time",
-    "s2": "time"
-  },
-  "axes": {
-    "label": "map",
-    "time_side": "top",
-    "time_label": "seconds"
-  },
-  "data": [
-    {
-      "model": "DPR / ElasticSearch",
-      "n_docs": 100000,
-      "index_speed": 79.54165185,
-      "query_speed": 6.5360000000000005,
-      "map": 86.3
+    "chart_type": "BarChart",
+    "title": "Retriever Performance",
+    "subtitle": "Time and Accuracy Benchmarks",
+    "description": "Comparison of the speed and accuracy of different DocumentStore / Retriever combinations on 100k documents. <b>Indexing speed</b> (in docs/sec) refers to how quickly Documents can be inserted into a DocumentStore. <b>Querying speed</b> (in queries/sec) refers to the speed at which the system returns relevant Documents when presented with a query.\n\nThe dataset used is Wikipedia, split into 100 word passages (from <a href='https://github.com/facebookresearch/DPR/blob/master/data/download_data.py'>here</a>)). \n\nFor querying, we use the Natural Questions development set in combination with the wiki passages. The Document Store is populated with the 100 word passages in which the answer spans occur (i.e. gold passages) as well as a random selection of 100 word passages in which the answer spans do not occur (i.e. negative passages). We take a total of 100k gold and negative passages. Query and document embedding are generated by the <i>\"facebook/dpr-question_encoder-single-nq-base\"</i> and <i>\"facebook/dpr-ctx_encoder-single-nq-base\"</i> models. The retriever returns 10 candidates and both the recall and mAP scores are calculated on these 10.\n\nFor FAISS HNSW, we use <i>n_links=128</i>, <i>efSearch=20</i> and <i>efConstruction=80</i>. Both index and query benchmarks are performed on an AWS P3.2xlarge instance which is accelerated by an Nvidia V100 GPU.",
+    "bars": "horizontal",
+    "columns": [
+        "Model",
+        "mAP",
+        "Index Speed (docs/sec)",
+        "Query Speed (queries/sec)"
+    ],
+    "series": {
+        "s0": "map",
+        "s1": "time",
+        "s2": "time"
     },
-    {
-      "model": "DPR / FAISS (flat)",
-      "n_docs": 100000,
-      "index_speed": 107.8662479,
-      "query_speed": 5.044,
-      "map": 86.3
+    "axes": {
+        "label": "map",
+        "time_side": "top",
+        "time_label": "seconds"
     },
-    {
-      "model": "BM25 / ElasticSearch",
-      "n_docs": 100000,
-      "index_speed": 476.9143596,
-      "query_speed": 162.996,
-      "map": 56.0
-    },
-    {
-      "model": "DPR / FAISS (HSNW)",
-      "n_docs": 100000,
-      "index_speed": 92.24548333,
-      "query_speed": 12.815,
-      "map": 84.9
-    }
-  ]
-  
-}
+    "data": [
+        {
+            "model": "DPR / ElasticSearch",
+            "n_docs": 100000,
+            "index_speed": 73.93635160290218,
+            "query_speed": 6.23,
+            "map": 82
+        },
+        {
+            "model": "DPR / FAISS (flat)",
+            "n_docs": 100000,
+            "index_speed": 104.77116699738369,
+            "query_speed": 4.89,
+            "map": 86.3
+        },
+        {
+            "model": "BM25 / ElasticSearch",
+            "n_docs": 100000,
+            "index_speed": 484.32931514144724,
+            "query_speed": 162.59,
+            "map": 56
+        },
+        {
+            "model": "DPR / FAISS (HSNW)",
+            "n_docs": 100000,
+            "index_speed": 91.41086878008392,
+            "query_speed": 12.85,
+            "map": 84.9
+        }
+    ]
+}
\ No newline at end of file
diff --git a/docs/_src/benchmarks/retriever_speed.json b/docs/_src/benchmarks/retriever_speed.json
index 568cf68cc..8c68cea84 100644
--- a/docs/_src/benchmarks/retriever_speed.json
+++ b/docs/_src/benchmarks/retriever_speed.json
@@ -5,16 +5,18 @@
     "description": "Here you can see how the query speed of different Retriever / DocumentStore combinations scale as the number of documents increases. The set up is the same as the above querying benchmark except that a varying number of negative documents are used to fill the document store.",
     "columns": [
         "n_docs",
-        "BM25 / ElasticSearch", 
+        "BM25 / ElasticSearch",
         "DPR / ElasticSearch",
         "DPR / FAISS (flat)",
         "DPR / FAISS (HSNW)"
     ],
-   "axis": [
-     { "x": "Number of docs", "y": "Queries/sec" }
-   ],
-    "data":
-    [
+    "axis": [
+        {
+            "x": "Number of docs",
+            "y": "Queries/sec"
+        }
+    ],
+    "data": [
         {
             "model": "DPR / ElasticSearch",
             "n_docs": 1000,
@@ -23,17 +25,17 @@
         {
             "model": "DPR / ElasticSearch",
             "n_docs": 10000,
-            "query_speed": 27.006999999999998
+            "query_speed": 24.8
         },
         {
             "model": "DPR / ElasticSearch",
             "n_docs": 100000,
-            "query_speed": 6.5360000000000005
+            "query_speed": 6.23
         },
         {
             "model": "DPR / ElasticSearch",
             "n_docs": 500000,
-            "query_speed": 1.514
+            "query_speed": 1.45
         },
         {
             "model": "DPR / FAISS (flat)",
@@ -43,17 +45,17 @@
         {
             "model": "DPR / FAISS (flat)",
             "n_docs": 10000,
-            "query_speed": 23.976999999999997
+            "query_speed": 22.47
         },
         {
             "model": "DPR / FAISS (flat)",
             "n_docs": 100000,
-            "query_speed": 5.044
+            "query_speed": 4.90
         },
         {
             "model": "DPR / FAISS (flat)",
             "n_docs": 500000,
-            "query_speed": 1.091
+            "query_speed": 1.08
         },
         {
             "model": "BM25 / ElasticSearch",
@@ -63,17 +65,17 @@
         {
             "model": "BM25 / ElasticSearch",
             "n_docs": 10000,
-            "query_speed": 167.81
+            "query_speed": 248.97
         },
         {
             "model": "BM25 / ElasticSearch",
             "n_docs": 100000,
-            "query_speed": 162.996
+            "query_speed": 162.59
         },
         {
             "model": "BM25 / ElasticSearch",
             "n_docs": 500000,
-            "query_speed": 95.491
+            "query_speed": 91.39
         },
         {
             "model": "DPR / FAISS (HSNW)",
@@ -83,17 +85,17 @@
         {
             "model": "DPR / FAISS (HSNW)",
             "n_docs": 10000,
-            "query_speed": 33.421
+            "query_speed": 31.34
         },
         {
             "model": "DPR / FAISS (HSNW)",
             "n_docs": 100000,
-            "query_speed": 12.815
+            "query_speed": 12.85
         },
         {
             "model": "DPR / FAISS (HSNW)",
             "n_docs": 500000,
-            "query_speed": 3.259
+            "query_speed": 3.32
         }
     ]
 }
\ No newline at end of file
diff --git a/test/benchmarks/reader_results.csv b/test/benchmarks/reader_results.csv
index 5fc081050..9b9411a83 100644
--- a/test/benchmarks/reader_results.csv
+++ b/test/benchmarks/reader_results.csv
@@ -1,6 +1,7 @@
 ,EM,f1,top_n_accuracy,top_n,reader_time,seconds_per_query,passages_per_second,reader,error
-0,0.7589752233271532,0.8067985794671885,0.9671329849991572,5,133.79706027999998,0.011275666634080564,92.30397120949361,deepset/roberta-base-squad2,
-1,0.7359683128265633,0.7823306265318686,0.9714309792684982,5,125.22323393199997,0.010553112584864317,98.62387044489225,deepset/minilm-uncased-squad2,
-2,0.700825889094893,0.7490271600053505,0.9585369964604753,5,123.58959278499992,0.010415438461570867,99.92750782409666,deepset/bert-base-cased-squad2,
-3,0.7821506826226192,0.8264545708097472,0.9762346199224675,5,312.42233685099995,0.026329204184308102,39.529824033964466,deepset/bert-large-uncased-whole-word-masking-squad2,
-4,0.8099612337771785,0.8526275190954586,0.9772459126917242,5,314.3179854819998,0.026488958830439897,39.29142006004379,deepset/xlm-roberta-large-squad2,
\ No newline at end of file
+0,0.7836676217765043,0.8262983412843887,0.9742963087813922,5,124.91606550999859,0.01052722615118815,98.86638639776463,deepset/roberta-base-squad2,
+1,0.7439743805831789,0.7890026641413856,0.9720209000505646,5,67.87064415001078,0.005719757639475036,181.96379531485616,deepset/minilm-uncased-squad2,
+2,0.6947581324793528,0.7432668866064459,0.9557559413450194,5,116.45726653200109,0.009814365964267747,106.04748306200683,deepset/bert-base-cased-squad2,
+3,0.7900724759817968,0.8329492827667042,0.976908815101972,5,305.62878707199707,0.02575668187021718,40.40849724371908,deepset/bert-large-uncased-whole-word-masking-squad2,
+4,0.803472105174448,0.846217441464372,0.9742120343839542,5,305.06433064700104,0.025709112645120602,40.48326454229272,deepset/xlm-roberta-large-squad2,
+5,0.3730827574582842,0.42342513261953935,0.9539019046013821,5,76.98679084099422,0.006488015408814615,160.417129550279,distilbert-base-uncased-distilled-squad,
diff --git a/test/benchmarks/retriever.py b/test/benchmarks/retriever.py
index 4bd5d8671..f871dfaca 100644
--- a/test/benchmarks/retriever.py
+++ b/test/benchmarks/retriever.py
@@ -167,8 +167,12 @@ def benchmark_querying(n_docs_options,
                     "error": None
                 }
 
-                doc_store.delete_all_documents(index=doc_index)
-                doc_store.delete_all_documents(index=label_index)
+                logger.info("Deleting all docs from this run ...")
+                if isinstance(doc_store, FAISSDocumentStore):
+                    doc_store.session.close()
+                else:
+                    doc_store.delete_all_documents(index=doc_index)
+                    doc_store.delete_all_documents(index=label_index)
                 time.sleep(5)
                 del doc_store
                 del retriever
@@ -190,8 +194,12 @@ def benchmark_querying(n_docs_options,
                     "date_time": datetime.datetime.now(),
                     "error": str(tb)
                 }
-                doc_store.delete_all_documents(index=doc_index)
-                doc_store.delete_all_documents(index=label_index)
+                logger.info("Deleting all docs from this run ...")
+                if isinstance(doc_store, FAISSDocumentStore):
+                    doc_store.session.close()
+                else:
+                    doc_store.delete_all_documents(index=doc_index)
+                    doc_store.delete_all_documents(index=label_index)
                 time.sleep(5)
                 del doc_store
                 del retriever
diff --git a/test/benchmarks/retriever_index_results.csv b/test/benchmarks/retriever_index_results.csv
index 85b368e76..1a9253e30 100644
--- a/test/benchmarks/retriever_index_results.csv
+++ b/test/benchmarks/retriever_index_results.csv
@@ -1,13 +1,13 @@
-retriever,doc_store,n_docs,indexing_time,docs_per_second,date_time,Notes
-dpr,elasticsearch,1000,14.16526405,70.59522482,2020-10-08 10:30:56,
-elastic,elasticsearch,1000,5.805040058,172.2640998,2020-10-08 10:30:25,
-elastic,elasticsearch,10000,22.56448254,443.1743553,2020-10-08 13:01:09,
-dpr,elasticsearch,10000,126.2442168,79.21154929,2020-10-08 13:03:32,
-dpr,elasticsearch,100000,1257.202958,79.54165185,2020-10-08 13:28:16,
-elastic,elasticsearch,100000,209.681252,476.9143596,2020-10-08 13:07:05,
-dpr,faiss_flat,1000,8.223732258,121.5992895,44112.24392,
-dpr,faiss_flat,10000,89.72649358,111.4498026,44112.24663,
-dpr,faiss_flat,100000,927.0740565,107.8662479,44112.56656,
-dpr,faiss_hnsw,1000,8.86507699,112.8021788,44113.37262,"hnsw 128,20,80"
-dpr,faiss_hnsw,10000,100.1804832,99.81984193,44113.37413,"hnsw 128,20,80"
-dpr,faiss_hnsw,100000,1084.063917,92.24548333,44113.38721,"hnsw 128,20,80"
+,retriever,doc_store,n_docs,indexing_time,docs_per_second,date_time,error
+1,dpr,elasticsearch,10000,135.8048727600035,73.63506033890373,2020-12-02 06:51:48.587178,
+5,dpr,elasticsearch,100000,1352.514667440999,73.93635160290218,2020-12-02 07:23:04.264694,
+9,dpr,elasticsearch,500000,6781.024389943996,73.7351720400064,2020-12-02 10:10:42.147031,
+0,elastic,elasticsearch,10000,20.694342684997537,483.223852635317,2020-12-02 06:49:00.317977,
+4,elastic,elasticsearch,100000,206.47108666299755,484.32931514144724,2020-12-02 06:59:54.055199,
+8,elastic,elasticsearch,500000,1032.1480222880054,484.4266415311529,2020-12-02 08:16:15.828533,
+2,dpr,faiss_flat,10000,95.10171413100034,105.15057579535569,2020-12-02 06:53:59.472952,
+6,dpr,faiss_flat,100000,954.4610684969957,104.77116699738367,2020-12-02 07:39:56.194345,
+10,dpr,faiss_flat,500000,4865.149988802004,102.77175444761984,2020-12-02 11:34:34.726687,
+3,dpr,faiss_hnsw,10000,103.25490099400486,96.84770314757859,2020-12-02 06:56:14.230579,
+7,dpr,faiss_hnsw,100000,1093.9618158599915,91.41086878008392,2020-12-02 07:58:43.508489,
+11,dpr,faiss_hnsw,500000,5784.850161597002,86.43266221816312,2020-12-02 13:11:43.328380,
diff --git a/test/benchmarks/retriever_query_results.csv b/test/benchmarks/retriever_query_results.csv
index 7af04f620..c5759ff1a 100644
--- a/test/benchmarks/retriever_query_results.csv
+++ b/test/benchmarks/retriever_query_results.csv
@@ -1,17 +1,13 @@
-retriever,doc_store,n_docs,n_queries,retrieve_time,queries_per_second,seconds_per_query,recall,map,top_k,date_time,error,name,note
-dpr,elasticsearch,1000,1085,26.592,40.802,0.025,0.991,0.929,10,2020-10-07 15:06:57,,dpr-elasticsearch,
-dpr,elasticsearch,10000,5791,214.425,27.007,0.037,0.975,0.898,10,2020-10-07 15:11:35,,dpr-elasticsearch,
-dpr,elasticsearch,100000,5791,886.045,6.536,0.153,0.958,0.863,10,2020-10-07 15:30:52,,dpr-elasticsearch,
-dpr,elasticsearch,500000,5791,3824.624,1.514,0.660,0.930,0.805,10,2020-10-07 17:44:02,,dpr-elasticsearch,
-dpr,faiss_flat,1000,1085,27.092,40.048,0.025,0.991,0.929,10,2020-10-07 13:06:35,,dpr-faiss_flat,
-dpr,faiss_flat,10000,5791,241.524,23.977,0.042,0.975,0.898,10,2020-10-07 13:17:21,,dpr-faiss_flat,
-dpr,faiss_flat,100000,5791,1148.181,5.044,0.198,0.958,0.863,10,2020-10-07 14:04:51,,dpr-faiss_flat,
-dpr,faiss_flat,500000,5791,5308.016,1.091,0.917,0.930,0.805,10,2020-10-08 10:01:32,,dpr-faiss_flat,
-elastic,elasticsearch,1000,1085,4.657,232.978,0.004,0.891,0.748,10,2020-10-07 13:04:47,,elastic-elasticsearch,
-elastic,elasticsearch,10000,5791,34.509,167.810,0.006,0.811,0.661,10,2020-10-07 13:07:52,,elastic-elasticsearch,
-elastic,elasticsearch,100000,5791,35.529,162.996,0.006,0.717,0.560,10,2020-10-07 13:21:48,,elastic-elasticsearch,
-elastic,elasticsearch,500000,5791,60.645,95.491,0.010,0.624,0.452,10,2020-10-07 16:14:52,,elastic-elasticsearch,
-dpr,faiss_hnsw,1000,1085,28.640,37.884,0.026,0.991,0.929,10,2020-10-09 07:19:29,,dpr-faiss_hnsw,"128,20,80"
-dpr,faiss_hnsw,10000,5791,173.272,33.421,0.030,0.972,0.896,10,2020-10-09 07:23:28,,dpr-faiss_hnsw,"128,20,80"
-dpr,faiss_hnsw,100000,5791,451.884,12.815,0.078,0.940,0.849,10,2020-10-09 07:37:56,,dpr-faiss_hnsw,"128,20,80"
-dpr,faiss_hnsw,500000,5791,1777.023,3.259,0.307,0.882,0.766,10,2020-10-09,,dpr-faiss_hnsw,"128,20,80"
+,retriever,doc_store,n_docs,n_queries,retrieve_time,queries_per_second,seconds_per_query,recall,map,top_k,date_time,error
+1,dpr,elasticsearch,10000,5791,233.54168710688828,24.796429587106445,0.040328386652890395,0.9690899671904679,0.8808447974826822,10,2020-12-02 13:18:27.808539,
+5,dpr,elasticsearch,100000,5791,928.9148432369257,6.234155953220104,0.1604066384453334,0.9397340701087895,0.8212235461156204,10,2020-12-02 13:53:44.689757,
+9,dpr,elasticsearch,500000,5791,3992.798643678747,1.45036114184423,0.6894834473629333,0.8919012260404076,0.7302081363253893,10,2020-12-02 17:35:25.795083,
+0,elastic,elasticsearch,10000,5791,23.260322959773475,248.9647289083211,0.00401663321702184,0.8103954412018649,0.6609973604361457,10,2020-12-02 13:13:03.957613,
+4,elastic,elasticsearch,100000,5791,35.61682877641579,162.59167924109505,0.006150376234918976,0.7168019340355725,0.559593430418849,10,2020-12-02 13:33:30.417021,
+8,elastic,elasticsearch,500000,5791,63.36918604133825,91.38510941614904,0.010942701785760362,0.6238991538594371,0.45245893326535686,10,2020-12-02 16:08:13.070376,
+2,dpr,faiss_flat,10000,5791,257.67369354520633,22.474160712040344,0.044495543696288435,0.9746157831117251,0.8978985590667505,10,2020-12-02 13:23:51.002905,
+6,dpr,faiss_flat,100000,5791,1182.7107160334417,4.896379073508164,0.2042325532780939,0.9575202901053359,0.8630120493486063,10,2020-12-02 14:18:14.837806,
+3,dpr,faiss_hnsw,10000,5791,184.7552210999711,31.34417509568776,0.03190385444655001,0.972198238646175,0.8961883245210815,10,2020-12-02 13:28:33.415220,
+7,dpr,faiss_hnsw,100000,5791,450.7693457186833,12.84692505158515,0.0778396383558424,0.9399067518563288,0.8486882354392283,10,2020-12-02 15:10:44.114148,
+8,dpr,faiss_flat,500000,5791,5365.806154628852,1.0792413727067556,0.9265767837383616,0.9295458470039717,0.8045832613826054,10,2020-12-02 23:14:44.503864,
+9,dpr,faiss_hnsw,500000,5791,1745.922715222303,3.3168707580865915,0.30148898553312087,0.8820583664306683,0.765677378416975,10,2020-12-03 00:18:53.376265,
diff --git a/test/benchmarks/templates.py b/test/benchmarks/templates.py
index bef937439..c33e858d7 100644
--- a/test/benchmarks/templates.py
+++ b/test/benchmarks/templates.py
@@ -25,12 +25,12 @@ RETRIEVER_TEMPLATE = {
     "Query Speed (queries/sec)"
   ],
   "series": {
-    "s0": "recall",
+    "s0": "map",
     "s1": "time",
     "s2": "time"
   },
   "axes": {
-    "label": "recall",
+    "label": "map",
     "time_side": "top",
     "time_label": "seconds"
   },