haystack/test/benchmarks/results_to_json.py
Branden Chan 1cebcb7dda
Create time and performance benchmarks for all readers and retrievers (#339)
* add time and perf benchmark for es

* Add retriever benchmarking

* Add Reader benchmarking

* add nq to squad conversion

* add conversion stats

* clean benchmarks

* Add link to dataset

* Update imports

* add first support for neg psgs

* Refactor test

* set max_seq_len

* cleanup benchmark

* begin retriever speed benchmarking

* Add support for retriever query index benchmarking

* improve reader eval, retriever speed benchmarking

* improve retriever speed benchmarking

* Add retriever accuracy benchmark

* Add neg doc shuffling

* Add top_n

* 3x speedup of SQL. add postgres docker run. make shuffle neg a param. add more logging

* Add models to sweep

* add option for faiss index type

* remove unneeded line

* change faiss to faiss_flat

* begin automatic benchmark script

* remove existing postgres docker for benchmarking

* Add data processing scripts

* Remove shuffle in script bc data already shuffled

* switch hnsw setup from 256 to 128

* change es similarity to dot product by default

* Error includes stack trace

* Change ES default timeout

* remove delete_docs() from timing for indexing

* Add support for website export

* update website on push to benchmarks

* add complete benchmarks results

* new json format

* removed NaN as is not a valid json token

* fix benchmarking for faiss hnsw queries. do sql calls in update_embeddings() as batches

* update benchmarks for hnsw 128,20,80

* don't delete full index in delete_all_documents()

* update texts for charts

* update recall column for retriever

* change scale and add units to desc

* add units to legend

* add axis titles. update desc

* add html tags

Co-authored-by: deepset <deepset@Crenolape.localdomain>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
Co-authored-by: PiffPaffM <markuspaff.mp@gmail.com>
2020-10-12 13:34:42 +02:00

101 lines
2.6 KiB
Python

import json
import pandas as pd
from pprint import pprint
def reader():
model_rename_map = {
'deepset/roberta-base-squad2': "RoBERTa",
'deepset/minilm-uncased-squad2': "MiniLM",
'deepset/bert-base-cased-squad2': "BERT base",
'deepset/bert-large-uncased-whole-word-masking-squad2': "BERT large",
'deepset/xlm-roberta-large-squad2': "XLM-RoBERTa",
}
column_name_map = {
"f1": "F1",
"passages_per_second": "Speed",
"reader": "Model"
}
df = pd.read_csv("reader_results.csv")
df = df[["f1", "passages_per_second", "reader"]]
df["reader"] = df["reader"].map(model_rename_map)
df = df[list(column_name_map)]
df = df.rename(columns=column_name_map)
ret = [dict(row) for i, row in df.iterrows()]
print("Reader overview")
print(json.dumps(ret, indent=2))
def retriever():
column_name_map = {
"model": "model",
"n_docs": "n_docs",
"docs_per_second": "index_speed",
"queries_per_second": "query_speed",
"map": "map"
}
name_cleaning = {
"dpr": "DPR",
"elastic": "BM25",
"elasticsearch": "ElasticSearch",
"faiss": "FAISS",
"faiss_flat": "FAISS (flat)",
"faiss_hnsw": "FAISS (HSNW)"
}
index = pd.read_csv("retriever_index_results.csv")
query = pd.read_csv("retriever_query_results.csv")
df = pd.merge(index, query,
how="right",
left_on=["retriever", "doc_store", "n_docs"],
right_on=["retriever", "doc_store", "n_docs"])
df["retriever"] = df["retriever"].map(name_cleaning)
df["doc_store"] = df["doc_store"].map(name_cleaning)
df["model"] = df["retriever"] + " / " + df["doc_store"]
df = df[list(column_name_map)]
df = df.rename(columns=column_name_map)
print("Retriever overview")
print(retriever_overview(df))
print("Retriever MAP")
print(retriever_map(df))
print("Retriever Speed")
print(retriever_speed(df))
def retriever_map(df):
columns = ["model", "n_docs", "map"]
df = df[columns]
ret = [list(row) for i, row in df.iterrows()]
ret = [columns] + ret
return json.dumps(ret, indent=4)
def retriever_speed(df):
columns = ["model", "n_docs", "query_speed"]
df = df[columns]
ret = [list(row) for i, row in df.iterrows()]
ret = [columns] + ret
return json.dumps(ret, indent=4)
def retriever_overview(df, chosen_n_docs=100_000):
df = df[df["n_docs"] == chosen_n_docs]
ret = [dict(row) for i, row in df.iterrows()]
return json.dumps(ret, indent=2)
if __name__ == "__main__":
reader()
retriever()