mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-29 11:50:34 +00:00

* add time and perf benchmark for es * Add retriever benchmarking * Add Reader benchmarking * add nq to squad conversion * add conversion stats * clean benchmarks * Add link to dataset * Update imports * add first support for neg psgs * Refactor test * set max_seq_len * cleanup benchmark * begin retriever speed benchmarking * Add support for retriever query index benchmarking * improve reader eval, retriever speed benchmarking * improve retriever speed benchmarking * Add retriever accuracy benchmark * Add neg doc shuffling * Add top_n * 3x speedup of SQL. add postgres docker run. make shuffle neg a param. add more logging * Add models to sweep * add option for faiss index type * remove unneeded line * change faiss to faiss_flat * begin automatic benchmark script * remove existing postgres docker for benchmarking * Add data processing scripts * Remove shuffle in script bc data already shuffled * switch hnsw setup from 256 to 128 * change es similarity to dot product by default * Error includes stack trace * Change ES default timeout * remove delete_docs() from timing for indexing * Add support for website export * update website on push to benchmarks * add complete benchmarks results * new json format * removed NaN as is not a valid json token * fix benchmarking for faiss hnsw queries. do sql calls in update_embeddings() as batches * update benchmarks for hnsw 128,20,80 * don't delete full index in delete_all_documents() * update texts for charts * update recall column for retriever * change scale and add units to desc * add units to legend * add axis titles. update desc * add html tags Co-authored-by: deepset <deepset@Crenolape.localdomain> Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai> Co-authored-by: PiffPaffM <markuspaff.mp@gmail.com>
54 lines
2.2 KiB
Python
54 lines
2.2 KiB
Python
from utils import get_document_store, index_to_doc_store, get_reader
|
|
from haystack.preprocessor.utils import eval_data_from_file
|
|
from pathlib import Path
|
|
import pandas as pd
|
|
|
|
reader_models = ["deepset/roberta-base-squad2", "deepset/minilm-uncased-squad2",
|
|
"deepset/bert-base-cased-squad2", "deepset/bert-large-uncased-whole-word-masking-squad2",
|
|
"deepset/xlm-roberta-large-squad2", "distilbert-base-uncased-distilled-squad"]
|
|
|
|
reader_types = ["farm"]
|
|
data_dir = Path("../../data/squad20")
|
|
filename = "dev-v2.0.json"
|
|
# Note that this number is approximate - it was calculated using Bert Base Cased
|
|
# This number could vary when using a different tokenizer
|
|
n_passages = 12350
|
|
|
|
doc_index = "eval_document"
|
|
label_index = "label"
|
|
|
|
def benchmark_reader():
|
|
reader_results = []
|
|
doc_store = get_document_store("elasticsearch")
|
|
docs, labels = eval_data_from_file(data_dir/filename)
|
|
index_to_doc_store(doc_store, docs, None, labels)
|
|
for reader_name in reader_models:
|
|
for reader_type in reader_types:
|
|
try:
|
|
reader = get_reader(reader_name, reader_type)
|
|
results = reader.eval(document_store=doc_store,
|
|
doc_index=doc_index,
|
|
label_index=label_index,
|
|
device="cuda")
|
|
# print(results)
|
|
results["passages_per_second"] = n_passages / results["reader_time"]
|
|
results["reader"] = reader_name
|
|
results["error"] = ""
|
|
reader_results.append(results)
|
|
except Exception as e:
|
|
results = {'EM': 0.,
|
|
'f1': 0.,
|
|
'top_n_accuracy': 0.,
|
|
'top_n': 0,
|
|
'reader_time': 0.,
|
|
"passages_per_second": 0.,
|
|
"seconds_per_query": 0.,
|
|
'reader': reader_name,
|
|
"error": e}
|
|
reader_results.append(results)
|
|
reader_df = pd.DataFrame.from_records(reader_results)
|
|
reader_df.to_csv("reader_results.csv")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
benchmark_reader() |