haystack/test/benchmarks/retriever.py

149 lines
5.7 KiB
Python
Raw Normal View History

Create time and performance benchmarks for all readers and retrievers (#339) * add time and perf benchmark for es * Add retriever benchmarking * Add Reader benchmarking * add nq to squad conversion * add conversion stats * clean benchmarks * Add link to dataset * Update imports * add first support for neg psgs * Refactor test * set max_seq_len * cleanup benchmark * begin retriever speed benchmarking * Add support for retriever query index benchmarking * improve reader eval, retriever speed benchmarking * improve retriever speed benchmarking * Add retriever accuracy benchmark * Add neg doc shuffling * Add top_n * 3x speedup of SQL. add postgres docker run. make shuffle neg a param. add more logging * Add models to sweep * add option for faiss index type * remove unneeded line * change faiss to faiss_flat * begin automatic benchmark script * remove existing postgres docker for benchmarking * Add data processing scripts * Remove shuffle in script bc data already shuffled * switch hnsw setup from 256 to 128 * change es similarity to dot product by default * Error includes stack trace * Change ES default timeout * remove delete_docs() from timing for indexing * Add support for website export * update website on push to benchmarks * add complete benchmarks results * new json format * removed NaN as is not a valid json token * fix benchmarking for faiss hnsw queries. do sql calls in update_embeddings() as batches * update benchmarks for hnsw 128,20,80 * don't delete full index in delete_all_documents() * update texts for charts * update recall column for retriever * change scale and add units to desc * add units to legend * add axis titles. update desc * add html tags Co-authored-by: deepset <deepset@Crenolape.localdomain> Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai> Co-authored-by: PiffPaffM <markuspaff.mp@gmail.com>
2020-10-12 13:34:42 +02:00
from pathlib import Path
from time import perf_counter
import logging
import datetime
import traceback
from typing import Dict
from haystack.nodes import BaseRetriever
from haystack import Pipeline
from haystack.utils import aggregate_labels
Create time and performance benchmarks for all readers and retrievers (#339) * add time and perf benchmark for es * Add retriever benchmarking * Add Reader benchmarking * add nq to squad conversion * add conversion stats * clean benchmarks * Add link to dataset * Update imports * add first support for neg psgs * Refactor test * set max_seq_len * cleanup benchmark * begin retriever speed benchmarking * Add support for retriever query index benchmarking * improve reader eval, retriever speed benchmarking * improve retriever speed benchmarking * Add retriever accuracy benchmark * Add neg doc shuffling * Add top_n * 3x speedup of SQL. add postgres docker run. make shuffle neg a param. add more logging * Add models to sweep * add option for faiss index type * remove unneeded line * change faiss to faiss_flat * begin automatic benchmark script * remove existing postgres docker for benchmarking * Add data processing scripts * Remove shuffle in script bc data already shuffled * switch hnsw setup from 256 to 128 * change es similarity to dot product by default * Error includes stack trace * Change ES default timeout * remove delete_docs() from timing for indexing * Add support for website export * update website on push to benchmarks * add complete benchmarks results * new json format * removed NaN as is not a valid json token * fix benchmarking for faiss hnsw queries. do sql calls in update_embeddings() as batches * update benchmarks for hnsw 128,20,80 * don't delete full index in delete_all_documents() * update texts for charts * update recall column for retriever * change scale and add units to desc * add units to legend * add axis titles. update desc * add html tags Co-authored-by: deepset <deepset@Crenolape.localdomain> Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai> Co-authored-by: PiffPaffM <markuspaff.mp@gmail.com>
2020-10-12 13:34:42 +02:00
from utils import load_eval_data, get_retriever_config
Create time and performance benchmarks for all readers and retrievers (#339) * add time and perf benchmark for es * Add retriever benchmarking * Add Reader benchmarking * add nq to squad conversion * add conversion stats * clean benchmarks * Add link to dataset * Update imports * add first support for neg psgs * Refactor test * set max_seq_len * cleanup benchmark * begin retriever speed benchmarking * Add support for retriever query index benchmarking * improve reader eval, retriever speed benchmarking * improve retriever speed benchmarking * Add retriever accuracy benchmark * Add neg doc shuffling * Add top_n * 3x speedup of SQL. add postgres docker run. make shuffle neg a param. add more logging * Add models to sweep * add option for faiss index type * remove unneeded line * change faiss to faiss_flat * begin automatic benchmark script * remove existing postgres docker for benchmarking * Add data processing scripts * Remove shuffle in script bc data already shuffled * switch hnsw setup from 256 to 128 * change es similarity to dot product by default * Error includes stack trace * Change ES default timeout * remove delete_docs() from timing for indexing * Add support for website export * update website on push to benchmarks * add complete benchmarks results * new json format * removed NaN as is not a valid json token * fix benchmarking for faiss hnsw queries. do sql calls in update_embeddings() as batches * update benchmarks for hnsw 128,20,80 * don't delete full index in delete_all_documents() * update texts for charts * update recall column for retriever * change scale and add units to desc * add units to legend * add axis titles. update desc * add html tags Co-authored-by: deepset <deepset@Crenolape.localdomain> Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai> Co-authored-by: PiffPaffM <markuspaff.mp@gmail.com>
2020-10-12 13:34:42 +02:00
def benchmark_retriever(
indexing_pipeline: Pipeline, querying_pipeline: Pipeline, documents_directory: Path, eval_set: Path
) -> Dict:
2020-10-15 18:12:17 +02:00
"""
Benchmark indexing and querying on retriever pipelines on a given dataset.
:param indexing_pipeline: Pipeline for indexing documents.
:param querying_pipeline: Pipeline for querying documents.
:param documents_directory: Directory containing files to index.
:param eval_set: Path to evaluation set.
2020-10-15 18:12:17 +02:00
"""
# Indexing
indexing_results = benchmark_indexing(indexing_pipeline, documents_directory)
2020-10-15 18:12:17 +02:00
# Querying
querying_results = benchmark_querying(querying_pipeline, eval_set)
2020-10-15 18:12:17 +02:00
results = {"indexing": indexing_results, "querying": querying_results}
return results
2020-10-15 18:12:17 +02:00
def benchmark_indexing(pipeline: Pipeline, documents_directory: Path) -> Dict:
"""
Benchmark indexing.
:param pipeline: Pipeline for indexing documents.
:param documents_directory: Directory containing files to index.
"""
try:
# Indexing Pipelines take a list of file paths as input
file_paths = [str(fp) for fp in documents_directory.iterdir() if fp.is_file() and not fp.name.startswith(".")]
# Indexing
start_time = perf_counter()
pipeline.run_batch(file_paths=file_paths)
end_time = perf_counter()
indexing_time = end_time - start_time
n_docs = len(file_paths)
retrievers = pipeline.get_nodes_by_class(BaseRetriever)
retriever_type = retrievers[0].__class__.__name__ if retrievers else "No component of type BaseRetriever found"
doc_store = pipeline.get_document_store()
doc_store_type = doc_store.__class__.__name__ if doc_store else "No DocumentStore found"
results = {
"retriever": retriever_type,
"doc_store": doc_store_type,
"n_docs": n_docs,
"indexing_time": indexing_time,
"docs_per_second": n_docs / indexing_time,
"date_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"error": None,
}
except Exception:
tb = traceback.format_exc()
logging.error("##### The following Error was raised while running indexing run:")
logging.error(tb)
retrievers = pipeline.get_nodes_by_class(BaseRetriever)
retriever_type = retrievers[0].__class__.__name__ if retrievers else "No component of type BaseRetriever found"
doc_store = pipeline.get_document_store()
doc_store_type = doc_store.__class__.__name__ if doc_store else "No DocumentStore found"
results = {
"retriever": retriever_type,
"doc_store": doc_store_type,
"n_docs": 0,
"indexing_time": 0,
"docs_per_second": 0,
"date_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"error": str(tb),
}
return results
def benchmark_querying(pipeline: Pipeline, eval_set: Path) -> Dict:
"""
Benchmark querying. This method should only be called if indexing has already been done.
:param pipeline: Pipeline for querying documents.
:param eval_set: Path to evaluation set.
"""
try:
# Load eval data
labels, _ = load_eval_data(eval_set)
multi_labels = aggregate_labels(labels)
queries = [label.query for label in multi_labels]
# Run querying
start_time = perf_counter()
predictions = pipeline.run_batch(queries=queries, labels=multi_labels, debug=True)
end_time = perf_counter()
querying_time = end_time - start_time
# Evaluate predictions
eval_result = pipeline._generate_eval_result_from_batch_preds(predictions_batches=predictions)
metrics = eval_result.calculate_metrics()["Retriever"]
retriever_type, retriever_top_k = get_retriever_config(pipeline)
doc_store = pipeline.get_document_store()
doc_store_type = doc_store.__class__.__name__ if doc_store else "No DocumentStore found"
results = {
"retriever": retriever_type,
"doc_store": doc_store_type,
"n_docs": doc_store.get_document_count(),
"n_queries": len(labels),
"querying_time": querying_time,
"queries_per_second": len(labels) / querying_time,
"seconds_per_query": querying_time / len(labels),
"recall": metrics["recall_single_hit"],
"map": metrics["map"],
"top_k": retriever_top_k,
"date_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"error": None,
}
except Exception:
tb = traceback.format_exc()
logging.error("##### The following Error was raised while running querying run:")
logging.error(tb)
retriever_type, retriever_top_k = get_retriever_config(pipeline)
doc_store = pipeline.get_document_store()
doc_store_type = doc_store.__class__.__name__ if doc_store else "No DocumentStore found"
results = {
"retriever": retriever_type,
"doc_store": doc_store_type,
"n_docs": 0,
"n_queries": 0,
"retrieve_time": 0,
"queries_per_second": 0,
"seconds_per_query": 0,
"recall": 0,
"map": 0,
"top_k": retriever_top_k,
"date_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"error": str(tb),
}
return results