mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-23 22:09:00 +00:00
149 lines
5.7 KiB
Python
149 lines
5.7 KiB
Python
from pathlib import Path
|
|
from time import perf_counter
|
|
import logging
|
|
import datetime
|
|
import traceback
|
|
from typing import Dict
|
|
|
|
from haystack.nodes import BaseRetriever
|
|
from haystack import Pipeline
|
|
from haystack.utils import aggregate_labels
|
|
|
|
from utils import load_eval_data, get_retriever_config
|
|
|
|
|
|
def benchmark_retriever(
|
|
indexing_pipeline: Pipeline, querying_pipeline: Pipeline, documents_directory: Path, eval_set: Path
|
|
) -> Dict:
|
|
"""
|
|
Benchmark indexing and querying on retriever pipelines on a given dataset.
|
|
:param indexing_pipeline: Pipeline for indexing documents.
|
|
:param querying_pipeline: Pipeline for querying documents.
|
|
:param documents_directory: Directory containing files to index.
|
|
:param eval_set: Path to evaluation set.
|
|
"""
|
|
# Indexing
|
|
indexing_results = benchmark_indexing(indexing_pipeline, documents_directory)
|
|
|
|
# Querying
|
|
querying_results = benchmark_querying(querying_pipeline, eval_set)
|
|
|
|
results = {"indexing": indexing_results, "querying": querying_results}
|
|
return results
|
|
|
|
|
|
def benchmark_indexing(pipeline: Pipeline, documents_directory: Path) -> Dict:
|
|
"""
|
|
Benchmark indexing.
|
|
:param pipeline: Pipeline for indexing documents.
|
|
:param documents_directory: Directory containing files to index.
|
|
"""
|
|
try:
|
|
# Indexing Pipelines take a list of file paths as input
|
|
file_paths = [str(fp) for fp in documents_directory.iterdir() if fp.is_file() and not fp.name.startswith(".")]
|
|
|
|
# Indexing
|
|
start_time = perf_counter()
|
|
pipeline.run_batch(file_paths=file_paths)
|
|
end_time = perf_counter()
|
|
|
|
indexing_time = end_time - start_time
|
|
n_docs = len(file_paths)
|
|
retrievers = pipeline.get_nodes_by_class(BaseRetriever)
|
|
retriever_type = retrievers[0].__class__.__name__ if retrievers else "No component of type BaseRetriever found"
|
|
doc_store = pipeline.get_document_store()
|
|
doc_store_type = doc_store.__class__.__name__ if doc_store else "No DocumentStore found"
|
|
results = {
|
|
"retriever": retriever_type,
|
|
"doc_store": doc_store_type,
|
|
"n_docs": n_docs,
|
|
"indexing_time": indexing_time,
|
|
"docs_per_second": n_docs / indexing_time,
|
|
"date_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
"error": None,
|
|
}
|
|
except Exception:
|
|
tb = traceback.format_exc()
|
|
logging.error("##### The following Error was raised while running indexing run:")
|
|
logging.error(tb)
|
|
retrievers = pipeline.get_nodes_by_class(BaseRetriever)
|
|
retriever_type = retrievers[0].__class__.__name__ if retrievers else "No component of type BaseRetriever found"
|
|
doc_store = pipeline.get_document_store()
|
|
doc_store_type = doc_store.__class__.__name__ if doc_store else "No DocumentStore found"
|
|
results = {
|
|
"retriever": retriever_type,
|
|
"doc_store": doc_store_type,
|
|
"n_docs": 0,
|
|
"indexing_time": 0,
|
|
"docs_per_second": 0,
|
|
"date_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
"error": str(tb),
|
|
}
|
|
|
|
return results
|
|
|
|
|
|
def benchmark_querying(pipeline: Pipeline, eval_set: Path) -> Dict:
|
|
"""
|
|
Benchmark querying. This method should only be called if indexing has already been done.
|
|
:param pipeline: Pipeline for querying documents.
|
|
:param eval_set: Path to evaluation set.
|
|
"""
|
|
try:
|
|
# Load eval data
|
|
labels, _ = load_eval_data(eval_set)
|
|
multi_labels = aggregate_labels(labels)
|
|
queries = [label.query for label in multi_labels]
|
|
|
|
# Run querying
|
|
start_time = perf_counter()
|
|
predictions = pipeline.run_batch(queries=queries, labels=multi_labels, debug=True)
|
|
end_time = perf_counter()
|
|
querying_time = end_time - start_time
|
|
|
|
# Evaluate predictions
|
|
eval_result = pipeline._generate_eval_result_from_batch_preds(predictions_batches=predictions)
|
|
metrics = eval_result.calculate_metrics()["Retriever"]
|
|
|
|
retriever_type, retriever_top_k = get_retriever_config(pipeline)
|
|
doc_store = pipeline.get_document_store()
|
|
doc_store_type = doc_store.__class__.__name__ if doc_store else "No DocumentStore found"
|
|
results = {
|
|
"retriever": retriever_type,
|
|
"doc_store": doc_store_type,
|
|
"n_docs": doc_store.get_document_count(),
|
|
"n_queries": len(labels),
|
|
"querying_time": querying_time,
|
|
"queries_per_second": len(labels) / querying_time,
|
|
"seconds_per_query": querying_time / len(labels),
|
|
"recall": metrics["recall_single_hit"],
|
|
"map": metrics["map"],
|
|
"top_k": retriever_top_k,
|
|
"date_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
"error": None,
|
|
}
|
|
|
|
except Exception:
|
|
tb = traceback.format_exc()
|
|
logging.error("##### The following Error was raised while running querying run:")
|
|
logging.error(tb)
|
|
retriever_type, retriever_top_k = get_retriever_config(pipeline)
|
|
doc_store = pipeline.get_document_store()
|
|
doc_store_type = doc_store.__class__.__name__ if doc_store else "No DocumentStore found"
|
|
results = {
|
|
"retriever": retriever_type,
|
|
"doc_store": doc_store_type,
|
|
"n_docs": 0,
|
|
"n_queries": 0,
|
|
"retrieve_time": 0,
|
|
"queries_per_second": 0,
|
|
"seconds_per_query": 0,
|
|
"recall": 0,
|
|
"map": 0,
|
|
"top_k": retriever_top_k,
|
|
"date_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
"error": str(tb),
|
|
}
|
|
|
|
return results
|