haystack/test/benchmarks/run.py

from pathlib import Path
from typing import Dict
import argparse
import json

from haystack import Pipeline
from haystack.pipelines.config import read_pipeline_config_from_yaml

from utils import prepare_environment, contains_reader, contains_retriever
from reader import benchmark_reader
from retriever import benchmark_retriever
from retriever_reader import benchmark_retriever_reader


def run_benchmark(pipeline_yaml: Path) -> Dict:
    """
    Run benchmarking on a given pipeline. Pipeline can be a retriever, reader, or retriever-reader pipeline.
    In case of retriever or retriever-reader pipelines, indexing is also benchmarked, so the config file must
    contain an indexing pipeline as well.

    :param pipeline_yaml: Path to pipeline YAML config. The config file should contain a benchmark_config section where
                          the following parameters are specified:
                            - documents_directory: Directory containing files to index.
                            - labels_file: Path to evaluation set.
                            - data_url (optional): URL to download the data from. Downloaded data will be stored in
                                                   the directory `data/`.
    """
    pipeline_config = read_pipeline_config_from_yaml(pipeline_yaml)
    benchmark_config = pipeline_config.pop("benchmark_config", {})

    # Prepare environment
    prepare_environment(pipeline_config, benchmark_config)
    labels_file = Path(benchmark_config["labels_file"])

    querying_pipeline = Pipeline.load_from_config(pipeline_config, pipeline_name="querying")
    pipeline_contains_reader = contains_reader(querying_pipeline)
    pipeline_contains_retriever = contains_retriever(querying_pipeline)

    # Retriever-Reader pipeline
    if pipeline_contains_retriever and pipeline_contains_reader:
        documents_dir = Path(benchmark_config["documents_directory"])
        indexing_pipeline = Pipeline.load_from_config(pipeline_config, pipeline_name="indexing")

        results = benchmark_retriever_reader(indexing_pipeline, querying_pipeline, documents_dir, labels_file)

    # Retriever pipeline
    elif pipeline_contains_retriever:
        documents_dir = Path(benchmark_config["documents_directory"])
        indexing_pipeline = Pipeline.load_from_config(pipeline_config, pipeline_name="indexing")

        results = benchmark_retriever(indexing_pipeline, querying_pipeline, documents_dir, labels_file)

    # Reader pipeline
    elif pipeline_contains_reader:
        results = benchmark_reader(querying_pipeline, labels_file)

    # Unsupported pipeline type
    else:
        raise ValueError("Pipeline must be a retriever, reader, or retriever-reader pipeline.")

    pipeline_config["benchmark_config"] = benchmark_config
    results["config"] = pipeline_config
    return results


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("config", type=str, help="Path to pipeline YAML config.")
    parser.add_argument("--output", type=str, help="Path to output file.")
    args = parser.parse_args()

    config_file = Path(args.config)
    output_file = f"{config_file.stem}_results.json" if args.output is None else args.output
    results = run_benchmark(config_file)
    with open(output_file, "w") as f:
        json.dump(results, f, indent=2)
refactor: Adapt running benchmarks (#5007) * Generate eval result in separate method * Adapt benchmarking utils * Adapt running retriever benchmarks * Adapt error message * Adapt running reader benchmarks * Adapt retriever reader benchmark script * Adapt running benchmarks script * Adapt README.md * Raise error if file doesn't exist * Raise error if path doesn't exist or is a directory * minor readme update * Create separate methods for checking if pipeline contains reader or retriever * Fix reader pipeline case --------- Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-05-26 18:48:11 +02:00			`from pathlib import Path`
			`from typing import Dict`
			`import argparse`
			`import json`

			`from haystack import Pipeline`
			`from haystack.pipelines.config import read_pipeline_config_from_yaml`
Integrate sentence transformers into benchmarks (#843) * Integrate sentence transformers into benchmarks * Add doc store asserts * switch data downloads from s3 client to https. add license info * Fix mypy, revert config Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-04-09 17:24:16 +02:00
refactor: Adapt running benchmarks (#5007) * Generate eval result in separate method * Adapt benchmarking utils * Adapt running retriever benchmarks * Adapt error message * Adapt running reader benchmarks * Adapt retriever reader benchmark script * Adapt running benchmarks script * Adapt README.md * Raise error if file doesn't exist * Raise error if path doesn't exist or is a directory * minor readme update * Create separate methods for checking if pipeline contains reader or retriever * Fix reader pipeline case --------- Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-05-26 18:48:11 +02:00			`from utils import prepare_environment, contains_reader, contains_retriever`
Create time and performance benchmarks for all readers and retrievers (#339) * add time and perf benchmark for es * Add retriever benchmarking * Add Reader benchmarking * add nq to squad conversion * add conversion stats * clean benchmarks * Add link to dataset * Update imports * add first support for neg psgs * Refactor test * set max_seq_len * cleanup benchmark * begin retriever speed benchmarking * Add support for retriever query index benchmarking * improve reader eval, retriever speed benchmarking * improve retriever speed benchmarking * Add retriever accuracy benchmark * Add neg doc shuffling * Add top_n * 3x speedup of SQL. add postgres docker run. make shuffle neg a param. add more logging * Add models to sweep * add option for faiss index type * remove unneeded line * change faiss to faiss_flat * begin automatic benchmark script * remove existing postgres docker for benchmarking * Add data processing scripts * Remove shuffle in script bc data already shuffled * switch hnsw setup from 256 to 128 * change es similarity to dot product by default * Error includes stack trace * Change ES default timeout * remove delete_docs() from timing for indexing * Add support for website export * update website on push to benchmarks * add complete benchmarks results * new json format * removed NaN as is not a valid json token * fix benchmarking for faiss hnsw queries. do sql calls in update_embeddings() as batches * update benchmarks for hnsw 128,20,80 * don't delete full index in delete_all_documents() * update texts for charts * update recall column for retriever * change scale and add units to desc * add units to legend * add axis titles. update desc * add html tags Co-authored-by: deepset <deepset@Crenolape.localdomain> Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai> Co-authored-by: PiffPaffM <markuspaff.mp@gmail.com> 2020-10-12 13:34:42 +02:00			`from reader import benchmark_reader`
refactor: Adapt running benchmarks (#5007) * Generate eval result in separate method * Adapt benchmarking utils * Adapt running retriever benchmarks * Adapt error message * Adapt running reader benchmarks * Adapt retriever reader benchmark script * Adapt running benchmarks script * Adapt README.md * Raise error if file doesn't exist * Raise error if path doesn't exist or is a directory * minor readme update * Create separate methods for checking if pipeline contains reader or retriever * Fix reader pipeline case --------- Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-05-26 18:48:11 +02:00			`from retriever import benchmark_retriever`
			`from retriever_reader import benchmark_retriever_reader`


			`def run_benchmark(pipeline_yaml: Path) -> Dict:`
			`"""`
			`Run benchmarking on a given pipeline. Pipeline can be a retriever, reader, or retriever-reader pipeline.`
			`In case of retriever or retriever-reader pipelines, indexing is also benchmarked, so the config file must`
			`contain an indexing pipeline as well.`

			`:param pipeline_yaml: Path to pipeline YAML config. The config file should contain a benchmark_config section where`
			`the following parameters are specified:`
			`- documents_directory: Directory containing files to index.`
			`- labels_file: Path to evaluation set.`
			`- data_url (optional): URL to download the data from. Downloaded data will be stored in`
			the directory `data/`.
			`"""`
			`pipeline_config = read_pipeline_config_from_yaml(pipeline_yaml)`
			`benchmark_config = pipeline_config.pop("benchmark_config", {})`

			`# Prepare environment`
			`prepare_environment(pipeline_config, benchmark_config)`
			`labels_file = Path(benchmark_config["labels_file"])`

			`querying_pipeline = Pipeline.load_from_config(pipeline_config, pipeline_name="querying")`
			`pipeline_contains_reader = contains_reader(querying_pipeline)`
			`pipeline_contains_retriever = contains_retriever(querying_pipeline)`

			`# Retriever-Reader pipeline`
			`if pipeline_contains_retriever and pipeline_contains_reader:`
			`documents_dir = Path(benchmark_config["documents_directory"])`
			`indexing_pipeline = Pipeline.load_from_config(pipeline_config, pipeline_name="indexing")`

			`results = benchmark_retriever_reader(indexing_pipeline, querying_pipeline, documents_dir, labels_file)`

			`# Retriever pipeline`
			`elif pipeline_contains_retriever:`
			`documents_dir = Path(benchmark_config["documents_directory"])`
			`indexing_pipeline = Pipeline.load_from_config(pipeline_config, pipeline_name="indexing")`

			`results = benchmark_retriever(indexing_pipeline, querying_pipeline, documents_dir, labels_file)`

			`# Reader pipeline`
			`elif pipeline_contains_reader:`
			`results = benchmark_reader(querying_pipeline, labels_file)`

			`# Unsupported pipeline type`
			`else:`
			`raise ValueError("Pipeline must be a retriever, reader, or retriever-reader pipeline.")`

fix: Use queries from aggregated labels in benchmarks (#5054) * Include benchmark config in output * Use queries from aggregated labels 2023-06-01 10:49:54 +02:00			`pipeline_config["benchmark_config"] = benchmark_config`
			`results["config"] = pipeline_config`
refactor: Adapt running benchmarks (#5007) * Generate eval result in separate method * Adapt benchmarking utils * Adapt running retriever benchmarks * Adapt error message * Adapt running reader benchmarks * Adapt retriever reader benchmark script * Adapt running benchmarks script * Adapt README.md * Raise error if file doesn't exist * Raise error if path doesn't exist or is a directory * minor readme update * Create separate methods for checking if pipeline contains reader or retriever * Fix reader pipeline case --------- Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-05-26 18:48:11 +02:00			`return results`


			`if __name__ == "__main__":`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("config", type=str, help="Path to pipeline YAML config.")`
			`parser.add_argument("--output", type=str, help="Path to output file.")`
			`args = parser.parse_args()`
Create time and performance benchmarks for all readers and retrievers (#339) * add time and perf benchmark for es * Add retriever benchmarking * Add Reader benchmarking * add nq to squad conversion * add conversion stats * clean benchmarks * Add link to dataset * Update imports * add first support for neg psgs * Refactor test * set max_seq_len * cleanup benchmark * begin retriever speed benchmarking * Add support for retriever query index benchmarking * improve reader eval, retriever speed benchmarking * improve retriever speed benchmarking * Add retriever accuracy benchmark * Add neg doc shuffling * Add top_n * 3x speedup of SQL. add postgres docker run. make shuffle neg a param. add more logging * Add models to sweep * add option for faiss index type * remove unneeded line * change faiss to faiss_flat * begin automatic benchmark script * remove existing postgres docker for benchmarking * Add data processing scripts * Remove shuffle in script bc data already shuffled * switch hnsw setup from 256 to 128 * change es similarity to dot product by default * Error includes stack trace * Change ES default timeout * remove delete_docs() from timing for indexing * Add support for website export * update website on push to benchmarks * add complete benchmarks results * new json format * removed NaN as is not a valid json token * fix benchmarking for faiss hnsw queries. do sql calls in update_embeddings() as batches * update benchmarks for hnsw 128,20,80 * don't delete full index in delete_all_documents() * update texts for charts * update recall column for retriever * change scale and add units to desc * add units to legend * add axis titles. update desc * add html tags Co-authored-by: deepset <deepset@Crenolape.localdomain> Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai> Co-authored-by: PiffPaffM <markuspaff.mp@gmail.com> 2020-10-12 13:34:42 +02:00
refactor: Adapt running benchmarks (#5007) * Generate eval result in separate method * Adapt benchmarking utils * Adapt running retriever benchmarks * Adapt error message * Adapt running reader benchmarks * Adapt retriever reader benchmark script * Adapt running benchmarks script * Adapt README.md * Raise error if file doesn't exist * Raise error if path doesn't exist or is a directory * minor readme update * Create separate methods for checking if pipeline contains reader or retriever * Fix reader pipeline case --------- Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-05-26 18:48:11 +02:00			`config_file = Path(args.config)`
			`output_file = f"{config_file.stem}_results.json" if args.output is None else args.output`
			`results = run_benchmark(config_file)`
			`with open(output_file, "w") as f:`
			`json.dump(results, f, indent=2)`