diff --git a/docker-compose-gpu.yml b/docker-compose-gpu.yml index 18ff10626..351adcabf 100644 --- a/docker-compose-gpu.yml +++ b/docker-compose-gpu.yml @@ -23,7 +23,7 @@ services: environment: # See rest_api/pipelines.yaml for configurations of Search & Indexing Pipeline. - DOCUMENTSTORE_PARAMS_HOST=elasticsearch - - PIPELINE_YAML_PATH=/home/user/rest_api/pipeline/pipelines_dpr.yaml + - PIPELINE_YAML_PATH=/home/user/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml - CONCURRENT_REQUEST_PER_WORKER depends_on: - elasticsearch diff --git a/docker-compose.yml b/docker-compose.yml index 24854f306..2a49246fb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: environment: # See rest_api/pipelines.yaml for configurations of Search & Indexing Pipeline. - DOCUMENTSTORE_PARAMS_HOST=elasticsearch - - PIPELINE_YAML_PATH=/home/user/rest_api/pipeline/pipelines.yaml + - PIPELINE_YAML_PATH=/home/user/rest_api/pipeline/pipelines.haystack-pipeline.yml - CONCURRENT_REQUEST_PER_WORKER depends_on: - elasticsearch diff --git a/docs/_src/api/api/evaluation.md b/docs/_src/api/api/evaluation.md index 2cbf0b4dd..fed855e80 100644 --- a/docs/_src/api/api/evaluation.md +++ b/docs/_src/api/api/evaluation.md @@ -123,7 +123,7 @@ Print the evaluation results #### semantic\_answer\_similarity ```python -def semantic_answer_similarity(predictions: List[List[str]], gold_labels: List[List[str]], sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2") -> Tuple[List[float], List[float]] +def semantic_answer_similarity(predictions: List[List[str]], gold_labels: List[List[str]], sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", batch_size: int = 32, use_gpu: bool = True) -> Tuple[List[float], List[float]] ``` Computes Transformer-based similarity of predicted answer to gold labels to derive a more meaningful metric than EM or F1. @@ -137,6 +137,9 @@ Returns per QA pair a) the similarity of the most likely prediction (top 1) to a - `gold_labels`: Labels as list of multiple possible answers per question - `sas_model_name_or_path`: SentenceTransformers semantic textual similarity model, should be path or string pointing to downloadable models. +- `batch_size`: Number of prediction label pairs to encode at once. +- `use_gpu`: Whether to use a GPU or the CPU for calculating semantic answer similarity. +Falls back to CPU if no GPU is available. **Returns**: diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md index 91f401853..59a61c701 100644 --- a/docs/_src/api/api/pipelines.md +++ b/docs/_src/api/api/pipelines.md @@ -466,6 +466,9 @@ If you use custom cross encoders please make sure they work with sentence_transf - Good default for multiple languages: "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" - Large, powerful, but slow model for English only: "cross-encoder/stsb-roberta-large" - Large model for German only: "deepset/gbert-large-sts" +- `sas_batch_size`: Number of prediction label pairs to encode at once by CrossEncoder or SentenceTransformer while calculating SAS. +- `sas_use_gpu`: Whether to use a GPU or the CPU for calculating semantic answer similarity. +Falls back to CPU if no GPU is available. - `add_isolated_node_eval`: If set to True, in addition to the integrated evaluation of the pipeline, each node is evaluated in isolated evaluation mode. This mode helps to understand the bottlenecks of a pipeline in terms of output quality of each individual node. If a node performs much better in the isolated evaluation than in the integrated evaluation, the previous node needs to be optimized to improve the pipeline's performance. diff --git a/haystack/nodes/evaluator/evaluator.py b/haystack/nodes/evaluator/evaluator.py index 134c855ed..6e4ac3184 100644 --- a/haystack/nodes/evaluator/evaluator.py +++ b/haystack/nodes/evaluator/evaluator.py @@ -394,7 +394,7 @@ def semantic_answer_similarity( gold_labels: List[List[str]], sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", batch_size: int = 32, - use_gpu: bool = True + use_gpu: bool = True, ) -> Tuple[List[float], List[float]]: """ Computes Transformer-based similarity of predicted answer to gold labels to derive a more meaningful metric than EM or F1. @@ -416,8 +416,8 @@ def semantic_answer_similarity( cross_encoder_used = False if config.architectures is not None: cross_encoder_used = any(arch.endswith("ForSequenceClassification") for arch in config.architectures) - - device = None if use_gpu else 'cpu' + + device = None if use_gpu else "cpu" # Compute similarities top_1_sas = [] diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 29ef83108..c45655f45 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -768,8 +768,11 @@ class Pipeline(BasePipeline): gold_labels = df["gold_answers"].values predictions = [[a] for a in df["answer"].values] sas, _ = semantic_answer_similarity( - predictions=predictions, gold_labels=gold_labels, sas_model_name_or_path=sas_model_name_or_path, - batch_size=sas_batch_size, use_gpu=sas_use_gpu + predictions=predictions, + gold_labels=gold_labels, + sas_model_name_or_path=sas_model_name_or_path, + batch_size=sas_batch_size, + use_gpu=sas_use_gpu, ) df["sas"] = sas diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 6e8c77626..f4df76aa2 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -69,7 +69,13 @@ def get_component_definitions(pipeline_config: Dict[str, Any], overwrite_with_en return component_definitions -def read_pipeline_config_from_yaml(path: Path): +def read_pipeline_config_from_yaml(path: Path) -> Dict[str, Any]: + """ + Parses YAML files into Python objects. + Fails if the file does not exist. + """ + if not os.path.isfile(path): + raise FileNotFoundError(f"Not found: {path}") with open(path, "r", encoding="utf-8") as stream: return yaml.safe_load(stream) diff --git a/rest_api/controller/file_upload.py b/rest_api/controller/file_upload.py index 2c1b97c8a..262c2f535 100644 --- a/rest_api/controller/file_upload.py +++ b/rest_api/controller/file_upload.py @@ -12,7 +12,12 @@ from pydantic import BaseModel from haystack.pipelines.base import Pipeline from haystack.errors import PipelineConfigError -from haystack.pipelines.config import get_component_definitions, get_pipeline_definition, read_pipeline_config_from_yaml +from haystack.pipelines.config import ( + get_component_definitions, + get_pipeline_definition, + read_pipeline_config_from_yaml, + validate_config, +) from rest_api.config import PIPELINE_YAML_PATH, FILE_UPLOAD_PATH, INDEXING_PIPELINE_NAME from rest_api.controller.utils import as_form @@ -22,6 +27,7 @@ router = APIRouter() try: pipeline_config = read_pipeline_config_from_yaml(Path(PIPELINE_YAML_PATH)) + validate_config(pipeline_config) pipeline_definition = get_pipeline_definition(pipeline_config=pipeline_config, pipeline_name=INDEXING_PIPELINE_NAME) component_definitions = get_component_definitions( pipeline_config=pipeline_config, overwrite_with_env_variables=True diff --git a/rest_api/pipeline/pipelines.haystack-pipeline.yml b/rest_api/pipeline/pipelines.haystack-pipeline.yml index 72565a41f..b32a62c66 100644 --- a/rest_api/pipeline/pipelines.haystack-pipeline.yml +++ b/rest_api/pipeline/pipelines.haystack-pipeline.yml @@ -1,3 +1,5 @@ +# To allow your IDE to autocomplete and validate your YAML pipelines, name them as .haystack-pipeline.yml + version: 'unstable' components: # define all the building-blocks for Pipeline diff --git a/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml b/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml index eaf426921..72cdf0f63 100644 --- a/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml +++ b/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml @@ -1,3 +1,5 @@ +# To allow your IDE to autocomplete and validate your YAML pipelines, name them as .haystack-pipeline.yml + version: 'unstable' components: # define all the building-blocks for Pipeline