mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-26 14:38:36 +00:00
Fix YAML pipeline paths in docker-compose.yml (#2335)
* Rename YAML files in docker-compose files * Make read_pipeline_config_from_yaml fail on wrong path * Validate indexing config in rest api * Update Documentation & Code Style * Add note about autocompletion of YAML Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
853c360a1f
commit
5454d57bfa
@ -23,7 +23,7 @@ services:
|
||||
environment:
|
||||
# See rest_api/pipelines.yaml for configurations of Search & Indexing Pipeline.
|
||||
- DOCUMENTSTORE_PARAMS_HOST=elasticsearch
|
||||
- PIPELINE_YAML_PATH=/home/user/rest_api/pipeline/pipelines_dpr.yaml
|
||||
- PIPELINE_YAML_PATH=/home/user/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml
|
||||
- CONCURRENT_REQUEST_PER_WORKER
|
||||
depends_on:
|
||||
- elasticsearch
|
||||
|
||||
@ -14,7 +14,7 @@ services:
|
||||
environment:
|
||||
# See rest_api/pipelines.yaml for configurations of Search & Indexing Pipeline.
|
||||
- DOCUMENTSTORE_PARAMS_HOST=elasticsearch
|
||||
- PIPELINE_YAML_PATH=/home/user/rest_api/pipeline/pipelines.yaml
|
||||
- PIPELINE_YAML_PATH=/home/user/rest_api/pipeline/pipelines.haystack-pipeline.yml
|
||||
- CONCURRENT_REQUEST_PER_WORKER
|
||||
depends_on:
|
||||
- elasticsearch
|
||||
|
||||
@ -123,7 +123,7 @@ Print the evaluation results
|
||||
#### semantic\_answer\_similarity
|
||||
|
||||
```python
|
||||
def semantic_answer_similarity(predictions: List[List[str]], gold_labels: List[List[str]], sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2") -> Tuple[List[float], List[float]]
|
||||
def semantic_answer_similarity(predictions: List[List[str]], gold_labels: List[List[str]], sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", batch_size: int = 32, use_gpu: bool = True) -> Tuple[List[float], List[float]]
|
||||
```
|
||||
|
||||
Computes Transformer-based similarity of predicted answer to gold labels to derive a more meaningful metric than EM or F1.
|
||||
@ -137,6 +137,9 @@ Returns per QA pair a) the similarity of the most likely prediction (top 1) to a
|
||||
- `gold_labels`: Labels as list of multiple possible answers per question
|
||||
- `sas_model_name_or_path`: SentenceTransformers semantic textual similarity model, should be path or string
|
||||
pointing to downloadable models.
|
||||
- `batch_size`: Number of prediction label pairs to encode at once.
|
||||
- `use_gpu`: Whether to use a GPU or the CPU for calculating semantic answer similarity.
|
||||
Falls back to CPU if no GPU is available.
|
||||
|
||||
**Returns**:
|
||||
|
||||
|
||||
@ -466,6 +466,9 @@ If you use custom cross encoders please make sure they work with sentence_transf
|
||||
- Good default for multiple languages: "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
|
||||
- Large, powerful, but slow model for English only: "cross-encoder/stsb-roberta-large"
|
||||
- Large model for German only: "deepset/gbert-large-sts"
|
||||
- `sas_batch_size`: Number of prediction label pairs to encode at once by CrossEncoder or SentenceTransformer while calculating SAS.
|
||||
- `sas_use_gpu`: Whether to use a GPU or the CPU for calculating semantic answer similarity.
|
||||
Falls back to CPU if no GPU is available.
|
||||
- `add_isolated_node_eval`: If set to True, in addition to the integrated evaluation of the pipeline, each node is evaluated in isolated evaluation mode.
|
||||
This mode helps to understand the bottlenecks of a pipeline in terms of output quality of each individual node.
|
||||
If a node performs much better in the isolated evaluation than in the integrated evaluation, the previous node needs to be optimized to improve the pipeline's performance.
|
||||
|
||||
@ -394,7 +394,7 @@ def semantic_answer_similarity(
|
||||
gold_labels: List[List[str]],
|
||||
sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
||||
batch_size: int = 32,
|
||||
use_gpu: bool = True
|
||||
use_gpu: bool = True,
|
||||
) -> Tuple[List[float], List[float]]:
|
||||
"""
|
||||
Computes Transformer-based similarity of predicted answer to gold labels to derive a more meaningful metric than EM or F1.
|
||||
@ -416,8 +416,8 @@ def semantic_answer_similarity(
|
||||
cross_encoder_used = False
|
||||
if config.architectures is not None:
|
||||
cross_encoder_used = any(arch.endswith("ForSequenceClassification") for arch in config.architectures)
|
||||
|
||||
device = None if use_gpu else 'cpu'
|
||||
|
||||
device = None if use_gpu else "cpu"
|
||||
|
||||
# Compute similarities
|
||||
top_1_sas = []
|
||||
|
||||
@ -768,8 +768,11 @@ class Pipeline(BasePipeline):
|
||||
gold_labels = df["gold_answers"].values
|
||||
predictions = [[a] for a in df["answer"].values]
|
||||
sas, _ = semantic_answer_similarity(
|
||||
predictions=predictions, gold_labels=gold_labels, sas_model_name_or_path=sas_model_name_or_path,
|
||||
batch_size=sas_batch_size, use_gpu=sas_use_gpu
|
||||
predictions=predictions,
|
||||
gold_labels=gold_labels,
|
||||
sas_model_name_or_path=sas_model_name_or_path,
|
||||
batch_size=sas_batch_size,
|
||||
use_gpu=sas_use_gpu,
|
||||
)
|
||||
df["sas"] = sas
|
||||
|
||||
|
||||
@ -69,7 +69,13 @@ def get_component_definitions(pipeline_config: Dict[str, Any], overwrite_with_en
|
||||
return component_definitions
|
||||
|
||||
|
||||
def read_pipeline_config_from_yaml(path: Path):
|
||||
def read_pipeline_config_from_yaml(path: Path) -> Dict[str, Any]:
|
||||
"""
|
||||
Parses YAML files into Python objects.
|
||||
Fails if the file does not exist.
|
||||
"""
|
||||
if not os.path.isfile(path):
|
||||
raise FileNotFoundError(f"Not found: {path}")
|
||||
with open(path, "r", encoding="utf-8") as stream:
|
||||
return yaml.safe_load(stream)
|
||||
|
||||
|
||||
@ -12,7 +12,12 @@ from pydantic import BaseModel
|
||||
|
||||
from haystack.pipelines.base import Pipeline
|
||||
from haystack.errors import PipelineConfigError
|
||||
from haystack.pipelines.config import get_component_definitions, get_pipeline_definition, read_pipeline_config_from_yaml
|
||||
from haystack.pipelines.config import (
|
||||
get_component_definitions,
|
||||
get_pipeline_definition,
|
||||
read_pipeline_config_from_yaml,
|
||||
validate_config,
|
||||
)
|
||||
from rest_api.config import PIPELINE_YAML_PATH, FILE_UPLOAD_PATH, INDEXING_PIPELINE_NAME
|
||||
from rest_api.controller.utils import as_form
|
||||
|
||||
@ -22,6 +27,7 @@ router = APIRouter()
|
||||
|
||||
try:
|
||||
pipeline_config = read_pipeline_config_from_yaml(Path(PIPELINE_YAML_PATH))
|
||||
validate_config(pipeline_config)
|
||||
pipeline_definition = get_pipeline_definition(pipeline_config=pipeline_config, pipeline_name=INDEXING_PIPELINE_NAME)
|
||||
component_definitions = get_component_definitions(
|
||||
pipeline_config=pipeline_config, overwrite_with_env_variables=True
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
# To allow your IDE to autocomplete and validate your YAML pipelines, name them as <name of your choice>.haystack-pipeline.yml
|
||||
|
||||
version: 'unstable'
|
||||
|
||||
components: # define all the building-blocks for Pipeline
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
# To allow your IDE to autocomplete and validate your YAML pipelines, name them as <name of your choice>.haystack-pipeline.yml
|
||||
|
||||
version: 'unstable'
|
||||
|
||||
components: # define all the building-blocks for Pipeline
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user