YAML versioning (#2209)

* Make YAML files get the same version as Haystack and throw warning at load in case of mismatch

* Update version of most YAMLs in the codebase (aesthethic chamge, only to avoid the warning).

* Remove quotes from version in tests

* Fix version in generate_json_schema.py

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Sara Zan 2022-02-21 12:22:37 +01:00 committed by GitHub
parent 2a674eaff7
commit 2a840ee248
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 2651 additions and 25 deletions

View File

@ -4,6 +4,7 @@ import subprocess
from pathlib import Path
from typing import Any, Dict, Optional, Set, Tuple
from haystack import __version__
import haystack.document_stores
import haystack.nodes
import pydantic.schema
@ -15,8 +16,8 @@ from pydantic.schema import field_singleton_schema as _field_singleton_schema
from pydantic.typing import is_callable_type
from pydantic.utils import lenient_issubclass
schema_version = "0.7"
filename = f"haystack-pipeline.{schema_version}.schema.json"
schema_version = __version__
filename = f"haystack-pipeline-{schema_version}.schema.json"
destination_path = Path(__file__).parent.parent.parent / "json-schemas" / filename

View File

@ -55,7 +55,7 @@ Here's a sample configuration:
```python
| {
| "version": "0.9",
| "version": "1.0",
| "components": [
| { # define all the building-blocks for Pipeline
| "name": "MyReader", # custom-name for the component; helpful for visualization & debugging
@ -110,7 +110,7 @@ be passed.
Here's a sample configuration:
```yaml
| version: '0.9'
| version: '1.0'
|
| components: # define all the building-blocks for Pipeline
| - name: MyReader # custom-name for the component; helpful for visualization & debugging
@ -137,6 +137,9 @@ Here's a sample configuration:
| inputs: [MyESRetriever]
```
Note that, in case of a mismatch in version between Haystack and the YAML, a warning will be printed.
If the pipeline loads correctly regardless, save again the pipeline using `Pipeline.save_to_yaml()` to remove the warning.
**Arguments**:
- `path`: path of the YAML file.
@ -605,6 +608,10 @@ Here's a sample configuration:
| inputs: [MyESRetriever]
```
Note that, in case of a mismatch in version between Haystack and the YAML, a warning will be printed.
If the pipeline loads correctly regardless, save again the pipeline using `RayPipeline.save_to_yaml()` to remove the warning.
**Arguments**:
- `path`: path of the YAML file.

View File

@ -29,6 +29,7 @@ except:
ray = None # type: ignore
serve = None # type: ignore
from haystack import __version__
from haystack.schema import EvaluationResult, MultiLabel, Document
from haystack.nodes.base import BaseComponent
from haystack.document_stores.base import BaseDocumentStore
@ -81,7 +82,7 @@ class BasePipeline:
```python
| {
| "version": "0.9",
| "version": "1.0",
| "components": [
| { # define all the building-blocks for Pipeline
| "name": "MyReader", # custom-name for the component; helpful for visualization & debugging
@ -146,7 +147,7 @@ class BasePipeline:
Here's a sample configuration:
```yaml
| version: '0.9'
| version: '1.0'
|
| components: # define all the building-blocks for Pipeline
| - name: MyReader # custom-name for the component; helpful for visualization & debugging
@ -173,6 +174,9 @@ class BasePipeline:
| inputs: [MyESRetriever]
```
Note that, in case of a mismatch in version between Haystack and the YAML, a warning will be printed.
If the pipeline loads correctly regardless, save again the pipeline using `Pipeline.save_to_yaml()` to remove the warning.
:param path: path of the YAML file.
:param pipeline_name: if the YAML contains multiple pipelines, the pipeline_name to load must be set.
:param overwrite_with_env_variables: Overwrite the YAML configuration with environment variables. For example,
@ -182,6 +186,14 @@ class BasePipeline:
"""
pipeline_config = cls._read_pipeline_config_from_yaml(path)
if pipeline_config["version"] != __version__:
logger.warning(
f"YAML version ({pipeline_config['version']}) does not match with Haystack version ({__version__}). "
"Issues may occur during loading. "
"To fix this warning, save again this pipeline with the current Haystack version using Pipeline.save_to_yaml(), "
"check out our migration guide at https://haystack.deepset.ai/overview/migration "
f"or downgrade to haystack version {__version__}."
)
return cls.load_from_config(
pipeline_config=pipeline_config,
pipeline_name=pipeline_name,
@ -1037,7 +1049,11 @@ class Pipeline(BasePipeline):
# create the Pipeline definition with how the Component are connected
pipelines[pipeline_name]["nodes"].append({"name": node, "inputs": list(self.graph.predecessors(node))})
config = {"components": list(components.values()), "pipelines": list(pipelines.values()), "version": "0.8"}
config = {
"components": list(components.values()),
"pipelines": list(pipelines.values()),
"version": __version__,
}
return config
def _format_document_answer(self, document_or_answer: dict):
@ -1290,6 +1306,10 @@ class RayPipeline(Pipeline):
| inputs: [MyESRetriever]
```
Note that, in case of a mismatch in version between Haystack and the YAML, a warning will be printed.
If the pipeline loads correctly regardless, save again the pipeline using `RayPipeline.save_to_yaml()` to remove the warning.
:param path: path of the YAML file.
:param pipeline_name: if the YAML contains multiple pipelines, the pipeline_name to load must be set.
:param overwrite_with_env_variables: Overwrite the YAML configuration with environment variables. For example,
@ -1299,6 +1319,14 @@ class RayPipeline(Pipeline):
:param address: The IP address for the Ray cluster. If set to None, a local Ray instance is started.
"""
pipeline_config = cls._read_pipeline_config_from_yaml(path)
if pipeline_config["version"] != __version__:
logger.warning(
f"YAML version ({pipeline_config['version']}) does not match with Haystack version ({__version__}). "
"Issues may occur during loading. "
"To fix this warning, save again this pipeline with the current Haystack version using Pipeline.save_to_yaml(), "
"check out our migration guide at https://haystack.deepset.ai/overview/migration "
f"or downgrade to haystack version {__version__}."
)
return RayPipeline.load_from_config(
pipeline_config=pipeline_config,
pipeline_name=pipeline_name,

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,6 @@
# Dummy pipeline, used when the CI needs to load the REST API to
# extract the OpenAPI specs. DO NOT USE.
# Dummy pipeline, used when the CI needs to load the REST API to extract the OpenAPI specs. DO NOT USE.
version: '0.9'
version: '1.1.0'
components:
- name: FileTypeClassifier

View File

@ -1,4 +1,4 @@
version: '0.9'
version: '1.1.0'
components: # define all the building-blocks for Pipeline
- name: DocumentStore

View File

@ -1,4 +1,4 @@
version: '0.9'
version: '1.1.0'
components: # define all the building-blocks for Pipeline
- name: DocumentStore

View File

@ -1,4 +1,4 @@
version: '0.7'
version: '1.1.0'
components:
- name: Reader

View File

@ -1,4 +1,4 @@
version: '0.7'
version: '1.1.0'
components:
- name: DPRRetriever

View File

@ -1,4 +1,4 @@
version: '0.7'
version: '1.1.0'
components:
- name: DPRRetriever

View File

@ -1,4 +1,4 @@
version: '0.9'
version: '1.1.0'
components:
- name: Reader

View File

@ -1,4 +1,4 @@
version: '0.7'
version: '1.1.0'
components:
- name: Reader

View File

@ -1,4 +1,4 @@
version: '0.7'
version: '1.1.0'
components:
- name: DPRRetriever

View File

@ -1,4 +1,4 @@
version: '0.7'
version: '1.1.0'
components:
- name: DPRRetriever

View File

@ -1,4 +1,4 @@
version: '0.9'
version: '1.1.0'
components:
- name: Reader

View File

@ -5,8 +5,9 @@ import json
from unittest.mock import Mock
import pytest
import responses
from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
from haystack import __version__
from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
from haystack.nodes.retriever.sparse import ElasticsearchRetriever
from haystack.pipelines import (
@ -44,7 +45,7 @@ def test_load_and_save_yaml(document_store, tmp_path):
pipeline.save_to_yaml(tmp_path / "test.yaml")
with open(tmp_path / "test.yaml", "r", encoding="utf-8") as stream:
saved_yaml = stream.read()
expected_yaml = """
expected_yaml = f"""
components:
- name: ESRetriever
params:
@ -71,7 +72,7 @@ def test_load_and_save_yaml(document_store, tmp_path):
- ESRetriever
name: Reader
type: Pipeline
version: '0.8'
version: {__version__}
"""
assert saved_yaml.replace(" ", "").replace("\n", "") == expected_yaml.replace(" ", "").replace("\n", "")
@ -104,7 +105,7 @@ def test_load_and_save_yaml_prebuilt_pipelines(document_store, tmp_path):
pipeline.save_to_yaml(tmp_path / "test.yaml")
with open(tmp_path / "test.yaml", "r", encoding="utf-8") as stream:
saved_yaml = stream.read()
expected_yaml = """
expected_yaml = f"""
components:
- name: ESRetriever
params:
@ -131,7 +132,7 @@ def test_load_and_save_yaml_prebuilt_pipelines(document_store, tmp_path):
- ESRetriever
name: Reader
type: Pipeline
version: '0.8'
version: {__version__}
"""
assert saved_yaml.replace(" ", "").replace("\n", "") == expected_yaml.replace(" ", "").replace("\n", "")