refactor: remove YAML save/load methods for subclasses of BaseStandardPipeline (#3443)

* remove methods & update docstring

* remove irrelevant test
This commit is contained in:
Sara Zan 2022-11-02 10:14:33 +01:00 committed by GitHub
parent 0b2e71daf6
commit bb1d9983b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 1 additions and 71 deletions

View File

@ -1845,19 +1845,17 @@ class Pipeline:
Here's a sample configuration:
```yaml
| version: '1.0.0'
| version: '1.9.0'
|
| components: # define all the building-blocks for Pipeline
| - name: MyReader # custom-name for the component; helpful for visualization & debugging
| type: FARMReader # Haystack Class name for the component
| params:
| no_ans_boost: -10
| model_name_or_path: deepset/roberta-base-squad2
| - name: MyESRetriever
| type: BM25Retriever
| params:
| document_store: MyDocumentStore # params can reference other components defined in the YAML
| custom_query: null
| - name: MyDocumentStore
| type: ElasticsearchDocumentStore
| params:

View File

@ -78,65 +78,6 @@ class BaseStandardPipeline(ABC):
"""
self.pipeline.draw(path)
def save_to_yaml(self, path: Path, return_defaults: bool = False):
"""
Save a YAML configuration for the Pipeline that can be used with `Pipeline.load_from_yaml()`.
:param path: path of the output YAML file.
:param return_defaults: whether to output parameters that have the default values.
"""
return self.pipeline.save_to_yaml(path, return_defaults)
@classmethod
def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True):
"""
Load Pipeline from a YAML file defining the individual components and how they're tied together to form
a Pipeline. A single YAML can declare multiple Pipelines, in which case an explicit `pipeline_name` must
be passed.
Here's a sample configuration:
```yaml
| version: '1.0.0'
|
| components: # define all the building-blocks for Pipeline
| - name: MyReader # custom-name for the component; helpful for visualization & debugging
| type: FARMReader # Haystack Class name for the component
| params:
| no_ans_boost: -10
| model_name_or_path: deepset/roberta-base-squad2
| - name: MyESRetriever
| type: BM25Retriever
| params:
| document_store: MyDocumentStore # params can reference other components defined in the YAML
| custom_query: null
| - name: MyDocumentStore
| type: ElasticsearchDocumentStore
| params:
| index: haystack_test
|
| pipelines: # multiple Pipelines can be defined using the components from above
| - name: my_query_pipeline # a simple extractive-qa Pipeline
| nodes:
| - name: MyESRetriever
| inputs: [Query]
| - name: MyReader
| inputs: [MyESRetriever]
```
:param path: path of the YAML file.
:param pipeline_name: if the YAML contains multiple pipelines, the pipeline_name to load must be set.
:param overwrite_with_env_variables: Overwrite the YAML configuration with environment variables. For example,
to change index name param for an ElasticsearchDocumentStore, an env
variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an
`_` sign must be used to specify nested hierarchical properties.
"""
standard_pipeline_object = cls.__new__(
cls
) # necessary because we can't call __init__ as we can't provide parameters
standard_pipeline_object.pipeline = Pipeline.load_from_yaml(path, pipeline_name, overwrite_with_env_variables)
return standard_pipeline_object
def get_nodes_by_class(self, class_type) -> List[Any]:
"""
Gets all nodes in the pipeline that are an instance of a certain class (incl. subclasses).

View File

@ -293,15 +293,6 @@ def test_most_similar_documents_pipeline_with_filters_batch(retriever, document_
assert document.meta["source"] in ["wiki3", "wiki4", "wiki5"]
@pytest.mark.integration
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
def test_most_similar_documents_pipeline_save(tmpdir, document_store_with_docs):
pipeline = MostSimilarDocumentsPipeline(document_store=document_store_with_docs)
path = Path(tmpdir, "most_similar_document_pipeline.yml")
pipeline.save_to_yaml(path)
os.path.exists(path)
@pytest.mark.elasticsearch
@pytest.mark.parametrize("document_store_dot_product_with_docs", ["elasticsearch"], indirect=True)
def test_join_merge_no_weights(document_store_dot_product_with_docs):