From 99990e7249fe4fc4ff717fea40f09c84b95253ff Mon Sep 17 00:00:00 2001
From: oryx1729 <78848855+oryx1729@users.noreply.github.com>
Date: Fri, 30 Apr 2021 12:23:29 +0200
Subject: [PATCH] Add export of Pipeline YAML config (#1003)

---
 haystack/document_store/elasticsearch.py   | 10 +++++
 haystack/document_store/faiss.py           |  9 ++++
 haystack/document_store/memory.py          |  7 +++
 haystack/document_store/milvus.py          |  9 ++++
 haystack/document_store/sql.py             |  6 +++
 haystack/file_converter/base.py            |  4 ++
 haystack/file_converter/pdf.py             |  4 ++
 haystack/file_converter/tika.py            |  6 +++
 haystack/file_converter/txt.py             | 16 -------
 haystack/generator/transformers.py         |  7 +++
 haystack/graph_retriever/text_to_sparql.py |  5 ++-
 haystack/knowledge_graph/graphdb.py        |  7 ++-
 haystack/pipeline.py                       | 52 +++++++++++++++++++++-
 haystack/preprocessor/preprocessor.py      |  8 ++++
 haystack/reader/farm.py                    |  8 ++++
 haystack/reader/transformers.py            |  8 ++++
 haystack/retriever/dense.py                | 18 ++++++++
 haystack/retriever/sparse.py               |  8 ++++
 haystack/schema.py                         | 19 +++++++-
 haystack/summarizer/transformers.py        |  8 ++++
 haystack/translator/base.py                |  8 ++--
 haystack/translator/transformers.py        |  6 +++
 test/test_pipeline.py                      | 39 ++++++++++++++--
 23 files changed, 245 insertions(+), 27 deletions(-)

diff --git a/haystack/document_store/elasticsearch.py b/haystack/document_store/elasticsearch.py
index 9ec567e20..e22e14e6c 100644
--- a/haystack/document_store/elasticsearch.py
+++ b/haystack/document_store/elasticsearch.py
@@ -94,6 +94,16 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
         :param return_embedding: To return document embedding
 
         """
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            host=host, port=port, username=username, password=password, api_key_id=api_key_id, api_key=api_key,
+            aws4auth=aws4auth, index=index, label_index=label_index, search_fields=search_fields, text_field=text_field,
+            name_field=name_field, embedding_field=embedding_field, embedding_dim=embedding_dim,
+            custom_mapping=custom_mapping, excluded_meta_data=excluded_meta_data, analyzer=analyzer, scheme=scheme,
+            ca_certs=ca_certs, verify_certs=verify_certs, create_index=create_index,
+            update_existing_documents=update_existing_documents, refresh_type=refresh_type, similarity=similarity,
+            timeout=timeout, return_embedding=return_embedding,
+        )
 
         self.client = self._init_elastic_client(host=host, port=port, username=username, password=password,
                                            api_key=api_key, api_key_id=api_key_id, aws4auth=aws4auth, scheme=scheme,
diff --git a/haystack/document_store/faiss.py b/haystack/document_store/faiss.py
index 26e2ef5fe..25979ebcd 100644
--- a/haystack/document_store/faiss.py
+++ b/haystack/document_store/faiss.py
@@ -77,6 +77,15 @@ class FAISSDocumentStore(SQLDocumentStore):
         :param progress_bar: Whether to show a tqdm progress bar or not.
                              Can be helpful to disable in production deployments to keep the logs clean.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            sql_url=sql_url, vector_dim=vector_dim, faiss_index_factory_str=faiss_index_factory_str,
+            faiss_index=faiss_index, return_embedding=return_embedding,
+            update_existing_documents=update_existing_documents, index=index, similarity=similarity,
+            embedding_field=embedding_field, progress_bar=progress_bar
+        )
+
         self.vector_dim = vector_dim
         self.faiss_index_factory_str = faiss_index_factory_str
         self.faiss_indexes: Dict[str, faiss.swigfaiss.Index] = {}
diff --git a/haystack/document_store/memory.py b/haystack/document_store/memory.py
index 3a6c417d2..3ae9659d5 100644
--- a/haystack/document_store/memory.py
+++ b/haystack/document_store/memory.py
@@ -44,6 +44,13 @@ class InMemoryDocumentStore(BaseDocumentStore):
         :param progress_bar: Whether to show a tqdm progress bar or not.
                              Can be helpful to disable in production deployments to keep the logs clean.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            index=index, label_index=label_index, embedding_field=embedding_field, embedding_dim=embedding_dim,
+            return_embedding=return_embedding, similarity=similarity, progress_bar=progress_bar,
+        )
+
         self.indexes: Dict[str, Dict] = defaultdict(dict)
         self.index: str = index
         self.label_index: str = label_index
diff --git a/haystack/document_store/milvus.py b/haystack/document_store/milvus.py
index 549f73693..b3ecc89e8 100644
--- a/haystack/document_store/milvus.py
+++ b/haystack/document_store/milvus.py
@@ -94,6 +94,15 @@ class MilvusDocumentStore(SQLDocumentStore):
         :param progress_bar: Whether to show a tqdm progress bar or not.
                              Can be helpful to disable in production deployments to keep the logs clean.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            sql_url=sql_url, milvus_url=milvus_url, connection_pool=connection_pool, index=index, vector_dim=vector_dim,
+            index_file_size=index_file_size, similarity=similarity, index_type=index_type, index_param=index_param,
+            search_param=search_param, update_existing_documents=update_existing_documents,
+            return_embedding=return_embedding, embedding_field=embedding_field, progress_bar=progress_bar,
+        )
+
         self.milvus_server = Milvus(uri=milvus_url, pool=connection_pool)
         self.vector_dim = vector_dim
         self.index_file_size = index_file_size
diff --git a/haystack/document_store/sql.py b/haystack/document_store/sql.py
index ab2fabd91..58fdf5c65 100644
--- a/haystack/document_store/sql.py
+++ b/haystack/document_store/sql.py
@@ -88,6 +88,12 @@ class SQLDocumentStore(BaseDocumentStore):
                                           added already exists. Using this parameter could cause performance degradation
                                           for document insertion.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            url=url, index=index, label_index=label_index, update_existing_documents=update_existing_documents
+        )
+
         engine = create_engine(url)
         ORMBase.metadata.create_all(engine)
         Session = sessionmaker(bind=engine)
diff --git a/haystack/file_converter/base.py b/haystack/file_converter/base.py
index 09c524f0f..c66331c16 100644
--- a/haystack/file_converter/base.py
+++ b/haystack/file_converter/base.py
@@ -27,6 +27,10 @@ class BaseConverter(BaseComponent):
                                 not one of the valid languages, then it might likely be encoding error resulting
                                 in garbled text.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(remove_numeric_tables=remove_numeric_tables, valid_languages=valid_languages)
+
         self.remove_numeric_tables = remove_numeric_tables
         self.valid_languages = valid_languages
 
diff --git a/haystack/file_converter/pdf.py b/haystack/file_converter/pdf.py
index 03c9bd3fe..17ea44ed3 100644
--- a/haystack/file_converter/pdf.py
+++ b/haystack/file_converter/pdf.py
@@ -22,6 +22,10 @@ class PDFToTextConverter(BaseConverter):
                                 not one of the valid languages, then it might likely be encoding error resulting
                                 in garbled text.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(remove_numeric_tables=remove_numeric_tables, valid_languages=valid_languages)
+
         verify_installation = subprocess.run(["pdftotext -v"], shell=True)
         if verify_installation.returncode == 127:
             raise Exception(
diff --git a/haystack/file_converter/tika.py b/haystack/file_converter/tika.py
index 8c1f27ae4..42ec2e459 100644
--- a/haystack/file_converter/tika.py
+++ b/haystack/file_converter/tika.py
@@ -58,6 +58,12 @@ class TikaConverter(BaseConverter):
                                 not one of the valid languages, then it might likely be encoding error resulting
                                 in garbled text.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            tika_url=tika_url, remove_numeric_tables=remove_numeric_tables, valid_languages=valid_languages
+        )
+
         ping = requests.get(tika_url)
         if ping.status_code != 200:
             raise Exception(f"Apache Tika server is not reachable at the URL '{tika_url}'. To run it locally"
diff --git a/haystack/file_converter/txt.py b/haystack/file_converter/txt.py
index fa1160fee..2e08c9915 100644
--- a/haystack/file_converter/txt.py
+++ b/haystack/file_converter/txt.py
@@ -8,22 +8,6 @@ logger = logging.getLogger(__name__)
 
 
 class TextConverter(BaseConverter):
-    def __init__(self, remove_numeric_tables: bool = False, valid_languages: Optional[List[str]] = None):
-        """
-        :param remove_numeric_tables: This option uses heuristics to remove numeric rows from the tables.
-                                      The tabular structures in documents might be noise for the reader model if it
-                                      does not have table parsing capability for finding answers. However, tables
-                                      may also have long strings that could possible candidate for searching answers.
-                                      The rows containing strings are thus retained in this option.
-        :param valid_languages: validate languages from a list of languages specified in the ISO 639-1
-                                (https://en.wikipedia.org/wiki/ISO_639-1) format.
-                                This option can be used to add test for encoding errors. If the extracted text is
-                                not one of the valid languages, then it might likely be encoding error resulting
-                                in garbled text.
-        """
-
-        super().__init__(remove_numeric_tables=remove_numeric_tables, valid_languages=valid_languages)
-
     def convert(
         self,
         file_path: Path,
diff --git a/haystack/generator/transformers.py b/haystack/generator/transformers.py
index d32dd7bec..d163843af 100644
--- a/haystack/generator/transformers.py
+++ b/haystack/generator/transformers.py
@@ -94,6 +94,13 @@ class RAGenerator(BaseGenerator):
         :param use_gpu: Whether to use GPU (if available)
         """
 
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            model_name_or_path=model_name_or_path, model_version=model_version, retriever=retriever,
+            generator_type=generator_type, top_k=top_k, max_length=max_length, min_length=min_length,
+            num_beams=num_beams, embed_title=embed_title, prefix=prefix, use_gpu=use_gpu,
+        )
+
         self.model_name_or_path = model_name_or_path
         self.max_length = max_length
         self.min_length = min_length
diff --git a/haystack/graph_retriever/text_to_sparql.py b/haystack/graph_retriever/text_to_sparql.py
index 5e9c16833..ff230456e 100644
--- a/haystack/graph_retriever/text_to_sparql.py
+++ b/haystack/graph_retriever/text_to_sparql.py
@@ -22,7 +22,10 @@ class Text2SparqlRetriever(BaseGraphRetriever):
         :param model_name_or_path: Name of or path to a pre-trained BartForConditionalGeneration model.
         :param top_k: How many SPARQL queries to generate per text query.
         """
-        
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(knowledge_graph=knowledge_graph, model_name_or_path=model_name_or_path, top_k=top_k)
+
         self.knowledge_graph = knowledge_graph
         # TODO We should extend this to any seq2seq models and use the AutoModel class
         self.model = BartForConditionalGeneration.from_pretrained(model_name_or_path, force_bos_token_to_be_generated=True)
diff --git a/haystack/knowledge_graph/graphdb.py b/haystack/knowledge_graph/graphdb.py
index 42303628b..9e6f8bea3 100644
--- a/haystack/knowledge_graph/graphdb.py
+++ b/haystack/knowledge_graph/graphdb.py
@@ -33,7 +33,12 @@ class GraphDBKnowledgeGraph(BaseKnowledgeGraph):
         :param prefixes: definitions of namespaces with a new line after each namespace, e.g., PREFIX hp: <https://deepset.ai/harry_potter/>
         
         """
-        
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            host=host, port=port, username=username, password=password, index=index, prefixes=prefixes
+        )
+
         self.url = f"http://{host}:{port}"
         self.index = index
         self.username = username
diff --git a/haystack/pipeline.py b/haystack/pipeline.py
index c1560395b..42a33de12 100644
--- a/haystack/pipeline.py
+++ b/haystack/pipeline.py
@@ -1,3 +1,4 @@
+import inspect
 import logging
 import os
 import traceback
@@ -180,7 +181,7 @@ class Pipeline:
         Here's a sample configuration:
 
             ```yaml
-            |   version: '0.7'
+            |   version: '0.8'
             |
             |    components:    # define all the building-blocks for Pipeline
             |    - name: MyReader       # custom-name for the component; helpful for visualization & debugging
@@ -291,6 +292,55 @@ class Pipeline:
                 param_name = key.replace(env_prefix, "").lower()
                 definition["params"][param_name] = value
 
+    def save_to_yaml(self, path: Path, return_defaults: bool = False):
+        """
+        Save a YAML configuration for the Pipeline that can be used with `Pipeline.load_from_yaml()`.
+
+        :param path: path of the output YAML file.
+        :param return_defaults: whether to output parameters that have the default values.
+        """
+        nodes = self.graph.nodes
+
+        pipeline_name = self.pipeline_type.lower()
+        pipeline_type = self.pipeline_type
+        pipelines: dict = {pipeline_name: {"name": pipeline_name, "type": pipeline_type, "nodes": []}}
+
+        components = {}
+        for node in nodes:
+            if node == self.root_node_id:
+                continue
+            component_instance = self.graph.nodes.get(node)["component"]
+            component_type = component_instance.pipeline_config["type"]
+            component_params = component_instance.pipeline_config["params"]
+            components[node] = {"name": node, "type": component_type, "params": {}}
+            component_signature = inspect.signature(type(component_instance)).parameters
+            for key, value in component_params.items():
+                # A parameter for a Component could be another Component. For instance, a Retriever has
+                # the DocumentStore as a parameter.
+                # Component configs must be a dict with a "type" key. The "type" keys distinguishes between
+                # other parameters like "custom_mapping" that are dicts.
+                # This currently only checks for the case single-level nesting case, wherein, "a Component has another
+                # Component as a parameter". For deeper nesting cases, this function should be made recursive.
+                if isinstance(value, dict) and "type" in value.keys():  # the parameter is a Component
+                    components[node]["params"][key] = value["type"]
+                    sub_component_signature = inspect.signature(BaseComponent.subclasses[value["type"]]).parameters
+                    params = {
+                        k: v for k, v in value["params"].items()
+                        if sub_component_signature[k].default != v or return_defaults is True
+                    }
+                    components[value["type"]] = {"name": value["type"], "type": value["type"], "params": params}
+                else:
+                    if component_signature[key].default != value or return_defaults is True:
+                        components[node]["params"][key] = value
+
+            # create the Pipeline definition with how the Component are connected
+            pipelines[pipeline_name]["nodes"].append({"name": node, "inputs": list(self.graph.predecessors(node))})
+
+        config = {"components": list(components.values()), "pipelines": list(pipelines.values()), "version": "0.8"}
+
+        with open(path, 'w') as outfile:
+            yaml.dump(config, outfile, default_flow_style=False)
+
 
 class BaseStandardPipeline(ABC):
     pipeline: Pipeline
diff --git a/haystack/preprocessor/preprocessor.py b/haystack/preprocessor/preprocessor.py
index 7252f9640..a6a34ceec 100644
--- a/haystack/preprocessor/preprocessor.py
+++ b/haystack/preprocessor/preprocessor.py
@@ -44,6 +44,14 @@ class PreProcessor(BasePreProcessor):
                                                 to True, the individual split will always have complete sentences &
                                                 the number of words will be <= split_length.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            clean_whitespace=clean_whitespace, clean_header_footer=clean_header_footer,
+            clean_empty_lines=clean_empty_lines, split_by=split_by, split_length=split_length,
+            split_overlap=split_overlap, split_respect_sentence_boundary=split_respect_sentence_boundary,
+        )
+
         try:
             nltk.data.find('tokenizers/punkt')
         except LookupError:
diff --git a/haystack/reader/farm.py b/haystack/reader/farm.py
index ecb4040c1..2fd57274e 100644
--- a/haystack/reader/farm.py
+++ b/haystack/reader/farm.py
@@ -93,6 +93,14 @@ class FARMReader(BaseReader):
                              Can be helpful to disable in production deployments to keep the logs clean.
         """
 
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            model_name_or_path=model_name_or_path, model_version=model_version, context_window_size=context_window_size,
+            batch_size=batch_size, use_gpu=use_gpu, no_ans_boost=no_ans_boost, return_no_answer=return_no_answer,
+            top_k=top_k, top_k_per_candidate=top_k_per_candidate, top_k_per_sample=top_k_per_sample,
+            num_processes=num_processes, max_seq_len=max_seq_len, doc_stride=doc_stride, progress_bar=progress_bar,
+        )
+
         self.return_no_answers = return_no_answer
         self.top_k = top_k
         self.top_k_per_candidate = top_k_per_candidate
diff --git a/haystack/reader/transformers.py b/haystack/reader/transformers.py
index c194aec6c..f55ecca6e 100644
--- a/haystack/reader/transformers.py
+++ b/haystack/reader/transformers.py
@@ -57,6 +57,14 @@ class TransformersReader(BaseReader):
         :param doc_stride: length of striding window for splitting long texts (used if len(text) > max_seq_len)
 
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            model_name_or_path=model_name_or_path, model_version=model_version, tokenizer=tokenizer,
+            context_window_size=context_window_size, use_gpu=use_gpu, top_k=top_k, doc_stride=doc_stride,
+            top_k_per_candidate=top_k_per_candidate, return_no_answers=return_no_answers, max_seq_len=max_seq_len,
+        )
+
         self.model = pipeline('question-answering', model=model_name_or_path, tokenizer=tokenizer, device=use_gpu, revision=model_version)
         self.context_window_size = context_window_size
         self.top_k = top_k
diff --git a/haystack/retriever/dense.py b/haystack/retriever/dense.py
index cef64f9d7..ffde8e98f 100644
--- a/haystack/retriever/dense.py
+++ b/haystack/retriever/dense.py
@@ -98,6 +98,16 @@ class DensePassageRetriever(BaseRetriever):
                              Can be helpful to disable in production deployments to keep the logs clean.
         """
 
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            document_store=document_store, query_embedding_model=query_embedding_model,
+            passage_embedding_model=passage_embedding_model, single_model_path=single_model_path,
+            model_version=model_version, max_seq_len_query=max_seq_len_query, max_seq_len_passage=max_seq_len_passage,
+            top_k=top_k, use_gpu=use_gpu, batch_size=batch_size, embed_title=embed_title,
+            use_fast_tokenizers=use_fast_tokenizers, infer_tokenizer_classes=infer_tokenizer_classes,
+            similarity_function=similarity_function, progress_bar=progress_bar,
+        )
+
         self.document_store = document_store
         self.batch_size = batch_size
         self.progress_bar = progress_bar
@@ -461,6 +471,14 @@ class EmbeddingRetriever(BaseRetriever):
                                      Default: -1 (very last layer).
         :param top_k: How many documents to return per query.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            document_store=document_store, embedding_model=embedding_model, model_version=model_version,
+            use_gpu=use_gpu, model_format=model_format, pooling_strategy=pooling_strategy,
+            emb_extraction_layer=emb_extraction_layer, top_k=top_k,
+        )
+
         self.document_store = document_store
         self.model_format = model_format
         self.pooling_strategy = pooling_strategy
diff --git a/haystack/retriever/sparse.py b/haystack/retriever/sparse.py
index 5b30461b4..2dfbf698b 100644
--- a/haystack/retriever/sparse.py
+++ b/haystack/retriever/sparse.py
@@ -52,6 +52,10 @@ class ElasticsearchRetriever(BaseRetriever):
                             ```
         :param top_k: How many documents to return per query.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(document_store=document_store, top_k=top_k, custom_query=custom_query)
+
         self.document_store: ElasticsearchDocumentStore = document_store
         self.top_k = top_k
         self.custom_query = custom_query
@@ -118,6 +122,10 @@ class TfidfRetriever(BaseRetriever):
         :param document_store: an instance of a DocumentStore to retrieve documents from.
         :param top_k: How many documents to return per query.
         """
+
+        # save init parameters to enable export of component config as YAML
+        self.set_config(document_store=document_store, top_k=top_k)
+
         self.vectorizer = TfidfVectorizer(
             lowercase=True,
             stop_words=None,
diff --git a/haystack/schema.py b/haystack/schema.py
index 6d2dcc076..af8361aac 100644
--- a/haystack/schema.py
+++ b/haystack/schema.py
@@ -3,6 +3,7 @@ from uuid import uuid4
 import numpy as np
 from abc import abstractmethod
 
+
 class Document:
     def __init__(self, text: str,
                  id: Optional[str] = None,
@@ -227,6 +228,7 @@ class BaseComponent:
 
     outgoing_edges: int
     subclasses: dict = {}
+    pipeline_config: dict = {}
 
     def __init_subclass__(cls, **kwargs):
         """ This automatically keeps track of all available subclasses.
@@ -258,4 +260,19 @@ class BaseComponent:
         :param kwargs:
         :return:
         """
-        pass
\ No newline at end of file
+        pass
+
+    def set_config(self, **kwargs):
+        """
+        Save the init parameters of a component that later can be used with exporting
+        YAML configuration of a Pipeline.
+
+        :param kwargs: all parameters passed to the __init__() of the Component.
+        """
+        if not self.pipeline_config:
+            self.pipeline_config = {"params": {}, "type": type(self).__name__}
+            for k, v in kwargs.items():
+                if isinstance(v, BaseComponent):
+                    self.pipeline_config["params"][k] = v.pipeline_config
+                elif v is not None:
+                    self.pipeline_config["params"][k] = v
diff --git a/haystack/summarizer/transformers.py b/haystack/summarizer/transformers.py
index 671a76685..1802db5f1 100644
--- a/haystack/summarizer/transformers.py
+++ b/haystack/summarizer/transformers.py
@@ -81,6 +81,14 @@ class TransformersSummarizer(BaseSummarizer):
                                         Important: The summary will depend on the order of the supplied documents!
         """
 
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            model_name_or_path=model_name_or_path, model_version=model_version, tokenizer=tokenizer,
+            max_length=max_length, min_length=min_length, use_gpu=use_gpu,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            separator_for_single_summary=separator_for_single_summary, generate_single_summary=generate_single_summary,
+        )
+
         # TODO AutoModelForSeq2SeqLM is only necessary with transformers==4.1.1, with newer versions use the pipeline directly
         if tokenizer is None:
             tokenizer = model_name_or_path
diff --git a/haystack/translator/base.py b/haystack/translator/base.py
index d6c75efce..f284e7605 100644
--- a/haystack/translator/base.py
+++ b/haystack/translator/base.py
@@ -1,10 +1,10 @@
-from abc import ABC, abstractmethod
+from abc import abstractmethod
 from typing import Any, Dict, List, Mapping, Optional, Union
 
-from haystack import Document
+from haystack import Document, BaseComponent
 
 
-class BaseTranslator(ABC):
+class BaseTranslator(BaseComponent):
     """
     Abstract class for a Translator component that translates either a query or a doc from language A to language B.
     """
@@ -24,7 +24,7 @@ class BaseTranslator(ABC):
         """
         pass
 
-    def run(
+    def run(  # type: ignore
         self,
         query: Optional[str] = None,
         documents: Optional[Union[List[Document], List[str], List[Dict[str, Any]]]] = None,
diff --git a/haystack/translator/transformers.py b/haystack/translator/transformers.py
index 1ba88f16b..006fd136a 100644
--- a/haystack/translator/transformers.py
+++ b/haystack/translator/transformers.py
@@ -56,6 +56,12 @@ class TransformersTranslator(BaseTranslator):
         :param clean_up_tokenization_spaces: Whether or not to clean up the tokenization spaces. (default True)
         """
 
+        # save init parameters to enable export of component config as YAML
+        self.set_config(
+            model_name_or_path=model_name_or_path, tokenizer_name=tokenizer_name, max_seq_len=max_seq_len,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+        )
+
         self.max_seq_len = max_seq_len
         self.clean_up_tokenization_spaces = clean_up_tokenization_spaces
         tokenizer_name = tokenizer_name or model_name_or_path
diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index 8a8d44634..576a2e42e 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -10,10 +10,9 @@ from haystack.retriever.sparse import ElasticsearchRetriever
 
 
 @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
-def test_load_yaml(document_store_with_docs):
+def test_load_and_save_yaml(document_store_with_docs, tmp_path):
     # test correct load of indexing pipeline from yaml
-    pipeline = Pipeline.load_from_yaml(Path("samples/pipeline/test_pipeline.yaml"),
-                                       pipeline_name="indexing_pipeline")
+    pipeline = Pipeline.load_from_yaml(Path("samples/pipeline/test_pipeline.yaml"), pipeline_name="indexing_pipeline")
     pipeline.run(file_path=Path("samples/pdf/sample_pdf_1.pdf"), top_k_retriever=10, top_k_reader=3)
 
     # test correct load of query pipeline from yaml
@@ -26,6 +25,40 @@ def test_load_yaml(document_store_with_docs):
     with pytest.raises(Exception):
         Pipeline.load_from_yaml(path=Path("samples/pipeline/test_pipeline.yaml"), pipeline_name="invalid")
 
+    # test config export
+    pipeline.save_to_yaml(tmp_path / "test.yaml")
+    with open(tmp_path/"test.yaml", "r", encoding='utf-8') as stream:
+        saved_yaml = stream.read()
+    expected_yaml = '''
+        components:
+        - name: ESRetriever
+          params:
+            document_store: ElasticsearchDocumentStore
+          type: ElasticsearchRetriever
+        - name: ElasticsearchDocumentStore
+          params:
+            index: haystack_test_document
+            label_index: haystack_test_label
+          type: ElasticsearchDocumentStore
+        - name: Reader
+          params:
+            model_name_or_path: deepset/roberta-base-squad2
+            no_ans_boost: -10
+          type: FARMReader
+        pipelines:
+        - name: query
+          nodes:
+          - inputs:
+            - Query
+            name: ESRetriever
+          - inputs:
+            - ESRetriever
+            name: Reader
+          type: Query
+        version: '0.8'
+    '''
+    assert saved_yaml.replace(" ", "").replace("\n", "") == expected_yaml.replace(" ", "").replace("\n", "")
+
 
 @pytest.mark.slow
 @pytest.mark.elasticsearch