Fix building Pipeline with YAML (#800)

2025-12-30 00:30:09 +00:00 · 2021-02-04 11:53:51 +01:00 · 2021-02-04 11:53:51 +01:00 · 7b18e324f2
commit 7b18e324f2
parent f3a3b73d9b
11 changed files with 22 additions and 18 deletions
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@ -344,7 +344,7 @@ Return a summary of the documents in the document store
 #### update\_embeddings

 ```python
- | update_embeddings(retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000)
+ | update_embeddings(retriever, index: Optional[str] = None, batch_size: int = 10_000)
 ```

 Updates the embeddings in the the document store using the encoding model specified in the retriever.
--- a/haystack/document_store/init.py
+++ b/haystack/document_store/init.py
@ -0,0 +1,5 @@
+from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
+from haystack.document_store.faiss import FAISSDocumentStore
+from haystack.document_store.memory import InMemoryDocumentStore
+from haystack.document_store.milvus import MilvusDocumentStore
+from haystack.document_store.sql import SQLDocumentStore
--- a/haystack/document_store/elasticsearch.py
+++ b/haystack/document_store/elasticsearch.py
@ -12,7 +12,6 @@ from scipy.special import expit

 from haystack.document_store.base import BaseDocumentStore
 from haystack import Document, Label
-from haystack.retriever.base import BaseRetriever
 from haystack.utils import get_batches_from_generator

 logger = logging.getLogger(__name__)
@ -755,7 +754,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
                 }
        return stats

-    def update_embeddings(self, retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000):
+    def update_embeddings(self, retriever, index: Optional[str] = None, batch_size: int = 10_000):
        """
        Updates the embeddings in the the document store using the encoding model specified in the retriever.
        This can be useful if want to add or change the embeddings for your documents (e.g. after changing the retriever config).
--- a/haystack/file_converter/init.py
+++ b/haystack/file_converter/init.py
@ -0,0 +1,4 @@
+from haystack.file_converter.docx import DocxToTextConverter
+from haystack.file_converter.pdf import PDFToTextConverter
+from haystack.file_converter.tika import TikaConverter
+from haystack.file_converter.txt import TextConverter
--- a/haystack/generator/init.py
+++ b/haystack/generator/init.py
@ -0,0 +1 @@
+from haystack.generator.transformers import RAGenerator
--- a/haystack/pipeline.py
+++ b/haystack/pipeline.py
@ -1,7 +1,7 @@
 import os
 from copy import deepcopy
 from pathlib import Path
-from typing import List, Optional, Dict, Type
+from typing import List, Optional, Dict

 import networkx as nx
 import yaml
@ -9,7 +9,6 @@ from networkx import DiGraph
 from networkx.drawing.nx_agraph import to_agraph

 from haystack import BaseComponent
-from haystack.document_store.base import BaseDocumentStore
 from haystack.generator.base import BaseGenerator
 from haystack.reader.base import BaseReader
 from haystack.retriever.base import BaseRetriever
@ -240,19 +239,7 @@ class Pipeline:
                    cls._load_or_get_component(name=value, definitions=definitions, components=components)
                component_params[key] = components[value]  # substitute reference (string) with the component object.

-        if "DocumentStore" in component_type:
-            ComponentClass: Type[BaseComponent] = BaseDocumentStore
-        elif "Reader" in component_type:
-            ComponentClass = BaseReader
-        elif "Retriever" in component_type:
-            ComponentClass = BaseRetriever
-        elif "Generator" in component_type:
-            ComponentClass = BaseGenerator
-        elif "Summarizer" in component_type:
-            ComponentClass = BaseSummarizer
-        else:
-            raise NotImplementedError(f"Component of type '{component_type}' is not implemented for pipelines.")
-        instance = ComponentClass.load_from_args(component_type=component_type, **component_params)
+        instance = BaseComponent.load_from_args(component_type=component_type, **component_params)
        components[name] = instance
        return instance

--- a/haystack/preprocessor/init.py
+++ b/haystack/preprocessor/init.py
@ -0,0 +1 @@
+from haystack.preprocessor.preprocessor import PreProcessor
--- a/haystack/reader/init.py
+++ b/haystack/reader/init.py
@ -0,0 +1,2 @@
+from haystack.reader.farm import FARMReader
+from haystack.reader.transformers import TransformersReader
--- a/haystack/retriever/init.py
+++ b/haystack/retriever/init.py
@ -0,0 +1,2 @@
+from haystack.retriever.dense import DensePassageRetriever, EmbeddingRetriever
+from haystack.retriever.sparse import ElasticsearchRetriever
--- a/haystack/schema.py
+++ b/haystack/schema.py
@ -233,5 +233,7 @@ class BaseComponent:
        :param component_type: name of the component class to load.
        :param kwargs: parameters to pass to the __init__() for the component. 
        """
+        if component_type not in cls.subclasses.keys():
+            raise Exception(f"Haystack component with the name '{component_type}' does not exist.")
        instance = cls.subclasses[component_type](**kwargs)
        return instance
--- a/haystack/summarizer/init.py
+++ b/haystack/summarizer/init.py
@ -0,0 +1 @@
+from haystack.summarizer.transformers import TransformersSummarizer
				`@ -0,0 +1 @@`
				`from haystack.generator.transformers import RAGenerator`
				`@ -0,0 +1 @@`
				`from haystack.preprocessor.preprocessor import PreProcessor`
				`@ -0,0 +1 @@`
				`from haystack.summarizer.transformers import TransformersSummarizer`