From 7b18e324f22eced9bacedbcedbebb9f92f17a331 Mon Sep 17 00:00:00 2001 From: Tanay Soni Date: Thu, 4 Feb 2021 11:53:51 +0100 Subject: [PATCH] Fix building Pipeline with YAML (#800) --- docs/_src/api/api/document_store.md | 2 +- haystack/document_store/__init__.py | 5 +++++ haystack/document_store/elasticsearch.py | 3 +-- haystack/file_converter/__init__.py | 4 ++++ haystack/generator/__init__.py | 1 + haystack/pipeline.py | 17 ++--------------- haystack/preprocessor/__init__.py | 1 + haystack/reader/__init__.py | 2 ++ haystack/retriever/__init__.py | 2 ++ haystack/schema.py | 2 ++ haystack/summarizer/__init__.py | 1 + 11 files changed, 22 insertions(+), 18 deletions(-) diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md index 56e85e104..7c20dabc9 100644 --- a/docs/_src/api/api/document_store.md +++ b/docs/_src/api/api/document_store.md @@ -344,7 +344,7 @@ Return a summary of the documents in the document store #### update\_embeddings ```python - | update_embeddings(retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000) + | update_embeddings(retriever, index: Optional[str] = None, batch_size: int = 10_000) ``` Updates the embeddings in the the document store using the encoding model specified in the retriever. diff --git a/haystack/document_store/__init__.py b/haystack/document_store/__init__.py index e69de29bb..67d2c49fd 100644 --- a/haystack/document_store/__init__.py +++ b/haystack/document_store/__init__.py @@ -0,0 +1,5 @@ +from haystack.document_store.elasticsearch import ElasticsearchDocumentStore +from haystack.document_store.faiss import FAISSDocumentStore +from haystack.document_store.memory import InMemoryDocumentStore +from haystack.document_store.milvus import MilvusDocumentStore +from haystack.document_store.sql import SQLDocumentStore diff --git a/haystack/document_store/elasticsearch.py b/haystack/document_store/elasticsearch.py index 2cb2fd3f8..998ed6309 100644 --- a/haystack/document_store/elasticsearch.py +++ b/haystack/document_store/elasticsearch.py @@ -12,7 +12,6 @@ from scipy.special import expit from haystack.document_store.base import BaseDocumentStore from haystack import Document, Label -from haystack.retriever.base import BaseRetriever from haystack.utils import get_batches_from_generator logger = logging.getLogger(__name__) @@ -755,7 +754,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore): } return stats - def update_embeddings(self, retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000): + def update_embeddings(self, retriever, index: Optional[str] = None, batch_size: int = 10_000): """ Updates the embeddings in the the document store using the encoding model specified in the retriever. This can be useful if want to add or change the embeddings for your documents (e.g. after changing the retriever config). diff --git a/haystack/file_converter/__init__.py b/haystack/file_converter/__init__.py index e69de29bb..d55c5b39c 100644 --- a/haystack/file_converter/__init__.py +++ b/haystack/file_converter/__init__.py @@ -0,0 +1,4 @@ +from haystack.file_converter.docx import DocxToTextConverter +from haystack.file_converter.pdf import PDFToTextConverter +from haystack.file_converter.tika import TikaConverter +from haystack.file_converter.txt import TextConverter diff --git a/haystack/generator/__init__.py b/haystack/generator/__init__.py index e69de29bb..3a265b99a 100644 --- a/haystack/generator/__init__.py +++ b/haystack/generator/__init__.py @@ -0,0 +1 @@ +from haystack.generator.transformers import RAGenerator diff --git a/haystack/pipeline.py b/haystack/pipeline.py index 975395a61..d2ce30e65 100644 --- a/haystack/pipeline.py +++ b/haystack/pipeline.py @@ -1,7 +1,7 @@ import os from copy import deepcopy from pathlib import Path -from typing import List, Optional, Dict, Type +from typing import List, Optional, Dict import networkx as nx import yaml @@ -9,7 +9,6 @@ from networkx import DiGraph from networkx.drawing.nx_agraph import to_agraph from haystack import BaseComponent -from haystack.document_store.base import BaseDocumentStore from haystack.generator.base import BaseGenerator from haystack.reader.base import BaseReader from haystack.retriever.base import BaseRetriever @@ -240,19 +239,7 @@ class Pipeline: cls._load_or_get_component(name=value, definitions=definitions, components=components) component_params[key] = components[value] # substitute reference (string) with the component object. - if "DocumentStore" in component_type: - ComponentClass: Type[BaseComponent] = BaseDocumentStore - elif "Reader" in component_type: - ComponentClass = BaseReader - elif "Retriever" in component_type: - ComponentClass = BaseRetriever - elif "Generator" in component_type: - ComponentClass = BaseGenerator - elif "Summarizer" in component_type: - ComponentClass = BaseSummarizer - else: - raise NotImplementedError(f"Component of type '{component_type}' is not implemented for pipelines.") - instance = ComponentClass.load_from_args(component_type=component_type, **component_params) + instance = BaseComponent.load_from_args(component_type=component_type, **component_params) components[name] = instance return instance diff --git a/haystack/preprocessor/__init__.py b/haystack/preprocessor/__init__.py index e69de29bb..6b9310f7a 100644 --- a/haystack/preprocessor/__init__.py +++ b/haystack/preprocessor/__init__.py @@ -0,0 +1 @@ +from haystack.preprocessor.preprocessor import PreProcessor diff --git a/haystack/reader/__init__.py b/haystack/reader/__init__.py index e69de29bb..4b0800ef1 100644 --- a/haystack/reader/__init__.py +++ b/haystack/reader/__init__.py @@ -0,0 +1,2 @@ +from haystack.reader.farm import FARMReader +from haystack.reader.transformers import TransformersReader diff --git a/haystack/retriever/__init__.py b/haystack/retriever/__init__.py index e69de29bb..962775758 100644 --- a/haystack/retriever/__init__.py +++ b/haystack/retriever/__init__.py @@ -0,0 +1,2 @@ +from haystack.retriever.dense import DensePassageRetriever, EmbeddingRetriever +from haystack.retriever.sparse import ElasticsearchRetriever diff --git a/haystack/schema.py b/haystack/schema.py index 7a4062026..8297f8ac4 100644 --- a/haystack/schema.py +++ b/haystack/schema.py @@ -233,5 +233,7 @@ class BaseComponent: :param component_type: name of the component class to load. :param kwargs: parameters to pass to the __init__() for the component. """ + if component_type not in cls.subclasses.keys(): + raise Exception(f"Haystack component with the name '{component_type}' does not exist.") instance = cls.subclasses[component_type](**kwargs) return instance diff --git a/haystack/summarizer/__init__.py b/haystack/summarizer/__init__.py index e69de29bb..139c067fe 100644 --- a/haystack/summarizer/__init__.py +++ b/haystack/summarizer/__init__.py @@ -0,0 +1 @@ +from haystack.summarizer.transformers import TransformersSummarizer