Fix building Pipeline with YAML (#800)

This commit is contained in:
Tanay Soni 2021-02-04 11:53:51 +01:00 committed by GitHub
parent f3a3b73d9b
commit 7b18e324f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 22 additions and 18 deletions

View File

@ -344,7 +344,7 @@ Return a summary of the documents in the document store
#### update\_embeddings
```python
| update_embeddings(retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000)
| update_embeddings(retriever, index: Optional[str] = None, batch_size: int = 10_000)
```
Updates the embeddings in the the document store using the encoding model specified in the retriever.

View File

@ -0,0 +1,5 @@
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.document_store.faiss import FAISSDocumentStore
from haystack.document_store.memory import InMemoryDocumentStore
from haystack.document_store.milvus import MilvusDocumentStore
from haystack.document_store.sql import SQLDocumentStore

View File

@ -12,7 +12,6 @@ from scipy.special import expit
from haystack.document_store.base import BaseDocumentStore
from haystack import Document, Label
from haystack.retriever.base import BaseRetriever
from haystack.utils import get_batches_from_generator
logger = logging.getLogger(__name__)
@ -755,7 +754,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
}
return stats
def update_embeddings(self, retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000):
def update_embeddings(self, retriever, index: Optional[str] = None, batch_size: int = 10_000):
"""
Updates the embeddings in the the document store using the encoding model specified in the retriever.
This can be useful if want to add or change the embeddings for your documents (e.g. after changing the retriever config).

View File

@ -0,0 +1,4 @@
from haystack.file_converter.docx import DocxToTextConverter
from haystack.file_converter.pdf import PDFToTextConverter
from haystack.file_converter.tika import TikaConverter
from haystack.file_converter.txt import TextConverter

View File

@ -0,0 +1 @@
from haystack.generator.transformers import RAGenerator

View File

@ -1,7 +1,7 @@
import os
from copy import deepcopy
from pathlib import Path
from typing import List, Optional, Dict, Type
from typing import List, Optional, Dict
import networkx as nx
import yaml
@ -9,7 +9,6 @@ from networkx import DiGraph
from networkx.drawing.nx_agraph import to_agraph
from haystack import BaseComponent
from haystack.document_store.base import BaseDocumentStore
from haystack.generator.base import BaseGenerator
from haystack.reader.base import BaseReader
from haystack.retriever.base import BaseRetriever
@ -240,19 +239,7 @@ class Pipeline:
cls._load_or_get_component(name=value, definitions=definitions, components=components)
component_params[key] = components[value] # substitute reference (string) with the component object.
if "DocumentStore" in component_type:
ComponentClass: Type[BaseComponent] = BaseDocumentStore
elif "Reader" in component_type:
ComponentClass = BaseReader
elif "Retriever" in component_type:
ComponentClass = BaseRetriever
elif "Generator" in component_type:
ComponentClass = BaseGenerator
elif "Summarizer" in component_type:
ComponentClass = BaseSummarizer
else:
raise NotImplementedError(f"Component of type '{component_type}' is not implemented for pipelines.")
instance = ComponentClass.load_from_args(component_type=component_type, **component_params)
instance = BaseComponent.load_from_args(component_type=component_type, **component_params)
components[name] = instance
return instance

View File

@ -0,0 +1 @@
from haystack.preprocessor.preprocessor import PreProcessor

View File

@ -0,0 +1,2 @@
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader

View File

@ -0,0 +1,2 @@
from haystack.retriever.dense import DensePassageRetriever, EmbeddingRetriever
from haystack.retriever.sparse import ElasticsearchRetriever

View File

@ -233,5 +233,7 @@ class BaseComponent:
:param component_type: name of the component class to load.
:param kwargs: parameters to pass to the __init__() for the component.
"""
if component_type not in cls.subclasses.keys():
raise Exception(f"Haystack component with the name '{component_type}' does not exist.")
instance = cls.subclasses[component_type](**kwargs)
return instance

View File

@ -0,0 +1 @@
from haystack.summarizer.transformers import TransformersSummarizer