mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-27 15:08:43 +00:00
Add support for Dense Retrievers in REST API Indexing Pipeline (#1430)
This commit is contained in:
parent
9dd7c74f4f
commit
1f859694f1
@ -17,10 +17,24 @@ logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
try:
|
||||
INDEXING_PIPELINE = Pipeline.load_from_yaml(Path(PIPELINE_YAML_PATH), pipeline_name=INDEXING_PIPELINE_NAME)
|
||||
_, pipeline_config, definitions = Pipeline._read_yaml(
|
||||
path=Path(PIPELINE_YAML_PATH), pipeline_name=INDEXING_PIPELINE_NAME, overwrite_with_env_variables=True
|
||||
)
|
||||
# Since each instance of FAISSDocumentStore creates an in-memory FAISS index, the Indexing & Query Pipelines would
|
||||
# end up with different indices. The check below prevents creation of Indexing Pipelines with FAISSDocumentStore.
|
||||
is_faiss_present = False
|
||||
for node in pipeline_config["nodes"]:
|
||||
if definitions[node["name"]]["type"] == "FAISSDocumentStore":
|
||||
is_faiss_present = True
|
||||
break
|
||||
if is_faiss_present:
|
||||
logger.warning("Indexing Pipeline with FAISSDocumentStore is not supported with the REST APIs.")
|
||||
INDEXING_PIPELINE = None
|
||||
else:
|
||||
INDEXING_PIPELINE = Pipeline.load_from_yaml(Path(PIPELINE_YAML_PATH), pipeline_name=INDEXING_PIPELINE_NAME)
|
||||
except KeyError:
|
||||
INDEXING_PIPELINE = None
|
||||
logger.info("Indexing Pipeline not found in the YAML configuration. File Upload API will not be available.")
|
||||
logger.warning("Indexing Pipeline not found in the YAML configuration. File Upload API will not be available.")
|
||||
|
||||
|
||||
os.makedirs(FILE_UPLOAD_PATH, exist_ok=True) # create directory for uploading files
|
||||
|
||||
@ -1,14 +1,14 @@
|
||||
version: '0.7'
|
||||
version: '0.9'
|
||||
|
||||
components: # define all the building-blocks for Pipeline
|
||||
- name: ElasticsearchDocumentStore
|
||||
- name: DocumentStore
|
||||
type: ElasticsearchDocumentStore
|
||||
params:
|
||||
host: localhost
|
||||
- name: ESRetriever
|
||||
- name: Retriever
|
||||
type: ElasticsearchRetriever
|
||||
params:
|
||||
document_store: ElasticsearchDocumentStore # params can reference other components defined in the YAML
|
||||
document_store: DocumentStore # params can reference other components defined in the YAML
|
||||
top_k: 5
|
||||
- name: Reader # custom-name for the component; helpful for visualization & debugging
|
||||
type: FARMReader # Haystack Class name for the component
|
||||
@ -30,11 +30,10 @@ pipelines:
|
||||
- name: query # a sample extractive-qa Pipeline
|
||||
type: Query
|
||||
nodes:
|
||||
- name: ESRetriever
|
||||
- name: Retriever
|
||||
inputs: [Query]
|
||||
- name: Reader
|
||||
inputs: [ESRetriever]
|
||||
|
||||
inputs: [Retriever]
|
||||
- name: indexing
|
||||
type: Indexing
|
||||
nodes:
|
||||
@ -46,5 +45,7 @@ pipelines:
|
||||
inputs: [FileTypeClassifier.output_2]
|
||||
- name: Preprocessor
|
||||
inputs: [PDFFileConverter, TextFileConverter]
|
||||
- name: ElasticsearchDocumentStore
|
||||
- name: Retriever
|
||||
inputs: [Preprocessor]
|
||||
- name: DocumentStore
|
||||
inputs: [Retriever]
|
||||
|
||||
51
rest_api/pipeline/pipelines_dpr.yaml
Normal file
51
rest_api/pipeline/pipelines_dpr.yaml
Normal file
@ -0,0 +1,51 @@
|
||||
version: '0.9'
|
||||
|
||||
components: # define all the building-blocks for Pipeline
|
||||
- name: DocumentStore
|
||||
type: ElasticsearchDocumentStore # consider using MilvusDocumentStore or WeaviateDocumentStore for scaling to large number of documents
|
||||
params:
|
||||
host: localhost
|
||||
- name: Retriever
|
||||
type: DensePassageRetriever
|
||||
params:
|
||||
document_store: DocumentStore # params can reference other components defined in the YAML
|
||||
top_k: 5
|
||||
- name: Reader # custom-name for the component; helpful for visualization & debugging
|
||||
type: FARMReader # Haystack Class name for the component
|
||||
params:
|
||||
model_name_or_path: deepset/roberta-base-squad2
|
||||
- name: TextFileConverter
|
||||
type: TextConverter
|
||||
- name: PDFFileConverter
|
||||
type: PDFToTextConverter
|
||||
- name: Preprocessor
|
||||
type: PreProcessor
|
||||
params:
|
||||
split_by: word
|
||||
split_length: 1000
|
||||
- name: FileTypeClassifier
|
||||
type: FileTypeClassifier
|
||||
|
||||
pipelines:
|
||||
- name: query # a sample extractive-qa Pipeline
|
||||
type: Query
|
||||
nodes:
|
||||
- name: Retriever
|
||||
inputs: [Query]
|
||||
- name: Reader
|
||||
inputs: [Retriever]
|
||||
- name: indexing
|
||||
type: Indexing
|
||||
nodes:
|
||||
- name: FileTypeClassifier
|
||||
inputs: [File]
|
||||
- name: TextFileConverter
|
||||
inputs: [FileTypeClassifier.output_1]
|
||||
- name: PDFFileConverter
|
||||
inputs: [FileTypeClassifier.output_2]
|
||||
- name: Preprocessor
|
||||
inputs: [PDFFileConverter, TextFileConverter]
|
||||
- name: Retriever
|
||||
inputs: [Preprocessor]
|
||||
- name: DocumentStore
|
||||
inputs: [Retriever]
|
||||
Loading…
x
Reference in New Issue
Block a user