Update indexing and rag default templates to use InMemoryDocumentStore (#7782)

This commit is contained in:
Silvano Cerza 2024-06-04 12:57:33 +02:00 committed by GitHub
parent 55a657ba81
commit fd838fc573
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 24 additions and 23 deletions

View File

@ -45,10 +45,12 @@ components:
init_parameters: init_parameters:
document_store: document_store:
init_parameters: init_parameters:
collection_name: documents bm25_tokenization_regex: (?u)\b\w\w+\b
embedding_function: default bm25_algorithm: BM25L
persist_path: . bm25_parameters: {}
type: haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore embedding_similarity_function: dot_product
index: documents
type: haystack.document_stores.in_memory.document_store.InMemoryDocumentStore
policy: NONE policy: NONE
type: haystack.components.writers.document_writer.DocumentWriter type: haystack.components.writers.document_writer.DocumentWriter

View File

@ -34,13 +34,15 @@ components:
init_parameters: init_parameters:
document_store: document_store:
init_parameters: init_parameters:
collection_name: documents bm25_tokenization_regex: (?u)\b\w\w+\b
embedding_function: default bm25_algorithm: BM25L
persist_path: . bm25_parameters: {}
type: haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore embedding_similarity_function: dot_product
index: documents
type: haystack.document_stores.in_memory.document_store.InMemoryDocumentStore
filters: null filters: null
top_k: 10 top_k: 10
type: haystack_integrations.components.retrievers.chroma.retriever.ChromaEmbeddingRetriever type: haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever
text_embedder: text_embedder:
init_parameters: init_parameters:

View File

@ -117,7 +117,6 @@ extra-dependencies = [
"langdetect", # TextLanguageRouter and DocumentLanguageClassifier "langdetect", # TextLanguageRouter and DocumentLanguageClassifier
"sentence-transformers>=2.2.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder "sentence-transformers>=2.2.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder
"openai-whisper>=20231106", # LocalWhisperTranscriber "openai-whisper>=20231106", # LocalWhisperTranscriber
"chroma-haystack", # pipeline predefined templates
# OpenAPI # OpenAPI
"jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions "jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions

View File

@ -595,9 +595,8 @@ class TestPipeline:
def test_from_template(self, monkeypatch): def test_from_template(self, monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "fake_key") monkeypatch.setenv("OPENAI_API_KEY", "fake_key")
with patch("haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore"): pipe = Pipeline.from_template(PredefinedPipeline.INDEXING)
pipe = Pipeline.from_template(PredefinedPipeline.INDEXING) assert pipe.get_component("cleaner")
assert pipe.get_component("cleaner")
def test_walk_pipeline_with_no_cycles(self): def test_walk_pipeline_with_no_cycles(self):
""" """

View File

@ -50,15 +50,14 @@ class TestPipelineTemplate:
# Building a pipeline directly using all default components specified in a predefined or custom template. # Building a pipeline directly using all default components specified in a predefined or custom template.
def test_build_pipeline_with_default_components(self, monkeypatch): def test_build_pipeline_with_default_components(self, monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "fake_key") monkeypatch.setenv("OPENAI_API_KEY", "fake_key")
with mock.patch("haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore"): rendered = PipelineTemplate.from_predefined(PredefinedPipeline.INDEXING).render()
rendered = PipelineTemplate.from_predefined(PredefinedPipeline.INDEXING).render() pipeline = Pipeline.loads(rendered)
pipeline = Pipeline.loads(rendered)
# pipeline has components # pipeline has components
assert pipeline.get_component("cleaner") assert pipeline.get_component("cleaner")
assert pipeline.get_component("writer") assert pipeline.get_component("writer")
assert pipeline.get_component("embedder") assert pipeline.get_component("embedder")
# pipeline should have inputs and outputs # pipeline should have inputs and outputs
assert len(pipeline.inputs()) > 0 assert len(pipeline.inputs()) > 0
assert len(pipeline.outputs()) > 0 assert len(pipeline.outputs()) > 0