From fd838fc573258177c9546eb616ee873d55b4a183 Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Tue, 4 Jun 2024 12:57:33 +0200 Subject: [PATCH] Update indexing and rag default templates to use InMemoryDocumentStore (#7782) --- .../pipeline/predefined/indexing.yaml.jinja2 | 10 ++++++---- .../core/pipeline/predefined/rag.yaml.jinja2 | 12 +++++++----- pyproject.toml | 1 - test/core/pipeline/test_pipeline.py | 5 ++--- test/core/pipeline/test_templates.py | 19 +++++++++---------- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/haystack/core/pipeline/predefined/indexing.yaml.jinja2 b/haystack/core/pipeline/predefined/indexing.yaml.jinja2 index 5f46efaea..69f500845 100644 --- a/haystack/core/pipeline/predefined/indexing.yaml.jinja2 +++ b/haystack/core/pipeline/predefined/indexing.yaml.jinja2 @@ -45,10 +45,12 @@ components: init_parameters: document_store: init_parameters: - collection_name: documents - embedding_function: default - persist_path: . - type: haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore + bm25_tokenization_regex: (?u)\b\w\w+\b + bm25_algorithm: BM25L + bm25_parameters: {} + embedding_similarity_function: dot_product + index: documents + type: haystack.document_stores.in_memory.document_store.InMemoryDocumentStore policy: NONE type: haystack.components.writers.document_writer.DocumentWriter diff --git a/haystack/core/pipeline/predefined/rag.yaml.jinja2 b/haystack/core/pipeline/predefined/rag.yaml.jinja2 index 979829619..97b9ae76d 100644 --- a/haystack/core/pipeline/predefined/rag.yaml.jinja2 +++ b/haystack/core/pipeline/predefined/rag.yaml.jinja2 @@ -34,13 +34,15 @@ components: init_parameters: document_store: init_parameters: - collection_name: documents - embedding_function: default - persist_path: . - type: haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore + bm25_tokenization_regex: (?u)\b\w\w+\b + bm25_algorithm: BM25L + bm25_parameters: {} + embedding_similarity_function: dot_product + index: documents + type: haystack.document_stores.in_memory.document_store.InMemoryDocumentStore filters: null top_k: 10 - type: haystack_integrations.components.retrievers.chroma.retriever.ChromaEmbeddingRetriever + type: haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever text_embedder: init_parameters: diff --git a/pyproject.toml b/pyproject.toml index 2b3054011..2e17bf5c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,6 @@ extra-dependencies = [ "langdetect", # TextLanguageRouter and DocumentLanguageClassifier "sentence-transformers>=2.2.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder "openai-whisper>=20231106", # LocalWhisperTranscriber - "chroma-haystack", # pipeline predefined templates # OpenAPI "jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions diff --git a/test/core/pipeline/test_pipeline.py b/test/core/pipeline/test_pipeline.py index 648b0cf26..de7a2c78a 100644 --- a/test/core/pipeline/test_pipeline.py +++ b/test/core/pipeline/test_pipeline.py @@ -595,9 +595,8 @@ class TestPipeline: def test_from_template(self, monkeypatch): monkeypatch.setenv("OPENAI_API_KEY", "fake_key") - with patch("haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore"): - pipe = Pipeline.from_template(PredefinedPipeline.INDEXING) - assert pipe.get_component("cleaner") + pipe = Pipeline.from_template(PredefinedPipeline.INDEXING) + assert pipe.get_component("cleaner") def test_walk_pipeline_with_no_cycles(self): """ diff --git a/test/core/pipeline/test_templates.py b/test/core/pipeline/test_templates.py index d1157de47..5e54c241b 100644 --- a/test/core/pipeline/test_templates.py +++ b/test/core/pipeline/test_templates.py @@ -50,15 +50,14 @@ class TestPipelineTemplate: # Building a pipeline directly using all default components specified in a predefined or custom template. def test_build_pipeline_with_default_components(self, monkeypatch): monkeypatch.setenv("OPENAI_API_KEY", "fake_key") - with mock.patch("haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore"): - rendered = PipelineTemplate.from_predefined(PredefinedPipeline.INDEXING).render() - pipeline = Pipeline.loads(rendered) + rendered = PipelineTemplate.from_predefined(PredefinedPipeline.INDEXING).render() + pipeline = Pipeline.loads(rendered) - # pipeline has components - assert pipeline.get_component("cleaner") - assert pipeline.get_component("writer") - assert pipeline.get_component("embedder") + # pipeline has components + assert pipeline.get_component("cleaner") + assert pipeline.get_component("writer") + assert pipeline.get_component("embedder") - # pipeline should have inputs and outputs - assert len(pipeline.inputs()) > 0 - assert len(pipeline.outputs()) > 0 + # pipeline should have inputs and outputs + assert len(pipeline.inputs()) > 0 + assert len(pipeline.outputs()) > 0