From e1ec4e5e4d5978a32f242fe0fe3eed6691379e34 Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Wed, 10 Jan 2024 21:20:42 +0100 Subject: [PATCH] refact!: Remove symbols under the `haystack.document_stores` namespace (#6714) * remove symbols under the haystack.document_stores namespace * Update haystack/document_stores/types/protocol.py Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> * fix * same for retrievers * leftovers * more leftovers * add relnote * leftovers * one more * fix examples --------- Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> --- docs/pydoc/config/retriever.yml | 4 ++-- e2e/pipelines/test_dense_doc_search.py | 4 ++-- e2e/pipelines/test_eval_dense_doc_search.py | 4 ++-- .../test_eval_extractive_qa_pipeline.py | 4 ++-- .../test_eval_hybrid_doc_search_pipeline.py | 4 ++-- e2e/pipelines/test_eval_rag_pipelines.py | 4 ++-- e2e/pipelines/test_extractive_qa_pipeline.py | 4 ++-- .../test_hybrid_doc_search_pipeline.py | 4 ++-- e2e/pipelines/test_preprocessing_pipeline.py | 2 +- e2e/pipelines/test_rag_pipelines.py | 4 ++-- examples/getting_started/rag.py | 2 +- examples/getting_started/rag_custom_data.py | 2 +- examples/pipelines/indexing_pipeline.py | 2 +- .../pipelines/indexing_pipeline_with_meta.py | 5 +++-- examples/pipelines/rag_pipeline.py | 4 ++-- examples/rag/rag_self_correction.py | 8 ++++---- .../retrievers/in_memory_bm25_documentsearch.py | 4 ++-- examples/retrievers/in_memory_bm25_rag.py | 4 ++-- haystack/components/caching/cache_checker.py | 2 +- haystack/components/retrievers/__init__.py | 4 ---- .../components/retrievers/in_memory/__init__.py | 4 ++++ .../bm25_retriever.py} | 2 +- .../embedding_retriever.py} | 2 +- haystack/components/writers/document_writer.py | 2 +- haystack/document_stores/__init__.py | 12 ------------ haystack/document_stores/errors/__init__.py | 3 +++ haystack/document_stores/{ => errors}/errors.py | 0 .../document_stores/in_memory/document_store.py | 2 +- haystack/document_stores/types/__init__.py | 4 ++++ haystack/document_stores/types/policy.py | 8 ++++++++ .../document_stores/{ => types}/protocol.py | 9 +-------- haystack/pipeline_utils/indexing.py | 2 +- haystack/pipeline_utils/rag.py | 5 +++-- haystack/testing/document_store.py | 2 +- haystack/testing/factory.py | 2 +- ...cument-store-namespace-33f4433a2b121efc.yaml | 17 +++++++++++++++++ .../retrievers/test_in_memory_bm25_retriever.py | 12 ++++++------ .../test_in_memory_embedding_retriever.py | 16 ++++++++-------- test/components/writers/test_document_writer.py | 2 +- test/document_stores/test_in_memory.py | 5 ++--- test/pipelines/test_indexing_pipeline.py | 2 +- test/pipelines/test_rag_pipelines.py | 2 +- 42 files changed, 102 insertions(+), 88 deletions(-) create mode 100644 haystack/components/retrievers/in_memory/__init__.py rename haystack/components/retrievers/{in_memory_bm25_retriever.py => in_memory/bm25_retriever.py} (98%) rename haystack/components/retrievers/{in_memory_embedding_retriever.py => in_memory/embedding_retriever.py} (98%) create mode 100644 haystack/document_stores/errors/__init__.py rename haystack/document_stores/{ => errors}/errors.py (100%) create mode 100644 haystack/document_stores/types/__init__.py create mode 100644 haystack/document_stores/types/policy.py rename haystack/document_stores/{ => types}/protocol.py (97%) create mode 100644 releasenotes/notes/refactor-document-store-namespace-33f4433a2b121efc.yaml diff --git a/docs/pydoc/config/retriever.yml b/docs/pydoc/config/retriever.yml index 1a6f04431..d8c47ab44 100644 --- a/docs/pydoc/config/retriever.yml +++ b/docs/pydoc/config/retriever.yml @@ -1,7 +1,7 @@ loaders: - type: loaders.CustomPythonLoader - search_path: [../../../haystack/components/retrievers] - modules: ["in_memory_bm25_retriever", "in_memory_embedding_retriever"] + search_path: [../../../haystack/components/retrievers/in_memory] + modules: ["bm25_retriever", "embedding_retriever"] ignore_when_discovered: ["__init__"] processors: - type: filter diff --git a/e2e/pipelines/test_dense_doc_search.py b/e2e/pipelines/test_dense_doc_search.py index 80aec5d27..b83722b44 100644 --- a/e2e/pipelines/test_dense_doc_search.py +++ b/e2e/pipelines/test_dense_doc_search.py @@ -7,8 +7,8 @@ from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter from haystack.components.routers import FileTypeRouter from haystack.components.joiners import DocumentJoiner from haystack.components.writers import DocumentWriter -from haystack.document_stores import InMemoryDocumentStore -from haystack.components.retrievers import InMemoryEmbeddingRetriever +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever def test_dense_doc_search_pipeline(tmp_path, samples_path): diff --git a/e2e/pipelines/test_eval_dense_doc_search.py b/e2e/pipelines/test_eval_dense_doc_search.py index c4320c103..d1631f4e2 100644 --- a/e2e/pipelines/test_eval_dense_doc_search.py +++ b/e2e/pipelines/test_eval_dense_doc_search.py @@ -2,12 +2,12 @@ from haystack import Pipeline from haystack.components.converters import PyPDFToDocument, TextFileToDocument from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter -from haystack.components.retrievers import InMemoryEmbeddingRetriever +from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever from haystack.components.routers import FileTypeRouter from haystack.components.joiners import DocumentJoiner from haystack.components.writers import DocumentWriter from haystack.dataclasses import Document -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.evaluation.eval import eval diff --git a/e2e/pipelines/test_eval_extractive_qa_pipeline.py b/e2e/pipelines/test_eval_extractive_qa_pipeline.py index c0e171af1..5b5df26b5 100644 --- a/e2e/pipelines/test_eval_extractive_qa_pipeline.py +++ b/e2e/pipelines/test_eval_extractive_qa_pipeline.py @@ -2,9 +2,9 @@ import json from haystack import Pipeline from haystack.components.readers import ExtractiveReader -from haystack.components.retrievers import InMemoryBM25Retriever +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.dataclasses import Document, ExtractedAnswer -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.evaluation.eval import eval from haystack.evaluation.metrics import Metric diff --git a/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py b/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py index ed1f0b1e9..8d942583b 100644 --- a/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py +++ b/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py @@ -1,9 +1,9 @@ from haystack import Document, Pipeline from haystack.components.embedders import SentenceTransformersTextEmbedder from haystack.components.rankers import TransformersSimilarityRanker -from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever from haystack.components.joiners.document_joiner import DocumentJoiner -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.evaluation.eval import eval diff --git a/e2e/pipelines/test_eval_rag_pipelines.py b/e2e/pipelines/test_eval_rag_pipelines.py index 68251d328..1a9f57f6b 100644 --- a/e2e/pipelines/test_eval_rag_pipelines.py +++ b/e2e/pipelines/test_eval_rag_pipelines.py @@ -5,10 +5,10 @@ from haystack.components.builders.answer_builder import AnswerBuilder from haystack.components.builders.prompt_builder import PromptBuilder from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder from haystack.components.generators import HuggingFaceLocalGenerator -from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever from haystack.components.writers import DocumentWriter from haystack.dataclasses import Document -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.evaluation.eval import eval from haystack.evaluation.metrics import Metric diff --git a/e2e/pipelines/test_extractive_qa_pipeline.py b/e2e/pipelines/test_extractive_qa_pipeline.py index fd101c88b..71b540d0f 100644 --- a/e2e/pipelines/test_extractive_qa_pipeline.py +++ b/e2e/pipelines/test_extractive_qa_pipeline.py @@ -1,8 +1,8 @@ import json from haystack import Pipeline, Document -from haystack.document_stores import InMemoryDocumentStore -from haystack.components.retrievers import InMemoryBM25Retriever +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.components.readers import ExtractiveReader diff --git a/e2e/pipelines/test_hybrid_doc_search_pipeline.py b/e2e/pipelines/test_hybrid_doc_search_pipeline.py index fc6f6070e..cca572b28 100644 --- a/e2e/pipelines/test_hybrid_doc_search_pipeline.py +++ b/e2e/pipelines/test_hybrid_doc_search_pipeline.py @@ -4,8 +4,8 @@ from haystack import Pipeline, Document from haystack.components.embedders import SentenceTransformersTextEmbedder from haystack.components.rankers import TransformersSimilarityRanker from haystack.components.joiners.document_joiner import DocumentJoiner -from haystack.document_stores import InMemoryDocumentStore -from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever def test_hybrid_doc_search_pipeline(tmp_path): diff --git a/e2e/pipelines/test_preprocessing_pipeline.py b/e2e/pipelines/test_preprocessing_pipeline.py index ae56324ea..ebc246fb2 100644 --- a/e2e/pipelines/test_preprocessing_pipeline.py +++ b/e2e/pipelines/test_preprocessing_pipeline.py @@ -7,7 +7,7 @@ from haystack.components.preprocessors import DocumentSplitter, DocumentCleaner from haystack.components.classifiers import DocumentLanguageClassifier from haystack.components.routers import FileTypeRouter, MetadataRouter from haystack.components.writers import DocumentWriter -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore def test_preprocessing_pipeline(tmp_path): diff --git a/e2e/pipelines/test_rag_pipelines.py b/e2e/pipelines/test_rag_pipelines.py index fd804d833..2d4cb3006 100644 --- a/e2e/pipelines/test_rag_pipelines.py +++ b/e2e/pipelines/test_rag_pipelines.py @@ -3,9 +3,9 @@ import json import pytest from haystack import Pipeline, Document -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.writers import DocumentWriter -from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder from haystack.components.generators import OpenAIGenerator from haystack.components.builders.answer_builder import AnswerBuilder diff --git a/examples/getting_started/rag.py b/examples/getting_started/rag.py index 443a9dd25..d64ee7073 100644 --- a/examples/getting_started/rag.py +++ b/examples/getting_started/rag.py @@ -1,6 +1,6 @@ import os from haystack import Document -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.pipeline_utils import build_rag_pipeline API_KEY = "SET YOUR OPENAI API KEY HERE" diff --git a/examples/getting_started/rag_custom_data.py b/examples/getting_started/rag_custom_data.py index 294b45017..904a45a42 100644 --- a/examples/getting_started/rag_custom_data.py +++ b/examples/getting_started/rag_custom_data.py @@ -1,4 +1,4 @@ -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.pipeline_utils import build_rag_pipeline, build_indexing_pipeline from haystack.pipeline_utils.indexing import download_files diff --git a/examples/pipelines/indexing_pipeline.py b/examples/pipelines/indexing_pipeline.py index ba61d0270..38ed6e081 100644 --- a/examples/pipelines/indexing_pipeline.py +++ b/examples/pipelines/indexing_pipeline.py @@ -7,7 +7,7 @@ from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter from haystack.components.routers import FileTypeRouter from haystack.components.joiners import DocumentJoiner from haystack.components.writers import DocumentWriter -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore # Create components and an indexing pipeline that converts txt and pdf files to documents, cleans and splits them, and diff --git a/examples/pipelines/indexing_pipeline_with_meta.py b/examples/pipelines/indexing_pipeline_with_meta.py index 01990564d..de5cbe488 100644 --- a/examples/pipelines/indexing_pipeline_with_meta.py +++ b/examples/pipelines/indexing_pipeline_with_meta.py @@ -6,9 +6,10 @@ from haystack import Pipeline from haystack.components.others import Multiplexer from haystack.components.converters import PyPDFToDocument, TextFileToDocument from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter -from haystack.components.routers import FileTypeRouter, DocumentJoiner +from haystack.components.routers import FileTypeRouter +from haystack.components.joiners import DocumentJoiner from haystack.components.writers import DocumentWriter -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore document_store = InMemoryDocumentStore() diff --git a/examples/pipelines/rag_pipeline.py b/examples/pipelines/rag_pipeline.py index eebc88661..32bbabacd 100644 --- a/examples/pipelines/rag_pipeline.py +++ b/examples/pipelines/rag_pipeline.py @@ -1,7 +1,7 @@ import os from haystack import Pipeline, Document -from haystack.document_stores import InMemoryDocumentStore -from haystack.components.retrievers import InMemoryBM25Retriever +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.components.generators import OpenAIGenerator from haystack.components.builders.answer_builder import AnswerBuilder from haystack.components.builders.prompt_builder import PromptBuilder diff --git a/examples/rag/rag_self_correction.py b/examples/rag/rag_self_correction.py index 83046ec4f..d24f14bc8 100644 --- a/examples/rag/rag_self_correction.py +++ b/examples/rag/rag_self_correction.py @@ -3,14 +3,14 @@ from typing import List, Any, Optional, Dict import logging from pprint import pprint -from canals.component.types import Variadic from haystack import Pipeline, Document, component, default_to_dict, default_from_dict, DeserializationError -from haystack.document_stores import InMemoryDocumentStore -from haystack.components.retrievers import InMemoryBM25Retriever +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.components.generators import OpenAIGenerator from haystack.components.builders.prompt_builder import PromptBuilder from haystack.components.others import Multiplexer from haystack.components.routers.conditional_router import ConditionalRouter +from haystack.core.component.types import Variadic logging.getLogger().setLevel(logging.DEBUG) @@ -64,7 +64,7 @@ class PaginatedRetriever: if self.retrieved_documents is None: self.retrieved_documents = self.retriever.run( - query=query[0], filters=filters, top_k=top_k, scale_score=scale_score + query=query[0], filters=filters, top_k=top_k, scale_score=scale_score # type: ignore )["documents"] if not self.retrieved_documents: diff --git a/examples/retrievers/in_memory_bm25_documentsearch.py b/examples/retrievers/in_memory_bm25_documentsearch.py index 734e8bde3..8e88f39f0 100644 --- a/examples/retrievers/in_memory_bm25_documentsearch.py +++ b/examples/retrievers/in_memory_bm25_documentsearch.py @@ -1,6 +1,6 @@ from haystack import Document -from haystack.components.retrievers import InMemoryBM25Retriever -from haystack.document_stores import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.pipeline import Pipeline # Create components and a query pipeline diff --git a/examples/retrievers/in_memory_bm25_rag.py b/examples/retrievers/in_memory_bm25_rag.py index 64214bca5..d59bfe386 100644 --- a/examples/retrievers/in_memory_bm25_rag.py +++ b/examples/retrievers/in_memory_bm25_rag.py @@ -6,8 +6,8 @@ from haystack import Pipeline from haystack.components.builders.answer_builder import AnswerBuilder from haystack.components.builders.prompt_builder import PromptBuilder from haystack.components.generators import OpenAIGenerator -from haystack.components.retrievers import InMemoryBM25Retriever -from haystack.document_stores import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever +from haystack.document_stores.in_memory import InMemoryDocumentStore # Create a RAG query pipeline prompt_template = """ diff --git a/haystack/components/caching/cache_checker.py b/haystack/components/caching/cache_checker.py index 09cf18acb..3ef71e25f 100644 --- a/haystack/components/caching/cache_checker.py +++ b/haystack/components/caching/cache_checker.py @@ -5,7 +5,7 @@ import importlib import logging from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError -from haystack.document_stores import DocumentStore +from haystack.document_stores.types import DocumentStore logger = logging.getLogger(__name__) diff --git a/haystack/components/retrievers/__init__.py b/haystack/components/retrievers/__init__.py index b65de67a3..e69de29bb 100644 --- a/haystack/components/retrievers/__init__.py +++ b/haystack/components/retrievers/__init__.py @@ -1,4 +0,0 @@ -from haystack.components.retrievers.in_memory_bm25_retriever import InMemoryBM25Retriever -from haystack.components.retrievers.in_memory_embedding_retriever import InMemoryEmbeddingRetriever - -__all__ = ["InMemoryBM25Retriever", "InMemoryEmbeddingRetriever"] diff --git a/haystack/components/retrievers/in_memory/__init__.py b/haystack/components/retrievers/in_memory/__init__.py new file mode 100644 index 000000000..e7385414d --- /dev/null +++ b/haystack/components/retrievers/in_memory/__init__.py @@ -0,0 +1,4 @@ +from haystack.components.retrievers.in_memory.bm25_retriever import InMemoryBM25Retriever +from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever + +__all__ = ["InMemoryBM25Retriever", "InMemoryEmbeddingRetriever"] diff --git a/haystack/components/retrievers/in_memory_bm25_retriever.py b/haystack/components/retrievers/in_memory/bm25_retriever.py similarity index 98% rename from haystack/components/retrievers/in_memory_bm25_retriever.py rename to haystack/components/retrievers/in_memory/bm25_retriever.py index 79ced8d0b..653ad1003 100644 --- a/haystack/components/retrievers/in_memory_bm25_retriever.py +++ b/haystack/components/retrievers/in_memory/bm25_retriever.py @@ -1,7 +1,7 @@ from typing import Dict, List, Any, Optional from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore @component diff --git a/haystack/components/retrievers/in_memory_embedding_retriever.py b/haystack/components/retrievers/in_memory/embedding_retriever.py similarity index 98% rename from haystack/components/retrievers/in_memory_embedding_retriever.py rename to haystack/components/retrievers/in_memory/embedding_retriever.py index f3433703a..e8b3ff720 100644 --- a/haystack/components/retrievers/in_memory_embedding_retriever.py +++ b/haystack/components/retrievers/in_memory/embedding_retriever.py @@ -1,7 +1,7 @@ from typing import Dict, List, Any, Optional from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore @component diff --git a/haystack/components/writers/document_writer.py b/haystack/components/writers/document_writer.py index 170ee24da..2703bedc1 100644 --- a/haystack/components/writers/document_writer.py +++ b/haystack/components/writers/document_writer.py @@ -4,7 +4,7 @@ import importlib import logging from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError -from haystack.document_stores import DocumentStore, DuplicatePolicy +from haystack.document_stores.types import DocumentStore, DuplicatePolicy logger = logging.getLogger(__name__) diff --git a/haystack/document_stores/__init__.py b/haystack/document_stores/__init__.py index 7d5080ddb..e69de29bb 100644 --- a/haystack/document_stores/__init__.py +++ b/haystack/document_stores/__init__.py @@ -1,12 +0,0 @@ -from haystack.document_stores.protocol import DocumentStore, DuplicatePolicy -from haystack.document_stores.in_memory.document_store import InMemoryDocumentStore -from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError, MissingDocumentError - -__all__ = [ - "DocumentStore", - "DuplicatePolicy", - "InMemoryDocumentStore", - "DocumentStoreError", - "DuplicateDocumentError", - "MissingDocumentError", -] diff --git a/haystack/document_stores/errors/__init__.py b/haystack/document_stores/errors/__init__.py new file mode 100644 index 000000000..6f2c83ebe --- /dev/null +++ b/haystack/document_stores/errors/__init__.py @@ -0,0 +1,3 @@ +from .errors import DocumentStoreError, DuplicateDocumentError, MissingDocumentError + +__all__ = ["DocumentStoreError", "DuplicateDocumentError", "MissingDocumentError"] diff --git a/haystack/document_stores/errors.py b/haystack/document_stores/errors/errors.py similarity index 100% rename from haystack/document_stores/errors.py rename to haystack/document_stores/errors/errors.py diff --git a/haystack/document_stores/in_memory/document_store.py b/haystack/document_stores/in_memory/document_store.py index 24b86214d..027e9c4bd 100644 --- a/haystack/document_stores/in_memory/document_store.py +++ b/haystack/document_stores/in_memory/document_store.py @@ -9,7 +9,7 @@ from tqdm.auto import tqdm from haystack import default_from_dict, default_to_dict from haystack.dataclasses import Document -from haystack.document_stores.protocol import DuplicatePolicy +from haystack.document_stores.types import DuplicatePolicy from haystack.utils.filters import document_matches_filter, convert from haystack.document_stores.errors import DuplicateDocumentError, DocumentStoreError from haystack.utils import expit diff --git a/haystack/document_stores/types/__init__.py b/haystack/document_stores/types/__init__.py new file mode 100644 index 000000000..1033ccd63 --- /dev/null +++ b/haystack/document_stores/types/__init__.py @@ -0,0 +1,4 @@ +from .protocol import DocumentStore +from .policy import DuplicatePolicy + +__all__ = ["DocumentStore", "DuplicatePolicy"] diff --git a/haystack/document_stores/types/policy.py b/haystack/document_stores/types/policy.py new file mode 100644 index 000000000..8406d0018 --- /dev/null +++ b/haystack/document_stores/types/policy.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class DuplicatePolicy(Enum): + NONE = "none" + SKIP = "skip" + OVERWRITE = "overwrite" + FAIL = "fail" diff --git a/haystack/document_stores/protocol.py b/haystack/document_stores/types/protocol.py similarity index 97% rename from haystack/document_stores/protocol.py rename to haystack/document_stores/types/protocol.py index 802beca94..409fc1168 100644 --- a/haystack/document_stores/protocol.py +++ b/haystack/document_stores/types/protocol.py @@ -1,9 +1,9 @@ from typing import Protocol, Optional, Dict, Any, List import logging -from enum import Enum from haystack.dataclasses import Document +from haystack.document_stores.types.policy import DuplicatePolicy # Ellipsis are needed for the type checker, it's safe to disable module-wide # pylint: disable=unnecessary-ellipsis @@ -11,13 +11,6 @@ from haystack.dataclasses import Document logger = logging.getLogger(__name__) -class DuplicatePolicy(Enum): - NONE = "none" - SKIP = "skip" - OVERWRITE = "overwrite" - FAIL = "fail" - - class DocumentStore(Protocol): """ Stores Documents to be used by the components of a Pipeline. diff --git a/haystack/pipeline_utils/indexing.py b/haystack/pipeline_utils/indexing.py index f42210502..21d5ad79b 100644 --- a/haystack/pipeline_utils/indexing.py +++ b/haystack/pipeline_utils/indexing.py @@ -14,7 +14,7 @@ from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter from haystack.components.routers import FileTypeRouter from haystack.components.joiners import DocumentJoiner from haystack.components.writers import DocumentWriter -from haystack.document_stores.protocol import DocumentStore +from haystack.document_stores.types import DocumentStore def download_files(sources: List[str]) -> List[str]: diff --git a/haystack/pipeline_utils/rag.py b/haystack/pipeline_utils/rag.py index 34e21aba3..133a49640 100644 --- a/haystack/pipeline_utils/rag.py +++ b/haystack/pipeline_utils/rag.py @@ -9,9 +9,10 @@ from haystack.components.builders.answer_builder import AnswerBuilder from haystack.components.builders.prompt_builder import PromptBuilder from haystack.components.embedders import SentenceTransformersTextEmbedder from haystack.components.generators import OpenAIGenerator, HuggingFaceTGIGenerator -from haystack.components.retrievers import InMemoryEmbeddingRetriever +from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever from haystack.dataclasses import Answer -from haystack.document_stores import InMemoryDocumentStore, DocumentStore +from haystack.document_stores.types import DocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore def build_rag_pipeline( diff --git a/haystack/testing/document_store.py b/haystack/testing/document_store.py index f06573932..e3ab20d89 100644 --- a/haystack/testing/document_store.py +++ b/haystack/testing/document_store.py @@ -7,7 +7,7 @@ import pytest import pandas as pd from haystack.dataclasses import Document -from haystack.document_stores import DocumentStore, DuplicatePolicy +from haystack.document_stores.types import DocumentStore, DuplicatePolicy from haystack.document_stores.errors import DuplicateDocumentError from haystack.errors import FilterError diff --git a/haystack/testing/factory.py b/haystack/testing/factory.py index 35b937e81..07af4f57f 100644 --- a/haystack/testing/factory.py +++ b/haystack/testing/factory.py @@ -1,7 +1,7 @@ from typing import Any, Dict, Optional, Tuple, Type, List, Union from haystack.dataclasses import Document -from haystack.document_stores import DocumentStore, DuplicatePolicy +from haystack.document_stores.types import DocumentStore, DuplicatePolicy from haystack.core.component import component, Component from haystack.core.serialization import default_to_dict, default_from_dict diff --git a/releasenotes/notes/refactor-document-store-namespace-33f4433a2b121efc.yaml b/releasenotes/notes/refactor-document-store-namespace-33f4433a2b121efc.yaml new file mode 100644 index 000000000..e003254d7 --- /dev/null +++ b/releasenotes/notes/refactor-document-store-namespace-33f4433a2b121efc.yaml @@ -0,0 +1,17 @@ +--- +upgrade: + - | + Change the imports for in_memory document store and retrievers from: + + from haystack.document_stores import InMemoryDocumentStore + from haystack.components.retrievers import InMemoryEmbeddingRetriever + + to: + + from haystack.document_stores.in_memory import InMemoryDocumentStore + from haystack.components.retrievers.in_memory import InMemoryBM25Retriever + +enhancements: + - | + Stop exposing `in_memory` package symbols in the `haystack.document_store` and + `haystack.components.retrievers` root namespaces. diff --git a/test/components/retrievers/test_in_memory_bm25_retriever.py b/test/components/retrievers/test_in_memory_bm25_retriever.py index 1ca21d57b..db5e82a3f 100644 --- a/test/components/retrievers/test_in_memory_bm25_retriever.py +++ b/test/components/retrievers/test_in_memory_bm25_retriever.py @@ -4,9 +4,9 @@ import pytest from haystack import Pipeline, DeserializationError from haystack.testing.factory import document_store_class -from haystack.components.retrievers.in_memory_bm25_retriever import InMemoryBM25Retriever +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.dataclasses import Document -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore @pytest.fixture() @@ -47,7 +47,7 @@ class TestMemoryBM25Retriever: data = component.to_dict() assert data == { - "type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever", + "type": "haystack.components.retrievers.in_memory.bm25_retriever.InMemoryBM25Retriever", "init_parameters": { "document_store": {"type": "MyFakeStore", "init_parameters": {}}, "filters": None, @@ -65,7 +65,7 @@ class TestMemoryBM25Retriever: ) data = component.to_dict() assert data == { - "type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever", + "type": "haystack.components.retrievers.in_memory.bm25_retriever.InMemoryBM25Retriever", "init_parameters": { "document_store": serialized_ds, "filters": {"name": "test.txt"}, @@ -78,7 +78,7 @@ class TestMemoryBM25Retriever: def test_from_dict(self): data = { - "type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever", + "type": "haystack.components.retrievers.in_memory.bm25_retriever.InMemoryBM25Retriever", "init_parameters": { "document_store": { "type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", @@ -106,7 +106,7 @@ class TestMemoryBM25Retriever: def test_from_dict_nonexisting_docstore(self): data = { - "type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever", + "type": "haystack.components.retrievers.in_memory.bm25_retriever.InMemoryBM25Retriever", "init_parameters": {"document_store": {"type": "Nonexisting.Docstore", "init_parameters": {}}}, } with pytest.raises(DeserializationError): diff --git a/test/components/retrievers/test_in_memory_embedding_retriever.py b/test/components/retrievers/test_in_memory_embedding_retriever.py index e3282cd9b..f2828fead 100644 --- a/test/components/retrievers/test_in_memory_embedding_retriever.py +++ b/test/components/retrievers/test_in_memory_embedding_retriever.py @@ -5,9 +5,9 @@ import numpy as np from haystack import Pipeline, DeserializationError from haystack.testing.factory import document_store_class -from haystack.components.retrievers.in_memory_embedding_retriever import InMemoryEmbeddingRetriever +from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever from haystack.dataclasses import Document -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore class TestMemoryEmbeddingRetriever: @@ -37,7 +37,7 @@ class TestMemoryEmbeddingRetriever: data = component.to_dict() assert data == { - "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", + "type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever", "init_parameters": { "document_store": {"type": "test_module.MyFakeStore", "init_parameters": {}}, "filters": None, @@ -60,7 +60,7 @@ class TestMemoryEmbeddingRetriever: ) data = component.to_dict() assert data == { - "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", + "type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever", "init_parameters": { "document_store": {"type": "test_module.MyFakeStore", "init_parameters": {}}, "filters": {"name": "test.txt"}, @@ -72,7 +72,7 @@ class TestMemoryEmbeddingRetriever: def test_from_dict(self): data = { - "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", + "type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever", "init_parameters": { "document_store": { "type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", @@ -90,7 +90,7 @@ class TestMemoryEmbeddingRetriever: def test_from_dict_without_docstore(self): data = { - "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", + "type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever", "init_parameters": {}, } with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"): @@ -98,7 +98,7 @@ class TestMemoryEmbeddingRetriever: def test_from_dict_without_docstore_type(self): data = { - "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", + "type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever", "init_parameters": {"document_store": {"init_parameters": {}}}, } with pytest.raises(DeserializationError): @@ -106,7 +106,7 @@ class TestMemoryEmbeddingRetriever: def test_from_dict_nonexisting_docstore(self): data = { - "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", + "type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever", "init_parameters": {"document_store": {"type": "Nonexisting.Docstore", "init_parameters": {}}}, } with pytest.raises(DeserializationError): diff --git a/test/components/writers/test_document_writer.py b/test/components/writers/test_document_writer.py index 623d5ee56..1cd07a4f9 100644 --- a/test/components/writers/test_document_writer.py +++ b/test/components/writers/test_document_writer.py @@ -3,7 +3,7 @@ import pytest from haystack import Document, DeserializationError from haystack.testing.factory import document_store_class from haystack.components.writers.document_writer import DocumentWriter -from haystack.document_stores import DuplicatePolicy +from haystack.document_stores.types import DuplicatePolicy from haystack.document_stores.in_memory import InMemoryDocumentStore diff --git a/test/document_stores/test_in_memory.py b/test/document_stores/test_in_memory.py index 6e778a3fc..1d3a36131 100644 --- a/test/document_stores/test_in_memory.py +++ b/test/document_stores/test_in_memory.py @@ -5,9 +5,8 @@ import pandas as pd import pytest from haystack import Document -from haystack.document_stores import InMemoryDocumentStore, DocumentStoreError, DuplicatePolicy, DuplicateDocumentError - - +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.testing.document_store import DocumentStoreBaseTests diff --git a/test/pipelines/test_indexing_pipeline.py b/test/pipelines/test_indexing_pipeline.py index ffca4ca80..ea9b33025 100644 --- a/test/pipelines/test_indexing_pipeline.py +++ b/test/pipelines/test_indexing_pipeline.py @@ -3,7 +3,7 @@ import os import pytest from haystack.pipeline_utils.indexing import build_indexing_pipeline -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore class TestIndexingPipeline: diff --git a/test/pipelines/test_rag_pipelines.py b/test/pipelines/test_rag_pipelines.py index 6c15c7503..2cd2027dd 100644 --- a/test/pipelines/test_rag_pipelines.py +++ b/test/pipelines/test_rag_pipelines.py @@ -3,7 +3,7 @@ import os import pytest from haystack.dataclasses import Answer -from haystack.document_stores import InMemoryDocumentStore +from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.pipeline_utils.rag import build_rag_pipeline from haystack.testing.factory import document_store_class