refact!: Remove symbols under the haystack.document_stores namespace (#6714)

* remove symbols under the haystack.document_stores namespace

* Update haystack/document_stores/types/protocol.py

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>

* fix

* same for retrievers

* leftovers

* more leftovers

* add relnote

* leftovers

* one more

* fix examples

---------

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
This commit is contained in:
Massimiliano Pippi 2024-01-10 21:20:42 +01:00 committed by GitHub
parent 374a937663
commit e1ec4e5e4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
42 changed files with 102 additions and 88 deletions

View File

@ -1,7 +1,7 @@
loaders:
- type: loaders.CustomPythonLoader
search_path: [../../../haystack/components/retrievers]
modules: ["in_memory_bm25_retriever", "in_memory_embedding_retriever"]
search_path: [../../../haystack/components/retrievers/in_memory]
modules: ["bm25_retriever", "embedding_retriever"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter

View File

@ -7,8 +7,8 @@ from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
def test_dense_doc_search_pipeline(tmp_path, samples_path):

View File

@ -2,12 +2,12 @@ from haystack import Pipeline
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.dataclasses import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.evaluation.eval import eval

View File

@ -2,9 +2,9 @@ import json
from haystack import Pipeline
from haystack.components.readers import ExtractiveReader
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.dataclasses import Document, ExtractedAnswer
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.evaluation.eval import eval
from haystack.evaluation.metrics import Metric

View File

@ -1,9 +1,9 @@
from haystack import Document, Pipeline
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.rankers import TransformersSimilarityRanker
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.joiners.document_joiner import DocumentJoiner
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.evaluation.eval import eval

View File

@ -5,10 +5,10 @@ from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.writers import DocumentWriter
from haystack.dataclasses import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.evaluation.eval import eval
from haystack.evaluation.metrics import Metric

View File

@ -1,8 +1,8 @@
import json
from haystack import Pipeline, Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.readers import ExtractiveReader

View File

@ -4,8 +4,8 @@ from haystack import Pipeline, Document
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.rankers import TransformersSimilarityRanker
from haystack.components.joiners.document_joiner import DocumentJoiner
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
def test_hybrid_doc_search_pipeline(tmp_path):

View File

@ -7,7 +7,7 @@ from haystack.components.preprocessors import DocumentSplitter, DocumentCleaner
from haystack.components.classifiers import DocumentLanguageClassifier
from haystack.components.routers import FileTypeRouter, MetadataRouter
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
def test_preprocessing_pipeline(tmp_path):

View File

@ -3,9 +3,9 @@ import json
import pytest
from haystack import Pipeline, Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.answer_builder import AnswerBuilder

View File

@ -1,6 +1,6 @@
import os
from haystack import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.pipeline_utils import build_rag_pipeline
API_KEY = "SET YOUR OPENAI API KEY HERE"

View File

@ -1,4 +1,4 @@
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.pipeline_utils import build_rag_pipeline, build_indexing_pipeline
from haystack.pipeline_utils.indexing import download_files

View File

@ -7,7 +7,7 @@ from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
# Create components and an indexing pipeline that converts txt and pdf files to documents, cleans and splits them, and

View File

@ -6,9 +6,10 @@ from haystack import Pipeline
from haystack.components.others import Multiplexer
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter, DocumentJoiner
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
document_store = InMemoryDocumentStore()

View File

@ -1,7 +1,7 @@
import os
from haystack import Pipeline, Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder

View File

@ -3,14 +3,14 @@ from typing import List, Any, Optional, Dict
import logging
from pprint import pprint
from canals.component.types import Variadic
from haystack import Pipeline, Document, component, default_to_dict, default_from_dict, DeserializationError
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.others import Multiplexer
from haystack.components.routers.conditional_router import ConditionalRouter
from haystack.core.component.types import Variadic
logging.getLogger().setLevel(logging.DEBUG)
@ -64,7 +64,7 @@ class PaginatedRetriever:
if self.retrieved_documents is None:
self.retrieved_documents = self.retriever.run(
query=query[0], filters=filters, top_k=top_k, scale_score=scale_score
query=query[0], filters=filters, top_k=top_k, scale_score=scale_score # type: ignore
)["documents"]
if not self.retrieved_documents:

View File

@ -1,6 +1,6 @@
from haystack import Document
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.pipeline import Pipeline
# Create components and a query pipeline

View File

@ -6,8 +6,8 @@ from haystack import Pipeline
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
# Create a RAG query pipeline
prompt_template = """

View File

@ -5,7 +5,7 @@ import importlib
import logging
from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError
from haystack.document_stores import DocumentStore
from haystack.document_stores.types import DocumentStore
logger = logging.getLogger(__name__)

View File

@ -1,4 +0,0 @@
from haystack.components.retrievers.in_memory_bm25_retriever import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory_embedding_retriever import InMemoryEmbeddingRetriever
__all__ = ["InMemoryBM25Retriever", "InMemoryEmbeddingRetriever"]

View File

@ -0,0 +1,4 @@
from haystack.components.retrievers.in_memory.bm25_retriever import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever
__all__ = ["InMemoryBM25Retriever", "InMemoryEmbeddingRetriever"]

View File

@ -1,7 +1,7 @@
from typing import Dict, List, Any, Optional
from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
@component

View File

@ -1,7 +1,7 @@
from typing import Dict, List, Any, Optional
from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
@component

View File

@ -4,7 +4,7 @@ import importlib
import logging
from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError
from haystack.document_stores import DocumentStore, DuplicatePolicy
from haystack.document_stores.types import DocumentStore, DuplicatePolicy
logger = logging.getLogger(__name__)

View File

@ -1,12 +0,0 @@
from haystack.document_stores.protocol import DocumentStore, DuplicatePolicy
from haystack.document_stores.in_memory.document_store import InMemoryDocumentStore
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError, MissingDocumentError
__all__ = [
"DocumentStore",
"DuplicatePolicy",
"InMemoryDocumentStore",
"DocumentStoreError",
"DuplicateDocumentError",
"MissingDocumentError",
]

View File

@ -0,0 +1,3 @@
from .errors import DocumentStoreError, DuplicateDocumentError, MissingDocumentError
__all__ = ["DocumentStoreError", "DuplicateDocumentError", "MissingDocumentError"]

View File

@ -9,7 +9,7 @@ from tqdm.auto import tqdm
from haystack import default_from_dict, default_to_dict
from haystack.dataclasses import Document
from haystack.document_stores.protocol import DuplicatePolicy
from haystack.document_stores.types import DuplicatePolicy
from haystack.utils.filters import document_matches_filter, convert
from haystack.document_stores.errors import DuplicateDocumentError, DocumentStoreError
from haystack.utils import expit

View File

@ -0,0 +1,4 @@
from .protocol import DocumentStore
from .policy import DuplicatePolicy
__all__ = ["DocumentStore", "DuplicatePolicy"]

View File

@ -0,0 +1,8 @@
from enum import Enum
class DuplicatePolicy(Enum):
NONE = "none"
SKIP = "skip"
OVERWRITE = "overwrite"
FAIL = "fail"

View File

@ -1,9 +1,9 @@
from typing import Protocol, Optional, Dict, Any, List
import logging
from enum import Enum
from haystack.dataclasses import Document
from haystack.document_stores.types.policy import DuplicatePolicy
# Ellipsis are needed for the type checker, it's safe to disable module-wide
# pylint: disable=unnecessary-ellipsis
@ -11,13 +11,6 @@ from haystack.dataclasses import Document
logger = logging.getLogger(__name__)
class DuplicatePolicy(Enum):
NONE = "none"
SKIP = "skip"
OVERWRITE = "overwrite"
FAIL = "fail"
class DocumentStore(Protocol):
"""
Stores Documents to be used by the components of a Pipeline.

View File

@ -14,7 +14,7 @@ from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores.protocol import DocumentStore
from haystack.document_stores.types import DocumentStore
def download_files(sources: List[str]) -> List[str]:

View File

@ -9,9 +9,10 @@ from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.generators import OpenAIGenerator, HuggingFaceTGIGenerator
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.dataclasses import Answer
from haystack.document_stores import InMemoryDocumentStore, DocumentStore
from haystack.document_stores.types import DocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
def build_rag_pipeline(

View File

@ -7,7 +7,7 @@ import pytest
import pandas as pd
from haystack.dataclasses import Document
from haystack.document_stores import DocumentStore, DuplicatePolicy
from haystack.document_stores.types import DocumentStore, DuplicatePolicy
from haystack.document_stores.errors import DuplicateDocumentError
from haystack.errors import FilterError

View File

@ -1,7 +1,7 @@
from typing import Any, Dict, Optional, Tuple, Type, List, Union
from haystack.dataclasses import Document
from haystack.document_stores import DocumentStore, DuplicatePolicy
from haystack.document_stores.types import DocumentStore, DuplicatePolicy
from haystack.core.component import component, Component
from haystack.core.serialization import default_to_dict, default_from_dict

View File

@ -0,0 +1,17 @@
---
upgrade:
- |
Change the imports for in_memory document store and retrievers from:
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryEmbeddingRetriever
to:
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
enhancements:
- |
Stop exposing `in_memory` package symbols in the `haystack.document_store` and
`haystack.components.retrievers` root namespaces.

View File

@ -4,9 +4,9 @@ import pytest
from haystack import Pipeline, DeserializationError
from haystack.testing.factory import document_store_class
from haystack.components.retrievers.in_memory_bm25_retriever import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.dataclasses import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
@pytest.fixture()
@ -47,7 +47,7 @@ class TestMemoryBM25Retriever:
data = component.to_dict()
assert data == {
"type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever",
"type": "haystack.components.retrievers.in_memory.bm25_retriever.InMemoryBM25Retriever",
"init_parameters": {
"document_store": {"type": "MyFakeStore", "init_parameters": {}},
"filters": None,
@ -65,7 +65,7 @@ class TestMemoryBM25Retriever:
)
data = component.to_dict()
assert data == {
"type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever",
"type": "haystack.components.retrievers.in_memory.bm25_retriever.InMemoryBM25Retriever",
"init_parameters": {
"document_store": serialized_ds,
"filters": {"name": "test.txt"},
@ -78,7 +78,7 @@ class TestMemoryBM25Retriever:
def test_from_dict(self):
data = {
"type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever",
"type": "haystack.components.retrievers.in_memory.bm25_retriever.InMemoryBM25Retriever",
"init_parameters": {
"document_store": {
"type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore",
@ -106,7 +106,7 @@ class TestMemoryBM25Retriever:
def test_from_dict_nonexisting_docstore(self):
data = {
"type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever",
"type": "haystack.components.retrievers.in_memory.bm25_retriever.InMemoryBM25Retriever",
"init_parameters": {"document_store": {"type": "Nonexisting.Docstore", "init_parameters": {}}},
}
with pytest.raises(DeserializationError):

View File

@ -5,9 +5,9 @@ import numpy as np
from haystack import Pipeline, DeserializationError
from haystack.testing.factory import document_store_class
from haystack.components.retrievers.in_memory_embedding_retriever import InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever
from haystack.dataclasses import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
class TestMemoryEmbeddingRetriever:
@ -37,7 +37,7 @@ class TestMemoryEmbeddingRetriever:
data = component.to_dict()
assert data == {
"type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever",
"type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever",
"init_parameters": {
"document_store": {"type": "test_module.MyFakeStore", "init_parameters": {}},
"filters": None,
@ -60,7 +60,7 @@ class TestMemoryEmbeddingRetriever:
)
data = component.to_dict()
assert data == {
"type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever",
"type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever",
"init_parameters": {
"document_store": {"type": "test_module.MyFakeStore", "init_parameters": {}},
"filters": {"name": "test.txt"},
@ -72,7 +72,7 @@ class TestMemoryEmbeddingRetriever:
def test_from_dict(self):
data = {
"type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever",
"type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever",
"init_parameters": {
"document_store": {
"type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore",
@ -90,7 +90,7 @@ class TestMemoryEmbeddingRetriever:
def test_from_dict_without_docstore(self):
data = {
"type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever",
"type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever",
"init_parameters": {},
}
with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"):
@ -98,7 +98,7 @@ class TestMemoryEmbeddingRetriever:
def test_from_dict_without_docstore_type(self):
data = {
"type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever",
"type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever",
"init_parameters": {"document_store": {"init_parameters": {}}},
}
with pytest.raises(DeserializationError):
@ -106,7 +106,7 @@ class TestMemoryEmbeddingRetriever:
def test_from_dict_nonexisting_docstore(self):
data = {
"type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever",
"type": "haystack.components.retrievers.in_memory.embedding_retriever.InMemoryEmbeddingRetriever",
"init_parameters": {"document_store": {"type": "Nonexisting.Docstore", "init_parameters": {}}},
}
with pytest.raises(DeserializationError):

View File

@ -3,7 +3,7 @@ import pytest
from haystack import Document, DeserializationError
from haystack.testing.factory import document_store_class
from haystack.components.writers.document_writer import DocumentWriter
from haystack.document_stores import DuplicatePolicy
from haystack.document_stores.types import DuplicatePolicy
from haystack.document_stores.in_memory import InMemoryDocumentStore

View File

@ -5,9 +5,8 @@ import pandas as pd
import pytest
from haystack import Document
from haystack.document_stores import InMemoryDocumentStore, DocumentStoreError, DuplicatePolicy, DuplicateDocumentError
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
from haystack.testing.document_store import DocumentStoreBaseTests

View File

@ -3,7 +3,7 @@ import os
import pytest
from haystack.pipeline_utils.indexing import build_indexing_pipeline
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
class TestIndexingPipeline:

View File

@ -3,7 +3,7 @@ import os
import pytest
from haystack.dataclasses import Answer
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.pipeline_utils.rag import build_rag_pipeline
from haystack.testing.factory import document_store_class