mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-26 22:48:29 +00:00
chore: move DocumentJoiner to new joiners package (#6692)
* move DocumentJoiner to new joiners package * relnote * leftovers * fix docstrings generation * fix unrelated pydoc misconfiguration * more unrelated work, yay! * fix assertions
This commit is contained in:
parent
9445b2d466
commit
93b2aaee09
@ -1,7 +1,7 @@
|
||||
loaders:
|
||||
- type: loaders.CustomPythonLoader
|
||||
search_path: [../../../haystack/components/caching]
|
||||
modules: ["url_cache_checker"]
|
||||
modules: ["cache_checker"]
|
||||
ignore_when_discovered: ["__init__"]
|
||||
processors:
|
||||
- type: filter
|
||||
|
||||
26
docs/pydoc/config/joiner.yml
Normal file
26
docs/pydoc/config/joiner.yml
Normal file
@ -0,0 +1,26 @@
|
||||
loaders:
|
||||
- type: loaders.CustomPythonLoader
|
||||
search_path: [../../../haystack/components/joiners]
|
||||
modules: ["document_joiner"]
|
||||
ignore_when_discovered: ["__init__"]
|
||||
processors:
|
||||
- type: filter
|
||||
expression:
|
||||
documented_only: true
|
||||
do_not_filter_modules: false
|
||||
skip_empty_modules: true
|
||||
- type: smart
|
||||
- type: crossref
|
||||
renderer:
|
||||
type: renderers.ReadmePreviewRenderer
|
||||
excerpt: Routes data to the right component based on its file type or metadata.
|
||||
category_slug: haystack-classes
|
||||
title: Joiner API
|
||||
slug: joiner-api
|
||||
order: 140
|
||||
markdown:
|
||||
descriptive_class_title: false
|
||||
descriptive_module_title: true
|
||||
add_method_class_prefix: true
|
||||
add_member_class_prefix: false
|
||||
filename: joiner_api.md
|
||||
@ -1,7 +1,7 @@
|
||||
loaders:
|
||||
- type: loaders.CustomPythonLoader
|
||||
search_path: [../../../haystack/components/routers]
|
||||
modules: ["document_joiner", "conditional_router", "file_type_router", "metadata_router", "text_language_router"]
|
||||
modules: ["conditional_router", "file_type_router", "metadata_router", "text_language_router"]
|
||||
ignore_when_discovered: ["__init__"]
|
||||
processors:
|
||||
- type: filter
|
||||
|
||||
@ -4,7 +4,8 @@ from haystack import Pipeline
|
||||
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
||||
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
|
||||
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
|
||||
from haystack.components.routers import FileTypeRouter, DocumentJoiner
|
||||
from haystack.components.routers import FileTypeRouter
|
||||
from haystack.components.joiners import DocumentJoiner
|
||||
from haystack.components.writers import DocumentWriter
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
from haystack.components.retrievers import InMemoryEmbeddingRetriever
|
||||
|
||||
@ -3,7 +3,8 @@ from haystack.components.converters import PyPDFToDocument, TextFileToDocument
|
||||
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
||||
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
|
||||
from haystack.components.retrievers import InMemoryEmbeddingRetriever
|
||||
from haystack.components.routers import DocumentJoiner, FileTypeRouter
|
||||
from haystack.components.routers import FileTypeRouter
|
||||
from haystack.components.joiners import DocumentJoiner
|
||||
from haystack.components.writers import DocumentWriter
|
||||
from haystack.dataclasses import Document
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
|
||||
@ -2,7 +2,7 @@ from haystack import Document, Pipeline
|
||||
from haystack.components.embedders import SentenceTransformersTextEmbedder
|
||||
from haystack.components.rankers import TransformersSimilarityRanker
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
|
||||
from haystack.components.routers.document_joiner import DocumentJoiner
|
||||
from haystack.components.joiners.document_joiner import DocumentJoiner
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
from haystack.evaluation.eval import eval
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@ import json
|
||||
from haystack import Pipeline, Document
|
||||
from haystack.components.embedders import SentenceTransformersTextEmbedder
|
||||
from haystack.components.rankers import TransformersSimilarityRanker
|
||||
from haystack.components.routers.document_joiner import DocumentJoiner
|
||||
from haystack.components.joiners.document_joiner import DocumentJoiner
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
|
||||
|
||||
|
||||
@ -4,7 +4,8 @@ from haystack import Pipeline
|
||||
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
|
||||
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
|
||||
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
|
||||
from haystack.components.routers import FileTypeRouter, DocumentJoiner
|
||||
from haystack.components.routers import FileTypeRouter
|
||||
from haystack.components.joiners import DocumentJoiner
|
||||
from haystack.components.writers import DocumentWriter
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
|
||||
|
||||
3
haystack/components/joiners/__init__.py
Normal file
3
haystack/components/joiners/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from haystack.components.joiners.document_joiner import DocumentJoiner
|
||||
|
||||
__all__ = ["DocumentJoiner"]
|
||||
@ -1,7 +1,6 @@
|
||||
from haystack.components.routers.document_joiner import DocumentJoiner
|
||||
from haystack.components.routers.file_type_router import FileTypeRouter
|
||||
from haystack.components.routers.metadata_router import MetadataRouter
|
||||
from haystack.components.routers.conditional_router import ConditionalRouter
|
||||
from haystack.components.routers.text_language_router import TextLanguageRouter
|
||||
|
||||
__all__ = ["DocumentJoiner", "FileTypeRouter", "MetadataRouter", "TextLanguageRouter", "ConditionalRouter"]
|
||||
__all__ = ["FileTypeRouter", "MetadataRouter", "TextLanguageRouter", "ConditionalRouter"]
|
||||
|
||||
@ -11,7 +11,8 @@ from haystack.components.converters import TextFileToDocument
|
||||
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, OpenAIDocumentEmbedder
|
||||
from haystack.components.fetchers import LinkContentFetcher
|
||||
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
|
||||
from haystack.components.routers import FileTypeRouter, DocumentJoiner
|
||||
from haystack.components.routers import FileTypeRouter
|
||||
from haystack.components.joiners import DocumentJoiner
|
||||
from haystack.components.writers import DocumentWriter
|
||||
from haystack.document_stores.protocol import DocumentStore
|
||||
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
---
|
||||
upgrade:
|
||||
- |
|
||||
Change any occurrence of:
|
||||
from haystack.components.routers.document_joiner import DocumentJoiner
|
||||
to:
|
||||
from haystack.components.joiners.document_joiner import DocumentJoiner
|
||||
enhancements:
|
||||
- |
|
||||
Create a new package called `joiners` and move `DocumentJoiner` there for clarity.
|
||||
@ -172,7 +172,7 @@ class TestOpenAIDocumentEmbedder:
|
||||
Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
|
||||
]
|
||||
|
||||
model = "text-similarity-ada-001"
|
||||
model = "text-embedding-ada-002"
|
||||
|
||||
embedder = OpenAIDocumentEmbedder(model_name=model, meta_fields_to_embed=["topic"], embedding_separator=" | ")
|
||||
|
||||
@ -185,6 +185,6 @@ class TestOpenAIDocumentEmbedder:
|
||||
for doc in documents_with_embeddings:
|
||||
assert isinstance(doc, Document)
|
||||
assert isinstance(doc.embedding, list)
|
||||
assert len(doc.embedding) == 1024
|
||||
assert len(doc.embedding) == 1536
|
||||
assert all(isinstance(x, float) for x in doc.embedding)
|
||||
assert metadata == {"model": "text-similarity-ada:001", "usage": {"prompt_tokens": 15, "total_tokens": 15}}
|
||||
assert metadata == {"model": "text-embedding-ada-002-v2", "usage": {"prompt_tokens": 15, "total_tokens": 15}}
|
||||
|
||||
@ -79,11 +79,14 @@ class TestOpenAITextEmbedder:
|
||||
@pytest.mark.skipif(os.environ.get("OPENAI_API_KEY", "") == "", reason="OPENAI_API_KEY is not set")
|
||||
@pytest.mark.integration
|
||||
def test_run(self):
|
||||
model = "text-similarity-ada-001"
|
||||
model = "text-embedding-ada-002"
|
||||
|
||||
embedder = OpenAITextEmbedder(model_name=model, prefix="prefix ", suffix=" suffix")
|
||||
result = embedder.run(text="The food was delicious")
|
||||
|
||||
assert len(result["embedding"]) == 1024
|
||||
assert len(result["embedding"]) == 1536
|
||||
assert all(isinstance(x, float) for x in result["embedding"])
|
||||
assert result["meta"] == {"model": "text-similarity-ada:001", "usage": {"prompt_tokens": 6, "total_tokens": 6}}
|
||||
assert result["meta"] == {
|
||||
"model": "text-embedding-ada-002-v2",
|
||||
"usage": {"prompt_tokens": 6, "total_tokens": 6},
|
||||
}
|
||||
|
||||
0
test/components/joiners/__init__.py
Normal file
0
test/components/joiners/__init__.py
Normal file
@ -3,7 +3,7 @@ import logging
|
||||
import pytest
|
||||
|
||||
from haystack import Document
|
||||
from haystack.components.routers.document_joiner import DocumentJoiner
|
||||
from haystack.components.joiners.document_joiner import DocumentJoiner
|
||||
|
||||
|
||||
class TestDocumentJoiner:
|
||||
Loading…
x
Reference in New Issue
Block a user