chore: move DocumentJoiner to new joiners package (#6692)

* move DocumentJoiner to new joiners package

* relnote

* leftovers

* fix docstrings generation

* fix unrelated pydoc misconfiguration

* more unrelated work, yay!

* fix assertions
This commit is contained in:
Massimiliano Pippi 2024-01-08 22:06:27 +01:00 committed by GitHub
parent 9445b2d466
commit 93b2aaee09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 62 additions and 17 deletions

View File

@ -1,7 +1,7 @@
loaders:
- type: loaders.CustomPythonLoader
search_path: [../../../haystack/components/caching]
modules: ["url_cache_checker"]
modules: ["cache_checker"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter

View File

@ -0,0 +1,26 @@
loaders:
- type: loaders.CustomPythonLoader
search_path: [../../../haystack/components/joiners]
modules: ["document_joiner"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter
expression:
documented_only: true
do_not_filter_modules: false
skip_empty_modules: true
- type: smart
- type: crossref
renderer:
type: renderers.ReadmePreviewRenderer
excerpt: Routes data to the right component based on its file type or metadata.
category_slug: haystack-classes
title: Joiner API
slug: joiner-api
order: 140
markdown:
descriptive_class_title: false
descriptive_module_title: true
add_method_class_prefix: true
add_member_class_prefix: false
filename: joiner_api.md

View File

@ -1,7 +1,7 @@
loaders:
- type: loaders.CustomPythonLoader
search_path: [../../../haystack/components/routers]
modules: ["document_joiner", "conditional_router", "file_type_router", "metadata_router", "text_language_router"]
modules: ["conditional_router", "file_type_router", "metadata_router", "text_language_router"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter

View File

@ -4,7 +4,8 @@ from haystack import Pipeline
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter, DocumentJoiner
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryEmbeddingRetriever

View File

@ -3,7 +3,8 @@ from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.routers import DocumentJoiner, FileTypeRouter
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.dataclasses import Document
from haystack.document_stores import InMemoryDocumentStore

View File

@ -2,7 +2,7 @@ from haystack import Document, Pipeline
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.rankers import TransformersSimilarityRanker
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.routers.document_joiner import DocumentJoiner
from haystack.components.joiners.document_joiner import DocumentJoiner
from haystack.document_stores import InMemoryDocumentStore
from haystack.evaluation.eval import eval

View File

@ -3,7 +3,7 @@ import json
from haystack import Pipeline, Document
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.rankers import TransformersSimilarityRanker
from haystack.components.routers.document_joiner import DocumentJoiner
from haystack.components.joiners.document_joiner import DocumentJoiner
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever

View File

@ -4,7 +4,8 @@ from haystack import Pipeline
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter, DocumentJoiner
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore

View File

@ -0,0 +1,3 @@
from haystack.components.joiners.document_joiner import DocumentJoiner
__all__ = ["DocumentJoiner"]

View File

@ -1,7 +1,6 @@
from haystack.components.routers.document_joiner import DocumentJoiner
from haystack.components.routers.file_type_router import FileTypeRouter
from haystack.components.routers.metadata_router import MetadataRouter
from haystack.components.routers.conditional_router import ConditionalRouter
from haystack.components.routers.text_language_router import TextLanguageRouter
__all__ = ["DocumentJoiner", "FileTypeRouter", "MetadataRouter", "TextLanguageRouter", "ConditionalRouter"]
__all__ = ["FileTypeRouter", "MetadataRouter", "TextLanguageRouter", "ConditionalRouter"]

View File

@ -11,7 +11,8 @@ from haystack.components.converters import TextFileToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, OpenAIDocumentEmbedder
from haystack.components.fetchers import LinkContentFetcher
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter, DocumentJoiner
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores.protocol import DocumentStore

View File

@ -0,0 +1,10 @@
---
upgrade:
- |
Change any occurrence of:
from haystack.components.routers.document_joiner import DocumentJoiner
to:
from haystack.components.joiners.document_joiner import DocumentJoiner
enhancements:
- |
Create a new package called `joiners` and move `DocumentJoiner` there for clarity.

View File

@ -172,7 +172,7 @@ class TestOpenAIDocumentEmbedder:
Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
]
model = "text-similarity-ada-001"
model = "text-embedding-ada-002"
embedder = OpenAIDocumentEmbedder(model_name=model, meta_fields_to_embed=["topic"], embedding_separator=" | ")
@ -185,6 +185,6 @@ class TestOpenAIDocumentEmbedder:
for doc in documents_with_embeddings:
assert isinstance(doc, Document)
assert isinstance(doc.embedding, list)
assert len(doc.embedding) == 1024
assert len(doc.embedding) == 1536
assert all(isinstance(x, float) for x in doc.embedding)
assert metadata == {"model": "text-similarity-ada:001", "usage": {"prompt_tokens": 15, "total_tokens": 15}}
assert metadata == {"model": "text-embedding-ada-002-v2", "usage": {"prompt_tokens": 15, "total_tokens": 15}}

View File

@ -79,11 +79,14 @@ class TestOpenAITextEmbedder:
@pytest.mark.skipif(os.environ.get("OPENAI_API_KEY", "") == "", reason="OPENAI_API_KEY is not set")
@pytest.mark.integration
def test_run(self):
model = "text-similarity-ada-001"
model = "text-embedding-ada-002"
embedder = OpenAITextEmbedder(model_name=model, prefix="prefix ", suffix=" suffix")
result = embedder.run(text="The food was delicious")
assert len(result["embedding"]) == 1024
assert len(result["embedding"]) == 1536
assert all(isinstance(x, float) for x in result["embedding"])
assert result["meta"] == {"model": "text-similarity-ada:001", "usage": {"prompt_tokens": 6, "total_tokens": 6}}
assert result["meta"] == {
"model": "text-embedding-ada-002-v2",
"usage": {"prompt_tokens": 6, "total_tokens": 6},
}

View File

View File

@ -3,7 +3,7 @@ import logging
import pytest
from haystack import Document
from haystack.components.routers.document_joiner import DocumentJoiner
from haystack.components.joiners.document_joiner import DocumentJoiner
class TestDocumentJoiner: