diff --git a/haystack/preview/components/__init__.py b/haystack/preview/components/__init__.py index 5831a72d6..a8acd34c7 100644 --- a/haystack/preview/components/__init__.py +++ b/haystack/preview/components/__init__.py @@ -2,3 +2,4 @@ from haystack.preview.components.audio.whisper_local import LocalWhisperTranscri from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber from haystack.preview.components.file_converters import TextFileToDocument from haystack.preview.components.classifiers import FileExtensionClassifier +from haystack.preview.components.writers.document_writer import DocumentWriter diff --git a/haystack/preview/components/writers/document_writer.py b/haystack/preview/components/writers/document_writer.py new file mode 100644 index 000000000..a39675dc2 --- /dev/null +++ b/haystack/preview/components/writers/document_writer.py @@ -0,0 +1,42 @@ +from typing import List, Optional + +from haystack.preview import component, Document +from haystack.preview.document_stores import DocumentStoreAwareMixin, DocumentStore, DuplicatePolicy + + +@component +class DocumentWriter(DocumentStoreAwareMixin): + """ + A component for writing documents to a DocumentStore. + """ + + supported_document_stores = [DocumentStore] # type: ignore + + def __init__(self, policy: DuplicatePolicy = DuplicatePolicy.FAIL): + """ + Create a DocumentWriter component. + + :param policy: The policy to use when encountering duplicate documents (default is DuplicatePolicy.FAIL). + """ + self.policy = policy + + def run(self, documents: List[Document], policy: Optional[DuplicatePolicy] = None): + """ + Run DocumentWriter on the given input data. + + :param documents: A list of documents to write to the store. + :param policy: The policy to use when encountering duplicate documents. + :return: None + + :raises ValueError: If the specified document store is not found. + """ + if not self.document_store: + raise ValueError( + "DocumentWriter needs a DocumentStore to run: set the DocumentStore instance to the self.document_store attribute." + ) + + if policy is None: + policy = self.policy + + self.document_store.write_documents(documents=documents, policy=policy) + return {} diff --git a/releasenotes/notes/document-writer-v2-bbe0a62b3066f9cf.yaml b/releasenotes/notes/document-writer-v2-bbe0a62b3066f9cf.yaml new file mode 100644 index 000000000..83950893a --- /dev/null +++ b/releasenotes/notes/document-writer-v2-bbe0a62b3066f9cf.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Added new DocumentWriter component to Haystack v2 preview so that documents can be written to stores. diff --git a/test/preview/components/writers/__init__.py b/test/preview/components/writers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/preview/components/writers/document_writer.py b/test/preview/components/writers/document_writer.py new file mode 100644 index 000000000..57366d4ed --- /dev/null +++ b/test/preview/components/writers/document_writer.py @@ -0,0 +1,36 @@ +from unittest.mock import MagicMock + +import pytest + +from haystack.preview import Document +from haystack.preview.components.writers.document_writer import DocumentWriter +from haystack.preview.document_stores import DuplicatePolicy +from test.preview.components.base import BaseTestComponent + + +class TestDocumentWriter(BaseTestComponent): + @pytest.mark.unit + def test_run(self): + writer = DocumentWriter() + documents = [ + Document(content="This is the text of a document."), + Document(content="This is the text of another document."), + ] + + mocked_document_store = MagicMock() + mocked_document_store.__haystack_document_store__ = True + writer.document_store = mocked_document_store + writer.run(documents=documents) + + mocked_document_store.write_documents.assert_called_once_with(documents=documents, policy=DuplicatePolicy.FAIL) + + @pytest.mark.unit + def test_run_without_store(self): + writer = DocumentWriter() + documents = [Document(content="test")] + with pytest.raises( + ValueError, + match="DocumentWriter needs a DocumentStore to run: set the DocumentStore instance to the " + "self.document_store attribute", + ): + writer.run(documents=documents)