mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-07 05:14:08 +00:00
feat: Add DocumentWriter v2 (#5435)
* add draft of WriteToStore and basic test * add DocumentWriter implementation * draft unit and integration tests * add release note * mock Store in unit tests * pylint * Update haystack/preview/components/writers/document_writer.py Co-authored-by: Daria Fokina <daria.fokina@deepset.ai> * Remove unnecessary test * Rework DocumentWriter to support new Component I/O definition --------- Co-authored-by: Daria Fokina <daria.fokina@deepset.ai> Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
This commit is contained in:
parent
d4c1a0508a
commit
22c7601729
@ -2,3 +2,4 @@ from haystack.preview.components.audio.whisper_local import LocalWhisperTranscri
|
|||||||
from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber
|
from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber
|
||||||
from haystack.preview.components.file_converters import TextFileToDocument
|
from haystack.preview.components.file_converters import TextFileToDocument
|
||||||
from haystack.preview.components.classifiers import FileExtensionClassifier
|
from haystack.preview.components.classifiers import FileExtensionClassifier
|
||||||
|
from haystack.preview.components.writers.document_writer import DocumentWriter
|
||||||
|
|||||||
42
haystack/preview/components/writers/document_writer.py
Normal file
42
haystack/preview/components/writers/document_writer.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from haystack.preview import component, Document
|
||||||
|
from haystack.preview.document_stores import DocumentStoreAwareMixin, DocumentStore, DuplicatePolicy
|
||||||
|
|
||||||
|
|
||||||
|
@component
|
||||||
|
class DocumentWriter(DocumentStoreAwareMixin):
|
||||||
|
"""
|
||||||
|
A component for writing documents to a DocumentStore.
|
||||||
|
"""
|
||||||
|
|
||||||
|
supported_document_stores = [DocumentStore] # type: ignore
|
||||||
|
|
||||||
|
def __init__(self, policy: DuplicatePolicy = DuplicatePolicy.FAIL):
|
||||||
|
"""
|
||||||
|
Create a DocumentWriter component.
|
||||||
|
|
||||||
|
:param policy: The policy to use when encountering duplicate documents (default is DuplicatePolicy.FAIL).
|
||||||
|
"""
|
||||||
|
self.policy = policy
|
||||||
|
|
||||||
|
def run(self, documents: List[Document], policy: Optional[DuplicatePolicy] = None):
|
||||||
|
"""
|
||||||
|
Run DocumentWriter on the given input data.
|
||||||
|
|
||||||
|
:param documents: A list of documents to write to the store.
|
||||||
|
:param policy: The policy to use when encountering duplicate documents.
|
||||||
|
:return: None
|
||||||
|
|
||||||
|
:raises ValueError: If the specified document store is not found.
|
||||||
|
"""
|
||||||
|
if not self.document_store:
|
||||||
|
raise ValueError(
|
||||||
|
"DocumentWriter needs a DocumentStore to run: set the DocumentStore instance to the self.document_store attribute."
|
||||||
|
)
|
||||||
|
|
||||||
|
if policy is None:
|
||||||
|
policy = self.policy
|
||||||
|
|
||||||
|
self.document_store.write_documents(documents=documents, policy=policy)
|
||||||
|
return {}
|
||||||
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Added new DocumentWriter component to Haystack v2 preview so that documents can be written to stores.
|
||||||
0
test/preview/components/writers/__init__.py
Normal file
0
test/preview/components/writers/__init__.py
Normal file
36
test/preview/components/writers/document_writer.py
Normal file
36
test/preview/components/writers/document_writer.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from haystack.preview import Document
|
||||||
|
from haystack.preview.components.writers.document_writer import DocumentWriter
|
||||||
|
from haystack.preview.document_stores import DuplicatePolicy
|
||||||
|
from test.preview.components.base import BaseTestComponent
|
||||||
|
|
||||||
|
|
||||||
|
class TestDocumentWriter(BaseTestComponent):
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_run(self):
|
||||||
|
writer = DocumentWriter()
|
||||||
|
documents = [
|
||||||
|
Document(content="This is the text of a document."),
|
||||||
|
Document(content="This is the text of another document."),
|
||||||
|
]
|
||||||
|
|
||||||
|
mocked_document_store = MagicMock()
|
||||||
|
mocked_document_store.__haystack_document_store__ = True
|
||||||
|
writer.document_store = mocked_document_store
|
||||||
|
writer.run(documents=documents)
|
||||||
|
|
||||||
|
mocked_document_store.write_documents.assert_called_once_with(documents=documents, policy=DuplicatePolicy.FAIL)
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_run_without_store(self):
|
||||||
|
writer = DocumentWriter()
|
||||||
|
documents = [Document(content="test")]
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError,
|
||||||
|
match="DocumentWriter needs a DocumentStore to run: set the DocumentStore instance to the "
|
||||||
|
"self.document_store attribute",
|
||||||
|
):
|
||||||
|
writer.run(documents=documents)
|
||||||
Loading…
x
Reference in New Issue
Block a user