mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-01 02:09:39 +00:00
feat: Add DocumentWriter v2 (#5435)
* add draft of WriteToStore and basic test * add DocumentWriter implementation * draft unit and integration tests * add release note * mock Store in unit tests * pylint * Update haystack/preview/components/writers/document_writer.py Co-authored-by: Daria Fokina <daria.fokina@deepset.ai> * Remove unnecessary test * Rework DocumentWriter to support new Component I/O definition --------- Co-authored-by: Daria Fokina <daria.fokina@deepset.ai> Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
This commit is contained in:
parent
d4c1a0508a
commit
22c7601729
@ -2,3 +2,4 @@ from haystack.preview.components.audio.whisper_local import LocalWhisperTranscri
|
||||
from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber
|
||||
from haystack.preview.components.file_converters import TextFileToDocument
|
||||
from haystack.preview.components.classifiers import FileExtensionClassifier
|
||||
from haystack.preview.components.writers.document_writer import DocumentWriter
|
||||
|
||||
42
haystack/preview/components/writers/document_writer.py
Normal file
42
haystack/preview/components/writers/document_writer.py
Normal file
@ -0,0 +1,42 @@
|
||||
from typing import List, Optional
|
||||
|
||||
from haystack.preview import component, Document
|
||||
from haystack.preview.document_stores import DocumentStoreAwareMixin, DocumentStore, DuplicatePolicy
|
||||
|
||||
|
||||
@component
|
||||
class DocumentWriter(DocumentStoreAwareMixin):
|
||||
"""
|
||||
A component for writing documents to a DocumentStore.
|
||||
"""
|
||||
|
||||
supported_document_stores = [DocumentStore] # type: ignore
|
||||
|
||||
def __init__(self, policy: DuplicatePolicy = DuplicatePolicy.FAIL):
|
||||
"""
|
||||
Create a DocumentWriter component.
|
||||
|
||||
:param policy: The policy to use when encountering duplicate documents (default is DuplicatePolicy.FAIL).
|
||||
"""
|
||||
self.policy = policy
|
||||
|
||||
def run(self, documents: List[Document], policy: Optional[DuplicatePolicy] = None):
|
||||
"""
|
||||
Run DocumentWriter on the given input data.
|
||||
|
||||
:param documents: A list of documents to write to the store.
|
||||
:param policy: The policy to use when encountering duplicate documents.
|
||||
:return: None
|
||||
|
||||
:raises ValueError: If the specified document store is not found.
|
||||
"""
|
||||
if not self.document_store:
|
||||
raise ValueError(
|
||||
"DocumentWriter needs a DocumentStore to run: set the DocumentStore instance to the self.document_store attribute."
|
||||
)
|
||||
|
||||
if policy is None:
|
||||
policy = self.policy
|
||||
|
||||
self.document_store.write_documents(documents=documents, policy=policy)
|
||||
return {}
|
||||
@ -0,0 +1,4 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Added new DocumentWriter component to Haystack v2 preview so that documents can be written to stores.
|
||||
0
test/preview/components/writers/__init__.py
Normal file
0
test/preview/components/writers/__init__.py
Normal file
36
test/preview/components/writers/document_writer.py
Normal file
36
test/preview/components/writers/document_writer.py
Normal file
@ -0,0 +1,36 @@
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from haystack.preview import Document
|
||||
from haystack.preview.components.writers.document_writer import DocumentWriter
|
||||
from haystack.preview.document_stores import DuplicatePolicy
|
||||
from test.preview.components.base import BaseTestComponent
|
||||
|
||||
|
||||
class TestDocumentWriter(BaseTestComponent):
|
||||
@pytest.mark.unit
|
||||
def test_run(self):
|
||||
writer = DocumentWriter()
|
||||
documents = [
|
||||
Document(content="This is the text of a document."),
|
||||
Document(content="This is the text of another document."),
|
||||
]
|
||||
|
||||
mocked_document_store = MagicMock()
|
||||
mocked_document_store.__haystack_document_store__ = True
|
||||
writer.document_store = mocked_document_store
|
||||
writer.run(documents=documents)
|
||||
|
||||
mocked_document_store.write_documents.assert_called_once_with(documents=documents, policy=DuplicatePolicy.FAIL)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_run_without_store(self):
|
||||
writer = DocumentWriter()
|
||||
documents = [Document(content="test")]
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="DocumentWriter needs a DocumentStore to run: set the DocumentStore instance to the "
|
||||
"self.document_store attribute",
|
||||
):
|
||||
writer.run(documents=documents)
|
||||
Loading…
x
Reference in New Issue
Block a user