haystack/test/preview/components/writers/test_document_writer.py
ZanSara b1daa7c647
chore: migrate to canals==0.7.0 (#5647)
* add default_to_dict and default_from_dict placeholders to ease migration to canals 0.7.0

* canals==0.7.0

* whisper components

* add to_dict/from_dict stubs

* import serialization methods in init to hide canals imports

* reno

* export deserializationerror too

* Update haystack/preview/__init__.py

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>

* serialization methods for LocalWhisperTranscriber (#5648)

* chore: serialization methods for `FileExtensionClassifier` (#5651)

* serialization methods for FileExtensionClassifier

* Update test_file_classifier.py

* chore: serialization methods for `SentenceTransformersDocumentEmbedder` (#5652)

* serialization methods for SentenceTransformersDocumentEmbedder

* fix device management

* serialization methods for SentenceTransformersTextEmbedder (#5653)

* serialization methods for TextFileToDocument (#5654)

* chore: serialization methods for `RemoteWhisperTranscriber` (#5650)

* serialization methods for RemoteWhisperTranscriber

* remove patches

* Add default to_dict and from_dict in document stores built with factory (#5674)

* fix tests (#5671)

* chore: simplify serialization methods for `MemoryDocumentStore` (#5667)

* simplify serialization for MemoryDocumentStore

* remove redundant tests

* pylint

* chore: serialization methods for `MemoryRetriever` (#5663)

* serialization method for MemoryRetriever

* more tests

* remove hash from default_document_store_to_dict

* remove diff in factory.py

* chore: serialization methods for `DocumentWriter` (#5661)

* serialization methods for DocumentWriter

* more tests

* use factory

* black

---------

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
2023-08-29 18:15:07 +02:00

84 lines
3.3 KiB
Python

from unittest.mock import MagicMock
import pytest
from haystack.preview import Document, DeserializationError
from haystack.preview.testing.factory import document_store_class
from haystack.preview.components.writers.document_writer import DocumentWriter
from haystack.preview.document_stores import DuplicatePolicy
class TestDocumentWriter:
@pytest.mark.unit
def test_to_dict(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
component = DocumentWriter(document_store=mocked_docstore_class())
data = component.to_dict()
assert data == {
"type": "DocumentWriter",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"policy": "FAIL",
},
}
@pytest.mark.unit
def test_to_dict_with_custom_init_parameters(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
component = DocumentWriter(document_store=mocked_docstore_class(), policy=DuplicatePolicy.SKIP)
data = component.to_dict()
assert data == {
"type": "DocumentWriter",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"policy": "SKIP",
},
}
@pytest.mark.unit
def test_from_dict(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
data = {
"type": "DocumentWriter",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"policy": "SKIP",
},
}
component = DocumentWriter.from_dict(data)
assert isinstance(component.document_store, mocked_docstore_class)
assert component.policy == DuplicatePolicy.SKIP
@pytest.mark.unit
def test_from_dict_without_docstore(self):
data = {"type": "DocumentWriter", "init_parameters": {}}
with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"):
DocumentWriter.from_dict(data)
@pytest.mark.unit
def test_from_dict_without_docstore_type(self):
data = {"type": "DocumentWriter", "init_parameters": {"document_store": {"init_parameters": {}}}}
with pytest.raises(DeserializationError, match="Missing 'type' in document store's serialization data"):
DocumentWriter.from_dict(data)
@pytest.mark.unit
def test_from_dict_nonexisting_docstore(self):
data = {
"type": "DocumentWriter",
"init_parameters": {"document_store": {"type": "NonexistingDocumentStore", "init_parameters": {}}},
}
with pytest.raises(DeserializationError, match="DocumentStore of type 'NonexistingDocumentStore' not found."):
DocumentWriter.from_dict(data)
@pytest.mark.unit
def test_run(self):
mocked_document_store = MagicMock()
writer = DocumentWriter(mocked_document_store)
documents = [
Document(content="This is the text of a document."),
Document(content="This is the text of another document."),
]
writer.run(documents=documents)
mocked_document_store.write_documents.assert_called_once_with(documents=documents, policy=DuplicatePolicy.FAIL)