haystack/test/preview/components/writers/test_document_writer.py
ZanSara 6e70d403f8
feat: Improve Document for Haystack 2.0 (#5738)
* initial draft

* tests

* add proposal

* proposal number

* reno

* fix tests and usage of content and content_type

* update branch & fix more tests

* mypy

* add docstring

* fix more tests

* review feedback

* improve __str__

* Apply suggestions from code review

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/preview/dataclasses/document.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* improve __str__

* fix tests

* fix more tests

* Update haystack/preview/document_stores/memory/document_store.py

---------

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>
2023-09-11 17:40:00 +02:00

84 lines
3.3 KiB
Python

from unittest.mock import MagicMock
import pytest
from haystack.preview import Document, DeserializationError
from haystack.preview.testing.factory import document_store_class
from haystack.preview.components.writers.document_writer import DocumentWriter
from haystack.preview.document_stores import DuplicatePolicy
class TestDocumentWriter:
@pytest.mark.unit
def test_to_dict(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
component = DocumentWriter(document_store=mocked_docstore_class())
data = component.to_dict()
assert data == {
"type": "DocumentWriter",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"policy": "FAIL",
},
}
@pytest.mark.unit
def test_to_dict_with_custom_init_parameters(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
component = DocumentWriter(document_store=mocked_docstore_class(), policy=DuplicatePolicy.SKIP)
data = component.to_dict()
assert data == {
"type": "DocumentWriter",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"policy": "SKIP",
},
}
@pytest.mark.unit
def test_from_dict(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
data = {
"type": "DocumentWriter",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"policy": "SKIP",
},
}
component = DocumentWriter.from_dict(data)
assert isinstance(component.document_store, mocked_docstore_class)
assert component.policy == DuplicatePolicy.SKIP
@pytest.mark.unit
def test_from_dict_without_docstore(self):
data = {"type": "DocumentWriter", "init_parameters": {}}
with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"):
DocumentWriter.from_dict(data)
@pytest.mark.unit
def test_from_dict_without_docstore_type(self):
data = {"type": "DocumentWriter", "init_parameters": {"document_store": {"init_parameters": {}}}}
with pytest.raises(DeserializationError, match="Missing 'type' in document store's serialization data"):
DocumentWriter.from_dict(data)
@pytest.mark.unit
def test_from_dict_nonexisting_docstore(self):
data = {
"type": "DocumentWriter",
"init_parameters": {"document_store": {"type": "NonexistingDocumentStore", "init_parameters": {}}},
}
with pytest.raises(DeserializationError, match="DocumentStore of type 'NonexistingDocumentStore' not found."):
DocumentWriter.from_dict(data)
@pytest.mark.unit
def test_run(self):
mocked_document_store = MagicMock()
writer = DocumentWriter(mocked_document_store)
documents = [
Document(text="This is the text of a document."),
Document(text="This is the text of another document."),
]
writer.run(documents=documents)
mocked_document_store.write_documents.assert_called_once_with(documents=documents, policy=DuplicatePolicy.FAIL)