2023-08-29 18:15:07 +02:00
|
|
|
import pytest
|
|
|
|
|
|
|
|
from haystack.preview import Document, DeserializationError
|
|
|
|
from haystack.preview.testing.factory import document_store_class
|
|
|
|
from haystack.preview.components.writers.document_writer import DocumentWriter
|
|
|
|
from haystack.preview.document_stores import DuplicatePolicy
|
2023-11-21 15:54:25 +01:00
|
|
|
from haystack.preview.document_stores.in_memory import InMemoryDocumentStore
|
2023-08-29 18:15:07 +02:00
|
|
|
|
|
|
|
|
|
|
|
class TestDocumentWriter:
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_to_dict(self):
|
|
|
|
mocked_docstore_class = document_store_class("MockedDocumentStore")
|
|
|
|
component = DocumentWriter(document_store=mocked_docstore_class())
|
|
|
|
data = component.to_dict()
|
|
|
|
assert data == {
|
2023-11-17 13:46:23 +00:00
|
|
|
"type": "haystack.preview.components.writers.document_writer.DocumentWriter",
|
2023-08-29 18:15:07 +02:00
|
|
|
"init_parameters": {
|
2023-11-17 13:46:23 +00:00
|
|
|
"document_store": {
|
|
|
|
"type": "haystack.preview.testing.factory.MockedDocumentStore",
|
|
|
|
"init_parameters": {},
|
|
|
|
},
|
2023-08-29 18:15:07 +02:00
|
|
|
"policy": "FAIL",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_to_dict_with_custom_init_parameters(self):
|
|
|
|
mocked_docstore_class = document_store_class("MockedDocumentStore")
|
|
|
|
component = DocumentWriter(document_store=mocked_docstore_class(), policy=DuplicatePolicy.SKIP)
|
|
|
|
data = component.to_dict()
|
|
|
|
assert data == {
|
2023-11-17 13:46:23 +00:00
|
|
|
"type": "haystack.preview.components.writers.document_writer.DocumentWriter",
|
2023-08-29 18:15:07 +02:00
|
|
|
"init_parameters": {
|
2023-11-17 13:46:23 +00:00
|
|
|
"document_store": {
|
|
|
|
"type": "haystack.preview.testing.factory.MockedDocumentStore",
|
|
|
|
"init_parameters": {},
|
|
|
|
},
|
2023-08-29 18:15:07 +02:00
|
|
|
"policy": "SKIP",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_from_dict(self):
|
|
|
|
mocked_docstore_class = document_store_class("MockedDocumentStore")
|
|
|
|
data = {
|
2023-11-17 13:46:23 +00:00
|
|
|
"type": "haystack.preview.components.writers.document_writer.DocumentWriter",
|
2023-08-29 18:15:07 +02:00
|
|
|
"init_parameters": {
|
2023-11-17 13:46:23 +00:00
|
|
|
"document_store": {
|
|
|
|
"type": "haystack.preview.testing.factory.MockedDocumentStore",
|
|
|
|
"init_parameters": {},
|
|
|
|
},
|
2023-08-29 18:15:07 +02:00
|
|
|
"policy": "SKIP",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
component = DocumentWriter.from_dict(data)
|
|
|
|
assert isinstance(component.document_store, mocked_docstore_class)
|
|
|
|
assert component.policy == DuplicatePolicy.SKIP
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_from_dict_without_docstore(self):
|
|
|
|
data = {"type": "DocumentWriter", "init_parameters": {}}
|
|
|
|
with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"):
|
|
|
|
DocumentWriter.from_dict(data)
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_from_dict_without_docstore_type(self):
|
|
|
|
data = {"type": "DocumentWriter", "init_parameters": {"document_store": {"init_parameters": {}}}}
|
|
|
|
with pytest.raises(DeserializationError, match="Missing 'type' in document store's serialization data"):
|
|
|
|
DocumentWriter.from_dict(data)
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_from_dict_nonexisting_docstore(self):
|
|
|
|
data = {
|
|
|
|
"type": "DocumentWriter",
|
|
|
|
"init_parameters": {"document_store": {"type": "NonexistingDocumentStore", "init_parameters": {}}},
|
|
|
|
}
|
|
|
|
with pytest.raises(DeserializationError, match="DocumentStore of type 'NonexistingDocumentStore' not found."):
|
|
|
|
DocumentWriter.from_dict(data)
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_run(self):
|
2023-11-21 15:54:25 +01:00
|
|
|
document_store = InMemoryDocumentStore()
|
|
|
|
writer = DocumentWriter(document_store)
|
2023-08-29 18:15:07 +02:00
|
|
|
documents = [
|
2023-10-31 12:44:04 +01:00
|
|
|
Document(content="This is the text of a document."),
|
|
|
|
Document(content="This is the text of another document."),
|
2023-08-29 18:15:07 +02:00
|
|
|
]
|
|
|
|
|
2023-11-21 15:54:25 +01:00
|
|
|
result = writer.run(documents=documents)
|
|
|
|
assert result["documents_written"] == 2
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_run_skip_policy(self):
|
|
|
|
document_store = InMemoryDocumentStore()
|
|
|
|
writer = DocumentWriter(document_store, policy=DuplicatePolicy.SKIP)
|
|
|
|
documents = [
|
|
|
|
Document(content="This is the text of a document."),
|
|
|
|
Document(content="This is the text of another document."),
|
|
|
|
]
|
|
|
|
|
|
|
|
result = writer.run(documents=documents)
|
|
|
|
assert result["documents_written"] == 2
|
|
|
|
|
|
|
|
result = writer.run(documents=documents)
|
|
|
|
assert result["documents_written"] == 0
|