mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-04 07:26:15 +00:00

* Rework Document serialisation Make Document backward compatible Fix InMemoryDocumentStore filters Fix InMemoryDocumentStore.bm25_retrieval Add release notes Fix pylint failures Enhance Document kwargs handling and docstrings Rename Document's text field to content Fix e2e tests Fix SimilarityRanker tests Fix typo in release notes Rename Document's metadata field to meta (#6183) * fix bugs * make linters happy * fix * more fix * match regex --------- Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
84 lines
3.6 KiB
Python
84 lines
3.6 KiB
Python
import pytest
|
|
|
|
from haystack.preview import Document, DeserializationError
|
|
from haystack.preview.testing.factory import document_store_class
|
|
from haystack.preview.document_stores.in_memory import InMemoryDocumentStore
|
|
from haystack.preview.components.caching.url_cache_checker import UrlCacheChecker
|
|
|
|
|
|
class TestUrlCacheChecker:
|
|
@pytest.mark.unit
|
|
def test_to_dict(self):
|
|
mocked_docstore_class = document_store_class("MockedDocumentStore")
|
|
component = UrlCacheChecker(document_store=mocked_docstore_class())
|
|
data = component.to_dict()
|
|
assert data == {
|
|
"type": "UrlCacheChecker",
|
|
"init_parameters": {
|
|
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
|
|
"url_field": "url",
|
|
},
|
|
}
|
|
|
|
@pytest.mark.unit
|
|
def test_to_dict_with_custom_init_parameters(self):
|
|
mocked_docstore_class = document_store_class("MockedDocumentStore")
|
|
component = UrlCacheChecker(document_store=mocked_docstore_class(), url_field="my_url_field")
|
|
data = component.to_dict()
|
|
assert data == {
|
|
"type": "UrlCacheChecker",
|
|
"init_parameters": {
|
|
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
|
|
"url_field": "my_url_field",
|
|
},
|
|
}
|
|
|
|
@pytest.mark.unit
|
|
def test_from_dict(self):
|
|
mocked_docstore_class = document_store_class("MockedDocumentStore")
|
|
data = {
|
|
"type": "UrlCacheChecker",
|
|
"init_parameters": {
|
|
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
|
|
"url_field": "my_url_field",
|
|
},
|
|
}
|
|
component = UrlCacheChecker.from_dict(data)
|
|
assert isinstance(component.document_store, mocked_docstore_class)
|
|
assert component.url_field == "my_url_field"
|
|
|
|
@pytest.mark.unit
|
|
def test_from_dict_without_docstore(self):
|
|
data = {"type": "UrlCacheChecker", "init_parameters": {}}
|
|
with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"):
|
|
UrlCacheChecker.from_dict(data)
|
|
|
|
@pytest.mark.unit
|
|
def test_from_dict_without_docstore_type(self):
|
|
data = {"type": "UrlCacheChecker", "init_parameters": {"document_store": {"init_parameters": {}}}}
|
|
with pytest.raises(DeserializationError, match="Missing 'type' in document store's serialization data"):
|
|
UrlCacheChecker.from_dict(data)
|
|
|
|
@pytest.mark.unit
|
|
def test_from_dict_nonexisting_docstore(self):
|
|
data = {
|
|
"type": "UrlCacheChecker",
|
|
"init_parameters": {"document_store": {"type": "NonexistingDocumentStore", "init_parameters": {}}},
|
|
}
|
|
with pytest.raises(DeserializationError, match="DocumentStore of type 'NonexistingDocumentStore' not found."):
|
|
UrlCacheChecker.from_dict(data)
|
|
|
|
@pytest.mark.unit
|
|
def test_run(self):
|
|
docstore = InMemoryDocumentStore()
|
|
documents = [
|
|
Document(content="doc1", meta={"url": "https://example.com/1"}),
|
|
Document(content="doc2", meta={"url": "https://example.com/2"}),
|
|
Document(content="doc3", meta={"url": "https://example.com/1"}),
|
|
Document(content="doc4", meta={"url": "https://example.com/2"}),
|
|
]
|
|
docstore.write_documents(documents)
|
|
checker = UrlCacheChecker(docstore)
|
|
results = checker.run(urls=["https://example.com/1", "https://example.com/5"])
|
|
assert results == {"hits": [documents[0], documents[2]], "misses": ["https://example.com/5"]}
|