haystack/test/preview/components/caching/test_url_cache_checker.py
Silvano Cerza 7287657f0e
refactor: Rename Document's text field to content (#6181)
* Rework Document serialisation

Make Document backward compatible

Fix InMemoryDocumentStore filters

Fix InMemoryDocumentStore.bm25_retrieval

Add release notes

Fix pylint failures

Enhance Document kwargs handling and docstrings

Rename Document's text field to content

Fix e2e tests

Fix SimilarityRanker tests

Fix typo in release notes

Rename Document's metadata field to meta (#6183)

* fix bugs

* make linters happy

* fix

* more fix

* match regex

---------

Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
2023-10-31 12:44:04 +01:00

84 lines
3.6 KiB
Python

import pytest
from haystack.preview import Document, DeserializationError
from haystack.preview.testing.factory import document_store_class
from haystack.preview.document_stores.in_memory import InMemoryDocumentStore
from haystack.preview.components.caching.url_cache_checker import UrlCacheChecker
class TestUrlCacheChecker:
@pytest.mark.unit
def test_to_dict(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
component = UrlCacheChecker(document_store=mocked_docstore_class())
data = component.to_dict()
assert data == {
"type": "UrlCacheChecker",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"url_field": "url",
},
}
@pytest.mark.unit
def test_to_dict_with_custom_init_parameters(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
component = UrlCacheChecker(document_store=mocked_docstore_class(), url_field="my_url_field")
data = component.to_dict()
assert data == {
"type": "UrlCacheChecker",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"url_field": "my_url_field",
},
}
@pytest.mark.unit
def test_from_dict(self):
mocked_docstore_class = document_store_class("MockedDocumentStore")
data = {
"type": "UrlCacheChecker",
"init_parameters": {
"document_store": {"type": "MockedDocumentStore", "init_parameters": {}},
"url_field": "my_url_field",
},
}
component = UrlCacheChecker.from_dict(data)
assert isinstance(component.document_store, mocked_docstore_class)
assert component.url_field == "my_url_field"
@pytest.mark.unit
def test_from_dict_without_docstore(self):
data = {"type": "UrlCacheChecker", "init_parameters": {}}
with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"):
UrlCacheChecker.from_dict(data)
@pytest.mark.unit
def test_from_dict_without_docstore_type(self):
data = {"type": "UrlCacheChecker", "init_parameters": {"document_store": {"init_parameters": {}}}}
with pytest.raises(DeserializationError, match="Missing 'type' in document store's serialization data"):
UrlCacheChecker.from_dict(data)
@pytest.mark.unit
def test_from_dict_nonexisting_docstore(self):
data = {
"type": "UrlCacheChecker",
"init_parameters": {"document_store": {"type": "NonexistingDocumentStore", "init_parameters": {}}},
}
with pytest.raises(DeserializationError, match="DocumentStore of type 'NonexistingDocumentStore' not found."):
UrlCacheChecker.from_dict(data)
@pytest.mark.unit
def test_run(self):
docstore = InMemoryDocumentStore()
documents = [
Document(content="doc1", meta={"url": "https://example.com/1"}),
Document(content="doc2", meta={"url": "https://example.com/2"}),
Document(content="doc3", meta={"url": "https://example.com/1"}),
Document(content="doc4", meta={"url": "https://example.com/2"}),
]
docstore.write_documents(documents)
checker = UrlCacheChecker(docstore)
results = checker.run(urls=["https://example.com/1", "https://example.com/5"])
assert results == {"hits": [documents[0], documents[2]], "misses": ["https://example.com/5"]}