From c4b54bcac0beefd543da3d9b56ea88c5c1f04823 Mon Sep 17 00:00:00 2001 From: Madeesh Kannan Date: Wed, 28 Feb 2024 17:35:16 +0100 Subject: [PATCH] docs: Update docstrings for `haystack.components.caching` (#7247) * docs: Update docstrings for `haystack.components.caching` * Update cache_checker.py * `black` --- haystack/components/caching/cache_checker.py | 59 ++++++++++++++++---- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/haystack/components/caching/cache_checker.py b/haystack/components/caching/cache_checker.py index 75b97d6f6..77fe2599d 100644 --- a/haystack/components/caching/cache_checker.py +++ b/haystack/components/caching/cache_checker.py @@ -14,27 +14,63 @@ logger = logging.getLogger(__name__) @component class CacheChecker: """ - CacheChecker is a component that checks for the presence of documents in a Document Store based on a specified - cache field. + Checks for the presence of documents in a Document Store based on a specified + field in each document's metadata. + + If matching documents are found, they are returned as hits. If not, the items + are returned as misses, indicating they are not in the cache. + + Usage example: + ```python + from haystack import Document + from haystack.document_stores.in_memory import InMemoryDocumentStore + from haystack.components.caching.cache_checker import CacheChecker + + docstore = InMemoryDocumentStore() + documents = [ + Document(content="doc1", meta={"url": "https://example.com/1"}), + Document(content="doc2", meta={"url": "https://example.com/2"}), + Document(content="doc3", meta={"url": "https://example.com/1"}), + Document(content="doc4", meta={"url": "https://example.com/2"}), + ] + docstore.write_documents(documents) + checker = CacheChecker(docstore, cache_field="url") + results = checker.run(items=["https://example.com/1", "https://example.com/5"]) + assert results == {"hits": [documents[0], documents[2]], "misses": ["https://example.com/5"]} + ``` """ def __init__(self, document_store: DocumentStore, cache_field: str): """ - Create a UrlCacheChecker component. + Create a CacheChecker component. + + :param document_store: + Document store to check. + :param cache_field: + Name of the Document metadata field + to check for cache hits. """ self.document_store = document_store self.cache_field = cache_field def to_dict(self) -> Dict[str, Any]: """ - Serialize this component to a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. """ return default_to_dict(self, document_store=self.document_store.to_dict(), cache_field=self.cache_field) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "CacheChecker": """ - Deserialize this component from a dictionary. + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. """ init_params = data.get("init_parameters", {}) if "document_store" not in init_params: @@ -60,12 +96,15 @@ class CacheChecker: @component.output_types(hits=List[Document], misses=List) def run(self, items: List[Any]): """ - Checks if any document associated with the specified field is already present in the store. If matching documents - are found, they are returned as hits. If not, the items are returned as misses, indicating they are not in the cache. + Checks if any document associated with the specified cache field + is already present in the store. - :param items: A list of values associated with the cache_field to be checked against the cache. - :return: A dictionary with two keys: "hits" and "misses". The values are lists of documents that were found in - the cache and items that were not, respectively. + :param items: + Values to be checked against the cache field. + :return: + A dictionary with two keys: + - `hits` - Documents that matched with any of the items. + - `misses` - Items that were not present in any documents. """ found_documents = [] misses = []