diff --git a/haystack/document_stores/deepsetcloud.py b/haystack/document_stores/deepsetcloud.py index 384929545..83358ce74 100644 --- a/haystack/document_stores/deepsetcloud.py +++ b/haystack/document_stores/deepsetcloud.py @@ -46,6 +46,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore): return_embedding: bool = False, label_index: str = "default", embedding_dim: int = 768, + use_prefiltering: bool = False, ): """ A DocumentStore facade enabling you to interact with the documents stored in deepset Cloud. @@ -86,6 +87,9 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore): :param label_index: index for the evaluation set interface :param return_embedding: To return document embedding. :param embedding_dim: Specifies the dimensionality of the embedding vector (only needed when using a dense retriever, for example, DensePassageRetriever pr EmbeddingRetriever, on top). + :param use_prefiltering: By default, DeepsetCloudDocumentStore uses post-filtering when querying with filters. + To use pre-filtering instead, set this parameter to `True`. Note that pre-filtering + comes at the cost of higher latency. """ self.index = index self.label_index = label_index @@ -93,6 +97,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore): self.similarity = similarity self.return_embedding = return_embedding self.embedding_dim = embedding_dim + self.use_prefiltering = use_prefiltering self.client = DeepsetCloud.get_index_client( api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, index=index ) @@ -419,6 +424,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore): index=index, scale_score=scale_score, headers=headers, + use_prefiltering=self.use_prefiltering, ) docs = [Document.from_dict(doc) for doc in doc_dicts] return docs diff --git a/haystack/utils/deepsetcloud.py b/haystack/utils/deepsetcloud.py index f62d8908c..bb62471bb 100644 --- a/haystack/utils/deepsetcloud.py +++ b/haystack/utils/deepsetcloud.py @@ -379,6 +379,7 @@ class IndexClient: all_terms_must_match: Optional[bool] = None, scale_score: bool = True, headers: Optional[dict] = None, + use_prefiltering: Optional[bool] = None, ) -> List[dict]: index_url = self._build_index_url(workspace=workspace, index=index) query_url = f"{index_url}/documents-query" @@ -391,6 +392,7 @@ class IndexClient: "return_embedding": return_embedding, "all_terms_must_match": all_terms_must_match, "scale_score": scale_score, + "use_prefiltering": use_prefiltering, } response = self.client.post(url=query_url, json=request, headers=headers) return response.json() diff --git a/test/document_stores/test_deepsetcloud.py b/test/document_stores/test_deepsetcloud.py index 4e5c2995e..d0d33a8c6 100644 --- a/test/document_stores/test_deepsetcloud.py +++ b/test/document_stores/test_deepsetcloud.py @@ -379,7 +379,13 @@ class TestDeepsetCloudDocumentStore: url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query", match=[ matchers.json_params_matcher( - {"query_emb": query_emb.tolist(), "top_k": 10, "return_embedding": False, "scale_score": True} + { + "query_emb": query_emb.tolist(), + "top_k": 10, + "return_embedding": False, + "scale_score": True, + "use_prefiltering": False, + } ) ], json=[],