feat: Add use_prefiltering parameter to DeepsetCloudDocumentStore (#3969)

* Add `use_prefiltering` parameter

* Adapt doc string

* Pass use_prefiltering via API to dC

* Adapt doc string

* Adapt test
This commit is contained in:
bogdankostic 2023-01-30 15:12:34 +01:00 committed by GitHub
parent b4c5bb7de4
commit 1a8fe0031d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 15 additions and 1 deletions

View File

@ -46,6 +46,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
return_embedding: bool = False,
label_index: str = "default",
embedding_dim: int = 768,
use_prefiltering: bool = False,
):
"""
A DocumentStore facade enabling you to interact with the documents stored in deepset Cloud.
@ -86,6 +87,9 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
:param label_index: index for the evaluation set interface
:param return_embedding: To return document embedding.
:param embedding_dim: Specifies the dimensionality of the embedding vector (only needed when using a dense retriever, for example, DensePassageRetriever pr EmbeddingRetriever, on top).
:param use_prefiltering: By default, DeepsetCloudDocumentStore uses post-filtering when querying with filters.
To use pre-filtering instead, set this parameter to `True`. Note that pre-filtering
comes at the cost of higher latency.
"""
self.index = index
self.label_index = label_index
@ -93,6 +97,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
self.similarity = similarity
self.return_embedding = return_embedding
self.embedding_dim = embedding_dim
self.use_prefiltering = use_prefiltering
self.client = DeepsetCloud.get_index_client(
api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, index=index
)
@ -419,6 +424,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
index=index,
scale_score=scale_score,
headers=headers,
use_prefiltering=self.use_prefiltering,
)
docs = [Document.from_dict(doc) for doc in doc_dicts]
return docs

View File

@ -379,6 +379,7 @@ class IndexClient:
all_terms_must_match: Optional[bool] = None,
scale_score: bool = True,
headers: Optional[dict] = None,
use_prefiltering: Optional[bool] = None,
) -> List[dict]:
index_url = self._build_index_url(workspace=workspace, index=index)
query_url = f"{index_url}/documents-query"
@ -391,6 +392,7 @@ class IndexClient:
"return_embedding": return_embedding,
"all_terms_must_match": all_terms_must_match,
"scale_score": scale_score,
"use_prefiltering": use_prefiltering,
}
response = self.client.post(url=query_url, json=request, headers=headers)
return response.json()

View File

@ -379,7 +379,13 @@ class TestDeepsetCloudDocumentStore:
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
match=[
matchers.json_params_matcher(
{"query_emb": query_emb.tolist(), "top_k": 10, "return_embedding": False, "scale_score": True}
{
"query_emb": query_emb.tolist(),
"top_k": 10,
"return_embedding": False,
"scale_score": True,
"use_prefiltering": False,
}
)
],
json=[],