mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-27 23:18:37 +00:00
feat: Add use_prefiltering parameter to DeepsetCloudDocumentStore (#3969)
* Add `use_prefiltering` parameter * Adapt doc string * Pass use_prefiltering via API to dC * Adapt doc string * Adapt test
This commit is contained in:
parent
b4c5bb7de4
commit
1a8fe0031d
@ -46,6 +46,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
return_embedding: bool = False,
|
||||
label_index: str = "default",
|
||||
embedding_dim: int = 768,
|
||||
use_prefiltering: bool = False,
|
||||
):
|
||||
"""
|
||||
A DocumentStore facade enabling you to interact with the documents stored in deepset Cloud.
|
||||
@ -86,6 +87,9 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
:param label_index: index for the evaluation set interface
|
||||
:param return_embedding: To return document embedding.
|
||||
:param embedding_dim: Specifies the dimensionality of the embedding vector (only needed when using a dense retriever, for example, DensePassageRetriever pr EmbeddingRetriever, on top).
|
||||
:param use_prefiltering: By default, DeepsetCloudDocumentStore uses post-filtering when querying with filters.
|
||||
To use pre-filtering instead, set this parameter to `True`. Note that pre-filtering
|
||||
comes at the cost of higher latency.
|
||||
"""
|
||||
self.index = index
|
||||
self.label_index = label_index
|
||||
@ -93,6 +97,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
self.similarity = similarity
|
||||
self.return_embedding = return_embedding
|
||||
self.embedding_dim = embedding_dim
|
||||
self.use_prefiltering = use_prefiltering
|
||||
self.client = DeepsetCloud.get_index_client(
|
||||
api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, index=index
|
||||
)
|
||||
@ -419,6 +424,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
|
||||
index=index,
|
||||
scale_score=scale_score,
|
||||
headers=headers,
|
||||
use_prefiltering=self.use_prefiltering,
|
||||
)
|
||||
docs = [Document.from_dict(doc) for doc in doc_dicts]
|
||||
return docs
|
||||
|
||||
@ -379,6 +379,7 @@ class IndexClient:
|
||||
all_terms_must_match: Optional[bool] = None,
|
||||
scale_score: bool = True,
|
||||
headers: Optional[dict] = None,
|
||||
use_prefiltering: Optional[bool] = None,
|
||||
) -> List[dict]:
|
||||
index_url = self._build_index_url(workspace=workspace, index=index)
|
||||
query_url = f"{index_url}/documents-query"
|
||||
@ -391,6 +392,7 @@ class IndexClient:
|
||||
"return_embedding": return_embedding,
|
||||
"all_terms_must_match": all_terms_must_match,
|
||||
"scale_score": scale_score,
|
||||
"use_prefiltering": use_prefiltering,
|
||||
}
|
||||
response = self.client.post(url=query_url, json=request, headers=headers)
|
||||
return response.json()
|
||||
|
||||
@ -379,7 +379,13 @@ class TestDeepsetCloudDocumentStore:
|
||||
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
||||
match=[
|
||||
matchers.json_params_matcher(
|
||||
{"query_emb": query_emb.tolist(), "top_k": 10, "return_embedding": False, "scale_score": True}
|
||||
{
|
||||
"query_emb": query_emb.tolist(),
|
||||
"top_k": 10,
|
||||
"return_embedding": False,
|
||||
"scale_score": True,
|
||||
"use_prefiltering": False,
|
||||
}
|
||||
)
|
||||
],
|
||||
json=[],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user