feat: support search_fields in DeepsetCloudDocumentStore (#5455)

* feat: support search_fields in DeepsetCloudDocumentStore

* add reno file

* make search_fields plain init arg

* Update lg

* Update releasenotes/notes/deepset-cloud-document-store-search-fields-40b2322466f808a3.yaml

* Update haystack/document_stores/deepsetcloud.py

---------

Co-authored-by: agnieszka-m <amarzec13@gmail.com>
This commit is contained in:
tstadel 2023-08-04 11:13:05 +02:00 committed by GitHub
parent d96c963bc4
commit d26d4201fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 0 deletions

View File

@ -47,6 +47,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
label_index: str = "default",
embedding_dim: int = 768,
use_prefiltering: bool = False,
search_fields: Union[str, list] = "content",
):
"""
A DocumentStore facade enabling you to interact with the documents stored in deepset Cloud.
@ -90,6 +91,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
:param use_prefiltering: By default, DeepsetCloudDocumentStore uses post-filtering when querying with filters.
To use pre-filtering instead, set this parameter to `True`. Note that pre-filtering
comes at the cost of higher latency.
:param search_fields: Names of fields BM25Retriever uses to find matches to the incoming query in the documents, for example: ["content", "title"].
"""
self.index = index
self.label_index = label_index
@ -98,6 +100,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
self.return_embedding = return_embedding
self.embedding_dim = embedding_dim
self.use_prefiltering = use_prefiltering
self.search_fields = search_fields
self.client = DeepsetCloud.get_index_client(
api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, index=index
)

View File

@ -0,0 +1,4 @@
---
features:
- |
`DeepsetCloudDocumentStore` supports searching multiple fields in sparse queries. This enables you to search meta fields as well when using `BM25Retriever`. For example set `search_fields=["content", "title"]` to search the `title` meta field along with the document `content`.