From 32091d66cb8bec877612edf3745dee6d910314bc Mon Sep 17 00:00:00 2001 From: Zoltan Fedor Date: Wed, 29 Mar 2023 10:51:22 -0400 Subject: [PATCH] Adding filtering support for Weaviate when used for BM25 querying (#4385) --- haystack/document_stores/weaviate.py | 29 ++++++++-------------------- test/nodes/test_retriever.py | 3 +-- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/haystack/document_stores/weaviate.py b/haystack/document_stores/weaviate.py index fea933d0a..4590cea96 100644 --- a/haystack/document_stores/weaviate.py +++ b/haystack/document_stores/weaviate.py @@ -999,29 +999,16 @@ class WeaviateDocumentStore(KeywordDocumentStore): # Default Retrieval via BM25 using the user's query on `self.content_field` else: - logger.warning( - "As of v1.14.1 Weaviate's BM25 retrieval is still in experimental phase, " - "so use it with care! To turn on the BM25 experimental feature in Weaviate " - "you need to start it with the `ENABLE_EXPERIMENTAL_BM25='true'` " - "environmental variable." - ) - # Retrieval with BM25 AND filtering - if filters: # pylint: disable=no-else-raise - raise NotImplementedError( - "Weaviate currently does not support filters WITH inverted index text query (eg BM25)!" + if filters: + filter_dict = LogicalFilterClause.parse(filters).convert_to_weaviate() + gql_query = ( + gql.get.GetBuilder(class_name=index, properties=properties, connection=self.weaviate_client) + .with_limit(top_k) + .with_bm25({"query": query, "properties": self.content_field}) + .with_where(filter_dict) + .build() ) - # # Once Weaviate starts supporting filters with BM25: - # filter_dict = LogicalFilterClause.parse(filters).convert_to_weaviate() - # gql_query = ( - # weaviate.gql.get.GetBuilder( - # class_name=index, properties=properties, connection=self.weaviate_client - # ) - # .with_near_vector({"vector": [0, 0]}) - # .with_where(filter_dict) - # .with_limit(top_k) - # .build() - # ) else: # BM25 retrieval without filtering gql_query = ( diff --git a/test/nodes/test_retriever.py b/test/nodes/test_retriever.py index 46ae69fb7..eec6b8428 100644 --- a/test/nodes/test_retriever.py +++ b/test/nodes/test_retriever.py @@ -86,8 +86,7 @@ def test_retrieval_without_filters(retriever_with_docs: BaseRetriever, document_ ("embedding", "elasticsearch"), ("embedding", "memory"), ("bm25", "elasticsearch"), - # TODO - add once Weaviate starts supporting filters with BM25 in Weaviate v1.18+ - # ("bm25", "weaviate"), + ("bm25", "weaviate"), ("es_filter_only", "elasticsearch"), ], indirect=True,