From fc89f6ea74307b163f3282afd5ed612f5ce60eb0 Mon Sep 17 00:00:00 2001 From: Sara Zan Date: Tue, 6 Dec 2022 14:48:58 +0100 Subject: [PATCH] fix: revert Weaviate query with filters and improve tests (#3646) * revert weaviate query with filters and improve tests * pylint * upgrade weaviate container * use latest docker tag * fix text * fix text --- .github/workflows/tests.yml | 2 +- haystack/document_stores/weaviate.py | 34 ++++++++++++++------------- test/document_stores/test_weaviate.py | 5 ++-- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 79712e486..bd57a42a7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -335,7 +335,7 @@ jobs: runs-on: ${{ matrix.os }} services: weaviate: - image: semitechnologies/weaviate:1.16.0 + image: semitechnologies/weaviate:latest env: AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true" PERSISTENCE_DATA_PATH: "/var/lib/weaviate" diff --git a/haystack/document_stores/weaviate.py b/haystack/document_stores/weaviate.py index f3dab95ec..c16d4b071 100644 --- a/haystack/document_stores/weaviate.py +++ b/haystack/document_stores/weaviate.py @@ -985,28 +985,30 @@ class WeaviateDocumentStore(BaseDocumentStore): ) # Retrieval with BM25 AND filtering - if filters: - - # Once Weaviate starts supporting filters with BM25: - filter_dict = LogicalFilterClause.parse(filters).convert_to_weaviate() + if filters: # pylint: disable=no-else-raise + raise NotImplementedError( + "Weaviate currently does not support filters WITH inverted index text query (eg BM25)!" + ) + # # Once Weaviate starts supporting filters with BM25: + # filter_dict = LogicalFilterClause.parse(filters).convert_to_weaviate() + # gql_query = ( + # weaviate.gql.get.GetBuilder( + # class_name=index, properties=properties, connection=self.weaviate_client + # ) + # .with_near_vector({"vector": [0, 0]}) + # .with_where(filter_dict) + # .with_limit(top_k) + # .build() + # ) + else: + # BM25 retrieval without filtering gql_query = ( - weaviate.gql.get.GetBuilder( - class_name=index, properties=properties, connection=self.weaviate_client - ) + gql.get.GetBuilder(class_name=index, properties=properties, connection=self.weaviate_client) .with_near_vector({"vector": [0, 0]}) - .with_where(filter_dict) .with_limit(top_k) .build() ) - # BM25 retrieval without filtering - gql_query = ( - gql.get.GetBuilder(class_name=index, properties=properties, connection=self.weaviate_client) - .with_near_vector({"vector": [0, 0]}) - .with_limit(top_k) - .build() - ) - # Build the BM25 part of the GQL manually. # Currently the GetBuilder of the Weaviate-client (v3.6.0) # does not support the BM25 part of GQL building, so diff --git a/test/document_stores/test_weaviate.py b/test/document_stores/test_weaviate.py index a92ad9846..80c206345 100644 --- a/test/document_stores/test_weaviate.py +++ b/test/document_stores/test_weaviate.py @@ -180,8 +180,9 @@ class TestWeaviateDocumentStore(DocumentStoreBaseTestAbstract): assert len(docs) == 3 # BM25 retrieval WITH filters is not yet supported as of Weaviate v1.14.1 - # with pytest.raises(Exception): - docs = ds.query(query_text, filters={"name": ["filename2"]}) + # Should be from 1.18: https://github.com/semi-technologies/weaviate/issues/2393 + # docs = ds.query(query_text, filters={"name": ["name_1"]}) + # assert len(docs) == 1 docs = ds.query(filters={"name": ["name_0"]}) assert len(docs) == 3