Upgrade weaviate-client to 3.3.3 and fix get_all_documents (#1895)

* Fix 'bug' on Weaviate only returning max. 100 docs on get_all_documents * Add type * Update Weaviate version on the CI * Fix bug on get_document_count where there are no documents * Add more info in the docstrings of get_all_documents and get_all_documents_generator * Add latest docstring and tutorial changes * Apply Black * Update Documentation & Code Style * Trigger pipeline * Update Documentation & Code Style * Include StefanBogdan feedback * Fix mypy issues and LogicalFilterClause * Add more types * Update Documentation & Code Style * update setup.cfg * Upgrade weaviate containers too * Allow to filter for content field in Weaviate * Use convert_to_weaviate instead of convert_to_pinecone * Fix _get_all_documents_in_index * Update docstrings and docs * Catching an exception in get_document(s)_by_id Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: bogdankostic <bogdankostic@web.de>
2025-11-02 02:39:51 +00:00 · 2022-04-01 14:37:34 +02:00 · 2022-04-01 14:37:34 +02:00 · ae712fe6bf
commit ae712fe6bf
parent 3459020600
17 changed files with 216 additions and 110 deletions
--- a/.github/workflows/linux_ci.yml
+++ b/.github/workflows/linux_ci.yml
@ -261,7 +261,7 @@ jobs:
        sudo docker-compose ps

    - name: Run Weaviate
-      run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2
+      run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.11.0

    - name: Run GraphDB
      run: docker run -d -p 7200:7200 --name haystack_test_graphdb deepset/graphdb-free:9.4.1-adoptopenjdk11
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -54,7 +54,7 @@ wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standal
 docker-compose up -d

 # Weaviate
-docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2
+docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.11.0

 # GraphDB
 docker run -d -p 7200:7200 --name haystack_test_graphdb deepset/graphdb-free:9.4.1-adoptopenjdk11
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@ -3144,7 +3144,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```

 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)

 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@ -3157,7 +3157,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html

 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack

 Limitations:
@ -3174,7 +3174,7 @@ def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int,
 **Arguments**:

 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@ -3188,11 +3188,11 @@ If no Reader is used (e.g. in FAQ-Style QA) the plain content of this field will
 'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
 Currently, HSNW is only supported.
-See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@ -3295,6 +3295,22 @@ def get_all_documents(index: Optional[str] = None, filters: Optional[Dict[str, U

 Get documents from the document store.

+Note this limitation from the changelog of Weaviate 1.8.0:
+
+.. quote::
+    Due to the increasing cost of each page outlined above, there is a limit to
+    how many objects can be retrieved using pagination. By default setting the sum
+    of offset and limit to higher than 10,000 objects, will lead to an error.
+    If you must retrieve more than 10,000 objects, you can increase this limit by
+    setting the environment variable `QUERY_MAXIMUM_RESULTS=<desired-value>`.
+
+    Warning: Setting this to arbitrarily high values can make the memory consumption
+    of a single query explode and single queries can slow down the entire cluster.
+    We recommend setting this value to the lowest possible value that does not
+    interfere with your users' expectations.
+
+(https://github.com/semi-technologies/weaviate/releases/tag/v1.8.0)
+
 **Arguments**:

 - `index`: Name of the index to get the documents from. If None, the
@ -3341,6 +3357,22 @@ Get documents from the document store. Under-the-hood, documents are fetched in
 document store and yielded as individual documents. This method can be used to iteratively process
 a large number of documents without having to load all documents in memory.

+Note this limitation from the changelog of Weaviate 1.8.0:
+
+.. quote::
+    Due to the increasing cost of each page outlined above, there is a limit to
+    how many objects can be retrieved using pagination. By default setting the sum
+    of offset and limit to higher than 10,000 objects, will lead to an error.
+    If you must retrieve more than 10,000 objects, you can increase this limit by
+    setting the environment variable `QUERY_MAXIMUM_RESULTS=<desired-value>`.
+
+    Warning: Setting this to arbitrarily high values can make the memory consumption
+    of a single query explode and single queries can slow down the entire cluster.
+    We recommend setting this value to the lowest possible value that does not
+    interfere with your users' expectations.
+
+(https://github.com/semi-technologies/weaviate/releases/tag/v1.8.0)
+
 **Arguments**:

 - `index`: Name of the index to get the documents from. If None, the
@ -3454,7 +3486,7 @@ operation.
    ```
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents

 <a id="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
--- a/docs/_src/usage/usage/document_store.md
+++ b/docs/_src/usage/usage/document_store.md
@ -128,9 +128,9 @@ document_store = SQLDocumentStore()
 <div class="tabcontent">

 The `WeaviateDocumentStore` requires a running Weaviate Server. 
-You can start a basic instance like this (see the [Weaviate docs](https://www.semi.technology/developers/weaviate/current/) for details):
+You can start a basic instance like this (see the [Weaviate docs](https://weaviate.io/developers/weaviate/current/) for details):
 ```
-    docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2
+    docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.11.0
 ```
  
 Afterwards, you can use it in Haystack:
--- a/docs/v0.10.0/_src/api/api/document_store.md
+++ b/docs/v0.10.0/_src/api/api/document_store.md
@ -1522,7 +1522,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```

 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)

 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@ -1533,7 +1533,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html

 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack

 <a name="weaviate.WeaviateDocumentStore.__init__"></a>
@ -1546,7 +1546,7 @@ Usage:
 **Arguments**:

 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-                     For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                     For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@ -1560,11 +1560,11 @@ Usage:
 - `similarity`: The similarity function used to compare document vectors. 'dot_product' is the default.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                   Currently, HSNW is only supported.
-                   See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                   See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-                   See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                   See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                    For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                    For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@ -1695,7 +1695,7 @@ that are most relevant to the query as defined by Weaviate semantic search.
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-                    https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                    https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents

 <a name="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
--- a/docs/v0.9.0/_src/api/api/document_store.md
+++ b/docs/v0.9.0/_src/api/api/document_store.md
@ -1477,7 +1477,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```

 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)

 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@ -1488,7 +1488,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html

 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack

 <a name="weaviate.WeaviateDocumentStore.__init__"></a>
@ -1501,7 +1501,7 @@ Usage:
 **Arguments**:

 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-                     For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                     For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@ -1515,11 +1515,11 @@ Usage:
 - `similarity`: The similarity function used to compare document vectors. 'dot_product' is the default.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                   Currently, HSNW is only supported.
-                   See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                   See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-                   See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                   See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                    For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                    For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@ -1650,7 +1650,7 @@ that are most relevant to the query as defined by Weaviate semantic search.
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-                    https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                    https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents

 <a name="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
--- a/docs/v1.0.0/_src/api/api/document_store.md
+++ b/docs/v1.0.0/_src/api/api/document_store.md
@ -1678,7 +1678,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```

 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)

 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@ -1690,7 +1690,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html

 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack

 Limitations:
@ -1706,7 +1706,7 @@ The current implementation is not supporting the storage of labels, so you canno
 **Arguments**:

 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-                     For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                     For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@ -1720,11 +1720,11 @@ The current implementation is not supporting the storage of labels, so you canno
                   'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                   Currently, HSNW is only supported.
-                   See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                   See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-                   See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                   See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                    For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                    For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@ -1863,7 +1863,7 @@ that are most relevant to the query as defined by Weaviate semantic search.
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-                    https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                    https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents

 <a name="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
--- a/docs/v1.1.0/_src/api/api/document_store.md
+++ b/docs/v1.1.0/_src/api/api/document_store.md
@ -1757,7 +1757,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```

 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)

 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@ -1770,7 +1770,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html

 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack

 Limitations:
@ -1786,7 +1786,7 @@ The current implementation is not supporting the storage of labels, so you canno
 **Arguments**:

 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-                     For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                     For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@ -1800,11 +1800,11 @@ The current implementation is not supporting the storage of labels, so you canno
                   'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                   Currently, HSNW is only supported.
-                   See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                   See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-                   See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                   See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                    For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                    For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@ -1943,7 +1943,7 @@ that are most relevant to the query as defined by Weaviate semantic search.
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-                    https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                    https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents

 <a name="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
--- a/docs/v1.2.0/_src/api/api/document_store.md
+++ b/docs/v1.2.0/_src/api/api/document_store.md
@ -2755,7 +2755,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```

 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)

 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@ -2768,7 +2768,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html

 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack

 Limitations:
@ -2785,7 +2785,7 @@ def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int,
 **Arguments**:

 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@ -2799,11 +2799,11 @@ If no Reader is used (e.g. in FAQ-Style QA) the plain content of this field will
 'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
 Currently, HSNW is only supported.
-See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@ -3065,7 +3065,7 @@ operation.
    ```
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents

 <a id="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
--- a/docs/v1.3.0/_src/api/api/document_store.md
+++ b/docs/v1.3.0/_src/api/api/document_store.md
@ -3136,7 +3136,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```

 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)

 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@ -3149,7 +3149,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html

 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack

 Limitations:
@ -3166,7 +3166,7 @@ def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int,
 **Arguments**:

 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@ -3180,11 +3180,11 @@ If no Reader is used (e.g. in FAQ-Style QA) the plain content of this field will
 'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
 Currently, HSNW is only supported.
-See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@ -3446,7 +3446,7 @@ operation.
    ```
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents

 <a id="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
--- a/docs/v1.3.0/_src/usage/usage/document_store.md
+++ b/docs/v1.3.0/_src/usage/usage/document_store.md
@ -128,7 +128,7 @@ document_store = SQLDocumentStore()
 <div class="tabcontent">

 The `WeaviateDocumentStore` requires a running Weaviate Server. 
-You can start a basic instance like this (see the [Weaviate docs](https://www.semi.technology/developers/weaviate/current/) for details):
+You can start a basic instance like this (see the [Weaviate docs](https://weaviate.io/developers/weaviate/current/) for details):
 ```
    docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2
 ```
--- a/haystack/document_stores/filter_utils.py
+++ b/haystack/document_stores/filter_utils.py
@ -277,7 +277,10 @@ class ComparisonOperation(ABC):
                data_type = "valueDate"
            # Comparison value is a plain string
            except ValueError:
-                data_type = "valueString"
+                if self.field_name == "content":
+                    data_type = "valueText"
+                else:
+                    data_type = "valueString"
        elif isinstance(value, int):
            data_type = "valueInt"
        elif isinstance(value, float):
--- a/haystack/document_stores/weaviate.py
+++ b/haystack/document_stores/weaviate.py
@ -1,13 +1,21 @@
-import hashlib
+from typing import Any, Dict, Generator, List, Optional, Union
 import re
 import uuid
-from typing import Dict, Generator, List, Optional, Union
+import json
+import hashlib
+import logging
 from datetime import datetime

-import logging
-import json
 import numpy as np
 from tqdm import tqdm
+import weaviate
+
+try:
+    from weaviate import client, AuthClientPassword
+except (ImportError, ModuleNotFoundError) as ie:
+    from haystack.utils.import_utils import _optional_component_not_installed
+
+    _optional_component_not_installed(__name__, "weaviate", ie)

 from haystack.schema import Document
 from haystack.document_stores import BaseDocumentStore
@ -15,14 +23,6 @@ from haystack.document_stores.base import get_batches_from_generator
 from haystack.document_stores.filter_utils import LogicalFilterClause
 from haystack.document_stores.utils import convert_date_to_rfc3339

-try:
-    from weaviate import client, AuthClientPassword
-    from weaviate import ObjectsBatchRequest
-except (ImportError, ModuleNotFoundError) as ie:
-    from haystack.utils.import_utils import _optional_component_not_installed
-
-    _optional_component_not_installed(__name__, "weaviate", ie)
-

 logger = logging.getLogger(__name__)
 UUID_PATTERN = re.compile(r"^[\da-f]{8}-([\da-f]{4}-){3}[\da-f]{12}$", re.IGNORECASE)
@ -32,7 +32,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
    """

    Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-    (See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+    (See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)

    Some of the key differences in contrast to FAISS & Milvus:
    1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@ -45,7 +45,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
    https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html

    Usage:
-    1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+    1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
    2. Init a WeaviateDocumentStore in Haystack

    Limitations:
@ -74,7 +74,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
    ):
        """
        :param host: Weaviate server connection URL for storing and processing documents and vectors.
-                             For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                             For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
        :param port: port of Weaviate instance
        :param timeout_config: Weaviate Timeout config as a tuple of (retries, time out seconds).
        :param username: username (standard authentication via http_auth)
@ -88,11 +88,11 @@ class WeaviateDocumentStore(BaseDocumentStore):
                           'cosine' is recommended for Sentence Transformers.
        :param index_type: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                           Currently, HSNW is only supported.
-                           See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                           See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
        :param custom_schema: Allows to create custom schema in Weaviate, for more details
-                           See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                           See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
        :param module_name: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                            For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                            For more details, See https://weaviate.io/developers/weaviate/current/modules/
        :param return_embedding: To return document embedding.
        :param embedding_field: Name of field containing an embedding vector.
        :param progress_bar: Whether to show a tqdm progress bar or not.
@ -265,8 +265,11 @@ class WeaviateDocumentStore(BaseDocumentStore):
        document = None

        id = self._sanitize_id(id=id, index=index)
-
-        result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
+        result = None
+        try:
+            result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
+        except weaviate.exceptions.UnexpectedStatusCodeException as usce:
+            logging.debug(f"Weaviate could not get the document requested: {usce}")
        if result:
            document = self._convert_weaviate_result_to_document(result, return_embedding=True)
        return document
@ -289,7 +292,11 @@ class WeaviateDocumentStore(BaseDocumentStore):
        # TODO: better implementation with multiple where filters instead of chatty call below?
        for id in ids:
            id = self._sanitize_id(id=id, index=index)
-            result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
+            result = None
+            try:
+                result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
+            except weaviate.exceptions.UnexpectedStatusCodeException as usce:
+                logging.debug(f"Weaviate could not get the document requested: {usce}")
            if result:
                document = self._convert_weaviate_result_to_document(result, return_embedding=True)
                documents.append(document)
@ -458,7 +465,6 @@ class WeaviateDocumentStore(BaseDocumentStore):
        batched_documents = get_batches_from_generator(document_objects, batch_size)
        with tqdm(total=len(document_objects), disable=not self.progress_bar) as progress_bar:
            for document_batch in batched_documents:
-                docs_batch = ObjectsBatchRequest()
                for idx, doc in enumerate(document_batch):
                    _doc = {**doc.to_dict(field_map=self._create_document_field_map())}
                    _ = _doc.pop("score", None)
@ -492,10 +498,11 @@ class WeaviateDocumentStore(BaseDocumentStore):
                    for date_field in date_fields:
                        _doc[date_field] = convert_date_to_rfc3339(_doc[date_field])

-                    docs_batch.add(_doc, class_name=index, uuid=doc_id, vector=vector)
-
+                    self.weaviate_client.batch.add_data_object(
+                        data_object=_doc, class_name=index, uuid=doc_id, vector=vector
+                    )
                # Ingest a batch of documents
-                results = self.weaviate_client.batch.create(docs_batch)
+                results = self.weaviate_client.batch.create_objects()
                # Weaviate returns errors for every failed document in the batch
                if results is not None:
                    for result in results:
@ -563,15 +570,14 @@ class WeaviateDocumentStore(BaseDocumentStore):
        doc_count = 0
        if filters:
            filter_dict = LogicalFilterClause.parse(filters).convert_to_weaviate()
-            result = (
-                self.weaviate_client.query.aggregate(index).with_fields("meta { count }").with_where(filter_dict).do()
-            )
+            result = self.weaviate_client.query.aggregate(index).with_meta_count().with_where(filter_dict).do()
        else:
-            result = self.weaviate_client.query.aggregate(index).with_fields("meta { count }").do()
+            result = self.weaviate_client.query.aggregate(index).with_meta_count().do()

        if "data" in result:
            if "Aggregate" in result.get("data"):
-                doc_count = result.get("data").get("Aggregate").get(index)[0]["meta"]["count"]
+                if result.get("data").get("Aggregate").get(index):
+                    doc_count = result.get("data").get("Aggregate").get(index)[0]["meta"]["count"]

        return doc_count

@ -586,6 +592,22 @@ class WeaviateDocumentStore(BaseDocumentStore):
        """
        Get documents from the document store.

+        Note this limitation from the changelog of Weaviate 1.8.0:
+
+        .. quote::
+            Due to the increasing cost of each page outlined above, there is a limit to
+            how many objects can be retrieved using pagination. By default setting the sum
+            of offset and limit to higher than 10,000 objects, will lead to an error.
+            If you must retrieve more than 10,000 objects, you can increase this limit by
+            setting the environment variable `QUERY_MAXIMUM_RESULTS=<desired-value>`.
+
+            Warning: Setting this to arbitrarily high values can make the memory consumption
+            of a single query explode and single queries can slow down the entire cluster.
+            We recommend setting this value to the lowest possible value that does not
+            interfere with your users' expectations.
+
+        (https://github.com/semi-technologies/weaviate/releases/tag/v1.8.0)
+
        :param index: Name of the index to get the documents from. If None, the
                      DocumentStore's default index (self.index) will be used.
        :param filters: Optional filters to narrow down the search space to documents whose metadata fulfill certain
@ -651,10 +673,30 @@ class WeaviateDocumentStore(BaseDocumentStore):
        else:
            result = self.weaviate_client.query.get(class_name=index, properties=properties).do()

-        all_docs = {}
-        if result and "data" in result and "Get" in result.get("data"):
-            if result.get("data").get("Get").get(index):
-                all_docs = result.get("data").get("Get").get(index)
+        # Inherent Weaviate limitation to 100 elements forces us to loop here:
+        #   https://weaviate-python-client.readthedocs.io/en/latest/weaviate.data.html?highlight=100#weaviate.data.DataObject.get
+        base_query = self.weaviate_client.query.get(class_name=index, properties=properties)
+        all_docs: List[Any] = []
+        num_of_documents = self.get_document_count(index=index, filters=filters)
+
+        while len(all_docs) < num_of_documents:
+            query = base_query
+            if filters:
+                filter_dict = LogicalFilterClause.parse(filters).convert_to_weaviate()
+                query = query.with_where(filter_dict)
+
+            if all_docs:
+                # .with_limit() must be used with .with_offset, of the latter won't work properly
+                #   https://weaviate-python-client.readthedocs.io/en/latest/weaviate.gql.html?highlight=offset#weaviate.gql.get.GetBuilder.with_offset
+                query = query.with_limit(100).with_offset(offset=len(all_docs))
+
+            result = query.do()
+
+            if result and "data" in result and "Get" in result.get("data"):
+                if result.get("data").get("Get").get(index):
+                    all_docs += result.get("data").get("Get").get(index)
+            else:
+                raise ValueError(f"Weaviate returned ad exception: {result}")

        yield from all_docs

@ -671,6 +713,22 @@ class WeaviateDocumentStore(BaseDocumentStore):
        document store and yielded as individual documents. This method can be used to iteratively process
        a large number of documents without having to load all documents in memory.

+        Note this limitation from the changelog of Weaviate 1.8.0:
+
+        .. quote::
+            Due to the increasing cost of each page outlined above, there is a limit to
+            how many objects can be retrieved using pagination. By default setting the sum
+            of offset and limit to higher than 10,000 objects, will lead to an error.
+            If you must retrieve more than 10,000 objects, you can increase this limit by
+            setting the environment variable `QUERY_MAXIMUM_RESULTS=<desired-value>`.
+
+            Warning: Setting this to arbitrarily high values can make the memory consumption
+            of a single query explode and single queries can slow down the entire cluster.
+            We recommend setting this value to the lowest possible value that does not
+            interfere with your users' expectations.
+
+        (https://github.com/semi-technologies/weaviate/releases/tag/v1.8.0)
+
        :param index: Name of the index to get the documents from. If None, the
                      DocumentStore's default index (self.index) will be used.
        :param filters: Optional filters to narrow down the search space to documents whose metadata fulfill certain
@ -793,7 +851,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
                            ```
        :param top_k: How many documents to return per query.
        :param custom_query: Custom query that will executed using query.raw method, for more details refer
-                            https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                            https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
        :param index: The name of the index in the DocumentStore from which to retrieve documents
        """
        index = self._sanitize_index_name(index) or self.index
--- a/haystack/utils/doc_store.py
+++ b/haystack/utils/doc_store.py
@ -63,7 +63,7 @@ def launch_weaviate(sleep=15):
    logger.debug("Starting Weaviate ...")
    status = subprocess.run(
        [
-            f"docker start {WEAVIATE_CONTAINER_NAME} > /dev/null 2>&1 || docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --name {WEAVIATE_CONTAINER_NAME} semitechnologies/weaviate:1.7.2"
+            f"docker start {WEAVIATE_CONTAINER_NAME} > /dev/null 2>&1 || docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --name {WEAVIATE_CONTAINER_NAME} semitechnologies/weaviate:1.11.0"
        ],
        shell=True,
    )
--- a/setup.cfg
+++ b/setup.cfg
@ -142,7 +142,7 @@ only-milvus =
 milvus = 
    farm-haystack[sql,only-milvus]
 weaviate =
-    weaviate-client==2.5.0
+    weaviate-client==3.3.3
 only-pinecone = 
    pinecone-client
 pinecone =
--- a/test/conftest.py
+++ b/test/conftest.py
@ -302,7 +302,7 @@ def weaviate_fixture():
        print("Starting Weaviate servers ...")
        status = subprocess.run(["docker rm haystack_test_weaviate"], shell=True)
        status = subprocess.run(
-            ["docker run -d --name haystack_test_weaviate -p 8080:8080 semitechnologies/weaviate:1.7.2"], shell=True
+            ["docker run -d --name haystack_test_weaviate -p 8080:8080 semitechnologies/weaviate:1.11.0"], shell=True
        )
        if status.returncode:
            raise Exception("Failed to launch Weaviate. Please check docker container logs.")
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@ -117,7 +117,7 @@ def test_init_elastic_doc_store_with_index_recreation():
    assert len(labels) == 0


-def test_write_with_duplicate_doc_ids(document_store):
+def test_write_with_duplicate_doc_ids(document_store: BaseDocumentStore):
    duplicate_documents = [
        Document(content="Doc1", id_hash_keys=["content"]),
        Document(content="Doc1", id_hash_keys=["content"]),
@ -131,7 +131,7 @@ def test_write_with_duplicate_doc_ids(document_store):
@pytest.mark.parametrize(
    "document_store", ["elasticsearch", "faiss", "memory", "milvus1", "weaviate", "pinecone"], indirect=True
 )
-def test_write_with_duplicate_doc_ids_custom_index(document_store):
+def test_write_with_duplicate_doc_ids_custom_index(document_store: BaseDocumentStore):
    duplicate_documents = [
        Document(content="Doc1", id_hash_keys=["content"]),
        Document(content="Doc1", id_hash_keys=["content"]),
@ -164,7 +164,20 @@ def test_get_all_documents_without_filters(document_store_with_docs):
    assert {d.meta["meta_field"] for d in documents} == {"test1", "test2", "test3", "test4", "test5"}


-def test_get_all_document_filter_duplicate_text_value(document_store):
+def test_get_all_documents_large_quantities(document_store: BaseDocumentStore):
+    # Test to exclude situations like Weaviate not returning more than 100 docs by default
+    #   https://github.com/deepset-ai/haystack/issues/1893
+    docs_to_write = [
+        {"meta": {"name": f"name_{i}"}, "content": f"text_{i}", "embedding": np.random.rand(768).astype(np.float32)}
+        for i in range(1000)
+    ]
+    document_store.write_documents(docs_to_write)
+    documents = document_store.get_all_documents()
+    assert all(isinstance(d, Document) for d in documents)
+    assert len(documents) == len(docs_to_write)
+
+
+def test_get_all_document_filter_duplicate_text_value(document_store: BaseDocumentStore):
    documents = [
        Document(content="Doc1", meta={"f1": "0"}, id_hash_keys=["meta"]),
        Document(content="Doc1", meta={"f1": "1", "meta_id": "0"}, id_hash_keys=["meta"]),
@ -355,7 +368,7 @@ def test_get_document_by_id(document_store_with_docs):
    assert doc.content == documents[0].content


-def test_get_documents_by_id(document_store):
+def test_get_documents_by_id(document_store: BaseDocumentStore):
    # generate more documents than the elasticsearch default query size limit of 10
    docs_to_generate = 15
    documents = [{"content": "doc-" + str(i)} for i in range(docs_to_generate)]
@ -372,7 +385,7 @@ def test_get_documents_by_id(document_store):
    assert set(retrieved_ids) == set(all_ids)


-def test_get_document_count(document_store):
+def test_get_document_count(document_store: BaseDocumentStore):
    documents = [
        {"content": "text1", "id": "1", "meta_field_for_count": "a"},
        {"content": "text2", "id": "2", "meta_field_for_count": "b"},
@ -385,7 +398,7 @@ def test_get_document_count(document_store):
    assert document_store.get_document_count(filters={"meta_field_for_count": ["b"]}) == 3


-def test_get_all_documents_generator(document_store):
+def test_get_all_documents_generator(document_store: BaseDocumentStore):
    documents = [
        {"content": "text1", "id": "1", "meta_field_for_count": "a"},
        {"content": "text2", "id": "2", "meta_field_for_count": "b"},
@ -421,7 +434,7 @@ def test_update_existing_documents(document_store, update_existing_documents):
        assert stored_docs[0].content == original_docs[0]["content"]


-def test_write_document_meta(document_store):
+def test_write_document_meta(document_store: BaseDocumentStore):
    documents = [
        {"content": "dict_without_meta", "id": "1"},
        {"content": "dict_with_meta", "meta_field": "test2", "name": "filename2", "id": "2"},
@ -438,7 +451,7 @@ def test_write_document_meta(document_store):
    assert document_store.get_document_by_id("4").meta["meta_field"] == "test4"


-def test_write_document_index(document_store):
+def test_write_document_index(document_store: BaseDocumentStore):
    documents = [{"content": "text1", "id": "1"}, {"content": "text2", "id": "2"}]
    document_store.write_documents([documents[0]], index="haystack_test_one")
    assert len(document_store.get_all_documents(index="haystack_test_one")) == 1
@ -453,7 +466,7 @@ def test_write_document_index(document_store):
@pytest.mark.parametrize(
    "document_store", ["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate"], indirect=True
 )
-def test_document_with_embeddings(document_store):
+def test_document_with_embeddings(document_store: BaseDocumentStore):
    documents = [
        {"content": "text1", "id": "1", "embedding": np.random.rand(768).astype(np.float32)},
        {"content": "text2", "id": "2", "embedding": np.random.rand(768).astype(np.float64)},
@ -720,7 +733,7 @@ def test_delete_documents_by_id_with_filters(document_store_with_docs):

 # exclude weaviate because it does not support storing labels
@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus1", "pinecone"], indirect=True)
-def test_labels(document_store):
+def test_labels(document_store: BaseDocumentStore):
    label = Label(
        query="question1",
        answer=Answer(
@ -808,7 +821,7 @@ def test_labels(document_store):

 # exclude weaviate because it does not support storing labels
@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus1", "pinecone"], indirect=True)
-def test_multilabel(document_store):
+def test_multilabel(document_store: BaseDocumentStore):
    labels = [
        Label(
            id="standard",
@ -924,7 +937,7 @@ def test_multilabel(document_store):

 # exclude weaviate because it does not support storing labels
@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus1", "pinecone"], indirect=True)
-def test_multilabel_no_answer(document_store):
+def test_multilabel_no_answer(document_store: BaseDocumentStore):
    labels = [
        Label(
            query="question",
@ -993,7 +1006,7 @@ def test_multilabel_no_answer(document_store):
 # exclude weaviate because it does not support storing labels
 # exclude faiss and milvus as label metadata is not implemented
@pytest.mark.parametrize("document_store", ["elasticsearch", "memory"], indirect=True)
-def test_multilabel_filter_aggregations(document_store):
+def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
    labels = [
        Label(
            id="standard",
@ -1089,7 +1102,7 @@ def test_multilabel_filter_aggregations(document_store):
 # exclude weaviate because it does not support storing labels
 # exclude faiss and milvus as label metadata is not implemented
@pytest.mark.parametrize("document_store", ["elasticsearch", "memory"], indirect=True)
-def test_multilabel_meta_aggregations(document_store):
+def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
    labels = [
        Label(
            id="standard",
@ -1180,7 +1193,7 @@ def test_multilabel_meta_aggregations(document_store):

@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "milvus1", "weaviate", "pinecone"], indirect=True)
 # Currently update_document_meta() is not implemented for Memory doc store
-def test_update_meta(document_store):
+def test_update_meta(document_store: BaseDocumentStore):
    documents = [
        Document(content="Doc1", meta={"meta_key_1": "1", "meta_key_2": "1"}),
        Document(content="Doc2", meta={"meta_key_1": "2", "meta_key_2": "2"}),
@ -1209,7 +1222,7 @@ def test_custom_embedding_field(document_store_type, tmp_path):


@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
-def test_get_meta_values_by_key(document_store):
+def test_get_meta_values_by_key(document_store: BaseDocumentStore):
    documents = [
        Document(content="Doc1", meta={"meta_key_1": "1", "meta_key_2": "11"}),
        Document(content="Doc2", meta={"meta_key_1": "2", "meta_key_2": "22"}),
@ -1271,7 +1284,7 @@ def test_elasticsearch_delete_index():


@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
-def test_elasticsearch_query_with_filters_and_missing_embeddings(document_store):
+def test_elasticsearch_query_with_filters_and_missing_embeddings(document_store: BaseDocumentStore):
    document_store.write_documents(DOCUMENTS)
    document_without_embedding = Document(
        content="Doc without embedding", meta={"name": "name_7", "year": "2021", "month": "04"}