From ae712fe6bf087c717f3e38e4e87d2347165fc12b Mon Sep 17 00:00:00 2001
From: Sara Zan <sara.zanzottera@deepset.ai>
Date: Fri, 1 Apr 2022 14:37:34 +0200
Subject: [PATCH] Upgrade `weaviate-client` to `3.3.3` and fix
 `get_all_documents` (#1895)

* Fix 'bug' on Weaviate only returning max. 100 docs on get_all_documents

* Add type

* Update Weaviate version on the CI

* Fix bug on get_document_count where there are no documents

* Add more info in the docstrings of get_all_documents and get_all_documents_generator

* Add latest docstring and tutorial changes

* Apply Black

* Update Documentation & Code Style

* Trigger pipeline

* Update Documentation & Code Style

* Include StefanBogdan feedback

* Fix mypy issues and LogicalFilterClause

* Add more types

* Update Documentation & Code Style

* update setup.cfg

* Upgrade weaviate containers too

* Allow to filter for content field in Weaviate

* Use convert_to_weaviate instead of convert_to_pinecone

* Fix _get_all_documents_in_index

* Update docstrings and docs

* Catching an exception in get_document(s)_by_id

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
---
 .github/workflows/linux_ci.yml                |   2 +-
 CONTRIBUTING.md                               |   2 +-
 docs/_src/api/api/document_store.md           |  46 ++++++-
 docs/_src/usage/usage/document_store.md       |   4 +-
 docs/v0.10.0/_src/api/api/document_store.md   |  14 +-
 docs/v0.9.0/_src/api/api/document_store.md    |  14 +-
 docs/v1.0.0/_src/api/api/document_store.md    |  14 +-
 docs/v1.1.0/_src/api/api/document_store.md    |  14 +-
 docs/v1.2.0/_src/api/api/document_store.md    |  14 +-
 docs/v1.3.0/_src/api/api/document_store.md    |  14 +-
 .../v1.3.0/_src/usage/usage/document_store.md |   2 +-
 haystack/document_stores/filter_utils.py      |   5 +-
 haystack/document_stores/weaviate.py          | 128 +++++++++++++-----
 haystack/utils/doc_store.py                   |   2 +-
 setup.cfg                                     |   2 +-
 test/conftest.py                              |   2 +-
 test/test_document_store.py                   |  47 ++++---
 17 files changed, 216 insertions(+), 110 deletions(-)

diff --git a/.github/workflows/linux_ci.yml b/.github/workflows/linux_ci.yml
index c4544dd15..1a5581019 100644
--- a/.github/workflows/linux_ci.yml
+++ b/.github/workflows/linux_ci.yml
@@ -261,7 +261,7 @@ jobs:
         sudo docker-compose ps
 
     - name: Run Weaviate
-      run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2
+      run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.11.0
 
     - name: Run GraphDB
       run: docker run -d -p 7200:7200 --name haystack_test_graphdb deepset/graphdb-free:9.4.1-adoptopenjdk11
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bb31128b4..37cf48455 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -54,7 +54,7 @@ wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standal
 docker-compose up -d
 
 # Weaviate
-docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2
+docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.11.0
 
 # GraphDB
 docker run -d -p 7200:7200 --name haystack_test_graphdb deepset/graphdb-free:9.4.1-adoptopenjdk11
diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
index 0f8c4ee1c..697657fb8 100644
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@@ -3144,7 +3144,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```
 
 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)
 
 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@@ -3157,7 +3157,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html
 
 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack
 
 Limitations:
@@ -3174,7 +3174,7 @@ def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int,
 **Arguments**:
 
 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@@ -3188,11 +3188,11 @@ If no Reader is used (e.g. in FAQ-Style QA) the plain content of this field will
 'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
 Currently, HSNW is only supported.
-See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@@ -3295,6 +3295,22 @@ def get_all_documents(index: Optional[str] = None, filters: Optional[Dict[str, U
 
 Get documents from the document store.
 
+Note this limitation from the changelog of Weaviate 1.8.0:
+
+.. quote::
+    Due to the increasing cost of each page outlined above, there is a limit to
+    how many objects can be retrieved using pagination. By default setting the sum
+    of offset and limit to higher than 10,000 objects, will lead to an error.
+    If you must retrieve more than 10,000 objects, you can increase this limit by
+    setting the environment variable `QUERY_MAXIMUM_RESULTS=<desired-value>`.
+
+    Warning: Setting this to arbitrarily high values can make the memory consumption
+    of a single query explode and single queries can slow down the entire cluster.
+    We recommend setting this value to the lowest possible value that does not
+    interfere with your users' expectations.
+
+(https://github.com/semi-technologies/weaviate/releases/tag/v1.8.0)
+
 **Arguments**:
 
 - `index`: Name of the index to get the documents from. If None, the
@@ -3341,6 +3357,22 @@ Get documents from the document store. Under-the-hood, documents are fetched in
 document store and yielded as individual documents. This method can be used to iteratively process
 a large number of documents without having to load all documents in memory.
 
+Note this limitation from the changelog of Weaviate 1.8.0:
+
+.. quote::
+    Due to the increasing cost of each page outlined above, there is a limit to
+    how many objects can be retrieved using pagination. By default setting the sum
+    of offset and limit to higher than 10,000 objects, will lead to an error.
+    If you must retrieve more than 10,000 objects, you can increase this limit by
+    setting the environment variable `QUERY_MAXIMUM_RESULTS=<desired-value>`.
+
+    Warning: Setting this to arbitrarily high values can make the memory consumption
+    of a single query explode and single queries can slow down the entire cluster.
+    We recommend setting this value to the lowest possible value that does not
+    interfere with your users' expectations.
+
+(https://github.com/semi-technologies/weaviate/releases/tag/v1.8.0)
+
 **Arguments**:
 
 - `index`: Name of the index to get the documents from. If None, the
@@ -3454,7 +3486,7 @@ operation.
     ```
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents
 
 <a id="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
diff --git a/docs/_src/usage/usage/document_store.md b/docs/_src/usage/usage/document_store.md
index bca65eac2..ab18f9bb1 100644
--- a/docs/_src/usage/usage/document_store.md
+++ b/docs/_src/usage/usage/document_store.md
@@ -128,9 +128,9 @@ document_store = SQLDocumentStore()
 <div class="tabcontent">
 
 The `WeaviateDocumentStore` requires a running Weaviate Server. 
-You can start a basic instance like this (see the [Weaviate docs](https://www.semi.technology/developers/weaviate/current/) for details):
+You can start a basic instance like this (see the [Weaviate docs](https://weaviate.io/developers/weaviate/current/) for details):
 ```
-    docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2
+    docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.11.0
 ```
   
 Afterwards, you can use it in Haystack:
diff --git a/docs/v0.10.0/_src/api/api/document_store.md b/docs/v0.10.0/_src/api/api/document_store.md
index 13d2e7025..3e12d5f05 100644
--- a/docs/v0.10.0/_src/api/api/document_store.md
+++ b/docs/v0.10.0/_src/api/api/document_store.md
@@ -1522,7 +1522,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```
 
 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)
 
 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@@ -1533,7 +1533,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html
 
 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack
 
 <a name="weaviate.WeaviateDocumentStore.__init__"></a>
@@ -1546,7 +1546,7 @@ Usage:
 **Arguments**:
 
 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-                     For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                     For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@@ -1560,11 +1560,11 @@ Usage:
 - `similarity`: The similarity function used to compare document vectors. 'dot_product' is the default.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                    Currently, HSNW is only supported.
-                   See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                   See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-                   See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                   See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                    For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                    For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@@ -1695,7 +1695,7 @@ that are most relevant to the query as defined by Weaviate semantic search.
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-                    https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                    https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents
 
 <a name="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
diff --git a/docs/v0.9.0/_src/api/api/document_store.md b/docs/v0.9.0/_src/api/api/document_store.md
index 52a1aa018..ffde1fc5a 100644
--- a/docs/v0.9.0/_src/api/api/document_store.md
+++ b/docs/v0.9.0/_src/api/api/document_store.md
@@ -1477,7 +1477,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```
 
 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)
 
 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@@ -1488,7 +1488,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html
 
 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack
 
 <a name="weaviate.WeaviateDocumentStore.__init__"></a>
@@ -1501,7 +1501,7 @@ Usage:
 **Arguments**:
 
 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-                     For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                     For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@@ -1515,11 +1515,11 @@ Usage:
 - `similarity`: The similarity function used to compare document vectors. 'dot_product' is the default.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                    Currently, HSNW is only supported.
-                   See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                   See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-                   See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                   See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                    For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                    For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@@ -1650,7 +1650,7 @@ that are most relevant to the query as defined by Weaviate semantic search.
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-                    https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                    https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents
 
 <a name="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
diff --git a/docs/v1.0.0/_src/api/api/document_store.md b/docs/v1.0.0/_src/api/api/document_store.md
index 8942d0ea0..b3fdfc62e 100644
--- a/docs/v1.0.0/_src/api/api/document_store.md
+++ b/docs/v1.0.0/_src/api/api/document_store.md
@@ -1678,7 +1678,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```
 
 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)
 
 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@@ -1690,7 +1690,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html
 
 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack
 
 Limitations:
@@ -1706,7 +1706,7 @@ The current implementation is not supporting the storage of labels, so you canno
 **Arguments**:
 
 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-                     For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                     For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@@ -1720,11 +1720,11 @@ The current implementation is not supporting the storage of labels, so you canno
                    'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                    Currently, HSNW is only supported.
-                   See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                   See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-                   See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                   See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                    For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                    For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@@ -1863,7 +1863,7 @@ that are most relevant to the query as defined by Weaviate semantic search.
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-                    https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                    https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents
 
 <a name="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
diff --git a/docs/v1.1.0/_src/api/api/document_store.md b/docs/v1.1.0/_src/api/api/document_store.md
index b71419c56..c0dbf7979 100644
--- a/docs/v1.1.0/_src/api/api/document_store.md
+++ b/docs/v1.1.0/_src/api/api/document_store.md
@@ -1757,7 +1757,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```
 
 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)
 
 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@@ -1770,7 +1770,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html
 
 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack
 
 Limitations:
@@ -1786,7 +1786,7 @@ The current implementation is not supporting the storage of labels, so you canno
 **Arguments**:
 
 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-                     For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                     For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@@ -1800,11 +1800,11 @@ The current implementation is not supporting the storage of labels, so you canno
                    'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                    Currently, HSNW is only supported.
-                   See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                   See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-                   See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                   See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                    For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                    For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@@ -1943,7 +1943,7 @@ that are most relevant to the query as defined by Weaviate semantic search.
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-                    https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                    https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents
 
 <a name="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
diff --git a/docs/v1.2.0/_src/api/api/document_store.md b/docs/v1.2.0/_src/api/api/document_store.md
index d8b8d17c5..97d6c7167 100644
--- a/docs/v1.2.0/_src/api/api/document_store.md
+++ b/docs/v1.2.0/_src/api/api/document_store.md
@@ -2755,7 +2755,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```
 
 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)
 
 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@@ -2768,7 +2768,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html
 
 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack
 
 Limitations:
@@ -2785,7 +2785,7 @@ def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int,
 **Arguments**:
 
 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@@ -2799,11 +2799,11 @@ If no Reader is used (e.g. in FAQ-Style QA) the plain content of this field will
 'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
 Currently, HSNW is only supported.
-See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@@ -3065,7 +3065,7 @@ operation.
     ```
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents
 
 <a id="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
diff --git a/docs/v1.3.0/_src/api/api/document_store.md b/docs/v1.3.0/_src/api/api/document_store.md
index 61b69bf7d..ba04facdd 100644
--- a/docs/v1.3.0/_src/api/api/document_store.md
+++ b/docs/v1.3.0/_src/api/api/document_store.md
@@ -3136,7 +3136,7 @@ class WeaviateDocumentStore(BaseDocumentStore)
 ```
 
 Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-(See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+(See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)
 
 Some of the key differences in contrast to FAISS & Milvus:
 1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@@ -3149,7 +3149,7 @@ Weaviate python client is used to connect to the server, more details are here
 https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html
 
 Usage:
-1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
 2. Init a WeaviateDocumentStore in Haystack
 
 Limitations:
@@ -3166,7 +3166,7 @@ def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int,
 **Arguments**:
 
 - `host`: Weaviate server connection URL for storing and processing documents and vectors.
-For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
 - `port`: port of Weaviate instance
 - `timeout_config`: Weaviate Timeout config as a tuple of (retries, time out seconds).
 - `username`: username (standard authentication via http_auth)
@@ -3180,11 +3180,11 @@ If no Reader is used (e.g. in FAQ-Style QA) the plain content of this field will
 'cosine' is recommended for Sentence Transformers.
 - `index_type`: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
 Currently, HSNW is only supported.
-See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
 - `custom_schema`: Allows to create custom schema in Weaviate, for more details
-See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
 - `module_name`: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+For more details, See https://weaviate.io/developers/weaviate/current/modules/
 - `return_embedding`: To return document embedding.
 - `embedding_field`: Name of field containing an embedding vector.
 - `progress_bar`: Whether to show a tqdm progress bar or not.
@@ -3446,7 +3446,7 @@ operation.
     ```
 - `top_k`: How many documents to return per query.
 - `custom_query`: Custom query that will executed using query.raw method, for more details refer
-https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
 - `index`: The name of the index in the DocumentStore from which to retrieve documents
 
 <a id="weaviate.WeaviateDocumentStore.query_by_embedding"></a>
diff --git a/docs/v1.3.0/_src/usage/usage/document_store.md b/docs/v1.3.0/_src/usage/usage/document_store.md
index bca65eac2..c4a12c291 100644
--- a/docs/v1.3.0/_src/usage/usage/document_store.md
+++ b/docs/v1.3.0/_src/usage/usage/document_store.md
@@ -128,7 +128,7 @@ document_store = SQLDocumentStore()
 <div class="tabcontent">
 
 The `WeaviateDocumentStore` requires a running Weaviate Server. 
-You can start a basic instance like this (see the [Weaviate docs](https://www.semi.technology/developers/weaviate/current/) for details):
+You can start a basic instance like this (see the [Weaviate docs](https://weaviate.io/developers/weaviate/current/) for details):
 ```
     docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2
 ```
diff --git a/haystack/document_stores/filter_utils.py b/haystack/document_stores/filter_utils.py
index 9b7bc8c5f..a758ece90 100644
--- a/haystack/document_stores/filter_utils.py
+++ b/haystack/document_stores/filter_utils.py
@@ -277,7 +277,10 @@ class ComparisonOperation(ABC):
                 data_type = "valueDate"
             # Comparison value is a plain string
             except ValueError:
-                data_type = "valueString"
+                if self.field_name == "content":
+                    data_type = "valueText"
+                else:
+                    data_type = "valueString"
         elif isinstance(value, int):
             data_type = "valueInt"
         elif isinstance(value, float):
diff --git a/haystack/document_stores/weaviate.py b/haystack/document_stores/weaviate.py
index cfdb91c00..1077afd5b 100644
--- a/haystack/document_stores/weaviate.py
+++ b/haystack/document_stores/weaviate.py
@@ -1,13 +1,21 @@
-import hashlib
+from typing import Any, Dict, Generator, List, Optional, Union
 import re
 import uuid
-from typing import Dict, Generator, List, Optional, Union
+import json
+import hashlib
+import logging
 from datetime import datetime
 
-import logging
-import json
 import numpy as np
 from tqdm import tqdm
+import weaviate
+
+try:
+    from weaviate import client, AuthClientPassword
+except (ImportError, ModuleNotFoundError) as ie:
+    from haystack.utils.import_utils import _optional_component_not_installed
+
+    _optional_component_not_installed(__name__, "weaviate", ie)
 
 from haystack.schema import Document
 from haystack.document_stores import BaseDocumentStore
@@ -15,14 +23,6 @@ from haystack.document_stores.base import get_batches_from_generator
 from haystack.document_stores.filter_utils import LogicalFilterClause
 from haystack.document_stores.utils import convert_date_to_rfc3339
 
-try:
-    from weaviate import client, AuthClientPassword
-    from weaviate import ObjectsBatchRequest
-except (ImportError, ModuleNotFoundError) as ie:
-    from haystack.utils.import_utils import _optional_component_not_installed
-
-    _optional_component_not_installed(__name__, "weaviate", ie)
-
 
 logger = logging.getLogger(__name__)
 UUID_PATTERN = re.compile(r"^[\da-f]{8}-([\da-f]{4}-){3}[\da-f]{12}$", re.IGNORECASE)
@@ -32,7 +32,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
     """
 
     Weaviate is a cloud-native, modular, real-time vector search engine built to scale your machine learning models.
-    (See https://www.semi.technology/developers/weaviate/current/index.html#what-is-weaviate)
+    (See https://weaviate.io/developers/weaviate/current/index.html#what-is-weaviate)
 
     Some of the key differences in contrast to FAISS & Milvus:
     1. Stores everything in one place: documents, meta data and vectors - so less network overhead when scaling this up
@@ -45,7 +45,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
     https://weaviate-python-client.readthedocs.io/en/docs/weaviate.html
 
     Usage:
-    1. Start a Weaviate server (see https://www.semi.technology/developers/weaviate/current/getting-started/installation.html)
+    1. Start a Weaviate server (see https://weaviate.io/developers/weaviate/current/getting-started/installation.html)
     2. Init a WeaviateDocumentStore in Haystack
 
     Limitations:
@@ -74,7 +74,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
     ):
         """
         :param host: Weaviate server connection URL for storing and processing documents and vectors.
-                             For more details, refer "https://www.semi.technology/developers/weaviate/current/getting-started/installation.html"
+                             For more details, refer "https://weaviate.io/developers/weaviate/current/getting-started/installation.html"
         :param port: port of Weaviate instance
         :param timeout_config: Weaviate Timeout config as a tuple of (retries, time out seconds).
         :param username: username (standard authentication via http_auth)
@@ -88,11 +88,11 @@ class WeaviateDocumentStore(BaseDocumentStore):
                            'cosine' is recommended for Sentence Transformers.
         :param index_type: Index type of any vector object defined in weaviate schema. The vector index type is pluggable.
                            Currently, HSNW is only supported.
-                           See: https://www.semi.technology/developers/weaviate/current/more-resources/performance.html
+                           See: https://weaviate.io/developers/weaviate/current/more-resources/performance.html
         :param custom_schema: Allows to create custom schema in Weaviate, for more details
-                           See https://www.semi.technology/developers/weaviate/current/data-schema/schema-configuration.html
+                           See https://weaviate.io/developers/weaviate/current/data-schema/schema-configuration.html
         :param module_name: Vectorization module to convert data into vectors. Default is "text2vec-trasnformers"
-                            For more details, See https://www.semi.technology/developers/weaviate/current/modules/
+                            For more details, See https://weaviate.io/developers/weaviate/current/modules/
         :param return_embedding: To return document embedding.
         :param embedding_field: Name of field containing an embedding vector.
         :param progress_bar: Whether to show a tqdm progress bar or not.
@@ -265,8 +265,11 @@ class WeaviateDocumentStore(BaseDocumentStore):
         document = None
 
         id = self._sanitize_id(id=id, index=index)
-
-        result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
+        result = None
+        try:
+            result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
+        except weaviate.exceptions.UnexpectedStatusCodeException as usce:
+            logging.debug(f"Weaviate could not get the document requested: {usce}")
         if result:
             document = self._convert_weaviate_result_to_document(result, return_embedding=True)
         return document
@@ -289,7 +292,11 @@ class WeaviateDocumentStore(BaseDocumentStore):
         # TODO: better implementation with multiple where filters instead of chatty call below?
         for id in ids:
             id = self._sanitize_id(id=id, index=index)
-            result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
+            result = None
+            try:
+                result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
+            except weaviate.exceptions.UnexpectedStatusCodeException as usce:
+                logging.debug(f"Weaviate could not get the document requested: {usce}")
             if result:
                 document = self._convert_weaviate_result_to_document(result, return_embedding=True)
                 documents.append(document)
@@ -458,7 +465,6 @@ class WeaviateDocumentStore(BaseDocumentStore):
         batched_documents = get_batches_from_generator(document_objects, batch_size)
         with tqdm(total=len(document_objects), disable=not self.progress_bar) as progress_bar:
             for document_batch in batched_documents:
-                docs_batch = ObjectsBatchRequest()
                 for idx, doc in enumerate(document_batch):
                     _doc = {**doc.to_dict(field_map=self._create_document_field_map())}
                     _ = _doc.pop("score", None)
@@ -492,10 +498,11 @@ class WeaviateDocumentStore(BaseDocumentStore):
                     for date_field in date_fields:
                         _doc[date_field] = convert_date_to_rfc3339(_doc[date_field])
 
-                    docs_batch.add(_doc, class_name=index, uuid=doc_id, vector=vector)
-
+                    self.weaviate_client.batch.add_data_object(
+                        data_object=_doc, class_name=index, uuid=doc_id, vector=vector
+                    )
                 # Ingest a batch of documents
-                results = self.weaviate_client.batch.create(docs_batch)
+                results = self.weaviate_client.batch.create_objects()
                 # Weaviate returns errors for every failed document in the batch
                 if results is not None:
                     for result in results:
@@ -563,15 +570,14 @@ class WeaviateDocumentStore(BaseDocumentStore):
         doc_count = 0
         if filters:
             filter_dict = LogicalFilterClause.parse(filters).convert_to_weaviate()
-            result = (
-                self.weaviate_client.query.aggregate(index).with_fields("meta { count }").with_where(filter_dict).do()
-            )
+            result = self.weaviate_client.query.aggregate(index).with_meta_count().with_where(filter_dict).do()
         else:
-            result = self.weaviate_client.query.aggregate(index).with_fields("meta { count }").do()
+            result = self.weaviate_client.query.aggregate(index).with_meta_count().do()
 
         if "data" in result:
             if "Aggregate" in result.get("data"):
-                doc_count = result.get("data").get("Aggregate").get(index)[0]["meta"]["count"]
+                if result.get("data").get("Aggregate").get(index):
+                    doc_count = result.get("data").get("Aggregate").get(index)[0]["meta"]["count"]
 
         return doc_count
 
@@ -586,6 +592,22 @@ class WeaviateDocumentStore(BaseDocumentStore):
         """
         Get documents from the document store.
 
+        Note this limitation from the changelog of Weaviate 1.8.0:
+
+        .. quote::
+            Due to the increasing cost of each page outlined above, there is a limit to
+            how many objects can be retrieved using pagination. By default setting the sum
+            of offset and limit to higher than 10,000 objects, will lead to an error.
+            If you must retrieve more than 10,000 objects, you can increase this limit by
+            setting the environment variable `QUERY_MAXIMUM_RESULTS=<desired-value>`.
+
+            Warning: Setting this to arbitrarily high values can make the memory consumption
+            of a single query explode and single queries can slow down the entire cluster.
+            We recommend setting this value to the lowest possible value that does not
+            interfere with your users' expectations.
+
+        (https://github.com/semi-technologies/weaviate/releases/tag/v1.8.0)
+
         :param index: Name of the index to get the documents from. If None, the
                       DocumentStore's default index (self.index) will be used.
         :param filters: Optional filters to narrow down the search space to documents whose metadata fulfill certain
@@ -651,10 +673,30 @@ class WeaviateDocumentStore(BaseDocumentStore):
         else:
             result = self.weaviate_client.query.get(class_name=index, properties=properties).do()
 
-        all_docs = {}
-        if result and "data" in result and "Get" in result.get("data"):
-            if result.get("data").get("Get").get(index):
-                all_docs = result.get("data").get("Get").get(index)
+        # Inherent Weaviate limitation to 100 elements forces us to loop here:
+        #   https://weaviate-python-client.readthedocs.io/en/latest/weaviate.data.html?highlight=100#weaviate.data.DataObject.get
+        base_query = self.weaviate_client.query.get(class_name=index, properties=properties)
+        all_docs: List[Any] = []
+        num_of_documents = self.get_document_count(index=index, filters=filters)
+
+        while len(all_docs) < num_of_documents:
+            query = base_query
+            if filters:
+                filter_dict = LogicalFilterClause.parse(filters).convert_to_weaviate()
+                query = query.with_where(filter_dict)
+
+            if all_docs:
+                # .with_limit() must be used with .with_offset, of the latter won't work properly
+                #   https://weaviate-python-client.readthedocs.io/en/latest/weaviate.gql.html?highlight=offset#weaviate.gql.get.GetBuilder.with_offset
+                query = query.with_limit(100).with_offset(offset=len(all_docs))
+
+            result = query.do()
+
+            if result and "data" in result and "Get" in result.get("data"):
+                if result.get("data").get("Get").get(index):
+                    all_docs += result.get("data").get("Get").get(index)
+            else:
+                raise ValueError(f"Weaviate returned ad exception: {result}")
 
         yield from all_docs
 
@@ -671,6 +713,22 @@ class WeaviateDocumentStore(BaseDocumentStore):
         document store and yielded as individual documents. This method can be used to iteratively process
         a large number of documents without having to load all documents in memory.
 
+        Note this limitation from the changelog of Weaviate 1.8.0:
+
+        .. quote::
+            Due to the increasing cost of each page outlined above, there is a limit to
+            how many objects can be retrieved using pagination. By default setting the sum
+            of offset and limit to higher than 10,000 objects, will lead to an error.
+            If you must retrieve more than 10,000 objects, you can increase this limit by
+            setting the environment variable `QUERY_MAXIMUM_RESULTS=<desired-value>`.
+
+            Warning: Setting this to arbitrarily high values can make the memory consumption
+            of a single query explode and single queries can slow down the entire cluster.
+            We recommend setting this value to the lowest possible value that does not
+            interfere with your users' expectations.
+
+        (https://github.com/semi-technologies/weaviate/releases/tag/v1.8.0)
+
         :param index: Name of the index to get the documents from. If None, the
                       DocumentStore's default index (self.index) will be used.
         :param filters: Optional filters to narrow down the search space to documents whose metadata fulfill certain
@@ -793,7 +851,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
                             ```
         :param top_k: How many documents to return per query.
         :param custom_query: Custom query that will executed using query.raw method, for more details refer
-                            https://www.semi.technology/developers/weaviate/current/graphql-references/filters.html
+                            https://weaviate.io/developers/weaviate/current/graphql-references/filters.html
         :param index: The name of the index in the DocumentStore from which to retrieve documents
         """
         index = self._sanitize_index_name(index) or self.index
diff --git a/haystack/utils/doc_store.py b/haystack/utils/doc_store.py
index b7605ccb2..d13e22c8c 100644
--- a/haystack/utils/doc_store.py
+++ b/haystack/utils/doc_store.py
@@ -63,7 +63,7 @@ def launch_weaviate(sleep=15):
     logger.debug("Starting Weaviate ...")
     status = subprocess.run(
         [
-            f"docker start {WEAVIATE_CONTAINER_NAME} > /dev/null 2>&1 || docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --name {WEAVIATE_CONTAINER_NAME} semitechnologies/weaviate:1.7.2"
+            f"docker start {WEAVIATE_CONTAINER_NAME} > /dev/null 2>&1 || docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --name {WEAVIATE_CONTAINER_NAME} semitechnologies/weaviate:1.11.0"
         ],
         shell=True,
     )
diff --git a/setup.cfg b/setup.cfg
index ae24b883c..f2e897c07 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -142,7 +142,7 @@ only-milvus =
 milvus = 
     farm-haystack[sql,only-milvus]
 weaviate =
-    weaviate-client==2.5.0
+    weaviate-client==3.3.3
 only-pinecone = 
     pinecone-client
 pinecone =
diff --git a/test/conftest.py b/test/conftest.py
index e6adb5a08..b598d8e65 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -302,7 +302,7 @@ def weaviate_fixture():
         print("Starting Weaviate servers ...")
         status = subprocess.run(["docker rm haystack_test_weaviate"], shell=True)
         status = subprocess.run(
-            ["docker run -d --name haystack_test_weaviate -p 8080:8080 semitechnologies/weaviate:1.7.2"], shell=True
+            ["docker run -d --name haystack_test_weaviate -p 8080:8080 semitechnologies/weaviate:1.11.0"], shell=True
         )
         if status.returncode:
             raise Exception("Failed to launch Weaviate. Please check docker container logs.")
diff --git a/test/test_document_store.py b/test/test_document_store.py
index 016d5efcd..e42326b37 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -117,7 +117,7 @@ def test_init_elastic_doc_store_with_index_recreation():
     assert len(labels) == 0
 
 
-def test_write_with_duplicate_doc_ids(document_store):
+def test_write_with_duplicate_doc_ids(document_store: BaseDocumentStore):
     duplicate_documents = [
         Document(content="Doc1", id_hash_keys=["content"]),
         Document(content="Doc1", id_hash_keys=["content"]),
@@ -131,7 +131,7 @@ def test_write_with_duplicate_doc_ids(document_store):
 @pytest.mark.parametrize(
     "document_store", ["elasticsearch", "faiss", "memory", "milvus1", "weaviate", "pinecone"], indirect=True
 )
-def test_write_with_duplicate_doc_ids_custom_index(document_store):
+def test_write_with_duplicate_doc_ids_custom_index(document_store: BaseDocumentStore):
     duplicate_documents = [
         Document(content="Doc1", id_hash_keys=["content"]),
         Document(content="Doc1", id_hash_keys=["content"]),
@@ -164,7 +164,20 @@ def test_get_all_documents_without_filters(document_store_with_docs):
     assert {d.meta["meta_field"] for d in documents} == {"test1", "test2", "test3", "test4", "test5"}
 
 
-def test_get_all_document_filter_duplicate_text_value(document_store):
+def test_get_all_documents_large_quantities(document_store: BaseDocumentStore):
+    # Test to exclude situations like Weaviate not returning more than 100 docs by default
+    #   https://github.com/deepset-ai/haystack/issues/1893
+    docs_to_write = [
+        {"meta": {"name": f"name_{i}"}, "content": f"text_{i}", "embedding": np.random.rand(768).astype(np.float32)}
+        for i in range(1000)
+    ]
+    document_store.write_documents(docs_to_write)
+    documents = document_store.get_all_documents()
+    assert all(isinstance(d, Document) for d in documents)
+    assert len(documents) == len(docs_to_write)
+
+
+def test_get_all_document_filter_duplicate_text_value(document_store: BaseDocumentStore):
     documents = [
         Document(content="Doc1", meta={"f1": "0"}, id_hash_keys=["meta"]),
         Document(content="Doc1", meta={"f1": "1", "meta_id": "0"}, id_hash_keys=["meta"]),
@@ -355,7 +368,7 @@ def test_get_document_by_id(document_store_with_docs):
     assert doc.content == documents[0].content
 
 
-def test_get_documents_by_id(document_store):
+def test_get_documents_by_id(document_store: BaseDocumentStore):
     # generate more documents than the elasticsearch default query size limit of 10
     docs_to_generate = 15
     documents = [{"content": "doc-" + str(i)} for i in range(docs_to_generate)]
@@ -372,7 +385,7 @@ def test_get_documents_by_id(document_store):
     assert set(retrieved_ids) == set(all_ids)
 
 
-def test_get_document_count(document_store):
+def test_get_document_count(document_store: BaseDocumentStore):
     documents = [
         {"content": "text1", "id": "1", "meta_field_for_count": "a"},
         {"content": "text2", "id": "2", "meta_field_for_count": "b"},
@@ -385,7 +398,7 @@ def test_get_document_count(document_store):
     assert document_store.get_document_count(filters={"meta_field_for_count": ["b"]}) == 3
 
 
-def test_get_all_documents_generator(document_store):
+def test_get_all_documents_generator(document_store: BaseDocumentStore):
     documents = [
         {"content": "text1", "id": "1", "meta_field_for_count": "a"},
         {"content": "text2", "id": "2", "meta_field_for_count": "b"},
@@ -421,7 +434,7 @@ def test_update_existing_documents(document_store, update_existing_documents):
         assert stored_docs[0].content == original_docs[0]["content"]
 
 
-def test_write_document_meta(document_store):
+def test_write_document_meta(document_store: BaseDocumentStore):
     documents = [
         {"content": "dict_without_meta", "id": "1"},
         {"content": "dict_with_meta", "meta_field": "test2", "name": "filename2", "id": "2"},
@@ -438,7 +451,7 @@ def test_write_document_meta(document_store):
     assert document_store.get_document_by_id("4").meta["meta_field"] == "test4"
 
 
-def test_write_document_index(document_store):
+def test_write_document_index(document_store: BaseDocumentStore):
     documents = [{"content": "text1", "id": "1"}, {"content": "text2", "id": "2"}]
     document_store.write_documents([documents[0]], index="haystack_test_one")
     assert len(document_store.get_all_documents(index="haystack_test_one")) == 1
@@ -453,7 +466,7 @@ def test_write_document_index(document_store):
 @pytest.mark.parametrize(
     "document_store", ["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate"], indirect=True
 )
-def test_document_with_embeddings(document_store):
+def test_document_with_embeddings(document_store: BaseDocumentStore):
     documents = [
         {"content": "text1", "id": "1", "embedding": np.random.rand(768).astype(np.float32)},
         {"content": "text2", "id": "2", "embedding": np.random.rand(768).astype(np.float64)},
@@ -720,7 +733,7 @@ def test_delete_documents_by_id_with_filters(document_store_with_docs):
 
 # exclude weaviate because it does not support storing labels
 @pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus1", "pinecone"], indirect=True)
-def test_labels(document_store):
+def test_labels(document_store: BaseDocumentStore):
     label = Label(
         query="question1",
         answer=Answer(
@@ -808,7 +821,7 @@ def test_labels(document_store):
 
 # exclude weaviate because it does not support storing labels
 @pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus1", "pinecone"], indirect=True)
-def test_multilabel(document_store):
+def test_multilabel(document_store: BaseDocumentStore):
     labels = [
         Label(
             id="standard",
@@ -924,7 +937,7 @@ def test_multilabel(document_store):
 
 # exclude weaviate because it does not support storing labels
 @pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus1", "pinecone"], indirect=True)
-def test_multilabel_no_answer(document_store):
+def test_multilabel_no_answer(document_store: BaseDocumentStore):
     labels = [
         Label(
             query="question",
@@ -993,7 +1006,7 @@ def test_multilabel_no_answer(document_store):
 # exclude weaviate because it does not support storing labels
 # exclude faiss and milvus as label metadata is not implemented
 @pytest.mark.parametrize("document_store", ["elasticsearch", "memory"], indirect=True)
-def test_multilabel_filter_aggregations(document_store):
+def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
     labels = [
         Label(
             id="standard",
@@ -1089,7 +1102,7 @@ def test_multilabel_filter_aggregations(document_store):
 # exclude weaviate because it does not support storing labels
 # exclude faiss and milvus as label metadata is not implemented
 @pytest.mark.parametrize("document_store", ["elasticsearch", "memory"], indirect=True)
-def test_multilabel_meta_aggregations(document_store):
+def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
     labels = [
         Label(
             id="standard",
@@ -1180,7 +1193,7 @@ def test_multilabel_meta_aggregations(document_store):
 
 @pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "milvus1", "weaviate", "pinecone"], indirect=True)
 # Currently update_document_meta() is not implemented for Memory doc store
-def test_update_meta(document_store):
+def test_update_meta(document_store: BaseDocumentStore):
     documents = [
         Document(content="Doc1", meta={"meta_key_1": "1", "meta_key_2": "1"}),
         Document(content="Doc2", meta={"meta_key_1": "2", "meta_key_2": "2"}),
@@ -1209,7 +1222,7 @@ def test_custom_embedding_field(document_store_type, tmp_path):
 
 
 @pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
-def test_get_meta_values_by_key(document_store):
+def test_get_meta_values_by_key(document_store: BaseDocumentStore):
     documents = [
         Document(content="Doc1", meta={"meta_key_1": "1", "meta_key_2": "11"}),
         Document(content="Doc2", meta={"meta_key_1": "2", "meta_key_2": "22"}),
@@ -1271,7 +1284,7 @@ def test_elasticsearch_delete_index():
 
 
 @pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
-def test_elasticsearch_query_with_filters_and_missing_embeddings(document_store):
+def test_elasticsearch_query_with_filters_and_missing_embeddings(document_store: BaseDocumentStore):
     document_store.write_documents(DOCUMENTS)
     document_without_embedding = Document(
         content="Doc without embedding", meta={"name": "name_7", "year": "2021", "month": "04"}