update to PineconeDocumentStore to remove dependency on SQL db (#2749)

* update to PineconeDocumentStore to remove dependency on SQL db * Update Documentation & Code Style * typing fixes * Update Documentation & Code Style * fixed embedding generator to yield Documents * Update Documentation & Code Style * fixes for final typing issues * fixes for pylint * Update Documentation & Code Style * uncomment pinecone tests * added new params to docstrings * Update Documentation & Code Style * Update Documentation & Code Style * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update Documentation & Code Style * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * Update haystack/document_stores/pinecone.py Co-authored-by: Sara Zan <sarazanzo94@gmail.com> * changes based on comments, updated errors and install * Update Documentation & Code Style * mypy * implement simple filtering in pinecone mock * typo * typo in reverse * account for missing meta key in filtering * typo * added metadata filtering to describe index * added handling for users switching indexes in same doc store, and handling duplicate docs in write * syntax tweaks * added index option to document/embedding count calls * labels implementation in progress * added metadata fields to be indexed for pinecone tests * further changes to mock * WIP implementation of labels+multilabels * switched to rely on labels namespace rather than filter * simpler delete_labels * label fixes, remove debug code * Apply dostring fixes Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> * mypy * pylint * docs * temporarily un-mock Pinecone * Small Pinecone test suite * pylint * Add fake test key to pass the None check * Add again fake test key to pass the None check * Add Pinecone to default docstores and fix filters * Fix field name * Change field name * Change field value * Remove comments * forgot to upgrade pyproject.toml Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> Co-authored-by: Sara Zan <sarazanzo94@gmail.com> Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com>
2026-01-05 11:38:20 +00:00 · 2022-08-24 12:27:15 +01:00 · 2022-08-24 12:27:15 +01:00 · 9b1b03002f
commit 9b1b03002f
parent 891707ecaa
11 changed files with 1549 additions and 196 deletions
--- a/conftest.py
+++ b/conftest.py
@ -1,6 +1,8 @@
 def pytest_addoption(parser):
    parser.addoption(
-        "--document_store_type", action="store", default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate"
+        "--document_store_type",
+        action="store",
+        default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
    )


--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@ -4689,7 +4689,7 @@ number of labels for the given index
 ## PineconeDocumentStore

 ```python
-class PineconeDocumentStore(SQLDocumentStore)
+class PineconeDocumentStore(BaseDocumentStore)
 ```

 Document store for very large scale embedding based dense retrievers like the DPR. This is a hosted document store,
@ -4708,7 +4708,7 @@ the vector embeddings and metadata (for filtering) are indexed in a Pinecone Ind
 #### PineconeDocumentStore.\_\_init\_\_

 ```python
-def __init__(api_key: str, environment: str = "us-west1-gcp", sql_url: str = "sqlite:///pinecone_document_store.db", pinecone_index: Optional[pinecone.Index] = None, embedding_dim: int = 768, return_embedding: bool = False, index: str = "document", similarity: str = "cosine", replicas: int = 1, shards: int = 1, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", recreate_index: bool = False, metadata_config: dict = {"indexed": []}, validate_index_sync: bool = True)
+def __init__(api_key: str, environment: str = "us-west1-gcp", pinecone_index: Optional[pinecone.Index] = None, embedding_dim: int = 768, return_embedding: bool = False, index: str = "document", similarity: str = "cosine", replicas: int = 1, shards: int = 1, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", recreate_index: bool = False, metadata_config: dict = {"indexed": []}, validate_index_sync: bool = True)
 ```

 **Arguments**:
@ -4716,8 +4716,6 @@ def __init__(api_key: str, environment: str = "us-west1-gcp", sql_url: str = "sq
 - `api_key`: Pinecone vector database API key ([https://app.pinecone.io](https://app.pinecone.io)).
 - `environment`: Pinecone cloud environment uses `"us-west1-gcp"` by default. Other GCP and AWS regions are
 supported, contact Pinecone [here](https://www.pinecone.io/contact/) if required.
- `sql_url`: SQL connection URL for database. It defaults to local file based SQLite DB. For large scale
-deployment, Postgres is recommended.
 - `pinecone_index`: pinecone-client Index object, an index will be initialized or loaded if not specified.
 - `embedding_dim`: The embedding vector size.
 - `return_embedding`: Whether to return document embeddings.
@ -4743,17 +4741,57 @@ Parameter options:
 created using the config you are using for initialization. Be aware that all data in the old index will be
 lost if you choose to recreate the index. Be aware that both the document_index and the label_index will
 be recreated.
- `metadata_config`: Which metadata fields should be indexed. Should be in the format
-`{"indexed": ["metadata-field-1", "metadata-field-2", "metadata-field-n"]}`.
-Indexing metadata fields is a prerequisite to allow filtering of documents by metadata values.
- `validate_index_sync`: Whether to check that the document count equals the embedding count at initialization time
+- `metadata_config`: Which metadata fields should be indexed, part of the
+[selective metadata filtering](https://www.pinecone.io/docs/manage-indexes/`selective`-metadata-indexing) feature.
+Should be in the format `{"indexed": ["metadata-field-1", "metadata-field-2", "metadata-field-n"]}`. By default,
+no fields are indexed.
+
+<a id="pinecone.PineconeDocumentStore.get_document_count"></a>
+
+#### PineconeDocumentStore.get\_document\_count
+
+```python
+def get_document_count(filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, index: Optional[str] = None, only_documents_without_embedding: bool = False, headers: Optional[Dict[str, str]] = None) -> int
+```
+
+Return the count of embeddings in the document store.
+
+**Arguments**:
+
+- `filters`: Optional filters to narrow down the documents for which embeddings are to be updated.
+Filters are defined as nested dictionaries. The keys of the dictionaries can be a logical
+operator (`"$and"`, `"$or"`, `"$not"`), a comparison operator (`"$eq"`, `"$in"`, `"$gt"`,
+`"$gte"`, `"$lt"`, `"$lte"`), or a metadata field name.
+Logical operator keys take a dictionary of metadata field names or logical operators as
+value. Metadata field names take a dictionary of comparison operators as value. Comparison
+operator keys take a single value or (in case of `"$in"`) a list of values as value.
+If no logical operator is provided, `"$and"` is used as default operation. If no comparison
+operator is provided, `"$eq"` (or `"$in"` if the comparison value is a list) is used as default
+operation.
+    __Example__:
+    ```python
+    filters = {
+        "$and": {
+            "type": {"$eq": "article"},
+            "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
+            "rating": {"$gte": 3},
+            "$or": {
+                "genre": {"$in": ["economy", "politics"]},
+                "publisher": {"$eq": "nytimes"}
+            }
+        }
+    }
+    ```
+- `index`: Optional index to use for the query. If not provided, the default index is used.
+- `only_documents_without_embedding`: If set to `True`, only documents without embeddings are counted.
+- `headers`: PineconeDocumentStore does not support headers.

 <a id="pinecone.PineconeDocumentStore.write_documents"></a>

 #### PineconeDocumentStore.write\_documents

 ```python
-def write_documents(documents: Union[List[dict], List[Document]], index: Optional[str] = None, batch_size: int = 32, duplicate_documents: Optional[str] = None, headers: Optional[Dict[str, str]] = None)
+def write_documents(documents: Union[List[dict], List[Document]], index: Optional[str] = None, batch_size: int = 32, duplicate_documents: Optional[str] = None, headers: Optional[Dict[str, str]] = None, labels: Optional[bool] = False)
 ```

 Add new documents to the DocumentStore.
@ -4771,6 +4809,7 @@ Parameter options:
    - `"overwrite"`: Update any existing documents with the same ID when adding documents.
    - `"fail"`: An error is raised if the document ID of the document being added already exists.
 - `headers`: PineconeDocumentStore does not support headers.
+- `labels`: Tells us whether these records are labels or not. Defaults to False.

 **Raises**:

@ -4824,12 +4863,55 @@ operation.
 - `batch_size`: Number of documents to process at a time. When working with large number of documents,
 batching can help reduce memory footprint.

+<a id="pinecone.PineconeDocumentStore.get_all_documents"></a>
+
+#### PineconeDocumentStore.get\_all\_documents
+
+```python
+def get_all_documents(index: Optional[str] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, return_embedding: Optional[bool] = None, batch_size: int = 32, headers: Optional[Dict[str, str]] = None, namespace: Optional[str] = None) -> List[Document]
+```
+
+Retrieves all documents in the index.
+
+**Arguments**:
+
+- `index`: Optional index name to retrieve all documents from.
+- `filters`: Optional filters to narrow down the documents that will be retrieved.
+Filters are defined as nested dictionaries. The keys of the dictionaries can be a logical
+operator (`"$and"`, `"$or"`, `"$not"`), a comparison operator (`"$eq"`, `"$in"`, `"$gt"`,
+`"$gte"`, `"$lt"`, `"$lte"`) or a metadata field name.
+Logical operator keys take a dictionary of metadata field names and/or logical operators as
+value. Metadata field names take a dictionary of comparison operators as value. Comparison
+operator keys take a single value or (in case of `"$in"`) a list of values as value.
+If no logical operator is provided, `"$and"` is used as default operation. If no comparison
+operator is provided, `"$eq"` (or `"$in"` if the comparison value is a list) is used as default
+operation.
+    __Example__:
+    ```python
+    filters = {
+        "$and": {
+            "type": {"$eq": "article"},
+            "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
+            "rating": {"$gte": 3},
+            "$or": {
+                "genre": {"$in": ["economy", "politics"]},
+                "publisher": {"$eq": "nytimes"}
+            }
+        }
+    }
+    ```
+- `return_embedding`: Optional flag to return the embedding of the document.
+- `batch_size`: Number of documents to process at a time. When working with large number of documents,
+batching can help reduce memory footprint.
+- `headers`: Pinecone does not support headers.
+- `namespace`: Optional namespace to retrieve documents from.
+
 <a id="pinecone.PineconeDocumentStore.get_all_documents_generator"></a>

 #### PineconeDocumentStore.get\_all\_documents\_generator

 ```python
-def get_all_documents_generator(index: Optional[str] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, return_embedding: Optional[bool] = None, batch_size: int = 32, headers: Optional[Dict[str, str]] = None) -> Generator[Document, None, None]
+def get_all_documents_generator(index: Optional[str] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, return_embedding: Optional[bool] = None, batch_size: int = 32, headers: Optional[Dict[str, str]] = None, namespace: Optional[str] = None) -> Generator[Document, None, None]
 ```

 Get all documents from the document store. Under-the-hood, documents are fetched in batches from the
@ -4868,6 +4950,45 @@ operation.
 - `return_embedding`: Whether to return the document embeddings.
 - `batch_size`: When working with large number of documents, batching can help reduce memory footprint.
 - `headers`: PineconeDocumentStore does not support headers.
+- `namespace`: Optional namespace to retrieve documents from.
+
+<a id="pinecone.PineconeDocumentStore.get_documents_by_id"></a>
+
+#### PineconeDocumentStore.get\_documents\_by\_id
+
+```python
+def get_documents_by_id(ids: List[str], index: Optional[str] = None, batch_size: int = 32, headers: Optional[Dict[str, str]] = None, return_embedding: Optional[bool] = None, namespace: str = None) -> List[Document]
+```
+
+Retrieves all documents in the index using their IDs.
+
+**Arguments**:
+
+- `ids`: List of IDs to retrieve.
+- `index`: Optional index name to retrieve all documents from.
+- `batch_size`: Number of documents to retrieve at a time. When working with large number of documents,
+batching can help reduce memory footprint.
+- `headers`: Pinecone does not support headers.
+- `return_embedding`: Optional flag to return the embedding of the document.
+- `namespace`: Optional namespace to retrieve documents from.
+
+<a id="pinecone.PineconeDocumentStore.get_document_by_id"></a>
+
+#### PineconeDocumentStore.get\_document\_by\_id
+
+```python
+def get_document_by_id(id: str, index: Optional[str] = None, headers: Optional[Dict[str, str]] = None, return_embedding: Optional[bool] = None, namespace: str = None) -> Document
+```
+
+Returns a single Document retrieved using an ID.
+
+**Arguments**:
+
+- `id`: ID string to retrieve.
+- `index`: Optional index name to retrieve all documents from.
+- `headers`: Pinecone does not support headers.
+- `return_embedding`: Optional flag to return the embedding of the document.
+- `namespace`: Optional namespace to retrieve documents from.

 <a id="pinecone.PineconeDocumentStore.get_embedding_count"></a>

@ -4879,22 +5000,35 @@ def get_embedding_count(index: Optional[str] = None, filters: Optional[Dict[str,

 Return the count of embeddings in the document store.

+**Arguments**:
+
+- `index`: Optional index name to retrieve all documents from.
+- `filters`: Filters are not supported for `get_embedding_count` in Pinecone.
+
 <a id="pinecone.PineconeDocumentStore.update_document_meta"></a>

 #### PineconeDocumentStore.update\_document\_meta

 ```python
-def update_document_meta(id: str, meta: Dict[str, str], index: str = None)
+def update_document_meta(id: str, meta: Dict[str, str], namespace: str = None, index: str = None)
 ```

-Update the metadata dictionary of a document by specifying its string id
+Update the metadata dictionary of a document by specifying its string ID.
+
+**Arguments**:
+
+- `id`: ID of the Document to update.
+- `meta`: Dictionary of new metadata.
+- `namespace`: Optional namespace to update documents from. If not specified, defaults to the embedding
+namespace (vectors) if it exists, otherwise the document namespace (no-vectors).
+- `index`: Optional index name to update documents from.

 <a id="pinecone.PineconeDocumentStore.delete_documents"></a>

 #### PineconeDocumentStore.delete\_documents

 ```python
-def delete_documents(index: Optional[str] = None, ids: Optional[List[str]] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, headers: Optional[Dict[str, str]] = None)
+def delete_documents(index: Optional[str] = None, ids: Optional[List[str]] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, headers: Optional[Dict[str, str]] = None, drop_ids: Optional[bool] = True, namespace: Optional[str] = None)
 ```

 Delete documents from the document store.
@ -4904,6 +5038,8 @@ Delete documents from the document store.
 - `index`: Index name to delete the documents from. If `None`, the DocumentStore's default index
 (`self.index`) will be used.
 - `ids`: Optional list of IDs to narrow down the documents to be deleted.
+- `namespace`: Optional namespace string. By default, it deletes vectors from the embeddings namespace
+unless the namespace is empty, in which case it deletes from the documents namespace.
 - `filters`: Optional filters to narrow down the documents for which embeddings are to be updated.
 Filters are defined as nested dictionaries. The keys of the dictionaries can be a logical
 operator (`"$and"`, `"$or"`, `"$not"`), a comparison operator (`"$eq"`, `"$in"`, `"$gt"`,
@ -4929,6 +5065,14 @@ operation.
    }
    ```
 - `headers`: PineconeDocumentStore does not support headers.
+- `drop_ids`: Specifies if the locally stored IDs should be deleted. The default
+is True.
+- `namespace`: Optional namespace to delete documents from. If not specified, defaults to the embedding
+namespace (vectors) if it exists, otherwise the document namespace (no-vectors).
+
+**Returns**:
+
+`None`: 

 <a id="pinecone.PineconeDocumentStore.delete_index"></a>

@ -4953,7 +5097,7 @@ None
 #### PineconeDocumentStore.query\_by\_embedding

 ```python
-def query_by_embedding(query_emb: np.ndarray, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, top_k: int = 10, index: Optional[str] = None, return_embedding: Optional[bool] = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = True) -> List[Document]
+def query_by_embedding(query_emb: np.ndarray, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, top_k: int = 10, index: Optional[str] = None, return_embedding: Optional[bool] = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = True, namespace: Optional[str] = None) -> List[Document]
 ```

 Find the document that is most similar to the provided `query_emb` by using a vector similarity metric.
@ -5038,7 +5182,47 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 def load(cls)
 ```

-Default class method used for loading indexes. Not applicable to the PineconeDocumentStore.
+Default class method used for loading indexes. Not applicable to PineconeDocumentStore.
+
+<a id="pinecone.PineconeDocumentStore.delete_labels"></a>
+
+#### PineconeDocumentStore.delete\_labels
+
+```python
+def delete_labels(index: Optional[str] = None, ids: Optional[List[str]] = None, filters: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, batch_size: int = 32)
+```
+
+Default class method used for deleting labels. Not supported by PineconeDocumentStore.
+
+<a id="pinecone.PineconeDocumentStore.get_all_labels"></a>
+
+#### PineconeDocumentStore.get\_all\_labels
+
+```python
+def get_all_labels(index=None, filters: Optional[dict] = None, headers: Optional[Dict[str, str]] = None)
+```
+
+Default class method used for getting all labels.
+
+<a id="pinecone.PineconeDocumentStore.get_label_count"></a>
+
+#### PineconeDocumentStore.get\_label\_count
+
+```python
+def get_label_count(index: Optional[str] = None, headers: Optional[Dict[str, str]] = None)
+```
+
+Default class method used for counting labels. Not supported by PineconeDocumentStore.
+
+<a id="pinecone.PineconeDocumentStore.write_labels"></a>
+
+#### PineconeDocumentStore.write\_labels
+
+```python
+def write_labels(labels, index=None, headers: Optional[Dict[str, str]] = None)
+```
+
+Default class method used for writing labels.

 <a id="utils"></a>

--- a/haystack/document_stores/filter_utils.py
+++ b/haystack/document_stores/filter_utils.py
@ -6,6 +6,7 @@ from sqlalchemy.sql import select
 from sqlalchemy import and_, or_

 from haystack.document_stores.utils import convert_date_to_rfc3339
+from haystack.errors import FilterError


 def nested_defaultdict() -> defaultdict:
@ -460,7 +461,8 @@ class InOperation(ComparisonOperation):
        # is only initialized with lists, but changing the type annotation would mean duplicating __init__

    def convert_to_elasticsearch(self) -> Dict[str, Dict[str, List]]:
-        assert isinstance(self.comparison_value, list), "'$in' operation requires comparison value to be a list."
+        if not isinstance(self.comparison_value, list):
+            raise FilterError("'$in' operation requires comparison value to be a list.")
        return {"terms": {self.field_name: self.comparison_value}}

    def convert_to_sql(self, meta_document_orm):
@ -470,7 +472,8 @@ class InOperation(ComparisonOperation):

    def convert_to_weaviate(self) -> Dict[str, Union[str, List[Dict]]]:
        filter_dict: Dict[str, Union[str, List[Dict]]] = {"operator": "Or", "operands": []}
-        assert isinstance(self.comparison_value, list), "'$in' operation requires comparison value to be a list."
+        if not isinstance(self.comparison_value, list):
+            raise FilterError("'$in' operation requires comparison value to be a list.")
        for value in self.comparison_value:
            comp_value_type, comp_value = self._get_weaviate_datatype(value)
            assert isinstance(filter_dict["operands"], list)  # Necessary for mypy
@ -481,7 +484,8 @@ class InOperation(ComparisonOperation):
        return filter_dict

    def convert_to_pinecone(self) -> Dict[str, Dict[str, List]]:
-        assert isinstance(self.comparison_value, list), "'$in' operation requires comparison value to be a list."
+        if not isinstance(self.comparison_value, list):
+            raise FilterError("'$in' operation requires comparison value to be a list.")
        return {self.field_name: {"$in": self.comparison_value}}

    def invert(self) -> "NinOperation":
@ -499,7 +503,8 @@ class NeOperation(ComparisonOperation):
        return fields[self.field_name] != self.comparison_value

    def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Dict[str, Union[str, int, float, bool]]]]]:
-        assert not isinstance(self.comparison_value, list), "Use '$nin' operation for lists as comparison values."
+        if isinstance(self.comparison_value, list):
+            raise FilterError("Use '$nin' operation for lists as comparison values.")
        return {"bool": {"must_not": {"term": {self.field_name: self.comparison_value}}}}

    def convert_to_sql(self, meta_document_orm):
@ -530,7 +535,8 @@ class NinOperation(ComparisonOperation):
        # is only initialized with lists, but changing the type annotation would mean duplicating __init__

    def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Dict[str, List]]]]:
-        assert isinstance(self.comparison_value, list), "'$nin' operation requires comparison value to be a list."
+        if not isinstance(self.comparison_value, list):
+            raise FilterError("'$nin' operation requires comparison value to be a list.")
        return {"bool": {"must_not": {"terms": {self.field_name: self.comparison_value}}}}

    def convert_to_sql(self, meta_document_orm):
@ -540,7 +546,8 @@ class NinOperation(ComparisonOperation):

    def convert_to_weaviate(self) -> Dict[str, Union[str, List[Dict]]]:
        filter_dict: Dict[str, Union[str, List[Dict]]] = {"operator": "And", "operands": []}
-        assert isinstance(self.comparison_value, list), "'$nin' operation requires comparison value to be a list."
+        if not isinstance(self.comparison_value, list):
+            raise FilterError("'$nin' operation requires comparison value to be a list.")
        for value in self.comparison_value:
            comp_value_type, comp_value = self._get_weaviate_datatype(value)
            assert isinstance(filter_dict["operands"], list)  # Necessary for mypy
@ -551,7 +558,8 @@ class NinOperation(ComparisonOperation):
        return filter_dict

    def convert_to_pinecone(self) -> Dict[str, Dict[str, List]]:
-        assert isinstance(self.comparison_value, list), "'$in' operation requires comparison value to be a list."
+        if not isinstance(self.comparison_value, list):
+            raise FilterError("'$in' operation requires comparison value to be a list.")
        return {self.field_name: {"$nin": self.comparison_value}}

    def invert(self) -> "InOperation":
@ -569,7 +577,8 @@ class GtOperation(ComparisonOperation):
        return fields[self.field_name] > self.comparison_value

    def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Union[str, float, int]]]]:
-        assert not isinstance(self.comparison_value, list), "Comparison value for '$gt' operation must not be a list."
+        if isinstance(self.comparison_value, list):
+            raise FilterError("Comparison value for '$gt' operation must not be a list.")
        return {"range": {self.field_name: {"gt": self.comparison_value}}}

    def convert_to_sql(self, meta_document_orm):
@ -579,13 +588,13 @@ class GtOperation(ComparisonOperation):

    def convert_to_weaviate(self) -> Dict[str, Union[List[str], str, float, int]]:
        comp_value_type, comp_value = self._get_weaviate_datatype()
-        assert not isinstance(comp_value, list), "Comparison value for '$gt' operation must not be a list."
+        if isinstance(comp_value, list):
+            raise FilterError("Comparison value for '$gt' operation must not be a list.")
        return {"path": [self.field_name], "operator": "GreaterThan", comp_value_type: comp_value}

    def convert_to_pinecone(self) -> Dict[str, Dict[str, Union[float, int]]]:
-        assert not isinstance(
-            self.comparison_value, (list, str)
-        ), "Comparison value for '$gt' operation must be a float or int."
+        if not isinstance(self.comparison_value, (float, int)):
+            raise FilterError("Comparison value for '$gt' operation must be a float or int.")
        return {self.field_name: {"$gt": self.comparison_value}}

    def invert(self) -> "LteOperation":
@ -603,7 +612,8 @@ class GteOperation(ComparisonOperation):
        return fields[self.field_name] >= self.comparison_value

    def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Union[str, float, int]]]]:
-        assert not isinstance(self.comparison_value, list), "Comparison value for '$gte' operation must not be a list."
+        if isinstance(self.comparison_value, list):
+            raise FilterError("Comparison value for '$gte' operation must not be a list.")
        return {"range": {self.field_name: {"gte": self.comparison_value}}}

    def convert_to_sql(self, meta_document_orm):
@ -613,13 +623,13 @@ class GteOperation(ComparisonOperation):

    def convert_to_weaviate(self) -> Dict[str, Union[List[str], str, float, int]]:
        comp_value_type, comp_value = self._get_weaviate_datatype()
-        assert not isinstance(comp_value, list), "Comparison value for '$gte' operation must not be a list."
+        if isinstance(comp_value, list):
+            raise FilterError("Comparison value for '$gte' operation must not be a list.")
        return {"path": [self.field_name], "operator": "GreaterThanEqual", comp_value_type: comp_value}

    def convert_to_pinecone(self) -> Dict[str, Dict[str, Union[float, int]]]:
-        assert not isinstance(
-            self.comparison_value, (list, str)
-        ), "Comparison value for '$gte' operation must be a float or int."
+        if not isinstance(self.comparison_value, (float, int)):
+            raise FilterError("Comparison value for '$gte' operation must be a float or int.")
        return {self.field_name: {"$gte": self.comparison_value}}

    def invert(self) -> "LtOperation":
@ -637,7 +647,8 @@ class LtOperation(ComparisonOperation):
        return fields[self.field_name] < self.comparison_value

    def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Union[str, float, int]]]]:
-        assert not isinstance(self.comparison_value, list), "Comparison value for '$lt' operation must not be a list."
+        if isinstance(self.comparison_value, list):
+            raise FilterError("Comparison value for '$lt' operation must not be a list.")
        return {"range": {self.field_name: {"lt": self.comparison_value}}}

    def convert_to_sql(self, meta_document_orm):
@ -647,13 +658,13 @@ class LtOperation(ComparisonOperation):

    def convert_to_weaviate(self) -> Dict[str, Union[List[str], str, float, int]]:
        comp_value_type, comp_value = self._get_weaviate_datatype()
-        assert not isinstance(comp_value, list), "Comparison value for '$lt' operation must not be a list."
+        if isinstance(comp_value, list):
+            raise FilterError("Comparison value for '$lt' operation must not be a list.")
        return {"path": [self.field_name], "operator": "LessThan", comp_value_type: comp_value}

    def convert_to_pinecone(self) -> Dict[str, Dict[str, Union[float, int]]]:
-        assert not isinstance(
-            self.comparison_value, (list, str)
-        ), "Comparison value for '$lt' operation must be a float or int."
+        if not isinstance(self.comparison_value, (float, int)):
+            raise FilterError("Comparison value for '$lt' operation must be a float or int.")
        return {self.field_name: {"$lt": self.comparison_value}}

    def invert(self) -> "GteOperation":
@ -671,7 +682,8 @@ class LteOperation(ComparisonOperation):
        return fields[self.field_name] <= self.comparison_value

    def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Union[str, float, int]]]]:
-        assert not isinstance(self.comparison_value, list), "Comparison value for '$lte' operation must not be a list."
+        if isinstance(self.comparison_value, list):
+            raise FilterError("Comparison value for '$lte' operation must not be a list.")
        return {"range": {self.field_name: {"lte": self.comparison_value}}}

    def convert_to_sql(self, meta_document_orm):
@ -681,13 +693,13 @@ class LteOperation(ComparisonOperation):

    def convert_to_weaviate(self) -> Dict[str, Union[List[str], str, float, int]]:
        comp_value_type, comp_value = self._get_weaviate_datatype()
-        assert not isinstance(comp_value, list), "Comparison value for '$lte' operation must not be a list."
+        if isinstance(comp_value, list):
+            raise FilterError("Comparison value for '$lte' operation must not be a list.")
        return {"path": [self.field_name], "operator": "LessThanEqual", comp_value_type: comp_value}

    def convert_to_pinecone(self) -> Dict[str, Dict[str, Union[float, int]]]:
-        assert not isinstance(
-            self.comparison_value, (list, str)
-        ), "Comparison value for '$lte' operation must be a float or int."
+        if not isinstance(self.comparison_value, (float, int)):
+            raise FilterError("Comparison value for '$lte' operation must be a float or int.")
        return {self.field_name: {"$lte": self.comparison_value}}

    def invert(self) -> "GtOperation":
--- a/haystack/document_stores/pinecone.py
+++ b/haystack/document_stores/pinecone.py
--- a/haystack/errors.py
+++ b/haystack/errors.py
@ -76,6 +76,20 @@ class DocumentStoreError(HaystackError):
        super().__init__(message=message)


+class FilterError(DocumentStoreError):
+    """Exception for issues that occur building complex filters"""
+
+    def __init__(self, message: Optional[str] = None):
+        super().__init__(message=message)
+
+
+class PineconeDocumentStoreError(DocumentStoreError):
+    """Exception for issues that occur in a Pinecone document store"""
+
+    def __init__(self, message: Optional[str] = None):
+        super().__init__(message=message)
+
+
 class DuplicateDocumentError(DocumentStoreError, ValueError):
    """Exception for Duplicate document"""

--- a/haystack/json-schemas/haystack-pipeline-master.schema.json
+++ b/haystack/json-schemas/haystack-pipeline-master.schema.json
@ -1700,11 +1700,6 @@
              "default": "us-west1-gcp",
              "type": "string"
            },
-            "sql_url": {
-              "title": "Sql Url",
-              "default": "sqlite:///pinecone_document_store.db",
-              "type": "string"
-            },
            "pinecone_index": {
              "title": "Pinecone Index",
              "default": null,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -128,7 +128,7 @@ weaviate = [
  "weaviate-client==3.6.0",
 ]
 only-pinecone = [
-  "pinecone-client",
+  "pinecone-client>=2.0.11,<3",
 ]
 pinecone = [
  "farm-haystack[sql,only-pinecone]",
--- a/test/conftest.py
+++ b/test/conftest.py
@ -171,7 +171,7 @@ def pytest_collection_modifyitems(config, items):
            "pinecone",
            "opensearch",
        ]:
-            if cur_doc_store in keywords and cur_doc_store not in document_store_types_to_run:
+            if keywords and cur_doc_store in keywords and cur_doc_store not in document_store_types_to_run:
                skip_docstore = pytest.mark.skip(
                    reason=f'{cur_doc_store} is disabled. Enable via pytest --document_store_type="{cur_doc_store}"'
                )
@ -180,15 +180,11 @@ def pytest_collection_modifyitems(config, items):
        if "milvus1" in keywords and not milvus1:
            skip_milvus1 = pytest.mark.skip(reason="Skipping Tests for 'milvus1', as Milvus2 seems to be installed.")
            item.add_marker(skip_milvus1)
+
        elif "milvus" in keywords and milvus1:
            skip_milvus = pytest.mark.skip(reason="Skipping Tests for 'milvus', as Milvus1 seems to be installed.")
            item.add_marker(skip_milvus)

-        # Skip PineconeDocumentStore if PINECONE_API_KEY not in environment variables
-        # if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords:
-        #     skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.")
-        #     item.add_marker(skip_pinecone)
-

 #
 # Empty mocks, as a base for unit tests.
@ -987,7 +983,7 @@ def get_document_store(

    elif document_store_type == "pinecone":
        document_store = PineconeDocumentStore(
-            api_key=os.environ.get("PINECONE_API_KEY"),
+            api_key=os.environ.get("PINECONE_API_KEY") or "fake-haystack-test-key",
            embedding_dim=embedding_dim,
            embedding_field=embedding_field,
            index=index,
--- a/test/document_stores/test_document_store.py
+++ b/test/document_stores/test_document_store.py
@ -209,25 +209,25 @@ def test_get_all_documents_large_quantities(document_store: BaseDocumentStore):

 def test_get_all_document_filter_duplicate_text_value(document_store: BaseDocumentStore):
    documents = [
-        Document(content="Doc1", meta={"f1": "0"}, id_hash_keys=["meta"]),
-        Document(content="Doc1", meta={"f1": "1", "meta_id": "0"}, id_hash_keys=["meta"]),
-        Document(content="Doc2", meta={"f3": "0"}, id_hash_keys=["meta"]),
+        Document(content="Doc1", meta={"meta_field": "0"}, id_hash_keys=["meta"]),
+        Document(content="Doc1", meta={"meta_field": "1", "name": "file.txt"}, id_hash_keys=["meta"]),
+        Document(content="Doc2", meta={"name": "file_2.txt"}, id_hash_keys=["meta"]),
    ]
    document_store.write_documents(documents)
-    documents = document_store.get_all_documents(filters={"f1": ["1"]})
+    documents = document_store.get_all_documents(filters={"meta_field": ["1"]})
    assert documents[0].content == "Doc1"
    assert len(documents) == 1
-    assert {d.meta["meta_id"] for d in documents} == {"0"}
+    assert {d.meta["name"] for d in documents} == {"file.txt"}

-    documents = document_store.get_all_documents(filters={"f1": ["0"]})
+    documents = document_store.get_all_documents(filters={"meta_field": ["0"]})
    assert documents[0].content == "Doc1"
    assert len(documents) == 1
-    assert documents[0].meta.get("meta_id") is None
+    assert documents[0].meta.get("name") is None

-    documents = document_store.get_all_documents(filters={"f3": ["0"]})
+    documents = document_store.get_all_documents(filters={"name": ["file_2.txt"]})
    assert documents[0].content == "Doc2"
    assert len(documents) == 1
-    assert documents[0].meta.get("meta_id") is None
+    assert documents[0].meta.get("meta_field") is None


 def test_get_all_documents_with_correct_filters(document_store_with_docs):
@ -266,9 +266,8 @@ def test_get_all_documents_with_incorrect_filter_value(document_store_with_docs)
    assert len(documents) == 0


-@pytest.mark.parametrize(
-    "document_store_with_docs", ["elasticsearch", "sql", "weaviate", "memory", "pinecone"], indirect=True
-)
+# See test_pinecone.py
+@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch", "sql", "weaviate", "memory"], indirect=True)
 def test_extended_filter(document_store_with_docs):
    # Test comparison operators individually
    documents = document_store_with_docs.get_all_documents(filters={"meta_field": {"$eq": "test1"}})
--- a/test/document_stores/test_pinecone.py
+++ b/test/document_stores/test_pinecone.py
@ -0,0 +1,337 @@
+from typing import List, Union, Dict, Any
+
+import os
+from datetime import datetime
+from inspect import getmembers, isclass, isfunction
+
+import pytest
+
+from haystack.document_stores.pinecone import PineconeDocumentStore
+from haystack.schema import Document
+from haystack.errors import FilterError
+
+
+from ..mocks import pinecone as pinecone_mock
+from ..conftest import SAMPLES_PATH
+
+
+# Set metadata fields used during testing for PineconeDocumentStore meta_config
+META_FIELDS = ["meta_field", "name", "date", "numeric_field", "odd_document"]
+
+
+#
+# FIXME This class should extend the base Document Store test class once it exists.
+# At that point some of the fixtures will be duplicate, so review them.
+#
+class TestPineconeDocumentStore:
+
+    # Fixtures
+
+    @pytest.fixture
+    def doc_store(self, monkeypatch, request) -> PineconeDocumentStore:
+        """
+        This fixture provides an empty document store and takes care of cleaning up after each test
+        """
+        # If it's a unit test, mock Pinecone
+        if not "integration" in request.keywords:
+            for fname, function in getmembers(pinecone_mock, isfunction):
+                monkeypatch.setattr(f"pinecone.{fname}", function, raising=False)
+            for cname, class_ in getmembers(pinecone_mock, isclass):
+                monkeypatch.setattr(f"pinecone.{cname}", class_, raising=False)
+
+        return PineconeDocumentStore(
+            api_key=os.environ.get("PINECONE_API_KEY") or "fake-pinecone-test-key",
+            embedding_dim=768,
+            embedding_field="embedding",
+            index="haystack_tests",
+            similarity="cosine",
+            recreate_index=True,
+            metadata_config={"indexed": META_FIELDS},
+        )
+
+    @pytest.fixture
+    def doc_store_with_docs(self, doc_store: PineconeDocumentStore, docs: List[Document]) -> PineconeDocumentStore:
+        """
+        This fixture provides a pre-populated document store and takes care of cleaning up after each test
+        """
+        doc_store.write_documents(docs)
+        return doc_store
+
+    @pytest.fixture
+    def docs_all_formats(self) -> List[Union[Document, Dict[str, Any]]]:
+        return [
+            # metafield at the top level for backward compatibility
+            {
+                "content": "My name is Paul and I live in New York",
+                "meta_field": "test-1",
+                "name": "file_1.txt",
+                "date": "2019-10-01",
+                "numeric_field": 5.0,
+                "odd_document": True,
+            },
+            # "dict" format
+            {
+                "content": "My name is Carla and I live in Berlin",
+                "meta": {
+                    "meta_field": "test-2",
+                    "name": "file_2.txt",
+                    "date": "2020-03-01",
+                    "numeric_field": 5.5,
+                    "odd_document": False,
+                },
+            },
+            # Document object
+            Document(
+                content="My name is Christelle and I live in Paris",
+                meta={
+                    "meta_field": "test-3",
+                    "name": "file_3.txt",
+                    "date": "2018-10-01",
+                    "numeric_field": 4.5,
+                    "odd_document": True,
+                },
+            ),
+            Document(
+                content="My name is Camila and I live in Madrid",
+                meta={
+                    "meta_field": "test-4",
+                    "name": "file_4.txt",
+                    "date": "2021-02-01",
+                    "numeric_field": 3.0,
+                    "odd_document": False,
+                },
+            ),
+            Document(
+                content="My name is Matteo and I live in Rome",
+                meta={
+                    "meta_field": "test-5",
+                    "name": "file_5.txt",
+                    "date": "2019-01-01",
+                    "numeric_field": 0.0,
+                    "odd_document": True,
+                },
+            ),
+            # Without meta
+            Document(content="My name is Ahmed and I live in Cairo"),
+        ]
+
+    @pytest.fixture
+    def docs(self, docs_all_formats: List[Union[Document, Dict[str, Any]]]) -> List[Document]:
+        return [Document.from_dict(doc) if isinstance(doc, dict) else doc for doc in docs_all_formats]
+
+    #
+    #  Tests
+    #
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_eq(self, doc_store_with_docs: PineconeDocumentStore):
+        eq_docs = doc_store_with_docs.get_all_documents(filters={"meta_field": {"$eq": "test-1"}})
+        normal_docs = doc_store_with_docs.get_all_documents(filters={"meta_field": "test-1"})
+        assert eq_docs == normal_docs
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_in(self, doc_store_with_docs: PineconeDocumentStore):
+        in_docs = doc_store_with_docs.get_all_documents(filters={"meta_field": {"$in": ["test-1", "test-2", "n.a."]}})
+        normal_docs = doc_store_with_docs.get_all_documents(filters={"meta_field": ["test-1", "test-2", "n.a."]})
+        assert in_docs == normal_docs
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_ne(self, doc_store_with_docs: PineconeDocumentStore):
+        retrieved_docs = doc_store_with_docs.get_all_documents(filters={"meta_field": {"$ne": "test-1"}})
+        assert all("test-1" != d.meta.get("meta_field", None) for d in retrieved_docs)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_nin(self, doc_store_with_docs: PineconeDocumentStore):
+        retrieved_docs = doc_store_with_docs.get_all_documents(
+            filters={"meta_field": {"$nin": ["test-1", "test-2", "n.a."]}}
+        )
+        assert {"test-1", "test-2"}.isdisjoint({d.meta.get("meta_field", None) for d in retrieved_docs})
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_gt(self, doc_store_with_docs: PineconeDocumentStore):
+        retrieved_docs = doc_store_with_docs.get_all_documents(filters={"numeric_field": {"$gt": 3.0}})
+        assert all(d.meta["numeric_field"] > 3.0 for d in retrieved_docs)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_gte(self, doc_store_with_docs: PineconeDocumentStore):
+        retrieved_docs = doc_store_with_docs.get_all_documents(filters={"numeric_field": {"$gte": 3.0}})
+        assert all(d.meta["numeric_field"] >= 3.0 for d in retrieved_docs)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_lt(self, doc_store_with_docs: PineconeDocumentStore):
+        retrieved_docs = doc_store_with_docs.get_all_documents(filters={"numeric_field": {"$lt": 3.0}})
+        assert all(d.meta["numeric_field"] < 3.0 for d in retrieved_docs)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_lte(self, doc_store_with_docs: PineconeDocumentStore):
+        retrieved_docs = doc_store_with_docs.get_all_documents(filters={"numeric_field": {"$lte": 3.0}})
+        assert all(d.meta["numeric_field"] <= 3.0 for d in retrieved_docs)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_dates(self, doc_store_with_docs: PineconeDocumentStore):
+        filters = {"date": {"$lte": "2020-12-31", "$gte": "2019-01-01"}}
+
+        with pytest.raises(FilterError, match=r"Comparison value for '\$[l|g]te' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_dates_and_other_field_explicit(
+        self, doc_store_with_docs: PineconeDocumentStore
+    ):
+        filters = {
+            "$and": {
+                "date": {"$lte": "2020-12-31", "$gte": "2019-01-01"},
+                "name": {"$in": ["file_5.txt", "file_3.txt"]},
+            }
+        }
+
+        with pytest.raises(FilterError, match="Comparison value for '\$[l|g]te' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_dates_and_other_field_simplified(
+        self, doc_store_with_docs: PineconeDocumentStore
+    ):
+        filters_simplified = {
+            "date": {"$lte": "2020-12-31", "$gte": "2019-01-01"},
+            "name": ["file_5.txt", "file_3.txt"],
+        }
+
+        with pytest.raises(FilterError, match="Comparison value for '\$[l|g]te' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters_simplified)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_dates_and_or_explicit(
+        self, doc_store_with_docs: PineconeDocumentStore
+    ):
+        filters = {
+            "$and": {
+                "date": {"$lte": "2020-12-31", "$gte": "2019-01-01"},
+                "$or": {"name": {"$in": ["file_5.txt", "file_3.txt"]}, "numeric_field": {"$lte": 5.0}},
+            }
+        }
+
+        with pytest.raises(FilterError, match="Comparison value for '\$[l|g]te' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_dates_and_or_simplified(
+        self, doc_store_with_docs: PineconeDocumentStore
+    ):
+        filters_simplified = {
+            "date": {"$lte": "2020-12-31", "$gte": "2019-01-01"},
+            "$or": {"name": ["file_5.txt", "file_3.txt"], "numeric_field": {"$lte": 5.0}},
+        }
+
+        with pytest.raises(FilterError, match="Comparison value for '\$[l|g]te' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters_simplified)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_dates_and_or_and_not_explicit(
+        self, doc_store_with_docs: PineconeDocumentStore
+    ):
+        filters = {
+            "$and": {
+                "date": {"$lte": "2020-12-31", "$gte": "2019-01-01"},
+                "$or": {
+                    "name": {"$in": ["file_5.txt", "file_3.txt"]},
+                    "$and": {"numeric_field": {"$lte": 5.0}, "$not": {"meta_field": {"$eq": "test-2"}}},
+                },
+            }
+        }
+        with pytest.raises(FilterError, match="Comparison value for '\$[l|g]te' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_dates_and_or_and_not_simplified(
+        self, doc_store_with_docs: PineconeDocumentStore
+    ):
+        filters_simplified = {
+            "date": {"$lte": "2020-12-31", "$gte": "2019-01-01"},
+            "$or": {
+                "name": ["file_5.txt", "file_3.txt"],
+                "$and": {"numeric_field": {"$lte": 5.0}, "$not": {"meta_field": "test-2"}},
+            },
+        }
+        with pytest.raises(FilterError, match="Comparison value for '\$[l|g]te' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters_simplified)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_nested_not(self, doc_store_with_docs: PineconeDocumentStore):
+        # Test nested logical operations within "$not", important as we apply De Morgan's laws in Weaviatedocstore
+        filters = {
+            "$not": {
+                "$or": {
+                    "$and": {"numeric_field": {"$gt": 3.0}, "meta_field": {"$ne": "test-3"}},
+                    "$not": {"date": {"$lt": "2020-01-01"}},
+                }
+            }
+        }
+        with pytest.raises(FilterError, match="Comparison value for '\$[l|g]t' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters)
+
+    @pytest.mark.pinecone
+    # NOTE: Pinecone does not support dates, so it can't do lte or gte on date fields. When a new release introduces this feature,
+    # the entire family of test_get_all_documents_extended_filter_* tests will become identical to the one present in the
+    # base document store suite, and can be removed from here.
+    def test_get_all_documents_extended_filter_compound_same_level_not(
+        self, doc_store_with_docs: PineconeDocumentStore
+    ):
+        # Test same logical operator twice on same level, important as we apply De Morgan's laws in Weaviatedocstore
+        filters = {
+            "$or": [
+                {"$and": {"meta_field": {"$in": ["test-1", "test-2"]}, "date": {"$gte": "2020-01-01"}}},
+                {"$and": {"meta_field": {"$in": ["test-3", "test-4"]}, "date": {"$lt": "2020-01-01"}}},
+            ]
+        }
+
+        with pytest.raises(FilterError, match="Comparison value for '\$[l|g]te' operation must be a float or int."):
+            doc_store_with_docs.get_all_documents(filters=filters)
--- a/test/mocks/pinecone.py
+++ b/test/mocks/pinecone.py
@ -2,8 +2,10 @@ from typing import Optional, List, Dict, Union

 import logging

+
 logger = logging.getLogger(__name__)

+
 # Mock Pinecone instance
 CONFIG: dict = {"api_key": None, "environment": None, "indexes": {}}

@ -84,6 +86,24 @@ class Index:
        include_values: bool = False,
        include_metadata: bool = False,
        filter: Optional[dict] = None,
+    ):
+        return self.query_filter(
+            vector=vector,
+            top_k=top_k,
+            namespace=namespace,
+            include_values=include_values,
+            include_metadata=include_metadata,
+            filter=filter,
+        )
+
+    def query_filter(
+        self,
+        vector: List[float],
+        top_k: int,
+        namespace: str = "",
+        include_values: bool = False,
+        include_metadata: bool = False,
+        filter: Optional[dict] = None,
    ):
        assert len(vector) == self.index_config.dimension
        response: dict = {"matches": []}
@ -92,6 +112,7 @@ class Index:
        else:
            records = self.index_config.namespaces[namespace]
            namespace_ids = list(records.keys())[:top_k]
+
            for _id in namespace_ids:
                match = {"id": _id}
                if include_values:
@ -99,6 +120,7 @@ class Index:
                if include_metadata:
                    match["metadata"] = records[_id]["metadata"].copy()
                match["score"] = 0.0
+
                if filter is None or (
                    filter is not None and self._filter(records[_id]["metadata"], filter, top_level=True)
                ):
@ -258,7 +280,7 @@ class Index:
                # We find the intersect between the IDs and filtered IDs
                id_list = set(id_list).intersection(filter_ids)
            records = self.index_config.namespaces[namespace]
-            for _id in records.keys():
+            for _id in list(records.keys()):  # list() is needed to be able to del below
                if _id in id_list:
                    del records[_id]
        else: