diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md index 1f6d58616..3fdde40db 100644 --- a/docs/_src/api/api/document_store.md +++ b/docs/_src/api/api/document_store.md @@ -4651,7 +4651,7 @@ the vector embeddings and metadata (for filtering) are indexed in a Pinecone Ind #### PineconeDocumentStore.\_\_init\_\_ ```python -def __init__(api_key: str, environment: str = "us-west1-gcp", sql_url: str = "sqlite:///pinecone_document_store.db", pinecone_index: Optional[pinecone.Index] = None, embedding_dim: int = 768, return_embedding: bool = False, index: str = "document", similarity: str = "cosine", replicas: int = 1, shards: int = 1, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", recreate_index: bool = False) +def __init__(api_key: str, environment: str = "us-west1-gcp", sql_url: str = "sqlite:///pinecone_document_store.db", pinecone_index: Optional[pinecone.Index] = None, embedding_dim: int = 768, return_embedding: bool = False, index: str = "document", similarity: str = "cosine", replicas: int = 1, shards: int = 1, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", recreate_index: bool = False, metadata_config: dict = {"indexed": []}) ``` **Arguments**: @@ -4686,6 +4686,8 @@ Parameter options: created using the config you are using for initialization. Be aware that all data in the old index will be lost if you choose to recreate the index. Be aware that both the document_index and the label_index will be recreated. +- `metadata_config`: Which metadata fields should be indexed. Should be in the format +`{"indexed": ["metadata-field-1", "metadata-field-2", "metadata-field-n"]}`. diff --git a/haystack/document_stores/pinecone.py b/haystack/document_stores/pinecone.py index b977ecbc0..2f26d8e74 100644 --- a/haystack/document_stores/pinecone.py +++ b/haystack/document_stores/pinecone.py @@ -52,6 +52,7 @@ class PineconeDocumentStore(SQLDocumentStore): progress_bar: bool = True, duplicate_documents: str = "overwrite", recreate_index: bool = False, + metadata_config: dict = {"indexed": []}, ): """ :param api_key: Pinecone vector database API key ([https://app.pinecone.io](https://app.pinecone.io)). @@ -85,6 +86,8 @@ class PineconeDocumentStore(SQLDocumentStore): created using the config you are using for initialization. Be aware that all data in the old index will be lost if you choose to recreate the index. Be aware that both the document_index and the label_index will be recreated. + :param metadata_config: Which metadata fields should be indexed. Should be in the format + `{"indexed": ["metadata-field-1", "metadata-field-2", "metadata-field-n"]}`. """ # Connect to Pinecone server using python client binding pinecone.init(api_key=api_key, environment=environment) @@ -113,6 +116,7 @@ class PineconeDocumentStore(SQLDocumentStore): # Pinecone index params self.replicas = replicas self.shards = shards + self.metadata_config = metadata_config # Initialize dictionary of index connections self.pinecone_indexes: Dict[str, pinecone.Index] = {} @@ -134,6 +138,7 @@ class PineconeDocumentStore(SQLDocumentStore): replicas=self.replicas, shards=self.shards, recreate_index=recreate_index, + metadata_config=self.metadata_config, ) def _sanitize_index_name(self, index: str) -> str: @@ -147,6 +152,7 @@ class PineconeDocumentStore(SQLDocumentStore): replicas: Optional[int] = 1, shards: Optional[int] = 1, recreate_index: bool = False, + metadata_config: dict = {}, ): """ Create a new index for storing documents in case an @@ -166,7 +172,12 @@ class PineconeDocumentStore(SQLDocumentStore): # Search pinecone hosted indexes and create an index if it does not exist if index not in pinecone.list_indexes(): pinecone.create_index( - name=index, dimension=embedding_dim, metric=metric_type, replicas=replicas, shards=shards + name=index, + dimension=embedding_dim, + metric=metric_type, + replicas=replicas, + shards=shards, + metadata_config=metadata_config, ) index_connection = pinecone.Index(index) @@ -670,8 +681,8 @@ class PineconeDocumentStore(SQLDocumentStore): f"Index named '{index}' does not exist. Try reinitializing PineconeDocumentStore() and running " f"'update_embeddings()' to create and populate an index." ) + query_emb = query_emb.astype(np.float32) - query_emb = query_emb.reshape(1, -1).astype(np.float32) if self.similarity == "cosine": self.normalize_embedding(query_emb) @@ -679,7 +690,7 @@ class PineconeDocumentStore(SQLDocumentStore): score_matrix = [] vector_id_matrix = [] - for match in res["results"][0]["matches"]: + for match in res["matches"]: score_matrix.append(match["score"]) vector_id_matrix.append(match["id"]) documents = self.get_documents_by_id(vector_id_matrix, index=index, return_embedding=return_embedding) diff --git a/haystack/json-schemas/haystack-pipeline-master.schema.json b/haystack/json-schemas/haystack-pipeline-master.schema.json index 33a713936..29ef197e1 100644 --- a/haystack/json-schemas/haystack-pipeline-master.schema.json +++ b/haystack/json-schemas/haystack-pipeline-master.schema.json @@ -1487,6 +1487,13 @@ "title": "Recreate Index", "default": false, "type": "boolean" + }, + "metadata_config": { + "title": "Metadata Config", + "default": { + "indexed": [] + }, + "type": "object" } }, "required": [