From a06e4450d1f039a48d06ca6b9345d50fbc0fbf79 Mon Sep 17 00:00:00 2001 From: Ikram Ali Date: Mon, 10 May 2021 16:37:08 +0500 Subject: [PATCH] Rename delete_all_documents() method to delete_documents() (#1047) --- haystack/document_store/base.py | 7 +++++-- haystack/document_store/elasticsearch.py | 22 +++++++++++++++++++--- haystack/document_store/faiss.py | 14 +++++++++++++- haystack/document_store/memory.py | 17 ++++++++++++++++- haystack/document_store/milvus.py | 18 +++++++++++++++++- haystack/document_store/sql.py | 24 ++++++++++++++++++++---- test/test_document_store.py | 15 ++++++++++++--- 7 files changed, 102 insertions(+), 15 deletions(-) diff --git a/haystack/document_store/base.py b/haystack/document_store/base.py index 401e0f540..4658de2be 100644 --- a/haystack/document_store/base.py +++ b/haystack/document_store/base.py @@ -204,10 +204,13 @@ class BaseDocumentStore(BaseComponent): else: logger.error("File needs to be in json or jsonl format.") - @abstractmethod def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): pass - def run(self, documents: List[dict], index: Optional[str] = None, **kwargs): # type: ignore + @abstractmethod + def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): + pass + + def run(self, documents: List[dict], index: Optional[str] = None, **kwargs): # type: ignore self.write_documents(documents=documents, index=index) return kwargs, "output_1" diff --git a/haystack/document_store/elasticsearch.py b/haystack/document_store/elasticsearch.py index 8b93584ea..8725a6b99 100644 --- a/haystack/document_store/elasticsearch.py +++ b/haystack/document_store/elasticsearch.py @@ -929,6 +929,22 @@ class ElasticsearchDocumentStore(BaseDocumentStore): """ Delete documents in an index. All documents are deleted if no filters are passed. + :param index: Index name to delete the document from. + :param filters: Optional filters to narrow down the documents to be deleted. + :return: None + """ + logger.warning( + """DEPRECATION WARNINGS: + 1. delete_all_documents() method is deprecated, please use delete_documents method + For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045 + """ + ) + self.delete_documents(index, filters) + + def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): + """ + Delete documents in an index. All documents are deleted if no filters are passed. + :param index: Index name to delete the document from. :param filters: Optional filters to narrow down the documents to be deleted. :return: None @@ -939,9 +955,9 @@ class ElasticsearchDocumentStore(BaseDocumentStore): filter_clause = [] for key, values in filters.items(): filter_clause.append( - { - "terms": {key: values} - } + { + "terms": {key: values} + } ) query["query"]["bool"] = {"filter": filter_clause} else: diff --git a/haystack/document_store/faiss.py b/haystack/document_store/faiss.py index 25979ebcd..98256463b 100644 --- a/haystack/document_store/faiss.py +++ b/haystack/document_store/faiss.py @@ -345,6 +345,18 @@ class FAISSDocumentStore(SQLDocumentStore): self.faiss_indexes[index].train(embeddings) def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): + """ + Delete all documents from the document store. + """ + logger.warning( + """DEPRECATION WARNINGS: + 1. delete_all_documents() method is deprecated, please use delete_documents method + For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045 + """ + ) + self.delete_documents(index, filters) + + def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): """ Delete all documents from the document store. """ @@ -353,7 +365,7 @@ class FAISSDocumentStore(SQLDocumentStore): index = index or self.index if index in self.faiss_indexes.keys(): self.faiss_indexes[index].reset() - super().delete_all_documents(index=index) + super().delete_documents(index=index) def query_by_embedding( self, diff --git a/haystack/document_store/memory.py b/haystack/document_store/memory.py index 3ae9659d5..e8b57cbd9 100644 --- a/haystack/document_store/memory.py +++ b/haystack/document_store/memory.py @@ -289,7 +289,7 @@ class InMemoryDocumentStore(BaseDocumentStore): result = self.get_all_documents_generator(index=index, filters=filters, return_embedding=return_embedding) documents = list(result) return documents - + def get_all_documents_generator( self, index: Optional[str] = None, @@ -345,7 +345,22 @@ class InMemoryDocumentStore(BaseDocumentStore): :param filters: Optional filters to narrow down the documents to be deleted. :return: None """ + logger.warning( + """DEPRECATION WARNINGS: + 1. delete_all_documents() method is deprecated, please use delete_documents method + For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045 + """ + ) + self.delete_documents(index, filters) + def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): + """ + Delete documents in an index. All documents are deleted if no filters are passed. + + :param index: Index name to delete the document from. + :param filters: Optional filters to narrow down the documents to be deleted. + :return: None + """ if filters: raise NotImplementedError("Delete by filters is not implemented for InMemoryDocumentStore.") index = index or self.index diff --git a/haystack/document_store/milvus.py b/haystack/document_store/milvus.py index b3ecc89e8..606e50c99 100644 --- a/haystack/document_store/milvus.py +++ b/haystack/document_store/milvus.py @@ -350,6 +350,22 @@ class MilvusDocumentStore(SQLDocumentStore): return documents def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): + """ + Delete all documents (from SQL AND Milvus). + :param index: (SQL) index name for storing the docs and metadata + :param filters: Optional filters to narrow down the search space. + Example: {"name": ["some", "more"], "category": ["only_one"]} + :return: None + """ + logger.warning( + """DEPRECATION WARNINGS: + 1. delete_all_documents() method is deprecated, please use delete_documents method + For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045 + """ + ) + self.delete_documents(index, filters) + + def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): """ Delete all documents (from SQL AND Milvus). :param index: (SQL) index name for storing the docs and metadata @@ -358,7 +374,7 @@ class MilvusDocumentStore(SQLDocumentStore): :return: None """ index = index or self.index - super().delete_all_documents(index=index, filters=filters) + super().delete_documents(index=index, filters=filters) status, ok = self.milvus_server.has_collection(collection_name=index) if status.code != Status.SUCCESS: raise RuntimeError(f'Milvus has collection check failed: {status}') diff --git a/haystack/document_store/sql.py b/haystack/document_store/sql.py index 58fdf5c65..27dc1caaf 100644 --- a/haystack/document_store/sql.py +++ b/haystack/document_store/sql.py @@ -446,19 +446,35 @@ class SQLDocumentStore(BaseDocumentStore): :param filters: Optional filters to narrow down the documents to be deleted. :return: None """ + logger.warning( + """DEPRECATION WARNINGS: + 1. delete_all_documents() method is deprecated, please use delete_documents method + For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045 + """ + ) + self.delete_documents(index, filters) + def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None): + """ + Delete documents in an index. All documents are deleted if no filters are passed. + + :param index: Index name to delete the document from. + :param filters: Optional filters to narrow down the documents to be deleted. + :return: None + """ index = index or self.index document_ids_to_delete = self.session.query(DocumentORM.id).filter_by(index=index) if filters: # documents_query = documents_query.join(MetaORM) for key, values in filters.items(): document_ids_to_delete = document_ids_to_delete.filter( - MetaORM.name == key, - MetaORM.value.in_(values), - DocumentORM.id == MetaORM.document_id + MetaORM.name == key, + MetaORM.value.in_(values), + DocumentORM.id == MetaORM.document_id ) - self.session.query(DocumentORM).filter(DocumentORM.id.in_(document_ids_to_delete)).delete(synchronize_session=False) + self.session.query(DocumentORM).filter(DocumentORM.id.in_(document_ids_to_delete)).delete( + synchronize_session=False) self.session.commit() def _get_or_create(self, session, model, **kwargs): diff --git a/test/test_document_store.py b/test/test_document_store.py index d42618927..b8950c79e 100644 --- a/test/test_document_store.py +++ b/test/test_document_store.py @@ -313,10 +313,19 @@ def test_delete_all_documents(document_store_with_docs): assert len(documents) == 0 +@pytest.mark.elasticsearch +def test_delete_documents(document_store_with_docs): + assert len(document_store_with_docs.get_all_documents()) == 3 + + document_store_with_docs.delete_documents() + documents = document_store_with_docs.get_all_documents() + assert len(documents) == 0 + + @pytest.mark.elasticsearch @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True) def test_delete_documents_with_filters(document_store_with_docs): - document_store_with_docs.delete_all_documents(filters={"meta_field": ["test1", "test2"]}) + document_store_with_docs.delete_documents(filters={"meta_field": ["test1", "test2"]}) documents = document_store_with_docs.get_all_documents() assert len(documents) == 1 assert documents[0].meta["meta_field"] == "test3" @@ -416,7 +425,7 @@ def test_multilabel(document_store): assert len(multi_labels) == 0 # clean up - document_store.delete_all_documents(index="haystack_test_multilabel") + document_store.delete_documents(index="haystack_test_multilabel") @pytest.mark.elasticsearch @@ -480,7 +489,7 @@ def test_multilabel_no_answer(document_store): == len(multi_labels[0].multiple_offset_start_in_docs) # clean up - document_store.delete_all_documents(index="haystack_test_multilabel_no_answer") + document_store.delete_documents(index="haystack_test_multilabel_no_answer") @pytest.mark.elasticsearch