Rename delete_all_documents() method to delete_documents() (#1047)

This commit is contained in:
Ikram Ali 2021-05-10 16:37:08 +05:00 committed by GitHub
parent 5d31e633ce
commit a06e4450d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 102 additions and 15 deletions

View File

@ -204,10 +204,13 @@ class BaseDocumentStore(BaseComponent):
else:
logger.error("File needs to be in json or jsonl format.")
@abstractmethod
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
pass
@abstractmethod
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
pass
def run(self, documents: List[dict], index: Optional[str] = None, **kwargs): # type: ignore
self.write_documents(documents=documents, index=index)
return kwargs, "output_1"

View File

@ -929,6 +929,22 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
"""
Delete documents in an index. All documents are deleted if no filters are passed.
:param index: Index name to delete the document from.
:param filters: Optional filters to narrow down the documents to be deleted.
:return: None
"""
logger.warning(
"""DEPRECATION WARNINGS:
1. delete_all_documents() method is deprecated, please use delete_documents method
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
"""
)
self.delete_documents(index, filters)
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
"""
Delete documents in an index. All documents are deleted if no filters are passed.
:param index: Index name to delete the document from.
:param filters: Optional filters to narrow down the documents to be deleted.
:return: None

View File

@ -345,6 +345,18 @@ class FAISSDocumentStore(SQLDocumentStore):
self.faiss_indexes[index].train(embeddings)
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
"""
Delete all documents from the document store.
"""
logger.warning(
"""DEPRECATION WARNINGS:
1. delete_all_documents() method is deprecated, please use delete_documents method
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
"""
)
self.delete_documents(index, filters)
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
"""
Delete all documents from the document store.
"""
@ -353,7 +365,7 @@ class FAISSDocumentStore(SQLDocumentStore):
index = index or self.index
if index in self.faiss_indexes.keys():
self.faiss_indexes[index].reset()
super().delete_all_documents(index=index)
super().delete_documents(index=index)
def query_by_embedding(
self,

View File

@ -345,7 +345,22 @@ class InMemoryDocumentStore(BaseDocumentStore):
:param filters: Optional filters to narrow down the documents to be deleted.
:return: None
"""
logger.warning(
"""DEPRECATION WARNINGS:
1. delete_all_documents() method is deprecated, please use delete_documents method
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
"""
)
self.delete_documents(index, filters)
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
"""
Delete documents in an index. All documents are deleted if no filters are passed.
:param index: Index name to delete the document from.
:param filters: Optional filters to narrow down the documents to be deleted.
:return: None
"""
if filters:
raise NotImplementedError("Delete by filters is not implemented for InMemoryDocumentStore.")
index = index or self.index

View File

@ -350,6 +350,22 @@ class MilvusDocumentStore(SQLDocumentStore):
return documents
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
"""
Delete all documents (from SQL AND Milvus).
:param index: (SQL) index name for storing the docs and metadata
:param filters: Optional filters to narrow down the search space.
Example: {"name": ["some", "more"], "category": ["only_one"]}
:return: None
"""
logger.warning(
"""DEPRECATION WARNINGS:
1. delete_all_documents() method is deprecated, please use delete_documents method
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
"""
)
self.delete_documents(index, filters)
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
"""
Delete all documents (from SQL AND Milvus).
:param index: (SQL) index name for storing the docs and metadata
@ -358,7 +374,7 @@ class MilvusDocumentStore(SQLDocumentStore):
:return: None
"""
index = index or self.index
super().delete_all_documents(index=index, filters=filters)
super().delete_documents(index=index, filters=filters)
status, ok = self.milvus_server.has_collection(collection_name=index)
if status.code != Status.SUCCESS:
raise RuntimeError(f'Milvus has collection check failed: {status}')

View File

@ -446,7 +446,22 @@ class SQLDocumentStore(BaseDocumentStore):
:param filters: Optional filters to narrow down the documents to be deleted.
:return: None
"""
logger.warning(
"""DEPRECATION WARNINGS:
1. delete_all_documents() method is deprecated, please use delete_documents method
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
"""
)
self.delete_documents(index, filters)
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
"""
Delete documents in an index. All documents are deleted if no filters are passed.
:param index: Index name to delete the document from.
:param filters: Optional filters to narrow down the documents to be deleted.
:return: None
"""
index = index or self.index
document_ids_to_delete = self.session.query(DocumentORM.id).filter_by(index=index)
if filters:
@ -458,7 +473,8 @@ class SQLDocumentStore(BaseDocumentStore):
DocumentORM.id == MetaORM.document_id
)
self.session.query(DocumentORM).filter(DocumentORM.id.in_(document_ids_to_delete)).delete(synchronize_session=False)
self.session.query(DocumentORM).filter(DocumentORM.id.in_(document_ids_to_delete)).delete(
synchronize_session=False)
self.session.commit()
def _get_or_create(self, session, model, **kwargs):

View File

@ -313,10 +313,19 @@ def test_delete_all_documents(document_store_with_docs):
assert len(documents) == 0
@pytest.mark.elasticsearch
def test_delete_documents(document_store_with_docs):
assert len(document_store_with_docs.get_all_documents()) == 3
document_store_with_docs.delete_documents()
documents = document_store_with_docs.get_all_documents()
assert len(documents) == 0
@pytest.mark.elasticsearch
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
def test_delete_documents_with_filters(document_store_with_docs):
document_store_with_docs.delete_all_documents(filters={"meta_field": ["test1", "test2"]})
document_store_with_docs.delete_documents(filters={"meta_field": ["test1", "test2"]})
documents = document_store_with_docs.get_all_documents()
assert len(documents) == 1
assert documents[0].meta["meta_field"] == "test3"
@ -416,7 +425,7 @@ def test_multilabel(document_store):
assert len(multi_labels) == 0
# clean up
document_store.delete_all_documents(index="haystack_test_multilabel")
document_store.delete_documents(index="haystack_test_multilabel")
@pytest.mark.elasticsearch
@ -480,7 +489,7 @@ def test_multilabel_no_answer(document_store):
== len(multi_labels[0].multiple_offset_start_in_docs)
# clean up
document_store.delete_all_documents(index="haystack_test_multilabel_no_answer")
document_store.delete_documents(index="haystack_test_multilabel_no_answer")
@pytest.mark.elasticsearch