mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-19 11:58:44 +00:00
Rename delete_all_documents() method to delete_documents() (#1047)
This commit is contained in:
parent
5d31e633ce
commit
a06e4450d1
@ -204,10 +204,13 @@ class BaseDocumentStore(BaseComponent):
|
|||||||
else:
|
else:
|
||||||
logger.error("File needs to be in json or jsonl format.")
|
logger.error("File needs to be in json or jsonl format.")
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def run(self, documents: List[dict], index: Optional[str] = None, **kwargs): # type: ignore
|
@abstractmethod
|
||||||
|
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self, documents: List[dict], index: Optional[str] = None, **kwargs): # type: ignore
|
||||||
self.write_documents(documents=documents, index=index)
|
self.write_documents(documents=documents, index=index)
|
||||||
return kwargs, "output_1"
|
return kwargs, "output_1"
|
||||||
|
@ -929,6 +929,22 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
|||||||
"""
|
"""
|
||||||
Delete documents in an index. All documents are deleted if no filters are passed.
|
Delete documents in an index. All documents are deleted if no filters are passed.
|
||||||
|
|
||||||
|
:param index: Index name to delete the document from.
|
||||||
|
:param filters: Optional filters to narrow down the documents to be deleted.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"""DEPRECATION WARNINGS:
|
||||||
|
1. delete_all_documents() method is deprecated, please use delete_documents method
|
||||||
|
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
self.delete_documents(index, filters)
|
||||||
|
|
||||||
|
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
|
"""
|
||||||
|
Delete documents in an index. All documents are deleted if no filters are passed.
|
||||||
|
|
||||||
:param index: Index name to delete the document from.
|
:param index: Index name to delete the document from.
|
||||||
:param filters: Optional filters to narrow down the documents to be deleted.
|
:param filters: Optional filters to narrow down the documents to be deleted.
|
||||||
:return: None
|
:return: None
|
||||||
@ -939,9 +955,9 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
|||||||
filter_clause = []
|
filter_clause = []
|
||||||
for key, values in filters.items():
|
for key, values in filters.items():
|
||||||
filter_clause.append(
|
filter_clause.append(
|
||||||
{
|
{
|
||||||
"terms": {key: values}
|
"terms": {key: values}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
query["query"]["bool"] = {"filter": filter_clause}
|
query["query"]["bool"] = {"filter": filter_clause}
|
||||||
else:
|
else:
|
||||||
|
@ -345,6 +345,18 @@ class FAISSDocumentStore(SQLDocumentStore):
|
|||||||
self.faiss_indexes[index].train(embeddings)
|
self.faiss_indexes[index].train(embeddings)
|
||||||
|
|
||||||
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
|
"""
|
||||||
|
Delete all documents from the document store.
|
||||||
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"""DEPRECATION WARNINGS:
|
||||||
|
1. delete_all_documents() method is deprecated, please use delete_documents method
|
||||||
|
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
self.delete_documents(index, filters)
|
||||||
|
|
||||||
|
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
"""
|
"""
|
||||||
Delete all documents from the document store.
|
Delete all documents from the document store.
|
||||||
"""
|
"""
|
||||||
@ -353,7 +365,7 @@ class FAISSDocumentStore(SQLDocumentStore):
|
|||||||
index = index or self.index
|
index = index or self.index
|
||||||
if index in self.faiss_indexes.keys():
|
if index in self.faiss_indexes.keys():
|
||||||
self.faiss_indexes[index].reset()
|
self.faiss_indexes[index].reset()
|
||||||
super().delete_all_documents(index=index)
|
super().delete_documents(index=index)
|
||||||
|
|
||||||
def query_by_embedding(
|
def query_by_embedding(
|
||||||
self,
|
self,
|
||||||
|
@ -345,7 +345,22 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
|||||||
:param filters: Optional filters to narrow down the documents to be deleted.
|
:param filters: Optional filters to narrow down the documents to be deleted.
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"""DEPRECATION WARNINGS:
|
||||||
|
1. delete_all_documents() method is deprecated, please use delete_documents method
|
||||||
|
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
self.delete_documents(index, filters)
|
||||||
|
|
||||||
|
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
|
"""
|
||||||
|
Delete documents in an index. All documents are deleted if no filters are passed.
|
||||||
|
|
||||||
|
:param index: Index name to delete the document from.
|
||||||
|
:param filters: Optional filters to narrow down the documents to be deleted.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
if filters:
|
if filters:
|
||||||
raise NotImplementedError("Delete by filters is not implemented for InMemoryDocumentStore.")
|
raise NotImplementedError("Delete by filters is not implemented for InMemoryDocumentStore.")
|
||||||
index = index or self.index
|
index = index or self.index
|
||||||
|
@ -350,6 +350,22 @@ class MilvusDocumentStore(SQLDocumentStore):
|
|||||||
return documents
|
return documents
|
||||||
|
|
||||||
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
|
"""
|
||||||
|
Delete all documents (from SQL AND Milvus).
|
||||||
|
:param index: (SQL) index name for storing the docs and metadata
|
||||||
|
:param filters: Optional filters to narrow down the search space.
|
||||||
|
Example: {"name": ["some", "more"], "category": ["only_one"]}
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"""DEPRECATION WARNINGS:
|
||||||
|
1. delete_all_documents() method is deprecated, please use delete_documents method
|
||||||
|
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
self.delete_documents(index, filters)
|
||||||
|
|
||||||
|
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
"""
|
"""
|
||||||
Delete all documents (from SQL AND Milvus).
|
Delete all documents (from SQL AND Milvus).
|
||||||
:param index: (SQL) index name for storing the docs and metadata
|
:param index: (SQL) index name for storing the docs and metadata
|
||||||
@ -358,7 +374,7 @@ class MilvusDocumentStore(SQLDocumentStore):
|
|||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
index = index or self.index
|
index = index or self.index
|
||||||
super().delete_all_documents(index=index, filters=filters)
|
super().delete_documents(index=index, filters=filters)
|
||||||
status, ok = self.milvus_server.has_collection(collection_name=index)
|
status, ok = self.milvus_server.has_collection(collection_name=index)
|
||||||
if status.code != Status.SUCCESS:
|
if status.code != Status.SUCCESS:
|
||||||
raise RuntimeError(f'Milvus has collection check failed: {status}')
|
raise RuntimeError(f'Milvus has collection check failed: {status}')
|
||||||
|
@ -446,19 +446,35 @@ class SQLDocumentStore(BaseDocumentStore):
|
|||||||
:param filters: Optional filters to narrow down the documents to be deleted.
|
:param filters: Optional filters to narrow down the documents to be deleted.
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"""DEPRECATION WARNINGS:
|
||||||
|
1. delete_all_documents() method is deprecated, please use delete_documents method
|
||||||
|
For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
self.delete_documents(index, filters)
|
||||||
|
|
||||||
|
def delete_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
|
||||||
|
"""
|
||||||
|
Delete documents in an index. All documents are deleted if no filters are passed.
|
||||||
|
|
||||||
|
:param index: Index name to delete the document from.
|
||||||
|
:param filters: Optional filters to narrow down the documents to be deleted.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
index = index or self.index
|
index = index or self.index
|
||||||
document_ids_to_delete = self.session.query(DocumentORM.id).filter_by(index=index)
|
document_ids_to_delete = self.session.query(DocumentORM.id).filter_by(index=index)
|
||||||
if filters:
|
if filters:
|
||||||
# documents_query = documents_query.join(MetaORM)
|
# documents_query = documents_query.join(MetaORM)
|
||||||
for key, values in filters.items():
|
for key, values in filters.items():
|
||||||
document_ids_to_delete = document_ids_to_delete.filter(
|
document_ids_to_delete = document_ids_to_delete.filter(
|
||||||
MetaORM.name == key,
|
MetaORM.name == key,
|
||||||
MetaORM.value.in_(values),
|
MetaORM.value.in_(values),
|
||||||
DocumentORM.id == MetaORM.document_id
|
DocumentORM.id == MetaORM.document_id
|
||||||
)
|
)
|
||||||
|
|
||||||
self.session.query(DocumentORM).filter(DocumentORM.id.in_(document_ids_to_delete)).delete(synchronize_session=False)
|
self.session.query(DocumentORM).filter(DocumentORM.id.in_(document_ids_to_delete)).delete(
|
||||||
|
synchronize_session=False)
|
||||||
self.session.commit()
|
self.session.commit()
|
||||||
|
|
||||||
def _get_or_create(self, session, model, **kwargs):
|
def _get_or_create(self, session, model, **kwargs):
|
||||||
|
@ -313,10 +313,19 @@ def test_delete_all_documents(document_store_with_docs):
|
|||||||
assert len(documents) == 0
|
assert len(documents) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.elasticsearch
|
||||||
|
def test_delete_documents(document_store_with_docs):
|
||||||
|
assert len(document_store_with_docs.get_all_documents()) == 3
|
||||||
|
|
||||||
|
document_store_with_docs.delete_documents()
|
||||||
|
documents = document_store_with_docs.get_all_documents()
|
||||||
|
assert len(documents) == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.elasticsearch
|
@pytest.mark.elasticsearch
|
||||||
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
||||||
def test_delete_documents_with_filters(document_store_with_docs):
|
def test_delete_documents_with_filters(document_store_with_docs):
|
||||||
document_store_with_docs.delete_all_documents(filters={"meta_field": ["test1", "test2"]})
|
document_store_with_docs.delete_documents(filters={"meta_field": ["test1", "test2"]})
|
||||||
documents = document_store_with_docs.get_all_documents()
|
documents = document_store_with_docs.get_all_documents()
|
||||||
assert len(documents) == 1
|
assert len(documents) == 1
|
||||||
assert documents[0].meta["meta_field"] == "test3"
|
assert documents[0].meta["meta_field"] == "test3"
|
||||||
@ -416,7 +425,7 @@ def test_multilabel(document_store):
|
|||||||
assert len(multi_labels) == 0
|
assert len(multi_labels) == 0
|
||||||
|
|
||||||
# clean up
|
# clean up
|
||||||
document_store.delete_all_documents(index="haystack_test_multilabel")
|
document_store.delete_documents(index="haystack_test_multilabel")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.elasticsearch
|
@pytest.mark.elasticsearch
|
||||||
@ -480,7 +489,7 @@ def test_multilabel_no_answer(document_store):
|
|||||||
== len(multi_labels[0].multiple_offset_start_in_docs)
|
== len(multi_labels[0].multiple_offset_start_in_docs)
|
||||||
|
|
||||||
# clean up
|
# clean up
|
||||||
document_store.delete_all_documents(index="haystack_test_multilabel_no_answer")
|
document_store.delete_documents(index="haystack_test_multilabel_no_answer")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.elasticsearch
|
@pytest.mark.elasticsearch
|
||||||
|
Loading…
x
Reference in New Issue
Block a user