From a73fbc61ff17d13b0f5a2e3e862736ecef019ee5 Mon Sep 17 00:00:00 2001 From: liwenju0 Date: Wed, 2 Apr 2025 19:20:17 +0800 Subject: [PATCH] Fix: Handle the case of deleting empty blocks. Update the relevant message (#6643) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …gic to return the correct deletion message. Add handling for empty arrays to ensure no errors occur during the deletion operation. Update the test cases to verify the new logic. ### What problem does this PR solve? fix this bug:https://github.com/infiniflow/ragflow/issues/6607 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: wenju.li --- api/apps/sdk/doc.py | 2 ++ rag/utils/es_conn.py | 5 ++++- .../test_delete_chunks.py | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index a10bfb32f..4f515e73c 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1170,6 +1170,8 @@ def rm_chunk(tenant_id, dataset_id, document_id): if chunk_number != 0: DocumentService.decrement_chunk_num(document_id, dataset_id, 1, chunk_number, 0) if "chunk_ids" in req and chunk_number != len(unique_chunk_ids): + if len(unique_chunk_ids) == 0: + return get_result(message=f"deleted {chunk_number} chunks") return get_error_data_result(message=f"rm_chunk deleted chunks {chunk_number}, expect {len(unique_chunk_ids)}") if duplicate_messages: return get_result(message=f"Partially deleted {chunk_number} chunks with {len(duplicate_messages)} errors", data={"success_count": chunk_number, "errors": duplicate_messages},) diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py index 62c6cf0da..f761a54e7 100644 --- a/rag/utils/es_conn.py +++ b/rag/utils/es_conn.py @@ -413,7 +413,10 @@ class ESConnection(DocStoreConnection): chunk_ids = condition["id"] if not isinstance(chunk_ids, list): chunk_ids = [chunk_ids] - qry = Q("ids", values=chunk_ids) + if not chunk_ids: # when chunk_ids is empty, delete all + qry = Q("match_all") + else: + qry = Q("ids", values=chunk_ids) else: qry = Q("bool") for k, v in condition.items(): diff --git a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_delete_chunks.py b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_delete_chunks.py index 0b123d29a..1042c93d8 100644 --- a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_delete_chunks.py +++ b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_delete_chunks.py @@ -168,7 +168,8 @@ class TestChunksDeletion: ), (lambda r: {"chunk_ids": r[:1]}, 0, "", 4), (lambda r: {"chunk_ids": r}, 0, "", 1), - pytest.param({"chunk_ids": []}, 0, "", 0, marks=pytest.mark.skip(reason="issues/6607")), + pytest.param({"chunk_ids": []}, 0, "deleted 5 chunks", 0) + ], ) def test_basic_scenarios(