mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-11-03 03:09:49 +00:00
API: completed delete_doc api (#1290)
### What problem does this PR solve? Adds the functionality of deleting documentation ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
6a7c2112f7
commit
8c9b54db31
@ -24,6 +24,7 @@ from flask_login import login_required, current_user
|
||||
from api.db import FileType, ParserType
|
||||
from api.db.services import duplicate_name
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.settings import RetCode
|
||||
@ -31,6 +32,8 @@ from api.utils import get_uuid
|
||||
from api.utils.api_utils import construct_json_result
|
||||
from api.utils.file_utils import filename_type, thumbnail
|
||||
from rag.utils.minio_conn import MINIO
|
||||
from api.db.db_models import Task, File
|
||||
from api.db import FileType, TaskStatus, ParserType, FileSource
|
||||
|
||||
|
||||
MAXIMUM_OF_UPLOADING_FILES = 256
|
||||
@ -89,6 +92,7 @@ def upload(dataset_id):
|
||||
# grab all the errs
|
||||
err = []
|
||||
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
||||
uploaded_docs_json = []
|
||||
for file in file_objs:
|
||||
try:
|
||||
# TODO: get this value from the database as some tenants have this limit while others don't
|
||||
@ -132,6 +136,7 @@ def upload(dataset_id):
|
||||
DocumentService.insert(doc)
|
||||
|
||||
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
||||
uploaded_docs_json.append(doc)
|
||||
except Exception as e:
|
||||
err.append(file.filename + ": " + str(e))
|
||||
|
||||
@ -139,14 +144,65 @@ def upload(dataset_id):
|
||||
# return all the errors
|
||||
return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
|
||||
# success
|
||||
return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
|
||||
|
||||
# ----------------------------delete a file-----------------------------------------------------
|
||||
@manager.route('/<dataset_id>/<document_id>', methods=['DELETE'])
|
||||
@login_required
|
||||
def delete(document_id, dataset_id): # string
|
||||
# get the root folder
|
||||
root_folder = FileService.get_root_folder(current_user.id)
|
||||
# parent file's id
|
||||
parent_file_id = root_folder["id"]
|
||||
# consider the new user
|
||||
FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
|
||||
# store all the errors that may have
|
||||
errors = ""
|
||||
try:
|
||||
# whether there is this document
|
||||
exist, doc = DocumentService.get_by_id(document_id)
|
||||
if not exist:
|
||||
return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
|
||||
# whether this doc is authorized by this tenant
|
||||
tenant_id = DocumentService.get_tenant_id(document_id)
|
||||
if not tenant_id:
|
||||
return construct_json_result(message=f"You cannot delete this document {document_id} due to the authorization"
|
||||
f" reason!", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
# get the doc's id and location
|
||||
real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id)
|
||||
|
||||
if real_dataset_id != dataset_id:
|
||||
return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
|
||||
f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
# there is an issue when removing
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
return construct_json_result(
|
||||
message="There was an error during the document removal process. Please check the status of the "
|
||||
"RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
|
||||
|
||||
# fetch the File2Document record associated with the provided document ID.
|
||||
file_to_doc = File2DocumentService.get_by_document_id(document_id)
|
||||
# delete the associated File record.
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
|
||||
# delete the File2Document record itself using the document ID. This removes the
|
||||
# association between the document and the file after the File record has been deleted.
|
||||
File2DocumentService.delete_by_document_id(document_id)
|
||||
|
||||
# delete it from minio
|
||||
MINIO.rm(dataset_id, location)
|
||||
except Exception as e:
|
||||
errors += str(e)
|
||||
if errors:
|
||||
return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
|
||||
|
||||
return construct_json_result(data=True, code=RetCode.SUCCESS)
|
||||
|
||||
# ----------------------------upload online files------------------------------------------------
|
||||
|
||||
# ----------------------------download a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------delete a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------enable rename-----------------------------------------------------
|
||||
|
||||
# ----------------------------list files-----------------------------------------------------
|
||||
|
||||
@ -101,10 +101,13 @@ class RAGFlow:
|
||||
result_dict = json.loads(res.text)
|
||||
return result_dict
|
||||
|
||||
# ----------------------------upload remote files-----------------------------------------------------
|
||||
# ----------------------------download a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------delete a file-----------------------------------------------------
|
||||
def delete_files(self, document_id, dataset_id):
|
||||
endpoint = f"{self.document_url}/{dataset_id}/{document_id}"
|
||||
res = requests.delete(endpoint, headers=self.authorization_header)
|
||||
return res.json()
|
||||
|
||||
# ----------------------------download a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------enable rename-----------------------------------------------------
|
||||
|
||||
|
||||
@ -149,12 +149,96 @@ class TestFile(TestSdk):
|
||||
res = ragflow.upload_local_file(dataset_id, file_paths)
|
||||
assert res['code'] == RetCode.ARGUMENT_ERROR and res['message'] == 'Remote files have not unsupported.'
|
||||
|
||||
# ----------------------------upload remote files-----------------------------------------------------
|
||||
# ----------------------------delete a file-----------------------------------------------------
|
||||
def test_delete_one_file(self):
|
||||
"""
|
||||
Test deleting one file with success.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_delete_one_file")
|
||||
dataset_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt"]
|
||||
res = ragflow.upload_local_file(dataset_id, file_paths)
|
||||
# get the doc_id
|
||||
data = res['data'][0]
|
||||
doc_id = data['id']
|
||||
# delete the files
|
||||
deleted_res = ragflow.delete_files(doc_id, dataset_id)
|
||||
# assert value
|
||||
assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True
|
||||
|
||||
def test_delete_document_with_not_existing_document(self):
|
||||
"""
|
||||
Test deleting a document that does not exist with failure.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_delete_document_with_not_existing_document")
|
||||
dataset_id = created_res['data']['dataset_id']
|
||||
res = ragflow.delete_files("111", dataset_id)
|
||||
assert res['code'] == RetCode.DATA_ERROR and res['message'] == 'Document 111 not found!'
|
||||
|
||||
def test_delete_document_with_creating_100_documents_and_deleting_100_documents(self):
|
||||
"""
|
||||
Test deleting documents when uploading 100 docs and deleting 100 docs.
|
||||
"""
|
||||
# upload 100 docs
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_delete_one_file")
|
||||
dataset_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt"] * 100
|
||||
res = ragflow.upload_local_file(dataset_id, file_paths)
|
||||
|
||||
# get the doc_id
|
||||
data = res['data']
|
||||
for d in data:
|
||||
doc_id = d['id']
|
||||
# delete the files
|
||||
deleted_res = ragflow.delete_files(doc_id, dataset_id)
|
||||
# assert value
|
||||
assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True
|
||||
|
||||
def test_delete_document_from_nonexistent_dataset(self):
|
||||
"""
|
||||
Test deleting documents from a non-existent dataset
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
created_res = ragflow.create_dataset("test_delete_one_file")
|
||||
dataset_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt"]
|
||||
res = ragflow.upload_local_file(dataset_id, file_paths)
|
||||
# get the doc_id
|
||||
data = res['data'][0]
|
||||
doc_id = data['id']
|
||||
# delete the files
|
||||
deleted_res = ragflow.delete_files(doc_id, "000")
|
||||
# assert value
|
||||
assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
|
||||
f'The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.')
|
||||
|
||||
def test_delete_document_which_is_located_in_other_dataset(self):
|
||||
"""
|
||||
Test deleting a document which is located in other dataset.
|
||||
"""
|
||||
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
# upload a document
|
||||
created_res = ragflow.create_dataset("test_delete_document_which_is_located_in_other_dataset")
|
||||
created_res_id = created_res['data']['dataset_id']
|
||||
file_paths = ["test_data/test.txt"]
|
||||
res = ragflow.upload_local_file(created_res_id, file_paths)
|
||||
# other dataset
|
||||
other_res = ragflow.create_dataset("other_dataset")
|
||||
other_dataset_id = other_res['data']['dataset_id']
|
||||
# get the doc_id
|
||||
data = res['data'][0]
|
||||
doc_id = data['id']
|
||||
# delete the files from the other dataset
|
||||
deleted_res = ragflow.delete_files(doc_id, other_dataset_id)
|
||||
# assert value
|
||||
assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
|
||||
f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.')
|
||||
|
||||
# ----------------------------download a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------delete a file-----------------------------------------------------
|
||||
|
||||
# ----------------------------enable rename-----------------------------------------------------
|
||||
|
||||
# ----------------------------list files-----------------------------------------------------
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user