| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | # | 
					
						
							| 
									
										
										
										
											2024-01-19 19:51:57 +08:00
										 |  |  | #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved. | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | # | 
					
						
							|  |  |  | #  Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #      http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							| 
									
										
										
										
											2024-01-22 19:51:38 +08:00
										 |  |  | #  limitations under the License | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-02-08 17:01:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  | import base64 | 
					
						
							| 
									
										
										
										
											2024-04-16 16:42:19 +08:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | import pathlib | 
					
						
							| 
									
										
										
										
											2024-02-08 17:01:01 +08:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-17 20:20:42 +08:00
										 |  |  | import flask | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | from elasticsearch_dsl import Q | 
					
						
							|  |  |  | from flask import request | 
					
						
							|  |  |  | from flask_login import login_required, current_user | 
					
						
							|  |  |  | from rag.nlp import search | 
					
						
							|  |  |  | from rag.utils import ELASTICSEARCH | 
					
						
							| 
									
										
										
										
											2024-01-17 20:20:42 +08:00
										 |  |  | from api.db.services import duplicate_name | 
					
						
							| 
									
										
										
										
											2024-02-01 18:53:56 +08:00
										 |  |  | from api.db.services.knowledgebase_service import KnowledgebaseService | 
					
						
							| 
									
										
										
										
											2024-01-17 20:20:42 +08:00
										 |  |  | from api.utils.api_utils import server_error_response, get_data_error_result, validate_request | 
					
						
							|  |  |  | from api.utils import get_uuid | 
					
						
							| 
									
										
										
										
											2024-02-08 17:01:01 +08:00
										 |  |  | from api.db import FileType, TaskStatus, ParserType | 
					
						
							| 
									
										
										
										
											2024-01-17 20:20:42 +08:00
										 |  |  | from api.db.services.document_service import DocumentService | 
					
						
							|  |  |  | from api.settings import RetCode | 
					
						
							|  |  |  | from api.utils.api_utils import get_json_result | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | from rag.utils.minio_conn import MINIO | 
					
						
							| 
									
										
										
										
											2024-02-07 19:27:23 +08:00
										 |  |  | from api.utils.file_utils import filename_type, thumbnail | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @manager.route('/upload', methods=['POST']) | 
					
						
							|  |  |  | @login_required | 
					
						
							|  |  |  | @validate_request("kb_id") | 
					
						
							|  |  |  | def upload(): | 
					
						
							|  |  |  |     kb_id = request.form.get("kb_id") | 
					
						
							|  |  |  |     if not kb_id: | 
					
						
							|  |  |  |         return get_json_result( | 
					
						
							|  |  |  |             data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) | 
					
						
							|  |  |  |     if 'file' not in request.files: | 
					
						
							|  |  |  |         return get_json_result( | 
					
						
							|  |  |  |             data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) | 
					
						
							|  |  |  |     file = request.files['file'] | 
					
						
							|  |  |  |     if file.filename == '': | 
					
						
							|  |  |  |         return get_json_result( | 
					
						
							|  |  |  |             data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         e, kb = KnowledgebaseService.get_by_id(kb_id) | 
					
						
							|  |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Can't find this knowledgebase!") | 
					
						
							| 
									
										
										
										
											2024-04-16 19:12:12 +08:00
										 |  |  |         if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)): | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Exceed the maximum file number of a free user!") | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         filename = duplicate_name( | 
					
						
							|  |  |  |             DocumentService.query, | 
					
						
							|  |  |  |             name=file.filename, | 
					
						
							|  |  |  |             kb_id=kb.id) | 
					
						
							| 
									
										
										
										
											2024-04-07 09:04:32 +08:00
										 |  |  |         filetype = filename_type(filename) | 
					
						
							|  |  |  |         if not filetype: | 
					
						
							|  |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="This type of file has not been supported yet!") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |         location = filename | 
					
						
							|  |  |  |         while MINIO.obj_exist(kb_id, location): | 
					
						
							|  |  |  |             location += "_" | 
					
						
							|  |  |  |         blob = request.files['file'].read() | 
					
						
							| 
									
										
										
										
											2024-01-22 19:51:38 +08:00
										 |  |  |         MINIO.put(kb_id, location, blob) | 
					
						
							| 
									
										
										
										
											2024-02-08 17:01:01 +08:00
										 |  |  |         doc = { | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |             "id": get_uuid(), | 
					
						
							|  |  |  |             "kb_id": kb.id, | 
					
						
							|  |  |  |             "parser_id": kb.parser_id, | 
					
						
							| 
									
										
										
										
											2024-02-05 18:08:17 +08:00
										 |  |  |             "parser_config": kb.parser_config, | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |             "created_by": current_user.id, | 
					
						
							| 
									
										
										
										
											2024-04-08 19:20:57 +08:00
										 |  |  |             "type": filetype, | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |             "name": filename, | 
					
						
							|  |  |  |             "location": location, | 
					
						
							| 
									
										
										
										
											2024-02-07 19:27:23 +08:00
										 |  |  |             "size": len(blob), | 
					
						
							|  |  |  |             "thumbnail": thumbnail(filename, blob) | 
					
						
							| 
									
										
										
										
											2024-02-08 17:01:01 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |         if doc["type"] == FileType.VISUAL: | 
					
						
							|  |  |  |             doc["parser_id"] = ParserType.PICTURE.value | 
					
						
							|  |  |  |         if re.search(r"\.(ppt|pptx|pages)$", filename): | 
					
						
							|  |  |  |             doc["parser_id"] = ParserType.PRESENTATION.value | 
					
						
							|  |  |  |         doc = DocumentService.insert(doc) | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |         return get_json_result(data=doc.to_json()) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @manager.route('/create', methods=['POST']) | 
					
						
							|  |  |  | @login_required | 
					
						
							|  |  |  | @validate_request("name", "kb_id") | 
					
						
							|  |  |  | def create(): | 
					
						
							|  |  |  |     req = request.json | 
					
						
							|  |  |  |     kb_id = req["kb_id"] | 
					
						
							|  |  |  |     if not kb_id: | 
					
						
							|  |  |  |         return get_json_result( | 
					
						
							|  |  |  |             data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         e, kb = KnowledgebaseService.get_by_id(kb_id) | 
					
						
							|  |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Can't find this knowledgebase!") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if DocumentService.query(name=req["name"], kb_id=kb_id): | 
					
						
							|  |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Duplicated document name in the same knowledgebase.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         doc = DocumentService.insert({ | 
					
						
							|  |  |  |             "id": get_uuid(), | 
					
						
							|  |  |  |             "kb_id": kb.id, | 
					
						
							|  |  |  |             "parser_id": kb.parser_id, | 
					
						
							| 
									
										
										
										
											2024-02-05 18:08:17 +08:00
										 |  |  |             "parser_config": kb.parser_config, | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |             "created_by": current_user.id, | 
					
						
							|  |  |  |             "type": FileType.VIRTUAL, | 
					
						
							|  |  |  |             "name": req["name"], | 
					
						
							|  |  |  |             "location": "", | 
					
						
							|  |  |  |             "size": 0 | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  |         return get_json_result(data=doc.to_json()) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @manager.route('/list', methods=['GET']) | 
					
						
							|  |  |  | @login_required | 
					
						
							|  |  |  | def list(): | 
					
						
							|  |  |  |     kb_id = request.args.get("kb_id") | 
					
						
							|  |  |  |     if not kb_id: | 
					
						
							|  |  |  |         return get_json_result( | 
					
						
							|  |  |  |             data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) | 
					
						
							|  |  |  |     keywords = request.args.get("keywords", "") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-05 18:08:17 +08:00
										 |  |  |     page_number = int(request.args.get("page", 1)) | 
					
						
							|  |  |  |     items_per_page = int(request.args.get("page_size", 15)) | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |     orderby = request.args.get("orderby", "create_time") | 
					
						
							|  |  |  |     desc = request.args.get("desc", True) | 
					
						
							|  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2024-02-02 19:21:37 +08:00
										 |  |  |         docs, tol = DocumentService.get_by_kb_id( | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |             kb_id, page_number, items_per_page, orderby, desc, keywords) | 
					
						
							| 
									
										
										
										
											2024-03-05 12:08:41 +08:00
										 |  |  |         return get_json_result(data={"total": tol, "docs": docs}) | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-28 15:01:12 +08:00
										 |  |  | @manager.route('/thumbnails', methods=['GET']) | 
					
						
							|  |  |  | @login_required | 
					
						
							|  |  |  | def thumbnails(): | 
					
						
							|  |  |  |     doc_ids = request.args.get("doc_ids").split(",") | 
					
						
							|  |  |  |     if not doc_ids: | 
					
						
							|  |  |  |         return get_json_result( | 
					
						
							|  |  |  |             data=False, retmsg='Lack of "Document ID"', retcode=RetCode.ARGUMENT_ERROR) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         docs = DocumentService.get_thumbnails(doc_ids) | 
					
						
							|  |  |  |         return get_json_result(data={d["id"]: d["thumbnail"] for d in docs}) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | @manager.route('/change_status', methods=['POST']) | 
					
						
							|  |  |  | @login_required | 
					
						
							|  |  |  | @validate_request("doc_id", "status") | 
					
						
							|  |  |  | def change_status(): | 
					
						
							|  |  |  |     req = request.json | 
					
						
							|  |  |  |     if str(req["status"]) not in ["0", "1"]: | 
					
						
							|  |  |  |         get_json_result( | 
					
						
							|  |  |  |             data=False, | 
					
						
							|  |  |  |             retmsg='"Status" must be either 0 or 1!', | 
					
						
							|  |  |  |             retcode=RetCode.ARGUMENT_ERROR) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         e, doc = DocumentService.get_by_id(req["doc_id"]) | 
					
						
							|  |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result(retmsg="Document not found!") | 
					
						
							|  |  |  |         e, kb = KnowledgebaseService.get_by_id(doc.kb_id) | 
					
						
							|  |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Can't find this knowledgebase!") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not DocumentService.update_by_id( | 
					
						
							|  |  |  |                 req["doc_id"], {"status": str(req["status"])}): | 
					
						
							|  |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Database error (Document update)!") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if str(req["status"]) == "0": | 
					
						
							|  |  |  |             ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]), | 
					
						
							| 
									
										
										
										
											2024-01-17 20:20:42 +08:00
										 |  |  |                                               scripts="ctx._source.available_int=0;", | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |                                               idxnm=search.index_name( | 
					
						
							|  |  |  |                                                   kb.tenant_id) | 
					
						
							|  |  |  |                                               ) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]), | 
					
						
							| 
									
										
										
										
											2024-01-17 20:20:42 +08:00
										 |  |  |                                               scripts="ctx._source.available_int=1;", | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |                                               idxnm=search.index_name( | 
					
						
							|  |  |  |                                                   kb.tenant_id) | 
					
						
							|  |  |  |                                               ) | 
					
						
							|  |  |  |         return get_json_result(data=True) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @manager.route('/rm', methods=['POST']) | 
					
						
							|  |  |  | @login_required | 
					
						
							|  |  |  | @validate_request("doc_id") | 
					
						
							|  |  |  | def rm(): | 
					
						
							|  |  |  |     req = request.json | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         e, doc = DocumentService.get_by_id(req["doc_id"]) | 
					
						
							|  |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result(retmsg="Document not found!") | 
					
						
							| 
									
										
										
										
											2024-01-22 19:51:38 +08:00
										 |  |  |         tenant_id = DocumentService.get_tenant_id(req["doc_id"]) | 
					
						
							|  |  |  |         if not tenant_id: | 
					
						
							|  |  |  |             return get_data_error_result(retmsg="Tenant not found!") | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |         ELASTICSEARCH.deleteByQuery( | 
					
						
							|  |  |  |             Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |         DocumentService.increment_chunk_num( | 
					
						
							|  |  |  |             doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0) | 
					
						
							| 
									
										
										
										
											2024-03-21 18:17:32 +08:00
										 |  |  |         if not DocumentService.delete(doc): | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Database error (Document removal)!") | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         MINIO.rm(doc.kb_id, doc.location) | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |         return get_json_result(data=True) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-31 19:57:45 +08:00
										 |  |  | @manager.route('/run', methods=['POST']) | 
					
						
							|  |  |  | @login_required | 
					
						
							|  |  |  | @validate_request("doc_ids", "run") | 
					
						
							| 
									
										
										
										
											2024-02-01 18:53:56 +08:00
										 |  |  | def run(): | 
					
						
							| 
									
										
										
										
											2024-01-31 19:57:45 +08:00
										 |  |  |     req = request.json | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         for id in req["doc_ids"]: | 
					
						
							| 
									
										
										
										
											2024-02-05 18:08:17 +08:00
										 |  |  |             info = {"run": str(req["run"]), "progress": 0} | 
					
						
							| 
									
										
										
										
											2024-03-01 19:48:01 +08:00
										 |  |  |             if str(req["run"]) == TaskStatus.RUNNING.value: | 
					
						
							|  |  |  |                 info["progress_msg"] = "" | 
					
						
							|  |  |  |                 info["chunk_num"] = 0 | 
					
						
							|  |  |  |                 info["token_num"] = 0 | 
					
						
							| 
									
										
										
										
											2024-02-05 18:08:17 +08:00
										 |  |  |             DocumentService.update_by_id(id, info) | 
					
						
							| 
									
										
										
										
											2024-03-05 12:08:41 +08:00
										 |  |  |             # if str(req["run"]) == TaskStatus.CANCEL.value: | 
					
						
							| 
									
										
										
										
											2024-03-01 19:48:01 +08:00
										 |  |  |             tenant_id = DocumentService.get_tenant_id(id) | 
					
						
							|  |  |  |             if not tenant_id: | 
					
						
							|  |  |  |                 return get_data_error_result(retmsg="Tenant not found!") | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |             ELASTICSEARCH.deleteByQuery( | 
					
						
							|  |  |  |                 Q("match", doc_id=id), idxnm=search.index_name(tenant_id)) | 
					
						
							| 
									
										
										
										
											2024-01-31 19:57:45 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return get_json_result(data=True) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | @manager.route('/rename', methods=['POST']) | 
					
						
							|  |  |  | @login_required | 
					
						
							| 
									
										
										
										
											2024-02-02 19:21:37 +08:00
										 |  |  | @validate_request("doc_id", "name") | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  | def rename(): | 
					
						
							|  |  |  |     req = request.json | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         e, doc = DocumentService.get_by_id(req["doc_id"]) | 
					
						
							|  |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result(retmsg="Document not found!") | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |         if pathlib.Path(req["name"].lower()).suffix != pathlib.Path( | 
					
						
							|  |  |  |                 doc.name.lower()).suffix: | 
					
						
							| 
									
										
										
										
											2024-02-02 19:21:37 +08:00
										 |  |  |             return get_json_result( | 
					
						
							|  |  |  |                 data=False, | 
					
						
							|  |  |  |                 retmsg="The extension of file can't be changed", | 
					
						
							|  |  |  |                 retcode=RetCode.ARGUMENT_ERROR) | 
					
						
							| 
									
										
										
										
											2024-01-15 08:46:22 +08:00
										 |  |  |         if DocumentService.query(name=req["name"], kb_id=doc.kb_id): | 
					
						
							|  |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Duplicated document name in the same knowledgebase.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not DocumentService.update_by_id( | 
					
						
							|  |  |  |                 req["doc_id"], {"name": req["name"]}): | 
					
						
							|  |  |  |             return get_data_error_result( | 
					
						
							|  |  |  |                 retmsg="Database error (Document rename)!") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return get_json_result(data=True) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-19 19:22:17 +08:00
										 |  |  | @manager.route('/get/<doc_id>', methods=['GET']) | 
					
						
							| 
									
										
										
										
											2024-03-05 12:08:41 +08:00
										 |  |  | # @login_required | 
					
						
							| 
									
										
										
										
											2024-02-19 19:22:17 +08:00
										 |  |  | def get(doc_id): | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  |     try: | 
					
						
							|  |  |  |         e, doc = DocumentService.get_by_id(doc_id) | 
					
						
							|  |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result(retmsg="Document not found!") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-19 19:22:17 +08:00
										 |  |  |         response = flask.make_response(MINIO.get(doc.kb_id, doc.location)) | 
					
						
							|  |  |  |         ext = re.search(r"\.([^.]+)$", doc.name) | 
					
						
							|  |  |  |         if ext: | 
					
						
							| 
									
										
										
										
											2024-02-23 18:28:12 +08:00
										 |  |  |             if doc.type == FileType.VISUAL.value: | 
					
						
							| 
									
										
										
										
											2024-03-05 12:08:41 +08:00
										 |  |  |                 response.headers.set('Content-Type', 'image/%s' % ext.group(1)) | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |                 response.headers.set( | 
					
						
							|  |  |  |                     'Content-Type', | 
					
						
							|  |  |  |                     'application/%s' % | 
					
						
							|  |  |  |                     ext.group(1)) | 
					
						
							| 
									
										
										
										
											2024-02-19 19:22:17 +08:00
										 |  |  |         return response | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @manager.route('/change_parser', methods=['POST']) | 
					
						
							|  |  |  | @login_required | 
					
						
							|  |  |  | @validate_request("doc_id", "parser_id") | 
					
						
							|  |  |  | def change_parser(): | 
					
						
							|  |  |  |     req = request.json | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         e, doc = DocumentService.get_by_id(req["doc_id"]) | 
					
						
							|  |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result(retmsg="Document not found!") | 
					
						
							|  |  |  |         if doc.parser_id.lower() == req["parser_id"].lower(): | 
					
						
							| 
									
										
										
										
											2024-03-22 19:21:09 +08:00
										 |  |  |             if "parser_config" in req: | 
					
						
							|  |  |  |                 if req["parser_config"] == doc.parser_config: | 
					
						
							|  |  |  |                     return get_json_result(data=True) | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 return get_json_result(data=True) | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |         if doc.type == FileType.VISUAL or re.search( | 
					
						
							|  |  |  |                 r"\.(ppt|pptx|pages)$", doc.name): | 
					
						
							| 
									
										
										
										
											2024-02-08 17:01:01 +08:00
										 |  |  |             return get_data_error_result(retmsg="Not supported yet!") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-05 12:08:41 +08:00
										 |  |  |         e = DocumentService.update_by_id(doc.id, | 
					
						
							| 
									
										
										
										
											2024-03-05 16:33:47 +08:00
										 |  |  |                                          {"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", "run": "0"}) | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  |         if not e: | 
					
						
							|  |  |  |             return get_data_error_result(retmsg="Document not found!") | 
					
						
							| 
									
										
										
										
											2024-03-22 19:21:09 +08:00
										 |  |  |         if "parser_config" in req: | 
					
						
							|  |  |  |             DocumentService.update_parser_config(doc.id, req["parser_config"]) | 
					
						
							| 
									
										
										
										
											2024-03-05 12:08:41 +08:00
										 |  |  |         if doc.token_num > 0: | 
					
						
							|  |  |  |             e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, | 
					
						
							|  |  |  |                                                     doc.process_duation * -1) | 
					
						
							| 
									
										
										
										
											2024-02-01 18:53:56 +08:00
										 |  |  |             if not e: | 
					
						
							|  |  |  |                 return get_data_error_result(retmsg="Document not found!") | 
					
						
							| 
									
										
										
										
											2024-03-01 19:48:01 +08:00
										 |  |  |             tenant_id = DocumentService.get_tenant_id(req["doc_id"]) | 
					
						
							|  |  |  |             if not tenant_id: | 
					
						
							|  |  |  |                 return get_data_error_result(retmsg="Tenant not found!") | 
					
						
							| 
									
										
										
										
											2024-03-27 11:33:46 +08:00
										 |  |  |             ELASTICSEARCH.deleteByQuery( | 
					
						
							|  |  |  |                 Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) | 
					
						
							| 
									
										
										
										
											2024-01-15 19:47:25 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return get_json_result(data=True) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-17 20:20:42 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | @manager.route('/image/<image_id>', methods=['GET']) | 
					
						
							| 
									
										
										
										
											2024-03-05 12:08:41 +08:00
										 |  |  | # @login_required | 
					
						
							| 
									
										
										
										
											2024-01-17 20:20:42 +08:00
										 |  |  | def get_image(image_id): | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         bkt, nm = image_id.split("-") | 
					
						
							|  |  |  |         response = flask.make_response(MINIO.get(bkt, nm)) | 
					
						
							|  |  |  |         response.headers.set('Content-Type', 'image/JPEG') | 
					
						
							|  |  |  |         return response | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         return server_error_response(e) |