2024-08-15 09:17:36 +08:00
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
2025-01-22 19:43:14 +08:00
import json
2025-10-09 12:36:19 +08:00
import logging
2025-01-22 19:43:14 +08:00
2024-08-15 09:17:36 +08:00
from flask import request
from flask_login import login_required , current_user
from api . db . services import duplicate_name
2025-10-09 12:36:19 +08:00
from api . db . services . document_service import DocumentService , queue_raptor_o_graphrag_tasks
2024-08-15 09:17:36 +08:00
from api . db . services . file2document_service import File2DocumentService
from api . db . services . file_service import FileService
2025-10-09 12:36:19 +08:00
from api . db . services . pipeline_operation_log_service import PipelineOperationLogService
from api . db . services . task_service import TaskService , GRAPH_RAPTOR_FAKE_DOC_ID
2024-08-15 09:17:36 +08:00
from api . db . services . user_service import TenantService , UserTenantService
2025-10-09 12:36:19 +08:00
from api . utils . api_utils import get_error_data_result , server_error_response , get_data_error_result , validate_request , not_allowed_parameters
2024-10-18 13:48:57 +08:00
from api . utils import get_uuid
2025-10-09 12:36:19 +08:00
from api . db import PipelineTaskType , StatusEnum , FileSource , VALID_FILE_TYPES , VALID_TASK_STATUS
2024-08-15 09:17:36 +08:00
from api . db . services . knowledgebase_service import KnowledgebaseService
2024-10-18 13:48:57 +08:00
from api . db . db_models import File
2024-08-15 09:17:36 +08:00
from api . utils . api_utils import get_json_result
2024-11-15 17:30:56 +08:00
from api import settings
2024-11-12 14:59:41 +08:00
from rag . nlp import search
2024-11-28 15:46:35 +08:00
from api . constants import DATASET_NAME_LIMIT
2025-01-09 17:07:21 +08:00
from rag . settings import PAGERANK_FLD
2025-10-13 11:53:48 +08:00
from rag . utils . redis_conn import REDIS_CONN
2025-05-25 21:02:51 -05:00
from rag . utils . storage_factory import STORAGE_IMPL
2024-08-15 09:17:36 +08:00
2024-12-08 21:23:51 +08:00
@manager.route ( ' /create ' , methods = [ ' post ' ] ) # noqa: F821
2024-08-15 09:17:36 +08:00
@login_required
@validate_request ( " name " )
def create ( ) :
req = request . json
2024-11-28 15:46:35 +08:00
dataset_name = req [ " name " ]
if not isinstance ( dataset_name , str ) :
return get_data_error_result ( message = " Dataset name must be string. " )
2025-06-11 16:14:29 +08:00
if dataset_name . strip ( ) == " " :
2024-11-28 15:46:35 +08:00
return get_data_error_result ( message = " Dataset name can ' t be empty. " )
2025-06-11 16:14:29 +08:00
if len ( dataset_name . encode ( " utf-8 " ) ) > DATASET_NAME_LIMIT :
2024-11-28 15:46:35 +08:00
return get_data_error_result (
2025-06-19 07:24:30 +05:30
message = f " Dataset name length is { len ( dataset_name ) } which is larger than { DATASET_NAME_LIMIT } " )
2024-11-28 15:46:35 +08:00
dataset_name = dataset_name . strip ( )
dataset_name = duplicate_name (
2024-08-15 09:17:36 +08:00
KnowledgebaseService . query ,
2024-11-28 15:46:35 +08:00
name = dataset_name ,
2024-08-15 09:17:36 +08:00
tenant_id = current_user . id ,
status = StatusEnum . VALID . value )
try :
req [ " id " ] = get_uuid ( )
2025-05-20 09:39:26 +08:00
req [ " name " ] = dataset_name
2024-08-15 09:17:36 +08:00
req [ " tenant_id " ] = current_user . id
req [ " created_by " ] = current_user . id
2025-10-09 12:36:19 +08:00
if not req . get ( " parser_id " ) :
req [ " parser_id " ] = " naive "
2024-08-15 09:17:36 +08:00
e , t = TenantService . get_by_id ( current_user . id )
if not e :
2024-11-05 11:02:31 +08:00
return get_data_error_result ( message = " Tenant not found. " )
2025-10-09 12:36:19 +08:00
req [ " parser_config " ] = {
" layout_recognize " : " DeepDOC " ,
" chunk_token_num " : 512 ,
" delimiter " : " \n " ,
" auto_keywords " : 0 ,
" auto_questions " : 0 ,
" html4excel " : False ,
" topn_tags " : 3 ,
" raptor " : {
" use_raptor " : True ,
" prompt " : " Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following: \n {cluster_content} \n The above is the content you need to summarize. " ,
" max_token " : 256 ,
" threshold " : 0.1 ,
" max_cluster " : 64 ,
" random_seed " : 0
} ,
" graphrag " : {
" use_graphrag " : True ,
" entity_types " : [
" organization " ,
" person " ,
" geo " ,
" event " ,
" category "
] ,
" method " : " light "
}
}
2024-08-15 09:17:36 +08:00
if not KnowledgebaseService . save ( * * req ) :
return get_data_error_result ( )
return get_json_result ( data = { " kb_id " : req [ " id " ] } )
except Exception as e :
return server_error_response ( e )
2024-12-08 21:23:51 +08:00
@manager.route ( ' /update ' , methods = [ ' post ' ] ) # noqa: F821
2024-08-15 09:17:36 +08:00
@login_required
2025-04-10 18:56:41 +08:00
@validate_request ( " kb_id " , " name " , " description " , " parser_id " )
2024-12-02 17:15:19 +08:00
@not_allowed_parameters ( " id " , " tenant_id " , " created_by " , " create_time " , " update_time " , " create_date " , " update_date " , " created_by " )
2024-08-15 09:17:36 +08:00
def update ( ) :
req = request . json
2025-06-12 11:37:25 +08:00
if not isinstance ( req [ " name " ] , str ) :
return get_data_error_result ( message = " Dataset name must be string. " )
if req [ " name " ] . strip ( ) == " " :
return get_data_error_result ( message = " Dataset name can ' t be empty. " )
if len ( req [ " name " ] . encode ( " utf-8 " ) ) > DATASET_NAME_LIMIT :
return get_data_error_result (
message = f " Dataset name length is { len ( req [ ' name ' ] ) } which is large than { DATASET_NAME_LIMIT } " )
2024-08-15 09:17:36 +08:00
req [ " name " ] = req [ " name " ] . strip ( )
2025-06-12 11:37:25 +08:00
2024-10-18 13:48:57 +08:00
if not KnowledgebaseService . accessible4deletion ( req [ " kb_id " ] , current_user . id ) :
return get_json_result (
data = False ,
2024-11-05 11:02:31 +08:00
message = ' No authorization. ' ,
2024-11-15 17:30:56 +08:00
code = settings . RetCode . AUTHENTICATION_ERROR
2024-10-18 13:48:57 +08:00
)
2024-08-15 09:17:36 +08:00
try :
if not KnowledgebaseService . query (
created_by = current_user . id , id = req [ " kb_id " ] ) :
return get_json_result (
2024-11-28 15:46:35 +08:00
data = False , message = ' Only owner of knowledgebase authorized for this operation. ' ,
code = settings . RetCode . OPERATING_ERROR )
2024-08-15 09:17:36 +08:00
e , kb = KnowledgebaseService . get_by_id ( req [ " kb_id " ] )
if not e :
return get_data_error_result (
2024-11-05 11:02:31 +08:00
message = " Can ' t find this knowledgebase! " )
2024-08-15 09:17:36 +08:00
if req [ " name " ] . lower ( ) != kb . name . lower ( ) \
2024-11-28 15:46:35 +08:00
and len (
2025-06-12 09:46:57 +08:00
KnowledgebaseService . query ( name = req [ " name " ] , tenant_id = current_user . id , status = StatusEnum . VALID . value ) ) > = 1 :
2024-08-15 09:17:36 +08:00
return get_data_error_result (
2024-11-05 11:02:31 +08:00
message = " Duplicated knowledgebase name. " )
2024-08-15 09:17:36 +08:00
del req [ " kb_id " ]
if not KnowledgebaseService . update_by_id ( kb . id , req ) :
return get_data_error_result ( )
2024-12-03 14:30:35 +08:00
if kb . pagerank != req . get ( " pagerank " , 0 ) :
if req . get ( " pagerank " , 0 ) > 0 :
2025-01-09 17:07:21 +08:00
settings . docStoreConn . update ( { " kb_id " : kb . id } , { PAGERANK_FLD : req [ " pagerank " ] } ,
2024-12-03 14:30:35 +08:00
search . index_name ( kb . tenant_id ) , kb . id )
else :
2025-01-09 17:07:21 +08:00
# Elasticsearch requires PAGERANK_FLD be non-zero!
2025-01-26 18:45:36 +08:00
settings . docStoreConn . update ( { " exists " : PAGERANK_FLD } , { " remove " : PAGERANK_FLD } ,
2024-12-03 14:30:35 +08:00
search . index_name ( kb . tenant_id ) , kb . id )
2024-08-15 09:17:36 +08:00
e , kb = KnowledgebaseService . get_by_id ( kb . id )
if not e :
return get_data_error_result (
2024-11-05 11:02:31 +08:00
message = " Database error (Knowledgebase rename)! " )
2025-01-15 14:06:46 +08:00
kb = kb . to_dict ( )
kb . update ( req )
2024-08-15 09:17:36 +08:00
2025-01-15 14:06:46 +08:00
return get_json_result ( data = kb )
2024-08-15 09:17:36 +08:00
except Exception as e :
return server_error_response ( e )
2024-12-08 21:23:51 +08:00
@manager.route ( ' /detail ' , methods = [ ' GET ' ] ) # noqa: F821
2024-08-15 09:17:36 +08:00
@login_required
def detail ( ) :
kb_id = request . args [ " kb_id " ]
try :
2024-09-03 16:28:46 +08:00
tenants = UserTenantService . query ( user_id = current_user . id )
for tenant in tenants :
2024-09-03 13:45:02 +08:00
if KnowledgebaseService . query (
2024-09-03 16:28:46 +08:00
tenant_id = tenant . tenant_id , id = kb_id ) :
2024-09-03 13:45:02 +08:00
break
else :
return get_json_result (
2024-11-05 11:02:31 +08:00
data = False , message = ' Only owner of knowledgebase authorized for this operation. ' ,
2024-11-15 17:30:56 +08:00
code = settings . RetCode . OPERATING_ERROR )
2024-08-15 09:17:36 +08:00
kb = KnowledgebaseService . get_detail ( kb_id )
if not kb :
return get_data_error_result (
2024-11-05 11:02:31 +08:00
message = " Can ' t find this knowledgebase! " )
2025-05-09 11:48:54 +08:00
kb [ " size " ] = DocumentService . get_total_size_by_kb_id ( kb_id = kb [ " id " ] , keywords = " " , run_status = [ ] , types = [ ] )
2025-10-14 09:30:46 +08:00
for key in [ " graphrag_task_finish_at " , " raptor_task_finish_at " , " mindmap_task_finish_at " ] :
if finish_at := kb . get ( key ) :
kb [ key ] = finish_at . strftime ( " % Y- % m- %d % H: % M: % S " )
2024-08-15 09:17:36 +08:00
return get_json_result ( data = kb )
except Exception as e :
return server_error_response ( e )
2025-04-22 17:54:12 +08:00
@manager.route ( ' /list ' , methods = [ ' POST ' ] ) # noqa: F821
2024-08-15 09:17:36 +08:00
@login_required
def list_kbs ( ) :
2024-11-28 15:25:38 +08:00
keywords = request . args . get ( " keywords " , " " )
2025-04-22 17:54:12 +08:00
page_number = int ( request . args . get ( " page " , 0 ) )
items_per_page = int ( request . args . get ( " page_size " , 0 ) )
2025-01-09 17:07:21 +08:00
parser_id = request . args . get ( " parser_id " )
2024-08-15 09:17:36 +08:00
orderby = request . args . get ( " orderby " , " create_time " )
2025-06-12 19:17:47 +08:00
if request . args . get ( " desc " , " true " ) . lower ( ) == " false " :
desc = False
else :
desc = True
2025-04-22 17:54:12 +08:00
req = request . get_json ( )
owner_ids = req . get ( " owner_ids " , [ ] )
2024-08-15 09:17:36 +08:00
try :
2025-04-22 17:54:12 +08:00
if not owner_ids :
tenants = TenantService . get_joined_tenants_by_user_id ( current_user . id )
tenants = [ m [ " tenant_id " ] for m in tenants ]
kbs , total = KnowledgebaseService . get_by_tenant_ids (
tenants , current_user . id , page_number ,
items_per_page , orderby , desc , keywords , parser_id )
else :
tenants = owner_ids
kbs , total = KnowledgebaseService . get_by_tenant_ids (
tenants , current_user . id , 0 ,
0 , orderby , desc , keywords , parser_id )
kbs = [ kb for kb in kbs if kb [ " tenant_id " ] in tenants ]
2025-06-10 11:34:30 +08:00
total = len ( kbs )
2025-04-22 17:54:12 +08:00
if page_number and items_per_page :
kbs = kbs [ ( page_number - 1 ) * items_per_page : page_number * items_per_page ]
2024-11-28 15:25:38 +08:00
return get_json_result ( data = { " kbs " : kbs , " total " : total } )
2024-08-15 09:17:36 +08:00
except Exception as e :
return server_error_response ( e )
2024-12-08 21:23:51 +08:00
@manager.route ( ' /rm ' , methods = [ ' post ' ] ) # noqa: F821
2024-08-15 09:17:36 +08:00
@login_required
@validate_request ( " kb_id " )
def rm ( ) :
req = request . json
2024-10-18 13:48:57 +08:00
if not KnowledgebaseService . accessible4deletion ( req [ " kb_id " ] , current_user . id ) :
return get_json_result (
data = False ,
2024-11-05 11:02:31 +08:00
message = ' No authorization. ' ,
2024-11-15 17:30:56 +08:00
code = settings . RetCode . AUTHENTICATION_ERROR
2024-10-18 13:48:57 +08:00
)
2024-08-15 09:17:36 +08:00
try :
kbs = KnowledgebaseService . query (
2024-11-28 15:46:35 +08:00
created_by = current_user . id , id = req [ " kb_id " ] )
2024-08-15 09:17:36 +08:00
if not kbs :
return get_json_result (
2024-11-28 15:46:35 +08:00
data = False , message = ' Only owner of knowledgebase authorized for this operation. ' ,
code = settings . RetCode . OPERATING_ERROR )
2024-08-15 09:17:36 +08:00
for doc in DocumentService . query ( kb_id = req [ " kb_id " ] ) :
if not DocumentService . remove_document ( doc , kbs [ 0 ] . tenant_id ) :
return get_data_error_result (
2024-11-05 11:02:31 +08:00
message = " Database error (Document removal)! " )
2024-08-15 09:17:36 +08:00
f2d = File2DocumentService . get_by_document_id ( doc . id )
2024-12-30 18:38:51 +08:00
if f2d :
FileService . filter_delete ( [ File . source_type == FileSource . KNOWLEDGEBASE , File . id == f2d [ 0 ] . file_id ] )
2024-08-15 09:17:36 +08:00
File2DocumentService . delete_by_document_id ( doc . id )
2024-11-26 12:06:56 +08:00
FileService . filter_delete (
[ File . source_type == FileSource . KNOWLEDGEBASE , File . type == " folder " , File . name == kbs [ 0 ] . name ] )
2024-08-15 09:17:36 +08:00
if not KnowledgebaseService . delete_by_id ( req [ " kb_id " ] ) :
return get_data_error_result (
2024-11-05 11:02:31 +08:00
message = " Database error (Knowledgebase removal)! " )
2024-11-28 13:00:38 +08:00
for kb in kbs :
settings . docStoreConn . delete ( { " kb_id " : kb . id } , search . index_name ( kb . tenant_id ) , kb . id )
settings . docStoreConn . deleteIdx ( search . index_name ( kb . tenant_id ) , kb . id )
2025-05-25 21:02:51 -05:00
if hasattr ( STORAGE_IMPL , ' remove_bucket ' ) :
STORAGE_IMPL . remove_bucket ( kb . id )
2024-08-15 09:17:36 +08:00
return get_json_result ( data = True )
except Exception as e :
return server_error_response ( e )
2025-01-09 17:07:21 +08:00
@manager.route ( ' /<kb_id>/tags ' , methods = [ ' GET ' ] ) # noqa: F821
@login_required
def list_tags ( kb_id ) :
if not KnowledgebaseService . accessible ( kb_id , current_user . id ) :
return get_json_result (
data = False ,
message = ' No authorization. ' ,
code = settings . RetCode . AUTHENTICATION_ERROR
)
2025-08-04 09:57:00 +08:00
tenants = UserTenantService . get_tenants_by_user_id ( current_user . id )
tags = [ ]
for tenant in tenants :
2025-10-10 09:17:36 +08:00
tags + = settings . retriever . all_tags ( tenant [ " tenant_id " ] , [ kb_id ] )
2025-01-09 17:07:21 +08:00
return get_json_result ( data = tags )
@manager.route ( ' /tags ' , methods = [ ' GET ' ] ) # noqa: F821
@login_required
def list_tags_from_kbs ( ) :
kb_ids = request . args . get ( " kb_ids " , " " ) . split ( " , " )
for kb_id in kb_ids :
if not KnowledgebaseService . accessible ( kb_id , current_user . id ) :
return get_json_result (
data = False ,
message = ' No authorization. ' ,
code = settings . RetCode . AUTHENTICATION_ERROR
)
2025-08-04 09:57:00 +08:00
tenants = UserTenantService . get_tenants_by_user_id ( current_user . id )
tags = [ ]
for tenant in tenants :
2025-10-10 09:17:36 +08:00
tags + = settings . retriever . all_tags ( tenant [ " tenant_id " ] , kb_ids )
2025-01-09 17:07:21 +08:00
return get_json_result ( data = tags )
@manager.route ( ' /<kb_id>/rm_tags ' , methods = [ ' POST ' ] ) # noqa: F821
@login_required
def rm_tags ( kb_id ) :
req = request . json
if not KnowledgebaseService . accessible ( kb_id , current_user . id ) :
return get_json_result (
data = False ,
message = ' No authorization. ' ,
code = settings . RetCode . AUTHENTICATION_ERROR
)
e , kb = KnowledgebaseService . get_by_id ( kb_id )
for t in req [ " tags " ] :
settings . docStoreConn . update ( { " tag_kwd " : t , " kb_id " : [ kb_id ] } ,
{ " remove " : { " tag_kwd " : t } } ,
search . index_name ( kb . tenant_id ) ,
kb_id )
return get_json_result ( data = True )
@manager.route ( ' /<kb_id>/rename_tag ' , methods = [ ' POST ' ] ) # noqa: F821
@login_required
def rename_tags ( kb_id ) :
req = request . json
if not KnowledgebaseService . accessible ( kb_id , current_user . id ) :
return get_json_result (
data = False ,
message = ' No authorization. ' ,
code = settings . RetCode . AUTHENTICATION_ERROR
)
e , kb = KnowledgebaseService . get_by_id ( kb_id )
settings . docStoreConn . update ( { " tag_kwd " : req [ " from_tag " ] , " kb_id " : [ kb_id ] } ,
{ " remove " : { " tag_kwd " : req [ " from_tag " ] . strip ( ) } , " add " : { " tag_kwd " : req [ " to_tag " ] } } ,
search . index_name ( kb . tenant_id ) ,
kb_id )
2025-01-22 19:43:14 +08:00
return get_json_result ( data = True )
@manager.route ( ' /<kb_id>/knowledge_graph ' , methods = [ ' GET ' ] ) # noqa: F821
@login_required
def knowledge_graph ( kb_id ) :
if not KnowledgebaseService . accessible ( kb_id , current_user . id ) :
return get_json_result (
data = False ,
message = ' No authorization. ' ,
code = settings . RetCode . AUTHENTICATION_ERROR
)
2025-01-23 11:45:22 +08:00
_ , kb = KnowledgebaseService . get_by_id ( kb_id )
2025-01-22 19:43:14 +08:00
req = {
" kb_id " : [ kb_id ] ,
" knowledge_graph_kwd " : [ " graph " ]
}
2025-02-17 15:59:55 +08:00
2025-01-22 19:43:14 +08:00
obj = { " graph " : { } , " mind_map " : { } }
2025-02-18 09:09:22 +08:00
if not settings . docStoreConn . indexExist ( search . index_name ( kb . tenant_id ) , kb_id ) :
2025-02-17 15:59:55 +08:00
return get_json_result ( data = obj )
2025-10-10 09:17:36 +08:00
sres = settings . retriever . search ( req , search . index_name ( kb . tenant_id ) , [ kb_id ] )
2025-02-17 15:59:55 +08:00
if not len ( sres . ids ) :
2025-01-23 11:45:22 +08:00
return get_json_result ( data = obj )
2025-01-22 19:43:14 +08:00
for id in sres . ids [ : 1 ] :
ty = sres . field [ id ] [ " knowledge_graph_kwd " ]
try :
content_json = json . loads ( sres . field [ id ] [ " content_with_weight " ] )
except Exception :
continue
obj [ ty ] = content_json
if " nodes " in obj [ " graph " ] :
obj [ " graph " ] [ " nodes " ] = sorted ( obj [ " graph " ] [ " nodes " ] , key = lambda x : x . get ( " pagerank " , 0 ) , reverse = True ) [ : 256 ]
2025-02-17 11:49:27 +08:00
if " edges " in obj [ " graph " ] :
node_id_set = { o [ " id " ] for o in obj [ " graph " ] [ " nodes " ] }
filtered_edges = [ o for o in obj [ " graph " ] [ " edges " ] if o [ " source " ] != o [ " target " ] and o [ " source " ] in node_id_set and o [ " target " ] in node_id_set ]
obj [ " graph " ] [ " edges " ] = sorted ( filtered_edges , key = lambda x : x . get ( " weight " , 0 ) , reverse = True ) [ : 128 ]
2025-03-27 17:16:48 +08:00
return get_json_result ( data = obj )
2025-08-12 14:12:56 +08:00
2025-03-27 17:16:48 +08:00
@manager.route ( ' /<kb_id>/knowledge_graph ' , methods = [ ' DELETE ' ] ) # noqa: F821
@login_required
def delete_knowledge_graph ( kb_id ) :
if not KnowledgebaseService . accessible ( kb_id , current_user . id ) :
return get_json_result (
data = False ,
message = ' No authorization. ' ,
code = settings . RetCode . AUTHENTICATION_ERROR
)
_ , kb = KnowledgebaseService . get_by_id ( kb_id )
settings . docStoreConn . delete ( { " knowledge_graph_kwd " : [ " graph " , " subgraph " , " entity " , " relation " ] } , search . index_name ( kb . tenant_id ) , kb_id )
return get_json_result ( data = True )
2025-08-12 14:12:56 +08:00
@manager.route ( " /get_meta " , methods = [ " GET " ] ) # noqa: F821
@login_required
def get_meta ( ) :
kb_ids = request . args . get ( " kb_ids " , " " ) . split ( " , " )
for kb_id in kb_ids :
if not KnowledgebaseService . accessible ( kb_id , current_user . id ) :
return get_json_result (
data = False ,
message = ' No authorization. ' ,
code = settings . RetCode . AUTHENTICATION_ERROR
)
return get_json_result ( data = DocumentService . get_meta_by_kbs ( kb_ids ) )
2025-09-18 09:52:33 +08:00
@manager.route ( " /basic_info " , methods = [ " GET " ] ) # noqa: F821
@login_required
def get_basic_info ( ) :
kb_id = request . args . get ( " kb_id " , " " )
if not KnowledgebaseService . accessible ( kb_id , current_user . id ) :
return get_json_result (
data = False ,
message = ' No authorization. ' ,
code = settings . RetCode . AUTHENTICATION_ERROR
)
basic_info = DocumentService . knowledgebase_basic_info ( kb_id )
return get_json_result ( data = basic_info )
2025-10-09 12:36:19 +08:00
@manager.route ( " /list_pipeline_logs " , methods = [ " POST " ] ) # noqa: F821
@login_required
def list_pipeline_logs ( ) :
kb_id = request . args . get ( " kb_id " )
if not kb_id :
return get_json_result ( data = False , message = ' Lack of " KB ID " ' , code = settings . RetCode . ARGUMENT_ERROR )
keywords = request . args . get ( " keywords " , " " )
page_number = int ( request . args . get ( " page " , 0 ) )
items_per_page = int ( request . args . get ( " page_size " , 0 ) )
orderby = request . args . get ( " orderby " , " create_time " )
if request . args . get ( " desc " , " true " ) . lower ( ) == " false " :
desc = False
else :
desc = True
create_date_from = request . args . get ( " create_date_from " , " " )
create_date_to = request . args . get ( " create_date_to " , " " )
if create_date_to > create_date_from :
return get_data_error_result ( message = " Create data filter is abnormal. " )
req = request . get_json ( )
operation_status = req . get ( " operation_status " , [ ] )
if operation_status :
invalid_status = { s for s in operation_status if s not in VALID_TASK_STATUS }
if invalid_status :
return get_data_error_result ( message = f " Invalid filter operation_status status conditions: { ' , ' . join ( invalid_status ) } " )
types = req . get ( " types " , [ ] )
if types :
invalid_types = { t for t in types if t not in VALID_FILE_TYPES }
if invalid_types :
return get_data_error_result ( message = f " Invalid filter conditions: { ' , ' . join ( invalid_types ) } type { ' s ' if len ( invalid_types ) > 1 else ' ' } " )
suffix = req . get ( " suffix " , [ ] )
try :
logs , tol = PipelineOperationLogService . get_file_logs_by_kb_id ( kb_id , page_number , items_per_page , orderby , desc , keywords , operation_status , types , suffix , create_date_from , create_date_to )
return get_json_result ( data = { " total " : tol , " logs " : logs } )
except Exception as e :
return server_error_response ( e )
@manager.route ( " /list_pipeline_dataset_logs " , methods = [ " POST " ] ) # noqa: F821
@login_required
def list_pipeline_dataset_logs ( ) :
kb_id = request . args . get ( " kb_id " )
if not kb_id :
return get_json_result ( data = False , message = ' Lack of " KB ID " ' , code = settings . RetCode . ARGUMENT_ERROR )
page_number = int ( request . args . get ( " page " , 0 ) )
items_per_page = int ( request . args . get ( " page_size " , 0 ) )
orderby = request . args . get ( " orderby " , " create_time " )
if request . args . get ( " desc " , " true " ) . lower ( ) == " false " :
desc = False
else :
desc = True
create_date_from = request . args . get ( " create_date_from " , " " )
create_date_to = request . args . get ( " create_date_to " , " " )
if create_date_to > create_date_from :
return get_data_error_result ( message = " Create data filter is abnormal. " )
req = request . get_json ( )
operation_status = req . get ( " operation_status " , [ ] )
if operation_status :
invalid_status = { s for s in operation_status if s not in VALID_TASK_STATUS }
if invalid_status :
return get_data_error_result ( message = f " Invalid filter operation_status status conditions: { ' , ' . join ( invalid_status ) } " )
try :
logs , tol = PipelineOperationLogService . get_dataset_logs_by_kb_id ( kb_id , page_number , items_per_page , orderby , desc , operation_status , create_date_from , create_date_to )
return get_json_result ( data = { " total " : tol , " logs " : logs } )
except Exception as e :
return server_error_response ( e )
@manager.route ( " /delete_pipeline_logs " , methods = [ " POST " ] ) # noqa: F821
@login_required
def delete_pipeline_logs ( ) :
kb_id = request . args . get ( " kb_id " )
if not kb_id :
return get_json_result ( data = False , message = ' Lack of " KB ID " ' , code = settings . RetCode . ARGUMENT_ERROR )
req = request . get_json ( )
log_ids = req . get ( " log_ids " , [ ] )
PipelineOperationLogService . delete_by_ids ( log_ids )
return get_json_result ( data = True )
@manager.route ( " /pipeline_log_detail " , methods = [ " GET " ] ) # noqa: F821
@login_required
def pipeline_log_detail ( ) :
log_id = request . args . get ( " log_id " )
if not log_id :
return get_json_result ( data = False , message = ' Lack of " Pipeline log ID " ' , code = settings . RetCode . ARGUMENT_ERROR )
ok , log = PipelineOperationLogService . get_by_id ( log_id )
if not ok :
return get_data_error_result ( message = " Invalid pipeline log ID " )
return get_json_result ( data = log . to_dict ( ) )
@manager.route ( " /run_graphrag " , methods = [ " POST " ] ) # noqa: F821
@login_required
def run_graphrag ( ) :
req = request . json
kb_id = req . get ( " kb_id " , " " )
if not kb_id :
return get_error_data_result ( message = ' Lack of " KB ID " ' )
ok , kb = KnowledgebaseService . get_by_id ( kb_id )
if not ok :
return get_error_data_result ( message = " Invalid Knowledgebase ID " )
task_id = kb . graphrag_task_id
if task_id :
ok , task = TaskService . get_by_id ( task_id )
if not ok :
logging . warning ( f " A valid GraphRAG task id is expected for kb { kb_id } " )
if task and task . progress not in [ - 1 , 1 ] :
return get_error_data_result ( message = f " Task { task_id } in progress with status { task . progress } . A Graph Task is already running. " )
documents , _ = DocumentService . get_by_kb_id (
kb_id = kb_id ,
page_number = 0 ,
items_per_page = 0 ,
orderby = " create_time " ,
desc = False ,
keywords = " " ,
run_status = [ ] ,
types = [ ] ,
suffix = [ ] ,
)
if not documents :
return get_error_data_result ( message = f " No documents in Knowledgebase { kb_id } " )
sample_document = documents [ 0 ]
document_ids = [ document [ " id " ] for document in documents ]
task_id = queue_raptor_o_graphrag_tasks ( doc = sample_document , ty = " graphrag " , priority = 0 , fake_doc_id = GRAPH_RAPTOR_FAKE_DOC_ID , doc_ids = list ( document_ids ) )
if not KnowledgebaseService . update_by_id ( kb . id , { " graphrag_task_id " : task_id } ) :
logging . warning ( f " Cannot save graphrag_task_id for kb { kb_id } " )
return get_json_result ( data = { " graphrag_task_id " : task_id } )
@manager.route ( " /trace_graphrag " , methods = [ " GET " ] ) # noqa: F821
@login_required
def trace_graphrag ( ) :
kb_id = request . args . get ( " kb_id " , " " )
if not kb_id :
return get_error_data_result ( message = ' Lack of " KB ID " ' )
ok , kb = KnowledgebaseService . get_by_id ( kb_id )
if not ok :
return get_error_data_result ( message = " Invalid Knowledgebase ID " )
task_id = kb . graphrag_task_id
if not task_id :
return get_json_result ( data = { } )
ok , task = TaskService . get_by_id ( task_id )
if not ok :
return get_error_data_result ( message = " GraphRAG Task Not Found or Error Occurred " )
return get_json_result ( data = task . to_dict ( ) )
@manager.route ( " /run_raptor " , methods = [ " POST " ] ) # noqa: F821
@login_required
def run_raptor ( ) :
req = request . json
kb_id = req . get ( " kb_id " , " " )
if not kb_id :
return get_error_data_result ( message = ' Lack of " KB ID " ' )
ok , kb = KnowledgebaseService . get_by_id ( kb_id )
if not ok :
return get_error_data_result ( message = " Invalid Knowledgebase ID " )
task_id = kb . raptor_task_id
if task_id :
ok , task = TaskService . get_by_id ( task_id )
if not ok :
logging . warning ( f " A valid RAPTOR task id is expected for kb { kb_id } " )
if task and task . progress not in [ - 1 , 1 ] :
return get_error_data_result ( message = f " Task { task_id } in progress with status { task . progress } . A RAPTOR Task is already running. " )
documents , _ = DocumentService . get_by_kb_id (
kb_id = kb_id ,
page_number = 0 ,
items_per_page = 0 ,
orderby = " create_time " ,
desc = False ,
keywords = " " ,
run_status = [ ] ,
types = [ ] ,
suffix = [ ] ,
)
if not documents :
return get_error_data_result ( message = f " No documents in Knowledgebase { kb_id } " )
sample_document = documents [ 0 ]
document_ids = [ document [ " id " ] for document in documents ]
task_id = queue_raptor_o_graphrag_tasks ( doc = sample_document , ty = " raptor " , priority = 0 , fake_doc_id = GRAPH_RAPTOR_FAKE_DOC_ID , doc_ids = list ( document_ids ) )
if not KnowledgebaseService . update_by_id ( kb . id , { " raptor_task_id " : task_id } ) :
logging . warning ( f " Cannot save raptor_task_id for kb { kb_id } " )
return get_json_result ( data = { " raptor_task_id " : task_id } )
@manager.route ( " /trace_raptor " , methods = [ " GET " ] ) # noqa: F821
@login_required
def trace_raptor ( ) :
kb_id = request . args . get ( " kb_id " , " " )
if not kb_id :
return get_error_data_result ( message = ' Lack of " KB ID " ' )
ok , kb = KnowledgebaseService . get_by_id ( kb_id )
if not ok :
return get_error_data_result ( message = " Invalid Knowledgebase ID " )
task_id = kb . raptor_task_id
if not task_id :
return get_json_result ( data = { } )
ok , task = TaskService . get_by_id ( task_id )
if not ok :
return get_error_data_result ( message = " RAPTOR Task Not Found or Error Occurred " )
return get_json_result ( data = task . to_dict ( ) )
@manager.route ( " /run_mindmap " , methods = [ " POST " ] ) # noqa: F821
@login_required
def run_mindmap ( ) :
req = request . json
kb_id = req . get ( " kb_id " , " " )
if not kb_id :
return get_error_data_result ( message = ' Lack of " KB ID " ' )
ok , kb = KnowledgebaseService . get_by_id ( kb_id )
if not ok :
return get_error_data_result ( message = " Invalid Knowledgebase ID " )
task_id = kb . mindmap_task_id
if task_id :
ok , task = TaskService . get_by_id ( task_id )
if not ok :
logging . warning ( f " A valid Mindmap task id is expected for kb { kb_id } " )
if task and task . progress not in [ - 1 , 1 ] :
return get_error_data_result ( message = f " Task { task_id } in progress with status { task . progress } . A Mindmap Task is already running. " )
documents , _ = DocumentService . get_by_kb_id (
kb_id = kb_id ,
page_number = 0 ,
items_per_page = 0 ,
orderby = " create_time " ,
desc = False ,
keywords = " " ,
run_status = [ ] ,
types = [ ] ,
suffix = [ ] ,
)
if not documents :
return get_error_data_result ( message = f " No documents in Knowledgebase { kb_id } " )
sample_document = documents [ 0 ]
document_ids = [ document [ " id " ] for document in documents ]
task_id = queue_raptor_o_graphrag_tasks ( doc = sample_document , ty = " mindmap " , priority = 0 , fake_doc_id = GRAPH_RAPTOR_FAKE_DOC_ID , doc_ids = list ( document_ids ) )
if not KnowledgebaseService . update_by_id ( kb . id , { " mindmap_task_id " : task_id } ) :
logging . warning ( f " Cannot save mindmap_task_id for kb { kb_id } " )
return get_json_result ( data = { " mindmap_task_id " : task_id } )
@manager.route ( " /trace_mindmap " , methods = [ " GET " ] ) # noqa: F821
@login_required
def trace_mindmap ( ) :
kb_id = request . args . get ( " kb_id " , " " )
if not kb_id :
return get_error_data_result ( message = ' Lack of " KB ID " ' )
ok , kb = KnowledgebaseService . get_by_id ( kb_id )
if not ok :
return get_error_data_result ( message = " Invalid Knowledgebase ID " )
task_id = kb . mindmap_task_id
if not task_id :
return get_json_result ( data = { } )
ok , task = TaskService . get_by_id ( task_id )
if not ok :
return get_error_data_result ( message = " Mindmap Task Not Found or Error Occurred " )
return get_json_result ( data = task . to_dict ( ) )
@manager.route ( " /unbind_task " , methods = [ " DELETE " ] ) # noqa: F821
@login_required
def delete_kb_task ( ) :
kb_id = request . args . get ( " kb_id " , " " )
if not kb_id :
return get_error_data_result ( message = ' Lack of " KB ID " ' )
ok , kb = KnowledgebaseService . get_by_id ( kb_id )
if not ok :
return get_json_result ( data = True )
pipeline_task_type = request . args . get ( " pipeline_task_type " , " " )
if not pipeline_task_type or pipeline_task_type not in [ PipelineTaskType . GRAPH_RAG , PipelineTaskType . RAPTOR , PipelineTaskType . MINDMAP ] :
return get_error_data_result ( message = " Invalid task type " )
match pipeline_task_type :
case PipelineTaskType . GRAPH_RAG :
settings . docStoreConn . delete ( { " knowledge_graph_kwd " : [ " graph " , " subgraph " , " entity " , " relation " ] } , search . index_name ( kb . tenant_id ) , kb_id )
2025-10-13 11:53:48 +08:00
kb_task_id_field = " graphrag_task_id "
task_id = kb . graphrag_task_id
2025-10-09 12:36:19 +08:00
kb_task_finish_at = " graphrag_task_finish_at "
case PipelineTaskType . RAPTOR :
2025-10-13 11:53:48 +08:00
kb_task_id_field = " raptor_task_id "
task_id = kb . raptor_task_id
2025-10-09 12:36:19 +08:00
kb_task_finish_at = " raptor_task_finish_at "
case PipelineTaskType . MINDMAP :
2025-10-13 11:53:48 +08:00
kb_task_id_field = " mindmap_task_id "
task_id = kb . mindmap_task_id
2025-10-09 12:36:19 +08:00
kb_task_finish_at = " mindmap_task_finish_at "
case _ :
return get_error_data_result ( message = " Internal Error: Invalid task type " )
2025-10-13 11:53:48 +08:00
def cancel_task ( task_id ) :
REDIS_CONN . set ( f " { task_id } -cancel " , " x " )
cancel_task ( task_id )
ok = KnowledgebaseService . update_by_id ( kb_id , { kb_task_id_field : " " , kb_task_finish_at : None } )
2025-10-09 12:36:19 +08:00
if not ok :
return server_error_response ( f " Internal error: cannot delete task { pipeline_task_type } " )
return get_json_result ( data = True )