2024-01-15 08:46:22 +08:00
#
2024-01-19 19:51:57 +08:00
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
2024-01-15 08:46:22 +08:00
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
import uuid
2024-02-27 14:57:34 +08:00
from api . db import LLMType , UserTenantRole
2024-01-17 20:20:42 +08:00
from api . db . db_models import init_database_tables as init_web_db
from api . db . services import UserService
2024-02-27 14:57:34 +08:00
from api . db . services . llm_service import LLMFactoriesService , LLMService , TenantLLMService , LLMBundle
from api . db . services . user_service import TenantService , UserTenantService
from api . settings import CHAT_MDL , EMBEDDING_MDL , ASR_MDL , IMAGE2TEXT_MDL , PARSERS , LLM_FACTORY , API_KEY
2024-01-15 08:46:22 +08:00
def init_superuser ( ) :
user_info = {
" id " : uuid . uuid1 ( ) . hex ,
" password " : " admin " ,
" nickname " : " admin " ,
" is_superuser " : True ,
2024-02-29 14:03:07 +08:00
" email " : " admin@ragflow.io " ,
2024-01-15 08:46:22 +08:00
" creator " : " system " ,
" status " : " 1 " ,
}
2024-02-27 14:57:34 +08:00
tenant = {
" id " : user_info [ " id " ] ,
" name " : user_info [ " nickname " ] + " ‘ s Kingdom" ,
" llm_id " : CHAT_MDL ,
" embd_id " : EMBEDDING_MDL ,
" asr_id " : ASR_MDL ,
" parser_ids " : PARSERS ,
" img2txt_id " : IMAGE2TEXT_MDL
}
usr_tenant = {
" tenant_id " : user_info [ " id " ] ,
" user_id " : user_info [ " id " ] ,
" invited_by " : user_info [ " id " ] ,
" role " : UserTenantRole . OWNER
}
tenant_llm = [ ]
for llm in LLMService . query ( fid = LLM_FACTORY ) :
tenant_llm . append (
{ " tenant_id " : user_info [ " id " ] , " llm_factory " : LLM_FACTORY , " llm_name " : llm . llm_name , " model_type " : llm . model_type ,
" api_key " : API_KEY } )
if not UserService . save ( * * user_info ) :
2024-02-28 15:01:12 +08:00
print ( " \033 [93m【ERROR】 \033 [0mcan ' t init admin. " )
2024-02-27 14:57:34 +08:00
return
2024-02-27 17:51:54 +08:00
TenantService . insert ( * * tenant )
UserTenantService . insert ( * * usr_tenant )
2024-02-27 14:57:34 +08:00
TenantLLMService . insert_many ( tenant_llm )
2024-02-29 14:03:07 +08:00
print ( " 【INFO】Super user initialized. \033 [93memail: admin@ragflow.io, password: admin \033 [0m. Changing the password after logining is strongly recomanded. " )
2024-01-15 08:46:22 +08:00
2024-02-27 14:57:34 +08:00
chat_mdl = LLMBundle ( tenant [ " id " ] , LLMType . CHAT , tenant [ " llm_id " ] )
msg = chat_mdl . chat ( system = " " , history = [ { " role " : " user " , " content " : " Hello! " } ] , gen_conf = { } )
if msg . find ( " ERROR: " ) == 0 :
2024-02-28 18:57:43 +08:00
print ( " \33 [91m【ERROR】 \33 [0m: " , " ' {} ' dosen ' t work. {} " . format ( tenant [ " llm_id " ] , msg ) )
2024-02-27 17:51:54 +08:00
embd_mdl = LLMBundle ( tenant [ " id " ] , LLMType . EMBEDDING , tenant [ " embd_id " ] )
2024-02-28 15:01:12 +08:00
v , c = embd_mdl . encode ( [ " Hello! " ] )
2024-02-27 14:57:34 +08:00
if c == 0 :
2024-02-28 15:01:12 +08:00
print ( " \33 [91m【ERROR】 \33 [0m: " , " ' {} ' dosen ' t work! " . format ( tenant [ " embd_id " ] ) )
2024-02-27 14:57:34 +08:00
2024-01-15 08:46:22 +08:00
def init_llm_factory ( ) :
factory_infos = [ {
" name " : " OpenAI " ,
" logo " : " " ,
" tags " : " LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION " ,
" status " : " 1 " ,
} , {
" name " : " 通义千问 " ,
" logo " : " " ,
" tags " : " LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION " ,
" status " : " 1 " ,
} , {
2024-02-29 18:53:02 +08:00
" name " : " 智谱AI " ,
2024-01-15 08:46:22 +08:00
" logo " : " " ,
" tags " : " LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION " ,
" status " : " 1 " ,
} ,
2024-02-28 15:01:12 +08:00
# {
# "name": "文心一言",
# "logo": "",
# "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
# "status": "1",
# },
2024-01-15 08:46:22 +08:00
]
2024-02-08 17:01:01 +08:00
llm_infos = [
# ---------------------- OpenAI ------------------------
{
2024-01-15 08:46:22 +08:00
" fid " : factory_infos [ 0 ] [ " name " ] ,
" llm_name " : " gpt-3.5-turbo " ,
" tags " : " LLM,CHAT,4K " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 4096 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . CHAT . value
} , {
" fid " : factory_infos [ 0 ] [ " name " ] ,
" llm_name " : " gpt-3.5-turbo-16k-0613 " ,
" tags " : " LLM,CHAT,16k " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 16385 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . CHAT . value
} , {
" fid " : factory_infos [ 0 ] [ " name " ] ,
" llm_name " : " text-embedding-ada-002 " ,
" tags " : " TEXT EMBEDDING,8K " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 8191 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . EMBEDDING . value
} , {
" fid " : factory_infos [ 0 ] [ " name " ] ,
" llm_name " : " whisper-1 " ,
" tags " : " SPEECH2TEXT " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 25 * 1024 * 1024 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . SPEECH2TEXT . value
} , {
" fid " : factory_infos [ 0 ] [ " name " ] ,
" llm_name " : " gpt-4 " ,
" tags " : " LLM,CHAT,8K " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 8191 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . CHAT . value
} , {
" fid " : factory_infos [ 0 ] [ " name " ] ,
" llm_name " : " gpt-4-32k " ,
" tags " : " LLM,CHAT,32K " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 32768 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . CHAT . value
} , {
" fid " : factory_infos [ 0 ] [ " name " ] ,
" llm_name " : " gpt-4-vision-preview " ,
" tags " : " LLM,CHAT,IMAGE2TEXT " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 765 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . IMAGE2TEXT . value
2024-02-08 17:01:01 +08:00
} ,
# ----------------------- Qwen -----------------------
{
2024-01-15 08:46:22 +08:00
" fid " : factory_infos [ 1 ] [ " name " ] ,
" llm_name " : " qwen-turbo " ,
" tags " : " LLM,CHAT,8K " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 8191 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . CHAT . value
} , {
" fid " : factory_infos [ 1 ] [ " name " ] ,
" llm_name " : " qwen-plus " ,
" tags " : " LLM,CHAT,32K " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 32768 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . CHAT . value
} , {
" fid " : factory_infos [ 1 ] [ " name " ] ,
" llm_name " : " text-embedding-v2 " ,
" tags " : " TEXT EMBEDDING,2K " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 2048 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . EMBEDDING . value
} , {
" fid " : factory_infos [ 1 ] [ " name " ] ,
" llm_name " : " paraformer-realtime-8k-v1 " ,
" tags " : " SPEECH2TEXT " ,
2024-01-18 19:28:37 +08:00
" max_tokens " : 25 * 1024 * 1024 ,
2024-01-15 08:46:22 +08:00
" model_type " : LLMType . SPEECH2TEXT . value
} , {
" fid " : factory_infos [ 1 ] [ " name " ] ,
2024-02-19 19:22:17 +08:00
" llm_name " : " qwen-vl-max " ,
2024-01-19 19:51:57 +08:00
" tags " : " LLM,CHAT,IMAGE2TEXT " ,
" max_tokens " : 765 ,
" model_type " : LLMType . IMAGE2TEXT . value
2024-01-15 08:46:22 +08:00
} ,
2024-02-08 17:01:01 +08:00
# ---------------------- ZhipuAI ----------------------
{
2024-02-19 19:22:17 +08:00
" fid " : factory_infos [ 2 ] [ " name " ] ,
2024-02-08 17:01:01 +08:00
" llm_name " : " glm-3-turbo " ,
" tags " : " LLM,CHAT, " ,
" max_tokens " : 128 * 1000 ,
" model_type " : LLMType . CHAT . value
} , {
2024-02-19 19:22:17 +08:00
" fid " : factory_infos [ 2 ] [ " name " ] ,
2024-02-08 17:01:01 +08:00
" llm_name " : " glm-4 " ,
" tags " : " LLM,CHAT, " ,
" max_tokens " : 128 * 1000 ,
" model_type " : LLMType . CHAT . value
} , {
2024-02-19 19:22:17 +08:00
" fid " : factory_infos [ 2 ] [ " name " ] ,
2024-02-08 17:01:01 +08:00
" llm_name " : " glm-4v " ,
" tags " : " LLM,CHAT,IMAGE2TEXT " ,
" max_tokens " : 2000 ,
" model_type " : LLMType . IMAGE2TEXT . value
} ,
{
2024-02-19 19:22:17 +08:00
" fid " : factory_infos [ 2 ] [ " name " ] ,
2024-02-08 17:01:01 +08:00
" llm_name " : " embedding-2 " ,
" tags " : " TEXT EMBEDDING " ,
" max_tokens " : 512 ,
2024-02-28 15:01:12 +08:00
" model_type " : LLMType . EMBEDDING . value
2024-02-08 17:01:01 +08:00
} ,
2024-01-15 08:46:22 +08:00
]
for info in factory_infos :
LLMFactoriesService . save ( * * info )
for info in llm_infos :
LLMService . save ( * * info )
def init_web_data ( ) :
start_time = time . time ( )
if not LLMService . get_all ( ) . count ( ) : init_llm_factory ( )
2024-02-27 14:57:34 +08:00
if not UserService . get_all ( ) . count ( ) :
init_superuser ( )
2024-01-15 08:46:22 +08:00
print ( " init web data success: {} " . format ( time . time ( ) - start_time ) )
if __name__ == ' __main__ ' :
init_web_db ( )
init_web_data ( )