mirror of
https://github.com/OpenSPG/KAG.git
synced 2025-07-17 05:56:56 +00:00

* add think cost * update csv scanner * add final rerank * add reasoner * add iterative planner * fix dpr search * fix dpr search * add reference data * move odps import * update requirement.txt * update 2wiki * add missing file * fix markdown reader * add iterative planning * update version * update runner * update 2wiki example * update bridge * merge solver and solver_new * add cur day * writer delete * update multi process * add missing files * fix report * add chunk retrieved executor * update try in stream runner result * add path * add math executor * update hotpotqa example * remove log * fix python coder solver * update hotpotqa example * fix python coder solver * update config * fix bad * add log * remove unused code * commit with task thought * move kag model to common * add default chat llm * fix * use static planner * support chunk graph node * add args * support naive rag * llm client support tool calls * add default async * add openai * fix result * fix markdown reader * fix thinker * update asyncio interface * feat(solver): add mcp support (#444) * 上传mcp client相关代码 * 1、完成一套mcp client的调用,从pipeline到planner、executor 2、允许json中传入多个mcp_server,通过大模型进行调用并选择 3、调通baidu_map_mcp的使用 * 1、schema * bugfix:删减冗余代码 --------- Co-authored-by: wanxingyu.wxy <wanxingyu.wxy@antgroup.com> * fix affairqa after solver refactor * fix affairqa after solver refactor * fix readme * add params * update version * update mcp executor * update mcp executor * solver add mcp executor * add missing file * add mpc executor * add executor * x * update * fix requirement * fix main llm config * fix solver * bugfix:修复invoke函数调用逻辑 * chg eva * update example * add kag layer * add step task * support dot refresh * support dot refresh * support dot refresh * support dot refresh * add retrieved num * add retrieved num * add pipelineconf * update ppr * update musique prompts * update * add to_dict for BuilderComponentData * async build * add deduce prompt * add deduce prompt * add deduce prompt * fix reader * add deduce prompt * add page thinker report * modify prmpt * add step status * add self cognition * add self cognition * add memory graph storage * add now time * update memory config * add now time * chg graph loader * 添加prqa数据集和代码 * bugfix:prqa调用逻辑修复 * optimize:优化代码逻辑,生成答案规范化 * add retry py code * update memory graph * update memory graph * fix * fix ner * add with_out_refer generator prompt * fix * close ckpt * fix query * fix query * update version * add llm checker * add llm checker * 1、上传evalutor.py以及修改gold_answer.json格式 2、优化代码逻辑 3、修改README.md文件 * update exp * update exp * rerank support * add static rewrite query * recall more chunks * fix graph load * add static rewrite query * fix bugs * add finish check * add finish check * add finish check * add finish check * 1、上传evalutor.py的结果 2、优化代码逻辑,优化readme文件 * add lf retry * add memory graph api * fix reader api * add ner * add metrics * fix bug * remove ner * add reraise fo retry * add edge prop to memory graph * add memory graph * 1、评测数据集结果修正 2、优化evaluator.py代码 3、删除结果不存在而gold_answer中有答案的问题 * 删除评测结果文件 * fix knext host addr * async eva * add lf prompt * add lf prompt * add config * add retry * add unknown check * add rc result * add rc result * add rc result * add rc result * 依据kag pipeline格式修改代码逻辑并通过测试 * bugfix:删除冗余代码 * fix report prompt * bugfix:触发重试机制 * bugfix:中文符号错误 * fix rethinker prompt * update version to 0.6.2b78 * update version * 1、修改evaluator.py,通过大模型计算准确率,符合最新调用逻辑 2、修改prompt,让没有回答的结果重复测试 * update affairqa for evaluate * update affairqa for evaluate * bugfix:修正数据集 * bugfix:修正数据集 * bugfix:修正数据集 * fix name conflict * bugfix:删除错误问题 * bugfix:文件名命名错误导致evaluator失败 * update for affairqa eval * bugfix:修改代码保持evaluate逻辑一致 * x * update for affairqa readme * remove temp eval scripts * bugfix for math deduce * merge 0.6.2_dev * merge 0.6.2_dev * fix * update client addr * updated version * update for affairqa eval * evaUtils 支持中文 * fix affairqa eval: * remove unused example * update kag config * fix default value * update readme * fix init * 注释信息修改,并添加部分class说明 * update example config * Tc 0.7.0 (#459) * 提交affairQA 代码 * fix affairqa eval --------- Co-authored-by: zhengke.gzk <zhengke.gzk@antgroup.com> * fix all examples * reformat --------- Co-authored-by: peilong <peilong.zpl@antgroup.com> Co-authored-by: 锦呈 <zhangxinhong.zxh@antgroup.com> Co-authored-by: wanxingyu.wxy <wanxingyu.wxy@antgroup.com> Co-authored-by: zhengke.gzk <zhengke.gzk@antgroup.com>
215 lines
6.8 KiB
Python
215 lines
6.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright 2023 OpenSPG Authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
|
# in compliance with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
# or implied.
|
|
import os
|
|
import json
|
|
import argparse
|
|
import tempfile
|
|
import requests
|
|
from git import Repo
|
|
from kag.bin.base import Command
|
|
from kag.common.registry import Registrable
|
|
from kag.common.conf import KAG_PROJECT_CONF
|
|
from kag.common.utils import bold, green, reset
|
|
from openai import NotFoundError
|
|
|
|
|
|
@Command.register("submit_builder_job")
|
|
class BuilderJobSubmit(Command):
|
|
def add_to_parser(self, subparsers: argparse._SubParsersAction):
|
|
|
|
parser = subparsers.add_parser(
|
|
"builder", help="Submit distributed builder jobs to cluster"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--user_number",
|
|
type=str,
|
|
help="User number",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--host_addr",
|
|
default=None,
|
|
help="Host address of SPG server.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--project_id",
|
|
default=None,
|
|
help="Project ID in SPG server.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--git_url",
|
|
required=True,
|
|
type=str,
|
|
help="Git repository URL containing project source code (supports SSH/HTTP)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--commit_id",
|
|
required=True,
|
|
type=str,
|
|
help="Git commit id containing project source code (supports SSH/HTTP)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--init_script",
|
|
default=None,
|
|
help="Bash script path for worker container initialization.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--entry_script",
|
|
type=str,
|
|
default=None,
|
|
help="Python entry script path. \n"
|
|
"Will be executed as: python <entry_script>",
|
|
)
|
|
|
|
parser.add_argument("--image", type=str, help="Worker image.")
|
|
parser.add_argument("--pool", type=str, help="Worker resource pool.")
|
|
|
|
parser.add_argument(
|
|
"--num_workers",
|
|
type=int,
|
|
default=1,
|
|
help="Number of parallel worker instances. \n",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--num_gpus",
|
|
type=int,
|
|
default=0,
|
|
help="GPUs per worker. Requires NVIDIA CUDA-enabled cluster. \n",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--num_cpus", type=int, default=8, help="CPU cores per worker."
|
|
)
|
|
|
|
# 存储资源配置
|
|
parser.add_argument(
|
|
"--memory",
|
|
type=int,
|
|
default=8,
|
|
help="Memory allocation per worker (GB).",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--storage",
|
|
type=int,
|
|
default=50,
|
|
help="Ephemeral disk space per worker (GB).",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--env",
|
|
type=str,
|
|
default="",
|
|
help="Environment variables, with each variable formatted as key=value and separated by commas: k1=v1, k2=v2",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--validity_check",
|
|
action="store_true",
|
|
help="Perform validity check.",
|
|
)
|
|
|
|
parser.set_defaults(func=self.get_handler())
|
|
|
|
@staticmethod
|
|
def get_cls(cls_name):
|
|
interface_classes = Registrable.list_all_registered(with_leaf_classes=False)
|
|
for item in interface_classes:
|
|
if item.__name__ == cls_name:
|
|
return item
|
|
raise ValueError(f"class {cls_name} is not a valid kag configurable class")
|
|
|
|
@staticmethod
|
|
def validity_check(args: argparse.Namespace):
|
|
with tempfile.TemporaryDirectory() as local_dir:
|
|
repo = Repo.clone_from(args.git_url, local_dir)
|
|
# parsed_url = parse(args.git_url)
|
|
repo.git.checkout(args.commit_id)
|
|
if args.init_script is not None:
|
|
if not os.path.exists(os.path.join(local_dir, args.init_script)):
|
|
raise NotFoundError(
|
|
f"init script {args.init_script} not found in git repo"
|
|
)
|
|
if not os.path.exists(os.path.join(local_dir, args.entry_script)):
|
|
raise ValueError(
|
|
f"entry script {args.entry_script} not found in git repo"
|
|
)
|
|
|
|
@staticmethod
|
|
def handler(args: argparse.Namespace):
|
|
work_dir = "src"
|
|
cmds = [
|
|
f"git clone {args.git_url} {work_dir}",
|
|
f"cd {work_dir}",
|
|
f"git checkout {args.commit_id}",
|
|
"/openspg_venv/bin/pip3.8 install -e . -i https://artifacts.antgroup-inc.cn/artifact/repositories/simple-dev/",
|
|
]
|
|
if args.validity_check:
|
|
BuilderJobSubmit.validity_check(args)
|
|
if args.init_script is not None:
|
|
cmds.append(f"sh {args.init_script}")
|
|
|
|
if args.entry_script is not None:
|
|
entry_script_dir = os.path.dirname(args.entry_script)
|
|
entry_script_name = os.path.basename(args.entry_script)
|
|
entry_cmd = f"cd {entry_script_dir} && python {entry_script_name}"
|
|
cmds.append(entry_cmd)
|
|
|
|
command = " && ".join(cmds)
|
|
|
|
envs = {}
|
|
if args.env:
|
|
|
|
kvs = args.env.split(",")
|
|
for kv in kvs:
|
|
key, value = kv.split("=")
|
|
envs[key.strip()] = value.strip()
|
|
|
|
if args.project_id is not None:
|
|
project_id = int(args.project_id)
|
|
else:
|
|
project_id = int(KAG_PROJECT_CONF.project_id)
|
|
req = {
|
|
"projectId": project_id,
|
|
"command": command,
|
|
"workerNum": args.num_workers,
|
|
"workerCpu": args.num_cpus,
|
|
"workerGpu": args.num_gpus,
|
|
"workerMemory": args.memory * 1024,
|
|
"workerStorage": args.storage * 1024,
|
|
"envs": envs,
|
|
}
|
|
if args.image:
|
|
req["image"] = args.image
|
|
if args.pool:
|
|
req["workerPool"] = args.pool
|
|
|
|
if args.user_number:
|
|
req["userNumber"] = args.user_number
|
|
|
|
if args.host_addr is not None:
|
|
host_addr = args.host_addr.rstrip("/")
|
|
else:
|
|
host_addr = KAG_PROJECT_CONF.host_addr.rstrip("/")
|
|
url = host_addr + "/public/v1/builder/kag/submit"
|
|
rsp = requests.post(url, json=req)
|
|
rsp.raise_for_status()
|
|
print(f"{bold}{green}Success submit job to server, info:{reset}")
|
|
print(json.dumps(rsp.json(), indent=4))
|