mirror of
https://github.com/OpenSPG/KAG.git
synced 2025-06-27 03:20:08 +00:00
chore(examples): domain KG inject example (#249)
* add timeout param for llm and embedding model * add example * fix title
This commit is contained in:
parent
fb15dcec26
commit
e1fccef44c
@ -46,7 +46,7 @@ class DefaultExternalGraphLoader(ExternalGraphLoaderABC):
|
||||
edges (List[Edge]): A list of Edge objects representing the edges in the graph.
|
||||
match_config (MatchConfig): The configuration for matching query str to graph nodes.
|
||||
"""
|
||||
super().__init__()
|
||||
super().__init__(match_config)
|
||||
self.schema = SchemaClient(project_id=KAG_PROJECT_CONF.project_id).load()
|
||||
for node in nodes:
|
||||
if node.label not in self.schema:
|
||||
|
@ -20,7 +20,9 @@ from kag.interface import (
|
||||
PostProcessorABC,
|
||||
SinkWriterABC,
|
||||
KAGBuilderChain,
|
||||
ExternalGraphLoaderABC,
|
||||
)
|
||||
|
||||
from kag.common.utils import generate_hash_id
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -188,3 +190,41 @@ class DefaultUnstructuredBuilderChain(KAGBuilderChain):
|
||||
ret = inner_future.result()
|
||||
result.append(ret)
|
||||
return result
|
||||
|
||||
|
||||
@KAGBuilderChain.register("domain_kg_inject_chain")
|
||||
class DomainKnowledgeInjectChain(KAGBuilderChain):
|
||||
def __init__(
|
||||
self,
|
||||
external_graph: ExternalGraphLoaderABC,
|
||||
writer: SinkWriterABC,
|
||||
vectorizer: VectorizerABC = None,
|
||||
):
|
||||
"""
|
||||
Initializes the DefaultStructuredBuilderChain instance.
|
||||
|
||||
Args:
|
||||
external_graph (ExternalGraphLoaderABC): The ExternalGraphLoader component to be used.
|
||||
writer (SinkWriterABC): The writer component to be used.
|
||||
vectorizer (VectorizerABC, optional): The vectorizer component to be used. Defaults to None.
|
||||
"""
|
||||
self.external_graph = external_graph
|
||||
self.writer = writer
|
||||
self.vectorizer = vectorizer
|
||||
|
||||
def build(self, **kwargs):
|
||||
"""
|
||||
Construct the builder chain by connecting the external_graph, vectorizer (if available), and writer components.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
KAGBuilderChain: The constructed builder chain.
|
||||
"""
|
||||
if self.vectorizer:
|
||||
chain = self.external_graph >> self.vectorizer >> self.writer
|
||||
else:
|
||||
chain = self.external_graph >> self.writer
|
||||
|
||||
return chain
|
||||
|
82
kag/examples/domain_kg/README.md
Normal file
82
kag/examples/domain_kg/README.md
Normal file
@ -0,0 +1,82 @@
|
||||
# KAG Example: DomainKG
|
||||
|
||||
[English](./README.md) |
|
||||
[简体中文](./README_cn.md)
|
||||
|
||||
This example provides a case of knowledge injection in the medical domain, where the nodes of the domain knowledge graph are medical terms, and the relationships are defined as "isA." The document contains an introduction to a selection of medical terms.
|
||||
|
||||
## 1. Precondition
|
||||
|
||||
Please refer to [Quick Start](https://openspg.yuque.com/ndx6g9/cwh47i/rs7gr8g4s538b1n7) to install KAG and its dependency OpenSPG server, and learn about using KAG in developer mode.
|
||||
|
||||
## 2. Steps to reproduce
|
||||
|
||||
### Step 1: Enter the example directory
|
||||
|
||||
```bash
|
||||
cd kag/examples/domain_kg
|
||||
```
|
||||
|
||||
### Step 2: Configure models
|
||||
|
||||
Update the generative model configurations ``openie_llm`` and ``chat_llm`` and the representive model configuration ``vectorizer_model`` in [kag_config.yaml](./kag_config.yaml).
|
||||
|
||||
You need to fill in correct ``api_key``s. If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
|
||||
|
||||
### Step 3: Project initialization
|
||||
|
||||
Initiate the project with the following command.
|
||||
|
||||
```bash
|
||||
knext project restore --host_addr http://127.0.0.1:8887 --proj_path .
|
||||
```
|
||||
|
||||
### Step 4: Commit the schema
|
||||
|
||||
Execute the following command to commit the schema [TwoWiki.schema](./schema/TwoWiki.schema).
|
||||
|
||||
```bash
|
||||
knext schema commit
|
||||
```
|
||||
|
||||
### Step 5: Build the knowledge graph
|
||||
We first need to inject the domain knowledge graph into the graph database. This allows the PostProcessor component to link the extracted nodes with the nodes of the domain knowledge graph, thereby standardizing them during the construction of the graph from unstructured documents.
|
||||
|
||||
Execute [injection.py](./builder/injection.py) in the [builder](./builder) directory to inject the domain KG.
|
||||
|
||||
```bash
|
||||
cd builder && python injection.py && cd ..
|
||||
```
|
||||
|
||||
Note that KAG provides a special implementation of the ``KAGBuilderChain`` for domain knowledge graph injection, known as the ``DomainKnowledgeInjectChain``, which is registered under the name ``domain_kg_inject_chain``. Since domain knowledge injection does not involve scanning files or directories, you can directly call the ``invoke`` interface of the chain to initiate the task.
|
||||
|
||||
|
||||
Next, execute [indexer.py](./builder/indexer.py) in the [builder](./builder) directory to build KG from unstructured document.
|
||||
|
||||
```bash
|
||||
cd builder && python indexer.py && cd ..
|
||||
```
|
||||
|
||||
|
||||
### Step 6: Execute the QA tasks
|
||||
|
||||
Execute [evaFor2wiki.py](./solver/evaFor2wiki.py) in the [solver](./solver) directory to generate the answer to the question.
|
||||
|
||||
```bash
|
||||
cd solver && python qa.py && cd ..
|
||||
```
|
||||
|
||||
### Step 7: (Optional) Cleanup
|
||||
|
||||
To delete the checkpoints, execute the following command.
|
||||
|
||||
```bash
|
||||
rm -rf ./builder/ckpt
|
||||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
82
kag/examples/domain_kg/README_cn.md
Normal file
82
kag/examples/domain_kg/README_cn.md
Normal file
@ -0,0 +1,82 @@
|
||||
# KAG 示例:DomainKG
|
||||
|
||||
[English](./README.md) |
|
||||
[简体中文](./README_cn.md)
|
||||
|
||||
本示例提供了一个医疗领域知识注入的案例,其中领域知识图谱的节点为医学名词,关系为isA。文档内容为部分医学名词的介绍。
|
||||
|
||||
|
||||
## 1. 前置条件
|
||||
|
||||
参考文档 [快速开始](https://openspg.yuque.com/ndx6g9/0.6/quzq24g4esal7q17) 安装 KAG 及其依赖的 OpenSPG server,了解开发者模式 KAG 的使用流程。
|
||||
|
||||
## 2. 复现步骤
|
||||
|
||||
### Step 1:进入示例目录
|
||||
|
||||
```bash
|
||||
cd kag/examples/domain_kg
|
||||
```
|
||||
|
||||
### Step 2:配置模型
|
||||
|
||||
更新 [kag_config.yaml](./kag_config.yaml) 中的生成模型配置 ``openie_llm`` 和 ``chat_llm`` 和表示模型配置 ``vectorizer_model``。
|
||||
|
||||
您需要设置正确的 ``api_key``。如果使用的模型供应商和模型名与默认值不同,您还需要更新 ``base_url`` 和 ``model``。
|
||||
|
||||
### Step 3:初始化项目
|
||||
|
||||
先对项目进行初始化。
|
||||
|
||||
```bash
|
||||
knext project restore --host_addr http://127.0.0.1:8887 --proj_path .
|
||||
```
|
||||
|
||||
### Step 4:提交 schema
|
||||
|
||||
执行以下命令提交 schema [TwoWiki.schema](./schema/TwoWiki.schema)。
|
||||
|
||||
```bash
|
||||
knext schema commit
|
||||
```
|
||||
|
||||
### Step 5:构建知识图谱
|
||||
|
||||
|
||||
我们首先需要将领域知识图谱注入到图数据库中,这样在对非结构化文档进行图谱构建的时候,PostProcessor组件可以将抽取出的节点与领域知识图谱节点进行链指(标准化)。
|
||||
在 [builder](./builder) 目录执行 [injection.py](./builder/injection.py) ,注入图数据。
|
||||
|
||||
```bash
|
||||
cd builder && python injection.py && cd ..
|
||||
```
|
||||
|
||||
注意,KAG为领域知识图谱注入提供了一个特殊的KAGBuilderChain实现,即DomainKnowledgeInjectChain,其注册名为domain_kg_inject_chain。由于领域知识注入不涉及到扫描文件或目录,可以直接调用builder chain 的invoke接口启动任务。
|
||||
|
||||
接下来,在 [builder](./builder) 目录执行 [indexer.py](./builder/indexer.py) 构建知识图谱。
|
||||
|
||||
```bash
|
||||
cd builder && python indexer.py && cd ..
|
||||
```
|
||||
|
||||
### Step 6:执行 QA 任务
|
||||
|
||||
在 [solver](./solver) 目录执行 [qa.py](./solver/qa.py) 生成问题的答案。
|
||||
|
||||
```bash
|
||||
cd solver && python qa.py && cd ..
|
||||
```
|
||||
|
||||
### Step 7:(可选)清理
|
||||
|
||||
若要删除 checkpoint,可执行以下命令。
|
||||
|
||||
```bash
|
||||
rm -rf ./builder/ckpt
|
||||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
7
kag/examples/domain_kg/builder/data/doc.txt
Normal file
7
kag/examples/domain_kg/builder/data/doc.txt
Normal file
@ -0,0 +1,7 @@
|
||||
生长激素(Human Growth Hormone,HGH)是由人体脑垂体前叶分泌的一种肽类激素,由191个氨基酸组成,能促进骨骼、内脏和全身生长.促进蛋白质合成,影响脂肪和矿物质代谢,在人体生长发育中起着关键性作用。一般情况下,生长激素是注射用人生长激素 [5]的简称(曾用名:注射用重组人生长激素 [2]),是通过基因重组技术生产的,在氨基酸含量、序列和蛋白质结构上与人垂体生长激素完全一致。在儿科领域,采用生长激素进行替代治疗,可以明显促进儿童的身高增长。同时,生长激素在生殖领域、烧伤领域及抗衰老领域也有着重要的作用。已经广泛应用于临床。
|
||||
肾上腺皮质激素(简称皮质激素),是肾上腺皮质受脑垂体前叶分泌的促肾上腺皮质激素刺激所产生的一类激素,对维持生命有重大意义。按其生理作用特点可分为盐皮质激素和糖皮质激素,前者主要调节机体水、盐代谢和维持电解质平衡;后者主要与糖、脂肪、蛋白质代谢和生长发育等有关。盐皮质激素基本无临床使用价值,而糖皮质激素在临床上具有极为重要的价值。临床常用药物有氢化可的松、醋酸地塞米松、地塞米松磷酸钠和曲安奈德等。肾上腺皮质由外到内分三带:球状带、束状带、网状带。分别分泌盐皮质激素、糖皮质激素、性激素。
|
||||
皮质激素有抗炎、抗过敏、增加β受体兴奋性、改善毛细血管通透性等作用。
|
||||
1.抗炎作用:对抗各种原因如物理、化学、生物、免疫等引起的炎症;改善红、肿、热、痛症状。在炎症后期可抑制毛细血管和成纤维细胞的增生,减轻后遗症。
|
||||
2.免疫抑制作用:抑制巨噬细胞对抗原的吞噬和处理,减少循环血中的淋巴细胞数量。
|
||||
3.抗休克:扩张痉挛收缩的血管和加强心肌收缩力;降低血管对某些收缩血管活性物质的敏感性,使微循环血流动力学恢复正常,改善休克状态;稳定溶酶体膜。
|
||||
4.其他作用:血液与造血系统,糖皮质激素能刺激骨髓造血功能;中枢神经系统,提高中枢神经系统的兴奋性;消化系统,使胃酸和胃蛋白酶分泌增多。
|
156
kag/examples/domain_kg/builder/data/edges.json
Normal file
156
kag/examples/domain_kg/builder/data/edges.json
Normal file
@ -0,0 +1,156 @@
|
||||
[
|
||||
{
|
||||
"id": "(缩)肾上腺皮质激素-促肾上腺皮质激素",
|
||||
"from": "(缩)肾上腺皮质激素",
|
||||
"fromType": "Concept",
|
||||
"to": "促肾上腺皮质激素",
|
||||
"toType": "Concept",
|
||||
"label": "isA",
|
||||
"properties": {}
|
||||
},
|
||||
{
|
||||
"id": "促肾皮素-促肾上腺皮质激素",
|
||||
"from": "促肾皮素",
|
||||
"fromType": "Concept",
|
||||
"to": "促肾上腺皮质激素",
|
||||
"toType": "Concept",
|
||||
"label": "isA",
|
||||
"properties": {}
|
||||
},
|
||||
{
|
||||
"id": "促皮质素-促肾上腺皮质激素",
|
||||
"from": "促皮质素",
|
||||
"fromType": "Concept",
|
||||
"to": "促肾上腺皮质激素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质[激]素-促肾上腺皮质激素",
|
||||
"from": "促肾上腺皮质[激]素",
|
||||
"fromType": "Concept",
|
||||
"to": "促肾上腺皮质激素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "ACTH-促肾上腺皮质激素",
|
||||
"from": "ACTH",
|
||||
"fromType": "Concept",
|
||||
"to": "促肾上腺皮质激素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "促皮质激素-促肾上腺皮质激素",
|
||||
"from": "促皮质激素",
|
||||
"fromType": "Concept",
|
||||
"to": "促肾上腺皮质激素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质素-促肾上腺皮质激素",
|
||||
"from": "促肾上腺皮质素",
|
||||
"fromType": "Concept",
|
||||
"to": "促肾上腺皮质激素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "人生长激素-促生长素",
|
||||
"from": "人生长激素",
|
||||
"fromType": "Concept",
|
||||
"to": "促生长素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "生长激素-促生长素",
|
||||
"from": "生长激素",
|
||||
"fromType": "Concept",
|
||||
"to": "促生长素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "生长激素释放抑制激素-生长抑素",
|
||||
"from": "生长激素释放抑制激素",
|
||||
"fromType": "Concept",
|
||||
"to": "生长抑素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "促生长素抑制素-生长抑素",
|
||||
"from": "促生长素抑制素",
|
||||
"fromType": "Concept",
|
||||
"to": "生长抑素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "生长抑素醋酸盐-生长抑素",
|
||||
"from": "生长抑素醋酸盐",
|
||||
"fromType": "Concept",
|
||||
"to": "生长抑素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "胃泌激素-促胃液素",
|
||||
"from": "胃泌激素",
|
||||
"fromType": "Concept",
|
||||
"to": "促胃液素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "胃泌素-促胃液素",
|
||||
"from": "胃泌素",
|
||||
"fromType": "Concept",
|
||||
"to": "促胃液素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "促乳素-催乳素",
|
||||
"from": "促乳素",
|
||||
"fromType": "Concept",
|
||||
"to": "催乳素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "泌乳素-催乳素",
|
||||
"from": "泌乳素",
|
||||
"fromType": "Concept",
|
||||
"to": "催乳素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "催乳激素-催乳素",
|
||||
"from": "催乳激素",
|
||||
"fromType": "Concept",
|
||||
"to": "催乳素",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "蛋白水解酶-内肽酶",
|
||||
"from": "蛋白水解酶",
|
||||
"fromType": "Concept",
|
||||
"to": "内肽酶",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
},
|
||||
{
|
||||
"id": "蛋白酶-内肽酶",
|
||||
"from": "蛋白酶",
|
||||
"fromType": "Concept",
|
||||
"to": "内肽酶",
|
||||
"toType": "Concept",
|
||||
"label": "isA"
|
||||
}
|
||||
]
|
195
kag/examples/domain_kg/builder/data/nodes.json
Normal file
195
kag/examples/domain_kg/builder/data/nodes.json
Normal file
@ -0,0 +1,195 @@
|
||||
[
|
||||
{
|
||||
"id": "(缩)肾上腺皮质激素",
|
||||
"name": "(缩)肾上腺皮质激素",
|
||||
"label": "Concept",
|
||||
"properties": {}
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质激素",
|
||||
"name": "促肾上腺皮质激素",
|
||||
"label": "Concept",
|
||||
"properties": {}
|
||||
},
|
||||
{
|
||||
"id": "促肾皮素",
|
||||
"name": "促肾皮素",
|
||||
"label": "Concept",
|
||||
"properties": {}
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质激素",
|
||||
"name": "促肾上腺皮质激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促皮质素",
|
||||
"name": "促皮质素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质激素",
|
||||
"name": "促肾上腺皮质激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质[激]素",
|
||||
"name": "促肾上腺皮质[激]素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质激素",
|
||||
"name": "促肾上腺皮质激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "ACTH",
|
||||
"name": "ACTH",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质激素",
|
||||
"name": "促肾上腺皮质激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促皮质激素",
|
||||
"name": "促皮质激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质激素",
|
||||
"name": "促肾上腺皮质激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质素",
|
||||
"name": "促肾上腺皮质素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促肾上腺皮质激素",
|
||||
"name": "促肾上腺皮质激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "人生长激素",
|
||||
"name": "人生长激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促生长素",
|
||||
"name": "促生长素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "生长激素",
|
||||
"name": "生长激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促生长素",
|
||||
"name": "促生长素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "生长激素释放抑制激素",
|
||||
"name": "生长激素释放抑制激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "生长抑素",
|
||||
"name": "生长抑素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促生长素抑制素",
|
||||
"name": "促生长素抑制素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "生长抑素",
|
||||
"name": "生长抑素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "生长抑素醋酸盐",
|
||||
"name": "生长抑素醋酸盐",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "生长抑素",
|
||||
"name": "生长抑素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "胃泌激素",
|
||||
"name": "胃泌激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促胃液素",
|
||||
"name": "促胃液素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "胃泌素",
|
||||
"name": "胃泌素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促胃液素",
|
||||
"name": "促胃液素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "促乳素",
|
||||
"name": "促乳素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "催乳素",
|
||||
"name": "催乳素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "泌乳素",
|
||||
"name": "泌乳素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "催乳素",
|
||||
"name": "催乳素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "催乳激素",
|
||||
"name": "催乳激素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "催乳素",
|
||||
"name": "催乳素",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "蛋白水解酶",
|
||||
"name": "蛋白水解酶",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "内肽酶",
|
||||
"name": "内肽酶",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "蛋白酶",
|
||||
"name": "蛋白酶",
|
||||
"label": "Concept"
|
||||
},
|
||||
{
|
||||
"id": "内肽酶",
|
||||
"name": "内肽酶",
|
||||
"label": "Concept"
|
||||
}
|
||||
]
|
36
kag/examples/domain_kg/builder/indexer.py
Normal file
36
kag/examples/domain_kg/builder/indexer.py
Normal file
@ -0,0 +1,36 @@
|
||||
# Copyright 2023 OpenSPG Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
# in compliance with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied.
|
||||
import os
|
||||
import logging
|
||||
from kag.common.registry import import_modules_from_path
|
||||
|
||||
from kag.builder.runner import BuilderChainRunner
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def buildKB(file_path):
|
||||
from kag.common.conf import KAG_CONFIG
|
||||
|
||||
runner = BuilderChainRunner.from_config(
|
||||
KAG_CONFIG.all_config["kag_builder_pipeline"]
|
||||
)
|
||||
runner.invoke(file_path)
|
||||
|
||||
logger.info(f"\n\nbuildKB successfully for {file_path}\n\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import_modules_from_path(".")
|
||||
dir_path = os.path.dirname(__file__)
|
||||
file_path = os.path.join(dir_path, "data/doc.txt")
|
||||
|
||||
buildKB(file_path)
|
35
kag/examples/domain_kg/builder/injection.py
Normal file
35
kag/examples/domain_kg/builder/injection.py
Normal file
@ -0,0 +1,35 @@
|
||||
# Copyright 2023 OpenSPG Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
# in compliance with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied.
|
||||
import os
|
||||
import logging
|
||||
from kag.common.registry import import_modules_from_path
|
||||
|
||||
from kag.builder.runner import BuilderChainRunner
|
||||
from kag.interface import KAGBuilderChain
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def buildKB():
|
||||
from kag.common.conf import KAG_CONFIG
|
||||
|
||||
# inject graph,
|
||||
domain_knowledge_graph_chain = KAGBuilderChain.from_config(
|
||||
KAG_CONFIG.all_config["domain_kg_inject_chain"]
|
||||
)
|
||||
|
||||
domain_knowledge_graph_chain.invoke(None)
|
||||
|
||||
logger.info(f"Done dump domain kg to graph store")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
buildKB()
|
148
kag/examples/domain_kg/kag_config.yaml
Normal file
148
kag/examples/domain_kg/kag_config.yaml
Normal file
@ -0,0 +1,148 @@
|
||||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
api_key: key
|
||||
base_url: https://api.deepseek.com
|
||||
model: deepseek-chat
|
||||
type: maas
|
||||
|
||||
chat_llm: &chat_llm
|
||||
api_key: key
|
||||
base_url: https://api.deepseek.com
|
||||
model: deepseek-chat
|
||||
type: maas
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
vectorizer: *vectorize_model
|
||||
|
||||
log:
|
||||
level: INFO
|
||||
|
||||
project:
|
||||
biz_scene: default
|
||||
host_addr: http://127.0.0.1:8887
|
||||
id: '2'
|
||||
language: zh
|
||||
namespace: DomainKG
|
||||
#------------project configuration end----------------#
|
||||
|
||||
#------------doman kg injection configuration start----------------#
|
||||
|
||||
external_graph_loader: &external_graph_loader
|
||||
type: base
|
||||
node_file_path: data/nodes.json
|
||||
edge_file_path: data/edges.json
|
||||
match_config:
|
||||
k: 1
|
||||
threshold: 0.9
|
||||
|
||||
domain_kg_inject_chain:
|
||||
type: domain_kg_inject_chain
|
||||
external_graph: *external_graph_loader
|
||||
vectorizer:
|
||||
type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer
|
||||
vectorize_model: *vectorize_model
|
||||
writer:
|
||||
type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter
|
||||
#------------doman kg injection configuration end----------------#
|
||||
|
||||
#------------kag-builder configuration start----------------#
|
||||
kag_builder_pipeline:
|
||||
chain:
|
||||
type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain
|
||||
extractor:
|
||||
type: schema_free_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor
|
||||
llm: *openie_llm
|
||||
ner_prompt:
|
||||
type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt
|
||||
std_prompt:
|
||||
type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt
|
||||
triple_prompt:
|
||||
type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt
|
||||
external_graph: *external_graph_loader
|
||||
reader:
|
||||
type: txt_reader # kag.builder.component.reader.text_reader.TXTReader
|
||||
post_processor:
|
||||
type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor
|
||||
similarity_threshold: 0.9
|
||||
external_graph: *external_graph_loader
|
||||
splitter:
|
||||
type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter
|
||||
split_length: 100000
|
||||
window_length: 0
|
||||
vectorizer:
|
||||
type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer
|
||||
vectorize_model: *vectorize_model
|
||||
writer:
|
||||
type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter
|
||||
num_threads_per_chain: 1
|
||||
num_chains: 16
|
||||
scanner:
|
||||
type: file_scanner # kag.builder.component.scanner.file_scanner.FileScanner
|
||||
#------------kag-builder configuration end----------------#
|
||||
|
||||
#------------kag-solver configuration start----------------#
|
||||
search_api: &search_api
|
||||
type: openspg_search_api #kag.solver.tools.search_api.impl.openspg_search_api.OpenSPGSearchAPI
|
||||
|
||||
graph_api: &graph_api
|
||||
type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi
|
||||
|
||||
exact_kg_retriever: &exact_kg_retriever
|
||||
type: default_exact_kg_retriever # kag.solver.retriever.impl.default_exact_kg_retriever.DefaultExactKgRetriever
|
||||
el_num: 5
|
||||
llm_client: *chat_llm
|
||||
search_api: *search_api
|
||||
graph_api: *graph_api
|
||||
|
||||
fuzzy_kg_retriever: &fuzzy_kg_retriever
|
||||
type: default_fuzzy_kg_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever
|
||||
el_num: 5
|
||||
vectorize_model: *vectorize_model
|
||||
llm_client: *chat_llm
|
||||
search_api: *search_api
|
||||
graph_api: *graph_api
|
||||
|
||||
chunk_retriever: &chunk_retriever
|
||||
type: default_chunk_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever
|
||||
llm_client: *chat_llm
|
||||
recall_num: 10
|
||||
rerank_topk: 10
|
||||
|
||||
kag_solver_pipeline:
|
||||
memory:
|
||||
type: default_memory # kag.solver.implementation.default_memory.DefaultMemory
|
||||
llm_client: *chat_llm
|
||||
max_iterations: 3
|
||||
reasoner:
|
||||
type: default_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner
|
||||
llm_client: *chat_llm
|
||||
lf_planner:
|
||||
type: default_lf_planner # kag.solver.plan.default_lf_planner.DefaultLFPlanner
|
||||
llm_client: *chat_llm
|
||||
vectorize_model: *vectorize_model
|
||||
lf_executor:
|
||||
type: default_lf_executor # kag.solver.execute.default_lf_executor.DefaultLFExecutor
|
||||
llm_client: *chat_llm
|
||||
force_chunk_retriever: true
|
||||
exact_kg_retriever: *exact_kg_retriever
|
||||
fuzzy_kg_retriever: *fuzzy_kg_retriever
|
||||
chunk_retriever: *chunk_retriever
|
||||
merger:
|
||||
type: default_lf_sub_query_res_merger # kag.solver.execute.default_sub_query_merger.DefaultLFSubQueryResMerger
|
||||
vectorize_model: *vectorize_model
|
||||
chunk_retriever: *chunk_retriever
|
||||
generator:
|
||||
type: default_generator # kag.solver.implementation.default_generator.DefaultGenerator
|
||||
llm_client: *chat_llm
|
||||
generate_prompt:
|
||||
type: default_resp_generator # kag/examples/2wiki/solver/prompt/resp_generator.py
|
||||
reflector:
|
||||
type: default_reflector # kag.solver.implementation.default_reflector.DefaultReflector
|
||||
llm_client: *chat_llm
|
||||
|
||||
#------------kag-solver configuration end----------------#
|
20
kag/examples/domain_kg/reasoner/__init__.py
Normal file
20
kag/examples/domain_kg/reasoner/__init__.py
Normal file
@ -0,0 +1,20 @@
|
||||
# Copyright 2023 OpenSPG Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
# in compliance with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied.
|
||||
|
||||
"""
|
||||
Place the DSL file for graph reasoning in this directory.
|
||||
For example:
|
||||
|
||||
```company.dsl
|
||||
MATCH (s:DEFAULT.Company)
|
||||
RETURN s.id, s.address
|
||||
```
|
||||
"""
|
98
kag/examples/domain_kg/schema/DomainKG.schema
Normal file
98
kag/examples/domain_kg/schema/DomainKG.schema
Normal file
@ -0,0 +1,98 @@
|
||||
namespace DomainKG
|
||||
|
||||
Chunk(文本块): EntityType
|
||||
properties:
|
||||
content(内容): Text
|
||||
index: TextAndVector
|
||||
|
||||
ArtificialObject(人造物体): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Astronomy(天文学): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Building(建筑): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Creature(生物): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Concept(概念): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Date(日期): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
GeographicLocation(地理位置): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Keyword(关键词): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Medicine(药物): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
|
||||
NaturalScience(自然科学): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Organization(组织机构): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Person(人物): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Others(其他): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
18
kag/examples/domain_kg/schema/__init__.py
Normal file
18
kag/examples/domain_kg/schema/__init__.py
Normal file
@ -0,0 +1,18 @@
|
||||
# Copyright 2023 OpenSPG Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
# in compliance with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied.
|
||||
|
||||
"""
|
||||
{{namespace}}.schema:
|
||||
The MarkLang file for the schema of this project.
|
||||
You can execute `kag schema commit` to commit your schema to SPG server.
|
||||
|
||||
|
||||
"""
|
0
kag/examples/domain_kg/solver/__init__.py
Normal file
0
kag/examples/domain_kg/solver/__init__.py
Normal file
31
kag/examples/domain_kg/solver/qa.py
Normal file
31
kag/examples/domain_kg/solver/qa.py
Normal file
@ -0,0 +1,31 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from kag.common.benchmarks.evaluate import Evaluate
|
||||
from kag.solver.logic.solver_pipeline import SolverPipeline
|
||||
from kag.common.conf import KAG_CONFIG
|
||||
from kag.common.registry import import_modules_from_path
|
||||
|
||||
from kag.common.checkpointer import CheckpointerManager
|
||||
|
||||
|
||||
def qa(query):
|
||||
resp = SolverPipeline.from_config(KAG_CONFIG.all_config["kag_solver_pipeline"])
|
||||
answer, traceLog = resp.run(query)
|
||||
|
||||
print(f"\n\nso the answer for '{query}' is: {answer}\n\n") #
|
||||
print(traceLog)
|
||||
return answer, traceLog
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
queries = [
|
||||
"皮质激素有什么作用",
|
||||
]
|
||||
for q in queries:
|
||||
qa(q)
|
Loading…
x
Reference in New Issue
Block a user