openspg/python/medical/builder/disease_chain.py
2023-12-22 14:11:31 +08:00

45 lines
1.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from knext.component.builder import CSVReader, KGWriter, LLMBasedExtractor, SubGraphMapping, SPGTypeMapping
from nn4k.invoker import LLMInvoker
from knext.api.operator import SPOPrompt
"""
1. 定义输入源CSV文件
"""
source = CSVReader(
local_path="job/data/Disease.csv",
columns=["id", "input"],
start_row=2,
)
"""
2. 定义大模型抽取组件从长文本中抽取Medical.Disease类型实体
"""
extract = LLMBasedExtractor(llm=LLMInvoker.from_config("openai_infer.json"),
prompt_ops=[SPOPrompt("Medical1.Disease", ["commonSymptom", "applicableDrug"])])
mapping = SubGraphMapping(spg_type_name="Medical1.Disease")\
.add_mapping_field("id", "id")\
.add_mapping_field("name", "name")\
.add_mapping_field("commonSymptom", "commonSymptom")\
.add_mapping_field("applicableDrug", "applicableDrug")
"""
4. 定义输出到图谱
"""
sink = KGWriter()
"""
5. 定义builder_chain
"""
builder_chain = source >> extract >> mapping >> sink
"""
5. 执行builder_chain或发布成平台任务
"""
builder_chain.invoke()
# builder_chain.submit()