openspg/python/medical/builder/operator/disease_extractor.py

33 lines
2.0 KiB
Python
Raw Normal View History

2023-12-19 15:32:58 +08:00
# -*- coding: utf-8 -*-
# Copyright 2023 Ant Group CO., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
from typing import Dict, List
from knext.operator.op import ExtractOp
from knext.operator.spg_record import SPGRecord
prompt_template = {
"ner": '已知实体类型(entity_type)包括:${schema}。假设你是一个专业的医学专家,请从下列文本中抽取所有实体(entity)。\n----文本----\n${input}\n----回答要求----\n1. 答案格式为:[{"entity": ,"entity_type": },]',
"re": '假设你是一个专业的医学专家请从文本中抽取关系。我们会首先提供文本然后会提供知识图谱schema再提供回答的具体要求最后是一个举例。\n----文本----\n${input}\n----知识图谱schema----\n${schema}\n----回答要求----\n1. 答案格式为json格式[{"subject":,"predicate":,"object":},]\n2. object要求简洁必须是中文如果object包含多个值请用英文逗号分隔\n3. 每一条关系必须属于知识图谱schema。\n----举例----\n文本为:急性扁桃体炎通常伴有咽痛,声嘶,发热等症状。回答为:{"subject":"急性扁桃体炎","predicate":"症状","object":"咽痛,声嘶,发热"}',
"convert": "请把上面的回答转成标准的json格式并只返回json数据作为回答不需要其他说明",
}
class DiseaseExtractor(ExtractOp):
def __init__(self, params: Dict[str, str] = None):
super().__init__(params)
def eval(self, record: Dict[str, str]) -> List[SPGRecord]:
print(record)
print(self.params)
return [SPGRecord("Medical.Disease", {"id": "123", "name": "123"})]