openspg/python/medical/builder/operator/disease_extractor.py
2023-12-19 15:32:58 +08:00

33 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
# Copyright 2023 Ant Group CO., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
from typing import Dict, List
from knext.operator.op import ExtractOp
from knext.operator.spg_record import SPGRecord
prompt_template = {
"ner": '已知实体类型(entity_type)包括:${schema}。假设你是一个专业的医学专家,请从下列文本中抽取所有实体(entity)。\n----文本----\n${input}\n----回答要求----\n1. 答案格式为:[{"entity": ,"entity_type": },]',
"re": '假设你是一个专业的医学专家请从文本中抽取关系。我们会首先提供文本然后会提供知识图谱schema再提供回答的具体要求最后是一个举例。\n----文本----\n${input}\n----知识图谱schema----\n${schema}\n----回答要求----\n1. 答案格式为json格式[{"subject":,"predicate":,"object":},]\n2. object要求简洁必须是中文如果object包含多个值请用英文逗号分隔\n3. 每一条关系必须属于知识图谱schema。\n----举例----\n文本为:急性扁桃体炎通常伴有咽痛,声嘶,发热等症状。回答为:{"subject":"急性扁桃体炎","predicate":"症状","object":"咽痛,声嘶,发热"}',
"convert": "请把上面的回答转成标准的json格式并只返回json数据作为回答不需要其他说明",
}
class DiseaseExtractor(ExtractOp):
def __init__(self, params: Dict[str, str] = None):
super().__init__(params)
def eval(self, record: Dict[str, str]) -> List[SPGRecord]:
print(record)
print(self.params)
return [SPGRecord("Medical.Disease", {"id": "123", "name": "123"})]