openspg/python/medical/builder/operator/disease_extractor.py

# -*- coding: utf-8 -*-
# Copyright 2023 Ant Group CO., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

from typing import Dict, List

from knext.operator.op import ExtractOp
from knext.operator.spg_record import SPGRecord

prompt_template = {
    "ner": '已知实体类型(entity_type)包括:${schema}。假设你是一个专业的医学专家，请从下列文本中抽取所有实体(entity)。\n----文本----\n${input}\n----回答要求----\n1. 答案格式为：[{"entity": ,"entity_type": },]',
    "re": '假设你是一个专业的医学专家，请从文本中抽取关系。我们会首先提供文本，然后会提供知识图谱schema，再提供回答的具体要求，最后是一个举例。\n----文本----\n${input}\n----知识图谱schema----\n${schema}\n----回答要求----\n1. 答案格式为json格式：[{"subject":,"predicate":,"object":},]\n2. object要求简洁，必须是中文，如果object包含多个值请用英文逗号分隔；\n3. 每一条关系必须属于知识图谱schema。\n----举例----\n文本为：急性扁桃体炎通常伴有咽痛，声嘶，发热等症状。回答为：{"subject":"急性扁桃体炎","predicate":"症状","object":"咽痛,声嘶,发热"}',
    "convert": "请把上面的回答转成标准的json格式，并只返回json数据作为回答，不需要其他说明",
}


class DiseaseExtractor(ExtractOp):
    def __init__(self, params: Dict[str, str] = None):
        super().__init__(params)

    def eval(self, record: Dict[str, str]) -> List[SPGRecord]:
        print(record)
        print(self.params)
        return [SPGRecord("Medical.Disease", {"id": "123", "name": "123"})]