From 711c2be2b13e030fc860389d3d621f3ed4fc7220 Mon Sep 17 00:00:00 2001 From: baifuyu Date: Mon, 8 Jan 2024 11:39:18 +0800 Subject: [PATCH] fix(example): update finance example (#65) --- python/knext/knext/examples/finance/README.md | 8 +- .../finance/builder/job/data/Indicator.csv | 14 ++ .../examples/finance/builder/job/indicator.py | 33 +++++ .../finance/builder/job/indicator_rel.py | 49 +++++++ .../builder/job/state_and_indicator.py | 50 ------- .../finance/builder/model/openai_infer.json | 5 +- .../operator/fuse/indicator_fuse_op.py | 36 ----- .../builder/operator/fuse/state_fuse_op.py | 36 ----- .../builder/operator/indicator_extract.py | 51 +++++++ .../builder/operator/indicator_fuse.py | 72 ++++++++++ .../builder/operator/indicator_link.py | 57 ++++++++ .../builder/operator/indicator_predict.py | 84 ++++++++++++ .../operator/predict/indicator_predict_op.py | 33 ----- .../operator/prompt/indicator_extraction.py | 47 ------- .../prompt/logic_relation_extraction.py | 56 -------- .../operator/prompt/relation_extraction.py | 33 ----- .../finance/builder/operator/prompts.py | 125 ++++++++++++++++++ .../examples/finance/schema/finance.schema | 50 +------ .../finance/schema/finance_schema_helper.py | 60 +-------- 19 files changed, 493 insertions(+), 406 deletions(-) create mode 100644 python/knext/knext/examples/finance/builder/job/data/Indicator.csv create mode 100644 python/knext/knext/examples/finance/builder/job/indicator.py create mode 100644 python/knext/knext/examples/finance/builder/job/indicator_rel.py delete mode 100644 python/knext/knext/examples/finance/builder/job/state_and_indicator.py delete mode 100644 python/knext/knext/examples/finance/builder/operator/fuse/indicator_fuse_op.py delete mode 100644 python/knext/knext/examples/finance/builder/operator/fuse/state_fuse_op.py create mode 100644 python/knext/knext/examples/finance/builder/operator/indicator_extract.py create mode 100644 python/knext/knext/examples/finance/builder/operator/indicator_fuse.py create mode 100644 python/knext/knext/examples/finance/builder/operator/indicator_link.py create mode 100644 python/knext/knext/examples/finance/builder/operator/indicator_predict.py delete mode 100644 python/knext/knext/examples/finance/builder/operator/predict/indicator_predict_op.py delete mode 100644 python/knext/knext/examples/finance/builder/operator/prompt/indicator_extraction.py delete mode 100644 python/knext/knext/examples/finance/builder/operator/prompt/logic_relation_extraction.py delete mode 100644 python/knext/knext/examples/finance/builder/operator/prompt/relation_extraction.py create mode 100644 python/knext/knext/examples/finance/builder/operator/prompts.py diff --git a/python/knext/knext/examples/finance/README.md b/python/knext/knext/examples/finance/README.md index d242a2ea..5220490d 100644 --- a/python/knext/knext/examples/finance/README.md +++ b/python/knext/knext/examples/finance/README.md @@ -1,5 +1,5 @@ ```bash -knext project create --name 全风 --namespace Finance --desc 全风财政指标抽取 +knext project create --name Finance --namespace Finance --desc 这是一个示例项目 ``` ```bash @@ -7,11 +7,7 @@ knext schema commit ``` ```bash -knext operator publish DemoExtractOp -``` - -```bash -knext builder submit Demo +knext builder execute Demo ``` ```bash diff --git a/python/knext/knext/examples/finance/builder/job/data/Indicator.csv b/python/knext/knext/examples/finance/builder/job/data/Indicator.csv new file mode 100644 index 00000000..7e401fe0 --- /dev/null +++ b/python/knext/knext/examples/finance/builder/job/data/Indicator.csv @@ -0,0 +1,14 @@ +id +财政收入质量 +财政自给能力 +土地出让收入 +一般公共预算收入 +留抵退税 +税收收入 +税收收入/一般公共预算收入 +一般公共预算支出 +财政自给率 +政府性基金收入 +转移性收入 +综合财力 + diff --git a/python/knext/knext/examples/finance/builder/job/indicator.py b/python/knext/knext/examples/finance/builder/job/indicator.py new file mode 100644 index 00000000..44d889a2 --- /dev/null +++ b/python/knext/knext/examples/finance/builder/job/indicator.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2023 Ant Group CO., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. +from knext.client.model.builder_job import BuilderJob +from knext.component.builder import CSVReader, SPGTypeMapping, KGWriter +from schema.finance_schema_helper import Finance + + +class Indicator(BuilderJob): + def build(self): + source = CSVReader( + local_path="./builder/job/data/Indicator.csv", + columns=["id"], + start_row=2, + ) + + mapping = ( + SPGTypeMapping(spg_type_name=Finance.Indicator) + .add_property_mapping("id", Finance.Indicator.id) + .add_property_mapping("id", Finance.Indicator.name) + ) + + sink = KGWriter() + + return source >> mapping >> sink diff --git a/python/knext/knext/examples/finance/builder/job/indicator_rel.py b/python/knext/knext/examples/finance/builder/job/indicator_rel.py new file mode 100644 index 00000000..5f103f47 --- /dev/null +++ b/python/knext/knext/examples/finance/builder/job/indicator_rel.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# Copyright 2023 Ant Group CO., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + + +from schema.finance_schema_helper import Finance + +from knext.api.component import ( + CSVReader, + UserDefinedExtractor, + KGWriter, + SPGTypeMapping, +) +from knext.client.model.builder_job import BuilderJob +from nn4k.invoker import NNInvoker + +from builder.operator.indicator_extract import IndicatorExtractOp + + +class IndicatorRel(BuilderJob): + def build(self): + source = CSVReader( + local_path="builder/job/data/document.csv", columns=["input"], start_row=2 + ) + + extract = UserDefinedExtractor( + extract_op=IndicatorExtractOp( + params={"url": "http://localhost:9999/generate"} + ), + ) + + indicator_mapping = ( + SPGTypeMapping(spg_type_name=Finance.Indicator) + .add_property_mapping("id", Finance.Indicator.id) + .add_property_mapping("name", Finance.Indicator.name) + .add_predicting_relation("isA", Finance.Indicator) + ) + + sink = KGWriter() + + return source >> extract >> [indicator_mapping] >> sink diff --git a/python/knext/knext/examples/finance/builder/job/state_and_indicator.py b/python/knext/knext/examples/finance/builder/job/state_and_indicator.py deleted file mode 100644 index 4679a9ee..00000000 --- a/python/knext/knext/examples/finance/builder/job/state_and_indicator.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2023 Ant Group CO., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. - - -from schema.finance_schema_helper import Finance - -from knext.api.component import CSVReader, LLMBasedExtractor, KGWriter, SPGTypeMapping -from knext.client.model.builder_job import BuilderJob -from nn4k.invoker import NNInvoker - - -class StateAndIndicator(BuilderJob): - def build(self): - source = CSVReader( - local_path="builder/job/data/document.csv", columns=["input"], start_row=2 - ) - - from builder.operator.prompt.indicator_extraction import IndicatorNER - from builder.operator.prompt.relation_extraction import IndicatorREL - from builder.operator.prompt.logic_relation_extraction import ( - IndicatorLogic, - ) - - extract = LLMBasedExtractor( - llm=NNInvoker.from_config("builder/model/openai_infer.json"), - prompt_ops=[IndicatorNER(), IndicatorREL(), IndicatorLogic()], - ) - - state_mapping = ( - SPGTypeMapping(spg_type_name=Finance.State) - .add_property_mapping("id", Finance.State.id) - .add_property_mapping("name", Finance.State.name) - .add_relation_mapping("causes", Finance.State.causes, Finance.State) - .add_predicting_relation(Finance.State.derivedFrom, Finance.Indicator) - ) - - indicator_mapping = SPGTypeMapping(spg_type_name=Finance.Indicator) - - sink = KGWriter() - - return source >> extract >> [state_mapping, indicator_mapping] >> sink diff --git a/python/knext/knext/examples/finance/builder/model/openai_infer.json b/python/knext/knext/examples/finance/builder/model/openai_infer.json index 570a1976..4eee4404 100644 --- a/python/knext/knext/examples/finance/builder/model/openai_infer.json +++ b/python/knext/knext/examples/finance/builder/model/openai_infer.json @@ -1,6 +1,7 @@ { - "nn_name": "gpt-3.5-turbo", + "invoker_type": "OpenAI", "openai_api_key": "EMPTY", "openai_api_base": "http://127.0.0.1:38080/v1", + "openai_model_name": "gpt-3.5-turbo", "openai_max_tokens": 2000 -} +} \ No newline at end of file diff --git a/python/knext/knext/examples/finance/builder/operator/fuse/indicator_fuse_op.py b/python/knext/knext/examples/finance/builder/operator/fuse/indicator_fuse_op.py deleted file mode 100644 index 6a6039c7..00000000 --- a/python/knext/knext/examples/finance/builder/operator/fuse/indicator_fuse_op.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2023 Ant Group CO., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. - -from typing import List - -from knext.client.search import SearchClient -from knext.operator.op import FuseOp -from knext.operator.spg_record import SPGRecord - -from schema.finance_schema_helper import Finance - - -class IndicatorFuse(FuseOp): - bind_to = Finance.Indicator - - def __init__(self): - super().__init__() - self.search_client = SearchClient(self.bind_to) - - def link(self, subject_record: SPGRecord) -> SPGRecord: - linked_record = self.search_client.exact_search(subject_record, "name") - - return linked_record - - def merge(self, subject_record: SPGRecord, linked_record: SPGRecord) -> SPGRecord: - if linked_record: - return linked_record - return subject_record diff --git a/python/knext/knext/examples/finance/builder/operator/fuse/state_fuse_op.py b/python/knext/knext/examples/finance/builder/operator/fuse/state_fuse_op.py deleted file mode 100644 index 5bd64cb5..00000000 --- a/python/knext/knext/examples/finance/builder/operator/fuse/state_fuse_op.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2023 Ant Group CO., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. - -from typing import List - -from knext.client.search import SearchClient -from knext.operator.op import FuseOp -from knext.operator.spg_record import SPGRecord - -from schema.finance_schema_helper import Finance - - -class StateFuse(FuseOp): - bind_to = Finance.State - - def __init__(self): - super().__init__() - self.search_client = SearchClient(self.bind_to) - - def link(self, subject_record: SPGRecord) -> SPGRecord: - linked_record = self.search_client.exact_search(subject_record, "name") - - return linked_record - - def merge(self, subject_record: SPGRecord, linked_record: SPGRecord) -> SPGRecord: - if linked_record: - return linked_record - return subject_record diff --git a/python/knext/knext/examples/finance/builder/operator/indicator_extract.py b/python/knext/knext/examples/finance/builder/operator/indicator_extract.py new file mode 100644 index 00000000..adc2ad3c --- /dev/null +++ b/python/knext/knext/examples/finance/builder/operator/indicator_extract.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Ant Group CO., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. +import requests +from typing import List, Dict +from knext.api.operator import ExtractOp +from knext.api.record import SPGRecord + + +class IndicatorExtractOp(ExtractOp): + def __init__(self, params: Dict[str, str] = None): + super().__init__(params) + # Address for LLM service + self.url = self.params["url"] + from builder.operator.prompts import IndicatorNERPrompt + + self.prompt_op = IndicatorNERPrompt() + + def generate(self, input_data, adapter_name): + # Request LLM service to get the extraction results + req = { + "input": input_data, + "adapter_name": adapter_name, + "max_input_len": 1024, + "max_output_len": 1024, + } + try: + rsp = requests.post(self.url, req) + rsp.raise_for_status() + return rsp.json() + except Exception as e: + return {"output": ""} + + def invoke(self, record: Dict[str, str]) -> List[SPGRecord]: + # Building LLM inputs with IndicatorNERPrompt + ner_input = self.prompt_op.build_prompt(record) + ner_output = self.generate(ner_input, "ner") + record["ner"] = ner_output["output"] + # Parsing the LLM output with IndicatorNERPrompt to construct SPGRecords + ner_result = self.prompt_op.parse_response(record["ner"]) + + return ner_result diff --git a/python/knext/knext/examples/finance/builder/operator/indicator_fuse.py b/python/knext/knext/examples/finance/builder/operator/indicator_fuse.py new file mode 100644 index 00000000..c938b676 --- /dev/null +++ b/python/knext/knext/examples/finance/builder/operator/indicator_fuse.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Ant Group CO., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. +import requests +from knext.api.operator import FuseOp +from knext.api.record import SPGRecord +from knext.api.client import SearchClient + +from schema.finance_schema_helper import Finance + + +class IndicatorFuseOp(FuseOp): + bind_to = Finance.Indicator + + def __init__(self): + super().__init__() + from builder.operator.prompts import IndicatorFusePrompt + + self.prompt_op = IndicatorFusePrompt() + self.search_client = SearchClient(Finance.Indicator) + + def generate(self, input_data): + req = { + "input": input_data, + "max_input_len": 1024, + "max_output_len": 1024, + } + url = "http://localhost:9999/generate" + try: + rsp = requests.post(url, req) + rsp.raise_for_status() + return rsp.json() + except Exception as e: + return {"output": ""} + + def link(self, subject_record: SPGRecord) -> SPGRecord: + # Retrieve relevant indicators from KG based on indicator name + recall_records = self.search_client.fuzzy_search(subject_record, "name", size=1) + if len(recall_records) == 0: + return subject_record + return recall_records[0] + + def merge(self, subject_record: SPGRecord, linked_record: SPGRecord) -> SPGRecord: + # Merge the recalled indicators with LLM + data = { + "name": subject_record.get_property("name"), + "candidates": [linked_record.properties["name"]], + } + merge_input = self.prompt_op.build_prompt(data) + merge_result = self.generate(merge_input) + merge_result = self.prompt_op.parse_response(merge_result) + # If the KG already contains `subject_record`, return the existing record + # (you can also update the properties of existing record as well), + # otherwise return `subject_record` + + if merge_result is not None: + tmp = merge_result[0] + is_a_relation = subject_record.get_relation("isA", Finance.Indicator) + if is_a_relation is not None: + tmp.upsert_relation("isA", Finance.Indicator, is_a_relation) + return tmp + else: + return subject_record diff --git a/python/knext/knext/examples/finance/builder/operator/indicator_link.py b/python/knext/knext/examples/finance/builder/operator/indicator_link.py new file mode 100644 index 00000000..78d0b0f5 --- /dev/null +++ b/python/knext/knext/examples/finance/builder/operator/indicator_link.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Ant Group CO., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. +import requests +from typing import List +from knext.api.operator import LinkOp +from knext.api.record import SPGRecord +from knext.api.client import SearchClient + +from schema.finance_schema_helper import Finance + + +class IndicatorLinkOp(LinkOp): + bind_to = Finance.Indicator + + def __init__(self): + super().__init__() + from builder.operator.prompts import IndicatorLinkPrompt + + self.prompt_op = IndicatorLinkPrompt() + self.search_client = SearchClient(self.bind_to) + + def generate(self, input_data): + req = { + "input": input_data, + "max_input_len": 1024, + "max_output_len": 1024, + } + url = "http://localhost:9999/generate" + try: + rsp = requests.post(url, req) + rsp.raise_for_status() + return rsp.json() + except Exception as e: + return {"output": ""} + + def invoke(self, property: str, subject_record: SPGRecord) -> List[SPGRecord]: + # Retrieve relevant indicators from KG based on indicator name + name = property + recall_records = self.search_client.fuzzy_search_by_property(property, name) + # Reranking the realled records with LLM to get final linking result + data = { + "input": name, + "candidates": [x.properties["name"] for x in recall_records], + } + link_input = self.prompt_op.build_prompt(data) + link_result = self.generate(link_input) + return self.prompt_op.parse_response(link_result) diff --git a/python/knext/knext/examples/finance/builder/operator/indicator_predict.py b/python/knext/knext/examples/finance/builder/operator/indicator_predict.py new file mode 100644 index 00000000..3abb93a3 --- /dev/null +++ b/python/knext/knext/examples/finance/builder/operator/indicator_predict.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Ant Group CO., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. +import requests +from typing import List +from knext.api.operator import PredictOp +from knext.api.record import SPGRecord +from knext.api.client import SearchClient + +from schema.finance_schema_helper import Finance + + +class IndicatorPredictOp(PredictOp): + bind_to = (Finance.Indicator, "isA", Finance.Indicator) + + def __init__(self): + super().__init__() + from builder.operator.prompts import IndicatorPredictPrompt + + self.prompt_op = IndicatorPredictPrompt() + self.search_client = SearchClient(Finance.Indicator) + + def generate(self, input_data): + # Request LLM to get hypernym predictions + req = { + "input": input_data, + "max_input_len": 1024, + "max_output_len": 1024, + } + url = "http://localhost:9999/generate" + try: + rsp = requests.post(url, req) + rsp.raise_for_status() + return rsp.json() + except Exception as e: + return {"output": ""} + + def _recall(self, indicator): + recall_records = self.search_client.fuzzy_search(indicator, "name", size=1) + if len(recall_records) == 0: + return None + else: + record = SPGRecord( + Finance.Indicator, + ) + record.upsert_properties( + { + "id": recall_records[0].properties["name"], + "name": recall_records[0].properties["name"], + } + ) + + return record + + def invoke(self, subject_record: SPGRecord) -> List[SPGRecord]: + # Predict the hypernym indicators with LLM based on the indicator name. For example: + # 一般公共预算收入-税收收入-增值税-土地增值税 + name = subject_record.get_property("name") + data = {"name": name} + predict_input = self.prompt_op.build_prompt(data) + predict_result = self.generate(predict_input) + predict_result = self.prompt_op.parse_response(predict_result) + print(f"predict_result = {predict_result}") + output = [] + if len(predict_result) == 0: + return output + for item in predict_result: + recalled_record = self._recall(item) + print(item, recalled_record) + if recalled_record is not None: + recalled_record.upsert_relation( + "isA", Finance.Indicator, subject_record.get_property("id") + ) + output.append(recalled_record) + return output diff --git a/python/knext/knext/examples/finance/builder/operator/predict/indicator_predict_op.py b/python/knext/knext/examples/finance/builder/operator/predict/indicator_predict_op.py deleted file mode 100644 index 813efb7b..00000000 --- a/python/knext/knext/examples/finance/builder/operator/predict/indicator_predict_op.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2023 Ant Group CO., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. - -from typing import List - -from knext.client.search import SearchClient -from knext.operator.op import PredictOp -from knext.operator.spg_record import SPGRecord - -from schema.finance_schema_helper import Finance - - -class IndicatorPredict(PredictOp): - - bind_to = (Finance.State, Finance.State.derivedFrom, Finance.Indicator) - - def __init__(self): - super().__init__() - self.search_client = SearchClient(Finance.Indicator) - - def invoke(self, subject_record: SPGRecord) -> List[SPGRecord]: - recall_records = self.search_client.fuzzy_search(subject_record, "name") - if recall_records: - return [recall_records[0]] - return [] diff --git a/python/knext/knext/examples/finance/builder/operator/prompt/indicator_extraction.py b/python/knext/knext/examples/finance/builder/operator/prompt/indicator_extraction.py deleted file mode 100644 index 86be5a21..00000000 --- a/python/knext/knext/examples/finance/builder/operator/prompt/indicator_extraction.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2023 Ant Group CO., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. - -import json -from typing import Dict, List - -from knext.operator.op import PromptOp -from knext.operator.spg_record import SPGRecord - -from schema.finance_schema_helper import Finance - - -class IndicatorNER(PromptOp): - template = """ -请从以下文本中提取所有指标并给出指标类型,以json格式输出 -##### -输出格式: -[{{"XXX": ["XXX", "XXX"]}}, {{"XXX": ["XXX", "XXX"]}}] -##### -文本: -${input} -""" - - def build_prompt(self, variables: Dict[str, str]): - template = self.template.replace("${input}", variables.get("input")) - return template - - def parse_response(self, response: str) -> List[SPGRecord]: - output_list = json.loads(response.replace("'", '"')) - ner_result = [] - for output in output_list: - for category, indicator_list in output.items(): - for indicator in indicator_list: - ner_result.append( - SPGRecord(Finance.Indicator) - .upsert_property("id", indicator) - .upsert_property("name", indicator) - ) - return ner_result diff --git a/python/knext/knext/examples/finance/builder/operator/prompt/logic_relation_extraction.py b/python/knext/knext/examples/finance/builder/operator/prompt/logic_relation_extraction.py deleted file mode 100644 index b299af0a..00000000 --- a/python/knext/knext/examples/finance/builder/operator/prompt/logic_relation_extraction.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2023 Ant Group CO., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. - -import json -from typing import Dict, List - -from knext.operator.op import PromptOp -from knext.operator.spg_record import SPGRecord - -from schema.finance_schema_helper import Finance - - -class IndicatorLogic(PromptOp): - template = """ -请根据给定文本和文本中的指标及其指标关系,梳理逻辑链,以json格式输出 -##### -输出格式: -[{"subject": "XXX", "predicate": "顺承", "object": ["XXX", "XXX"]}, {"subject": "XXX", "predicate": "顺承", "object": ["XXX", "XXX"]}] -文本: -${input} -指标: -${ner} -指标关系: -${rel} -""" - - def build_prompt(self, variables: Dict[str, str]): - template = ( - self.template.replace("${input}", variables.get("input")) - .replace("${ner}", variables.get("IndicatorNER")) - .replace("${rel}", variables.get("IndicatorREL")) - ) - - return template - - def parse_response(self, response: str) -> List[SPGRecord]: - output_list = json.loads(response) - - logic_result = [] - for output in output_list: - result = SPGRecord(Finance.State) - for k, v in output.items(): - if k == "subject": - result.upsert_property("id", v).upsert_property("name", v) - elif k == "object": - result.upsert_relation("causes", Finance.State, ",".join(v)) - logic_result.append(result) - return logic_result diff --git a/python/knext/knext/examples/finance/builder/operator/prompt/relation_extraction.py b/python/knext/knext/examples/finance/builder/operator/prompt/relation_extraction.py deleted file mode 100644 index 5f64c700..00000000 --- a/python/knext/knext/examples/finance/builder/operator/prompt/relation_extraction.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2023 Ant Group CO., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. - -from typing import Dict - -from knext.api.operator import PromptOp - - -class IndicatorREL(PromptOp): - template = """ -请根据给定文本和文本中的指标,理解这些指标之间的关联关系,以json格式输出 -##### -输出格式: -[{{"subject": "XXX", "predicate": "包含", "object": ["XXX", "XXX"]}}, {{"subject": "XXX", "predicate": "包含", "object": ["XXX", "XXX"]}}] -文本: -${input} -指标: -${ner} -""" - - def build_prompt(self, variables: Dict[str, str]) -> str: - template = self.template.replace("${input}", variables.get("input")).replace( - "${ner}", variables.get("IndicatorNER") - ) - return template diff --git a/python/knext/knext/examples/finance/builder/operator/prompts.py b/python/knext/knext/examples/finance/builder/operator/prompts.py new file mode 100644 index 00000000..01cd1d08 --- /dev/null +++ b/python/knext/knext/examples/finance/builder/operator/prompts.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Ant Group CO., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. +import numpy as np +from typing import List, Dict +from knext.api.operator import PromptOp +from knext.api.record import SPGRecord + + +def get_mock_spg_records(size: int = 10): + mock_data = [ + "财政收入质量", + "财政自给能力", + "土地出让收入", + "一般公共预算收入", + "留抵退税", + "税收收入", + "税收收入/一般公共预算收入", + "一般公共预算支出", + "财政自给率", + "政府性基金收入", + "转移性收入", + "综合财力", + ] + output = [] + np.random.shuffle(mock_data) + for data in mock_data[:size]: + tmp = SPGRecord("Finance.Indicator") + tmp.upsert_properties( + { + "id": data, + "name": data, + } + ) + output.append(tmp) + return output + + +class IndicatorNERPrompt(PromptOp): + template = """ +请从以下文本中提取所有指标并给出指标类型,以json格式输出 +##### +输出格式: +[{{"XXX": ["XXX", "XXX"]}}, {{"XXX": ["XXX", "XXX"]}}] +##### +文本: +{input} +""" + + def build_prompt(self, variables: Dict[str, str]): + return self.template.format(input=variables.get("input", "")) + + def parse_response(self, response: str) -> List[SPGRecord]: + return get_mock_spg_records(5) + + +class IndicatorLinkPrompt(PromptOp): + template = """ +判断在指标列表{candidates}中,有无与指标{input}相同的指标名称,如果有,则返回相同指标名称, +没有则返回空字符串。 +##### +输出格式: +{{"same_indicator": "XXX"}} +##### +文本: +{input} +""" + + def build_prompt(self, variables: Dict[str, str]): + return self.template.format( + input=variables.get("input", ""), + candidates=variables.get("candidates", [""]), + ) + + def parse_response(self, response: str) -> List[SPGRecord]: + return get_mock_spg_records(1) + + +class IndicatorFusePrompt(PromptOp): + template = """ +判断在指标列表{candidates}中,有无与指标{input}相同的指标名称,如果有,则返回相同指标名称, +没有则返回空字符串。 +##### +输出格式: +{{"same_indicator": "XXX"}} +##### +文本: +{input} +""" + + def build_prompt(self, variables: Dict[str, str]): + return self.template.format( + input=variables.get("input", ""), + candidates=variables.get("candidates", [""]), + ) + + def parse_response(self, response: str) -> List[SPGRecord]: + return get_mock_spg_records(3) + + +class IndicatorPredictPrompt(PromptOp): + template = """ +请在你所知的指标关系中,寻找指标{input}的最多5个上位指标名称,如果有,则返回相同指标名称, +没有则返回空。 +##### +输出格式: +{{"hypernym": ["XXX"]}} +""" + + def build_prompt(self, variables: Dict[str, str]): + return self.template.format( + input=variables.get("input", ""), + ) + + def parse_response(self, response: str) -> List[SPGRecord]: + return get_mock_spg_records(5) diff --git a/python/knext/knext/examples/finance/schema/finance.schema b/python/knext/knext/examples/finance/schema/finance.schema index 18f9d494..5ddff0b9 100644 --- a/python/knext/knext/examples/finance/schema/finance.schema +++ b/python/knext/knext/examples/finance/schema/finance.schema @@ -1,52 +1,4 @@ namespace Finance -STD.USCCode(统一社会信用代码): StandardType - desc: 统一社会信用代码由五个部分组成,分别是注册登记管理部门代码(1-2位)、机构类别代码(3位)、登记管理机关行政区划码(4-6位)、组织机构代码(9位)和校验码(最后一位) - spreadable: True - regular: ^([0-9A-HJ-NPQRTUWXY]{2}\\d{6}[0-9A-HJ-NPQRTUWXY]{10}|[1-9]\\d{14})$ - -AdministrativeArea(行政区划): ConceptType - desc: 为四级结构,由上到下分别是国家、省、市、区。例如,中国-浙江省-杭州市-西湖区 - hypernymPredicate: locateAt - -AreaRiskEvent(区域风险事件): EventType - properties: - subject(主体): AdministrativeArea - object(客体): Text - -Company(公司): EntityType - desc: 公司是一种法律实体,它是一个经济组织,可以在商业活动中进行交易、持有财产和进行法律诉讼 - properties: - orgCertNo(企业证件号码): STD.USCCode - businessScope(经营范围): Text - regArea(注册地区): AdministrativeArea - establishDate(成立日期): STD.Date - desc: 数字格式,4位表达年2位表达月2位表达日的一共8位数字,比如2023年9月1日为20230901 - legalPerson(法人): Text - desc: 是指在法律上具有权利和义务的实体,可以独立于其成员和创办者存在并行使权利和承担义务 - regCapital(注册资本): Text - desc: 是指公司在成立时向政府注册部门报告的实收股本或股东认缴的出资额 - -CompanyEvent(公司事件): EventType - properties: - subject(主体): Company - object(客体): Text - location(发生区域): AdministrativeArea - happenedTime(发生时间): STD.Date - - -# 财政-土地出让收入、财政-税收收入 -# -Indicator(区域经济指标): ConceptType +Indicator(指标概念): ConceptType hypernymPredicate: isA - -# 状态有:土地出让收入大幅下降、综合财力明显下滑 -# 因果关系:土地出让收入大幅下降 -导致-> 综合财力明显下滑 -# -State(指标状态): ConceptType - desc: 区域经济指标的状态 - relations: - CAU#causes(导致): State - desc: 区域经济指标的状态之间的因果关系 - INC#derivedFrom(源自于): Indicator - desc: 状态源自于哪些区域经济指标 \ No newline at end of file diff --git a/python/knext/knext/examples/finance/schema/finance_schema_helper.py b/python/knext/knext/examples/finance/schema/finance_schema_helper.py index 70451310..58cb172e 100644 --- a/python/knext/knext/examples/finance/schema/finance_schema_helper.py +++ b/python/knext/knext/examples/finance/schema/finance_schema_helper.py @@ -19,70 +19,14 @@ from knext.common.schema_helper import SPGTypeHelper, PropertyHelper, RelationHe class Finance: - class AdministrativeArea(SPGTypeHelper): - - name = PropertyHelper("name") - id = PropertyHelper("id") - stdId = PropertyHelper("stdId") - description = PropertyHelper("description") - alias = PropertyHelper("alias") - - class AreaRiskEvent(SPGTypeHelper): - - name = PropertyHelper("name") - id = PropertyHelper("id") - subject = PropertyHelper("subject") - eventTime = PropertyHelper("eventTime") - description = PropertyHelper("description") - object = PropertyHelper("object") - - class Company(SPGTypeHelper): - - regArea = PropertyHelper("regArea") - name = PropertyHelper("name") - businessScope = PropertyHelper("businessScope") - id = PropertyHelper("id") - regCapital = PropertyHelper("regCapital") - description = PropertyHelper("description") - legalPerson = PropertyHelper("legalPerson") - orgCertNo = PropertyHelper("orgCertNo") - establishDate = PropertyHelper("establishDate") - - class CompanyEvent(SPGTypeHelper): - - name = PropertyHelper("name") - location = PropertyHelper("location") - id = PropertyHelper("id") - happenedTime = PropertyHelper("happenedTime") - subject = PropertyHelper("subject") - eventTime = PropertyHelper("eventTime") - description = PropertyHelper("description") - object = PropertyHelper("object") - class Indicator(SPGTypeHelper): - name = PropertyHelper("name") - id = PropertyHelper("id") stdId = PropertyHelper("stdId") - description = PropertyHelper("description") - alias = PropertyHelper("alias") - - class State(SPGTypeHelper): - - name = PropertyHelper("name") id = PropertyHelper("id") - stdId = PropertyHelper("stdId") - description = PropertyHelper("description") alias = PropertyHelper("alias") + description = PropertyHelper("description") + name = PropertyHelper("name") - derivedFrom = RelationHelper("derivedFrom") - causes = RelationHelper("causes") - - AdministrativeArea = AdministrativeArea("Finance.AdministrativeArea") - AreaRiskEvent = AreaRiskEvent("Finance.AreaRiskEvent") - Company = Company("Finance.Company") - CompanyEvent = CompanyEvent("Finance.CompanyEvent") Indicator = Indicator("Finance.Indicator") - State = State("Finance.State") pass