diff --git a/python/knext/knext/examples/financial/builder/operator/IndicatorFuse.py b/python/knext/knext/examples/financial/builder/operator/IndicatorFuse.py index 780f1877..788ec5c0 100644 --- a/python/knext/knext/examples/financial/builder/operator/IndicatorFuse.py +++ b/python/knext/knext/examples/financial/builder/operator/IndicatorFuse.py @@ -14,24 +14,18 @@ class IndicatorFuse(FuseOp): self.search_client = SearchClient("Financial.Indicator") def invoke(self, subject_records: List[SPGRecord]) -> List[SPGRecord]: - print("##########IndicatorFuse###########") + print("####################IndicatorFuse#####################") print("IndicatorFuse(Input): ") - print(subject_records) + print("----------------------") + [print(r) for r in subject_records] fused_records = [] for record in subject_records: query = {"match": {"name": record.get_property("name", "")}} recall_records = self.search_client.search(query, start=0, size=10) if recall_records is not None and len(recall_records) > 0: - rerank_record = SPGRecord( - "Financial.Indicator", - { - "id": recall_records[0].doc_id, - "name": recall_records[0].properties.get("name", ""), - }, - ) - rerank_record.update_property("name", record.get_property("name")) - fused_records.append(rerank_record) + continue + fused_records.append(record) print("IndicatorFuse(Output): ") - print(fused_records) - print("##########IndicatorFuse###########") + print("----------------------") + [print(r) for r in fused_records] return fused_records diff --git a/python/knext/knext/examples/financial/builder/operator/IndicatorLOGIC.py b/python/knext/knext/examples/financial/builder/operator/IndicatorLOGIC.py index a9438d4f..8f2c8c82 100644 --- a/python/knext/knext/examples/financial/builder/operator/IndicatorLOGIC.py +++ b/python/knext/knext/examples/financial/builder/operator/IndicatorLOGIC.py @@ -20,35 +20,18 @@ ${rel} """ def build_prompt(self, variables: Dict[str, str]): - """ - record: { - "input": "济南市财政收入质量及自给能力均较好,但土地出让收入大幅下降致综合财力明显下滑。济南市财政收入质量及自给能力均较好,但土地出让收入大幅下降致综合 - 财力明显下滑。2022年济南市一般公共预算收入1,000.21亿元,扣除留 抵退税因素后同比增长8%,规模在山东省下辖地市中排名第2位;其中税收收入690.31亿元,税收占比69.02%;一般公共 预算支出1,260.23亿元,财政自给率79.37%。政 - 府性基金收入547.29亿元,同比大幅下降48.38%,主要系土地出让收入 同比由966.74亿元降至453.74亿元;转移性收入285.78亿元(上年同期为233.11亿元);综合财力约1,833.28亿元(上年 同期为2,301.02亿元)。" - "ner": "[{'财政': ['财政收入质量', '财政自给能力', '土地出让收入', '一般公共预算收入', '留抵退税', '税收收入', '税收收入/一般公共预算收入', '一般公共预算支出', '财政自给率', '政府性基金收入', '转移性收入', '综合财力']}]", - "rel": "[{'subject': '一般公共预算收入', 'predicate': '包含', 'object': ['税收收入']}, {'subject': '税收收入', 'predicate': '包含', 'object': ['留抵退税']}, {'subject': '政府性基金收入', 'predicate': '包含', 'object': ['土地出让收入', '转移性收入']}, {'subject': '综合财力', 'predicate': '包含', 'object': ['一般公共预算收入', '政府性基金收入']}]", - "id": "财政", - "name": "财政", - "hasA": "财政收入质量,财政自给能力,土地出让收入....." - } - """ - return ( + template = ( self.template.replace("${input}", variables.get("input")) - .replace("${ner}", variables.get("ner")) - .replace("${rel}", variables.get("rel")) + .replace("${ner}", variables.get("IndicatorNER")) + .replace("${rel}", variables.get("IndicatorREL")) ) + print("####################IndicatorLOGIC(状态逻辑抽取)#####################") + print("LLM(Input): ") + print("----------------------") + print(template) + return template def parse_response(self, response: str) -> List[SPGRecord]: - """ - response: "[{\"subject\": \"土地出让收入大幅下降\", \"predicate\": \"顺承\", \"object\": [\"综合财力明显下滑\"]}]" - """ - response = ( - '[{"subject": "土地出让收入大幅下降", "predicate": "顺承", "object": ["综合财力明显下滑"]}]' - ) - print("##########IndicatorLOGIC###########") - print("IndicatorLOGIC(Input): ") - print(response) - output_list = json.loads(response) logic_result = [] @@ -61,7 +44,4 @@ ${rel} elif k == "object": properties["causeOf"] = ",".join(v) logic_result.append(SPGRecord("Financial.State", properties=properties)) - print("IndicatorLOGIC(Output): ") - print(logic_result) - print("##########IndicatorLOGIC###########") return logic_result diff --git a/python/knext/knext/examples/financial/builder/operator/IndicatorNER.py b/python/knext/knext/examples/financial/builder/operator/IndicatorNER.py index 6d8ecfe1..4bc72dc4 100644 --- a/python/knext/knext/examples/financial/builder/operator/IndicatorNER.py +++ b/python/knext/knext/examples/financial/builder/operator/IndicatorNER.py @@ -17,22 +17,18 @@ ${input} """ def build_prompt(self, variables: Dict[str, str]): - return self.template.replace("${input}", variables.get("input")) + template = self.template.replace("${input}", variables.get("input")) + print("####################IndicatorNER(指标抽取)#####################") + print("LLM(Input): ") + print("----------------------") + print(template) + return template def parse_response(self, response: str) -> List[SPGRecord]: - response = "[{'财政': ['财政收入质量', '财政自给能力', '土地出让收入', '一般公共预算收入', '留抵退税', '税收收入', '税收收入/一般公共预算收入', '一般公共预算支出', '财政自给率', '政府性基金收入', '转移性收入', '综合财力']}]" - - print("##########IndicatorNER###########") - print("IndicatorNER(Input): ") - print(response) - output_list = json.loads(response.replace("'", '"')) ner_result = [] - # IF hasA for output in output_list: - # {'财政': ['财政收入....} for category, indicator_list in output.items(): - # '财政', ['财政收入....] for indicator in indicator_list: ner_result.append( SPGRecord( @@ -40,17 +36,4 @@ ${input} properties={"id": indicator, "name": indicator}, ) ) - print("IndicatorNER(Output): ") - print(ner_result) - print("##########IndicatorNER###########") return ner_result - - def build_next_variables( - self, variables: Dict[str, str], response: str - ) -> List[Dict[str, str]]: - """ - response: "[{'subject': '一般公共预算收入', 'predicate': '包含', 'object': ['税收收入']}, {'subject': '税收收入', 'predicate': '包含', 'object': ['留抵退税']}, {'subject': '政府性基金收入', 'predicate': '包含', 'object': ['土地出让收入', '转移性收入']}, {'subject': '综合财力', 'predicate': '包含', 'object': ['一般公共预算收入', '政府性基金收入']}]" - """ - response = "[{'财政': ['财政收入质量', '财政自给能力', '土地出让收入', '一般公共预算收入', '留抵退税', '税收收入', '税收收入/一般公共预算收入', '一般公共预算支出', '财政自给率', '政府性基金收入', '转移性收入', '综合财力']}]" - - return [{"input": variables["input"], "ner": response}] diff --git a/python/knext/knext/examples/financial/builder/operator/IndicatorPredict.py b/python/knext/knext/examples/financial/builder/operator/IndicatorPredict.py index c67bd8b7..9cbd48ea 100644 --- a/python/knext/knext/examples/financial/builder/operator/IndicatorPredict.py +++ b/python/knext/knext/examples/financial/builder/operator/IndicatorPredict.py @@ -14,8 +14,9 @@ class IndicatorPredict(PredictOp): self.search_client = SearchClient("Financial.Indicator") def invoke(self, subject_record: SPGRecord) -> List[SPGRecord]: - print("##########IndicatorPredict###########") + print("####################IndicatorPredict(状态关联指标预测)#####################") print("IndicatorPredict(Input): ") + print("----------------------") print(subject_record) predicted_records = [] query = {"match": {"name": subject_record.get_property("name", "")}} @@ -30,6 +31,6 @@ class IndicatorPredict(PredictOp): ) predicted_records.append(rerank_record) print("IndicatorPredict(Output): ") - print(predicted_records) - print("##########IndicatorPredict###########") + print("----------------------") + [print(r) for r in predicted_records] return predicted_records diff --git a/python/knext/knext/examples/financial/builder/operator/IndicatorREL.py b/python/knext/knext/examples/financial/builder/operator/IndicatorREL.py index f80648df..6c90fc47 100644 --- a/python/knext/knext/examples/financial/builder/operator/IndicatorREL.py +++ b/python/knext/knext/examples/financial/builder/operator/IndicatorREL.py @@ -16,26 +16,11 @@ ${ner} """ def build_prompt(self, variables: Dict[str, str]) -> str: - """ - record: { - "input": "济南市财政收入质量及自给能力均较好,但土地出让收入大幅下降致综合财力明显下滑。济南市财政收入质量及自给能力均较好,但土地出让收入大幅下降致综合 - 财力明显下滑。2022年济南市一般公共预算收入1,000.21亿元,扣除留 抵退税因素后同比增长8%,规模在山东省下辖地市中排名第2位;其中税收收入690.31亿元,税收占比69.02%;一般公共 预算支出1,260.23亿元,财政自给率79.37%。政 - 府性基金收入547.29亿元,同比大幅下降48.38%,主要系土地出让收入 同比由966.74亿元降至453.74亿元;转移性收入285.78亿元(上年同期为233.11亿元);综合财力约1,833.28亿元(上年 同期为2,301.02亿元)。" - "ner": "[{'财政': ['财政收入质量', '财政自给能力', '土地出让收入', '一般公共预算收入', '留抵退税', '税收收入', '税收收入/一般公共预算收入', '一般公共预算支出', '财政自给率', '政府性基金收入', '转移性收入', '综合财力']}]", - "id": "财政", - "name": "财政", - "hasA": "财政收入质量,财政自给能力,土地出让收入....." - } - """ - return self.template.replace("${input}", variables.get("input")).replace( - "${ner}", variables.get("ner") + template = self.template.replace("${input}", variables.get("input")).replace( + "${ner}", variables.get("IndicatorNER") ) - - def build_next_variables( - self, variables: Dict[str, str], response: str - ) -> List[Dict[str, str]]: - """ - response: "[{'subject': '一般公共预算收入', 'predicate': '包含', 'object': ['税收收入']}, {'subject': '税收收入', 'predicate': '包含', 'object': ['留抵退税']}, {'subject': '政府性基金收入', 'predicate': '包含', 'object': ['土地出让收入', '转移性收入']}, {'subject': '综合财力', 'predicate': '包含', 'object': ['一般公共预算收入', '政府性基金收入']}]" - """ - response = "[{'subject': '一般公共预算收入', 'predicate': '包含', 'object': ['税收收入']}, {'subject': '税收收入', 'predicate': '包含', 'object': ['留抵退税']}, {'subject': '政府性基金收入', 'predicate': '包含', 'object': ['土地出让收入', '转移性收入']}, {'subject': '综合财力', 'predicate': '包含', 'object': ['一般公共预算收入', '政府性基金收入']}]" - return [{"input": variables["input"], "ner": variables["ner"], "rel": response}] + print("####################IndicatorREL(指标关系抽取)#####################") + print("LLM(Input): ") + print("----------------------") + print(template) + return template diff --git a/python/knext/knext/examples/financial/builder/operator/StateFuse.py b/python/knext/knext/examples/financial/builder/operator/StateFuse.py index db9dbd88..bede4121 100644 --- a/python/knext/knext/examples/financial/builder/operator/StateFuse.py +++ b/python/knext/knext/examples/financial/builder/operator/StateFuse.py @@ -14,9 +14,10 @@ class StateFuse(FuseOp): self.search_client = SearchClient("Financial.State") def invoke(self, subject_records: List[SPGRecord]) -> List[SPGRecord]: - print("##########StateFuse###########") + print("####################StateFuse(状态融合)#####################") print("StateFuse(Input): ") - print(subject_records) + print("----------------------") + [print(r) for r in subject_records] fused_records = [] for record in subject_records: query = {"match": {"name": record.get_property("name", "")}} @@ -32,6 +33,6 @@ class StateFuse(FuseOp): rerank_record.update_property("name", record.get_property("name")) fused_records.append(rerank_record) print("StateFuse(Output): ") - print(fused_records) - print("##########StateFuse###########") + print("----------------------") + [print(r) for r in fused_records] return fused_records diff --git a/python/knext/knext/operator/builtin/auto_prompt.py b/python/knext/knext/operator/builtin/auto_prompt.py index d9a704eb..1b20f0e5 100644 --- a/python/knext/knext/operator/builtin/auto_prompt.py +++ b/python/knext/knext/operator/builtin/auto_prompt.py @@ -99,6 +99,7 @@ input:${input} def _render(self, spg_type: BaseSpgType, property_names: List[str]): spos = [] + repeat_desc = [] for property_name in property_names: if property_name in ["id", "name", "description"]: continue @@ -108,11 +109,11 @@ input:${input} if object_type: object_desc = object_type.desc spos.append( - f"{spg_type.name_zh}({spg_type.desc or spg_type.name_zh})" - f"-{prop.name_zh}({prop.desc or prop.name_zh})" + f"{spg_type.name_zh}" + (f"({spg_type.desc or spg_type.name_zh})" if spg_type.name_zh not in repeat_desc else "") + + f"-{prop.name_zh}({prop.desc or prop.name_zh})" f"-{prop.object_type_name_zh}({object_desc or prop.object_type_name_zh})" ) - schema_text = "[" + ",".join(spos) + "]" + schema_text = "\n[" + ",\n".join(spos) + "]\n" self.template = self.template.replace("${schema}", schema_text) diff --git a/python/knext/knext/operator/builtin/online_runner.py b/python/knext/knext/operator/builtin/online_runner.py index 0f8bd734..3bfd6e58 100644 --- a/python/knext/knext/operator/builtin/online_runner.py +++ b/python/knext/knext/operator/builtin/online_runner.py @@ -52,8 +52,16 @@ class _BuiltInOnlineExtractor(ExtractOp): while retry_times < self.max_retry_times: try: query = op.build_prompt(input_param) - # response = self.model.remote_inference(query) - response = "test" + op_name = op.__class__.__name__ + if op_name == "IndicatorNER": + response = "[{'财政': ['财政收入质量', '财政自给能力', '土地出让收入', '一般公共预算收入', '留抵退税', '税收收入', '税收收入/一般公共预算收入', '一般公共预算支出', '财政自给率', '政府性基金收入', '转移性收入', '综合财力']}]" + elif op_name == "IndicatorREL": + response = "[{'subject': '一般公共预算收入', 'predicate': '包含', 'object': ['税收收入']}, {'subject': '税收收入', 'predicate': '包含', 'object': ['留抵退税']}, {'subject': '政府性基金收入', 'predicate': '包含', 'object': ['土地出让收入', '转移性收入']}, {'subject': '综合财力', 'predicate': '包含', 'object': ['一般公共预算收入', '政府性基金收入']}]" + elif op_name == "IndicatorLOGIC": + response = '[{"subject": "土地出让收入大幅下降", "predicate": "顺承", "object": ["综合财力明显下滑"]}]' + else: + print(query) + response = self.model.remote_inference(query) collector.extend(op.parse_response(response)) next_params.extend( op.build_next_variables(input_param, response) @@ -61,6 +69,9 @@ class _BuiltInOnlineExtractor(ExtractOp): break except Exception as e: retry_times += 1 + print(e) raise e input_params = next_params + print("####################抽取结果#####################") + [print(c) for c in collector] return collector diff --git a/python/knext/knext/operator/op.py b/python/knext/knext/operator/op.py index 6b63c7cf..d8f2a447 100644 --- a/python/knext/knext/operator/op.py +++ b/python/knext/knext/operator/op.py @@ -127,7 +127,11 @@ class PromptOp(BaseOp, ABC): def build_next_variables( self, variables: Dict[str, str], response: str ) -> List[Dict[str, str]]: - return [] + variables.update({f"{self.__class__.__name__}": response}) + print("LLM(Output): ") + print("----------------------") + print([variables]) + return [variables] def invoke(self, *args): pass