128 lines
3.9 KiB
Python
Raw Normal View History

2023-12-18 13:46:44 +08:00
import json
2023-12-11 10:44:37 +08:00
from typing import Union, Dict, List, Sequence
2023-12-06 17:26:39 +08:00
2023-12-18 13:46:44 +08:00
from knext.client.operator import OperatorClient
2023-12-11 10:44:37 +08:00
from knext.common.runnable import Input, Output
2023-12-08 11:25:26 +08:00
from knext.component.builder.base import SPGExtractor
from knext.operator.spg_record import SPGRecord
2023-12-06 17:26:39 +08:00
from knext import rest
2023-12-08 11:25:26 +08:00
from knext.operator.op import PromptOp, ExtractOp
2023-12-06 17:26:39 +08:00
2023-12-11 23:13:19 +08:00
# try:
2023-12-22 19:49:39 +08:00
from nn4k.invoker.base import NNInvoker # noqa: F403
2023-12-13 15:22:41 +08:00
2023-12-11 23:13:19 +08:00
# except ImportError:
# pass
2023-12-11 10:44:37 +08:00
2023-12-06 17:26:39 +08:00
2023-12-08 11:25:26 +08:00
class LLMBasedExtractor(SPGExtractor):
2023-12-06 17:26:39 +08:00
"""A Process Component that transforming unstructured data into structured data.
Examples:
extract = UserDefinedExtractor(
output_fields=["id", 'riskMark', 'useCert']
).set_operator("DemoExtractOp")
"""
"""Knowledge extract operator of this component."""
2023-12-08 11:25:26 +08:00
llm: NNInvoker
2023-12-11 10:44:37 +08:00
"""PromptOps"""
2023-12-06 17:26:39 +08:00
prompt_ops: List[PromptOp]
@property
2023-12-11 10:44:37 +08:00
def input_types(self) -> Input:
2023-12-06 17:26:39 +08:00
return Dict[str, str]
@property
2023-12-11 10:44:37 +08:00
def output_types(self) -> Output:
return Union[Dict[str, str], SPGRecord]
@property
def input_keys(self):
return None
@property
def output_keys(self):
return self.output_fields
def invoke(self, input: Input) -> Sequence[Output]:
2023-12-18 13:46:44 +08:00
raise NotImplementedError(f"{self.__class__.__name__} does not support being invoked separately.")
def submit(self):
raise NotImplementedError(f"{self.__class__.__name__} does not support being submitted separately.")
2023-12-06 17:26:39 +08:00
def to_rest(self):
"""Transforms `LLMBasedExtractor` to REST model `ExtractNodeConfig`."""
2023-12-22 19:49:39 +08:00
params = dict()
2023-12-18 13:46:44 +08:00
params["model_config"] = json.dumps(self.llm._nn_config)
api_client = OperatorClient()._rest_client.api_client
2023-12-20 15:04:57 +08:00
params["prompt_config"] = json.dumps([api_client.sanitize_for_serialization(op.to_rest()) for op in self.prompt_ops], ensure_ascii=False)
2023-12-18 13:46:44 +08:00
from knext.operator.builtin.online_runner import _BuiltInOnlineExtractor
extract_op = _BuiltInOnlineExtractor(params)
2023-12-20 15:04:57 +08:00
config = rest.UserDefinedExtractNodeConfig(
2023-12-21 10:52:47 +08:00
operator_config=extract_op.to_rest()
2023-12-06 17:26:39 +08:00
)
return rest.Node(**super().to_dict(), node_config=config)
@classmethod
def from_rest(cls, node: rest.Node):
pass
2023-12-08 11:25:26 +08:00
class UserDefinedExtractor(SPGExtractor):
2023-12-06 17:26:39 +08:00
"""A Process Component that transforming unstructured data into structured data.
Examples:
extract = UserDefinedExtractor(
output_fields=["id", 'riskMark', 'useCert']
).set_operator("DemoExtractOp")
"""
"""All output column names after knowledge extraction processing."""
output_fields: List[str]
"""Knowledge extract operator of this component."""
extract_op: ExtractOp
@property
2023-12-11 10:44:37 +08:00
def input_types(self) -> Input:
2023-12-06 17:26:39 +08:00
return Dict[str, str]
@property
2023-12-11 10:44:37 +08:00
def output_types(self) -> Output:
return Union[Dict[str, str], SPGRecord]
2023-12-06 17:26:39 +08:00
@property
2023-12-11 10:44:37 +08:00
def input_keys(self):
return None
@property
def output_keys(self):
return self.output_fields
2023-12-06 17:26:39 +08:00
def set_operator(self, op_name: str, params: Dict[str, str] = None):
"""Sets knowledge extract operator to this component."""
self.extract_op = ExtractOp.by_name(op_name)(params)
return self
2023-12-18 13:46:44 +08:00
def invoke(self, input: Input) -> Sequence[Output]:
raise NotImplementedError(f"{self.__class__.__name__} does not support being invoked separately.")
def submit(self):
raise NotImplementedError(f"{self.__class__.__name__} does not support being submitted separately.")
2023-12-11 10:44:37 +08:00
2023-12-06 17:26:39 +08:00
def to_rest(self):
"""Transforms `UserDefinedExtractor` to REST model `ExtractNodeConfig`."""
2023-12-18 13:46:44 +08:00
operator_config = self.extract_op.to_rest()
2023-12-15 17:33:54 +08:00
config = rest.UserDefinedExtractNodeConfig(
2023-12-22 19:49:39 +08:00
operator_config=operator_config
2023-12-06 17:26:39 +08:00
)
return rest.Node(**super().to_dict(), node_config=config)
2023-12-11 10:44:37 +08:00
@classmethod
def from_rest(cls, node: rest.Node):
2023-12-18 13:46:44 +08:00
return cls()