106 lines
3.0 KiB
Python
Raw Normal View History

2023-12-08 11:25:26 +08:00
from typing import Union, Dict, List
2023-12-06 17:26:39 +08:00
2023-12-08 11:25:26 +08:00
from knext.component.builder.base import SPGExtractor
from knext.operator.spg_record import SPGRecord
from nn4k.invoker.base import NNInvoker
2023-12-06 17:26:39 +08:00
from knext import rest
2023-12-08 11:25:26 +08:00
from knext.component.base import SPGTypeHelper, PropertyHelper
from knext.operator.op import PromptOp, ExtractOp
2023-12-06 17:26:39 +08:00
2023-12-08 11:25:26 +08:00
class LLMBasedExtractor(SPGExtractor):
2023-12-06 17:26:39 +08:00
"""A Process Component that transforming unstructured data into structured data.
Examples:
extract = UserDefinedExtractor(
output_fields=["id", 'riskMark', 'useCert']
).set_operator("DemoExtractOp")
"""
"""All output column names after knowledge extraction processing."""
output_fields: List[str]
"""Knowledge extract operator of this component."""
2023-12-08 11:25:26 +08:00
llm: NNInvoker
2023-12-06 17:26:39 +08:00
prompt_ops: List[PromptOp]
spg_type_name: Union[str, SPGTypeHelper]
property_names: List[Union[str, PropertyHelper]]
@property
2023-12-08 11:25:26 +08:00
def input_types(self):
2023-12-06 17:26:39 +08:00
return Dict[str, str]
@property
2023-12-08 11:25:26 +08:00
def output_types(self):
2023-12-06 17:26:39 +08:00
return SPGRecord
def to_rest(self):
"""Transforms `LLMBasedExtractor` to REST model `ExtractNodeConfig`."""
# operator_config = client._generate_op_config(
# op_name=self.extract_op.name, params=self.extract_op.params
# )
operator_config = {}
config = rest.ExtractNodeConfig(
output_fields=self.output_fields, operator_config=operator_config
)
return rest.Node(**super().to_dict(), node_config=config)
2023-12-08 11:25:26 +08:00
def invoke(self, input):
2023-12-06 17:26:39 +08:00
pass
@classmethod
def from_rest(cls, node: rest.Node):
pass
def submit(self):
pass
2023-12-08 11:25:26 +08:00
class UserDefinedExtractor(SPGExtractor):
2023-12-06 17:26:39 +08:00
"""A Process Component that transforming unstructured data into structured data.
Examples:
extract = UserDefinedExtractor(
output_fields=["id", 'riskMark', 'useCert']
).set_operator("DemoExtractOp")
"""
"""All output column names after knowledge extraction processing."""
output_fields: List[str]
"""Knowledge extract operator of this component."""
extract_op: ExtractOp
@property
2023-12-08 11:25:26 +08:00
def input_types(self):
2023-12-06 17:26:39 +08:00
return Dict[str, str]
@property
2023-12-08 11:25:26 +08:00
def output_types(self):
2023-12-06 17:26:39 +08:00
return Dict[str, str]
@property
def name(self):
return self.__class__.__name__
def set_operator(self, op_name: str, params: Dict[str, str] = None):
"""Sets knowledge extract operator to this component."""
self.extract_op = ExtractOp.by_name(op_name)(params)
return self
def to_rest(self):
"""Transforms `UserDefinedExtractor` to REST model `ExtractNodeConfig`."""
# operator_config = client._generate_op_config(
# op_name=self.extract_op.name, params=self.extract_op.params
# )
operator_config = {}
config = rest.ExtractNodeConfig(
output_fields=self.output_fields, operator_config=operator_config
)
return rest.Node(**super().to_dict(), node_config=config)