mirror of
https://github.com/OpenSPG/openspg.git
synced 2025-11-06 21:30:14 +00:00
overwrite __rshift__
This commit is contained in:
parent
ae25cce855
commit
fe18880fcf
@ -1,12 +0,0 @@
|
|||||||
from knext.api.operator import (
|
|
||||||
BaseOp,
|
|
||||||
ExtractOp,
|
|
||||||
KnowledgeExtractOp,
|
|
||||||
EntityLinkOp,
|
|
||||||
LinkOp,
|
|
||||||
EntityFuseOp,
|
|
||||||
FuseOp,
|
|
||||||
PropertyNormalizeOp,
|
|
||||||
NormalizeOp,
|
|
||||||
PromptOp,
|
|
||||||
)
|
|
||||||
@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
from knext.client.builder import BuilderClient
|
||||||
|
from knext.client.schema import SchemaClient
|
||||||
|
from knext.client.reasoner import ReasonerClient
|
||||||
|
from knext.client.operator import OperatorClient
|
||||||
|
from knext.client.search import SearchClient
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BuilderClient",
|
||||||
|
"SchemaClient",
|
||||||
|
"ReasonerClient",
|
||||||
|
"OperatorClient",
|
||||||
|
"SearchClient"
|
||||||
|
]
|
||||||
@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
from knext.component.builder import UserDefinedExtractor, LLMBasedExtractor
|
||||||
|
from knext.component.builder import SPGTypeMapping, RelationMapping
|
||||||
|
from knext.component.builder import CsvSourceReader
|
||||||
|
from knext.component.builder import KGSinkWriter
|
||||||
|
from knext.component.base import Component
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"UserDefinedExtractor",
|
||||||
|
"LLMBasedExtractor",
|
||||||
|
"CsvSourceReader",
|
||||||
|
"SPGTypeMapping",
|
||||||
|
"RelationMapping",
|
||||||
|
"KGSinkWriter",
|
||||||
|
"Component"
|
||||||
|
]
|
||||||
@ -1,22 +0,0 @@
|
|||||||
from knext.core.builder.operator.model.op import BaseOp, ExtractOp, LinkOp, FuseOp, NormalizeOp, PromptOp
|
|
||||||
|
|
||||||
|
|
||||||
KnowledgeExtractOp = ExtractOp
|
|
||||||
EntityLinkOp = LinkOp
|
|
||||||
EntityFuseOp = FuseOp
|
|
||||||
PropertyNormalizeOp = NormalizeOp
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"BaseOp",
|
|
||||||
"ExtractOp",
|
|
||||||
"LinkOp",
|
|
||||||
"FuseOp",
|
|
||||||
"NormalizeOp",
|
|
||||||
"PromptOp",
|
|
||||||
"LinkOp",
|
|
||||||
] + [
|
|
||||||
"KnowledgeExtractOp",
|
|
||||||
"EntityLinkOp",
|
|
||||||
"EntityFuseOp",
|
|
||||||
"PropertyNormalizeOp",
|
|
||||||
]
|
|
||||||
@ -1,8 +1,54 @@
|
|||||||
from abc import ABC
|
from abc import ABC
|
||||||
|
from typing import Union, Type, List
|
||||||
|
|
||||||
|
import networkx as nx
|
||||||
|
|
||||||
|
from knext.common.restable import RESTable
|
||||||
|
from knext.common.runnable import Runnable
|
||||||
|
|
||||||
|
|
||||||
class Chain(ABC):
|
class Chain(Runnable, RESTable):
|
||||||
|
|
||||||
def dag(self):
|
dag: nx.DiGraph
|
||||||
|
|
||||||
|
def submit(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def to_rest(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __rshift__(self, other: Union[
|
||||||
|
Type['Chain'],
|
||||||
|
List[Type['Chain']],
|
||||||
|
Type['Component'],
|
||||||
|
List[Type['Component']],
|
||||||
|
None
|
||||||
|
]):
|
||||||
|
from knext.component.base import Component
|
||||||
|
if not other:
|
||||||
|
return self
|
||||||
|
if not isinstance(other, list):
|
||||||
|
other = [other]
|
||||||
|
dag_list = []
|
||||||
|
for o in other:
|
||||||
|
if not o:
|
||||||
|
dag_list.append(o.dag)
|
||||||
|
if isinstance(o, Component):
|
||||||
|
end_nodes = [node for node, out_degree in self.dag.out_degree() if out_degree == 0 or node.last]
|
||||||
|
dag = nx.DiGraph(self.dag)
|
||||||
|
if len(end_nodes) > 0:
|
||||||
|
for end_node in end_nodes:
|
||||||
|
dag.add_edge(end_node, o)
|
||||||
|
dag.add_node(o)
|
||||||
|
dag_list.append(dag)
|
||||||
|
elif isinstance(o, Chain):
|
||||||
|
combined_dag = nx.compose(self.dag, o.dag)
|
||||||
|
end_nodes = [node for node, out_degree in self.dag.out_degree() if out_degree == 0 or node.last]
|
||||||
|
start_nodes = [node for node, in_degree in o.dag.in_degree() if in_degree == 0]
|
||||||
|
|
||||||
|
if len(end_nodes) > 0 and len(start_nodes) > 0:
|
||||||
|
for end_node in end_nodes:
|
||||||
|
for start_node in start_nodes:
|
||||||
|
combined_dag.add_edge(end_node, start_node)
|
||||||
|
final_dag = nx.compose_all(dag_list)
|
||||||
|
return Chain(dag=final_dag)
|
||||||
|
|||||||
@ -12,7 +12,7 @@ class BuilderChain(RESTable, Chain):
|
|||||||
|
|
||||||
source: SourceReader
|
source: SourceReader
|
||||||
|
|
||||||
process: Union[SPGExtractor, Mapping]
|
process: Union[SPGExtractor, Mapping, ]
|
||||||
|
|
||||||
sink: SinkWriter
|
sink: SinkWriter
|
||||||
|
|
||||||
@ -24,3 +24,6 @@ class BuilderChain(RESTable, Chain):
|
|||||||
def output_types(self):
|
def output_types(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_config(cls):
|
||||||
|
return cls()
|
||||||
|
|||||||
@ -233,7 +233,7 @@ class SPGConceptRuleMarkLang:
|
|||||||
Load and then parse the script file
|
Load and then parse the script file
|
||||||
"""
|
"""
|
||||||
|
|
||||||
file = open(filename, "r")
|
file = open(filename, "r", encoding="utf-8")
|
||||||
lines = file.read().splitlines()
|
lines = file.read().splitlines()
|
||||||
last_indent_level = 0
|
last_indent_level = 0
|
||||||
|
|
||||||
|
|||||||
24
python/knext/common/restable.py
Normal file
24
python/knext/common/restable.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from abc import ABC
|
||||||
|
|
||||||
|
from knext import rest
|
||||||
|
|
||||||
|
|
||||||
|
class RESTable(ABC):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def upstream_types(self):
|
||||||
|
raise NotImplementedError("To be implemented in subclass")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def downstream_types(self):
|
||||||
|
raise NotImplementedError("To be implemented in subclass")
|
||||||
|
|
||||||
|
def to_rest(self):
|
||||||
|
raise NotImplementedError("To be implemented in subclass")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_rest(cls, node: rest.Node):
|
||||||
|
raise NotImplementedError("To be implemented in subclass")
|
||||||
|
|
||||||
|
def submit(self):
|
||||||
|
raise NotImplementedError("To be implemented in subclass")
|
||||||
24
python/knext/common/runnable.py
Normal file
24
python/knext/common/runnable.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from pydantic import BaseConfig, BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class Runnable(BaseModel):
|
||||||
|
|
||||||
|
last: bool = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def input_types(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_types(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def invoke(self, input):
|
||||||
|
raise NotImplementedError("To be implemented in subclass")
|
||||||
|
|
||||||
|
def __rshift__(self, other):
|
||||||
|
raise NotImplementedError("To be implemented in subclass")
|
||||||
|
|
||||||
|
class Config(BaseConfig):
|
||||||
|
|
||||||
|
arbitrary_types_allowed = True
|
||||||
@ -11,12 +11,14 @@
|
|||||||
# or implied.
|
# or implied.
|
||||||
|
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Union, TypeVar, Generic, Any, Dict, Tuple, Type
|
from typing import List, Union, TypeVar, Type
|
||||||
|
|
||||||
from knext import rest
|
import networkx as nx
|
||||||
|
|
||||||
|
from knext.common.restable import RESTable
|
||||||
|
from knext.common.runnable import Runnable
|
||||||
|
|
||||||
Other = TypeVar("Other")
|
Other = TypeVar("Other")
|
||||||
|
|
||||||
@ -46,52 +48,7 @@ class PropertyHelper:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class RESTable(ABC):
|
class Component(Runnable, RESTable, ABC):
|
||||||
|
|
||||||
@property
|
|
||||||
def upstream_types(self):
|
|
||||||
raise NotImplementedError("To be implemented in subclass")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def downstream_types(self):
|
|
||||||
raise NotImplementedError("To be implemented in subclass")
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def to_rest(self):
|
|
||||||
raise NotImplementedError("To be implemented in subclass")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_rest(cls, node: rest.Node):
|
|
||||||
raise NotImplementedError("To be implemented in subclass")
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def submit(self):
|
|
||||||
raise NotImplementedError("To be implemented in subclass")
|
|
||||||
|
|
||||||
|
|
||||||
class Runnable(ABC):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def input_types(self) -> Input:
|
|
||||||
return
|
|
||||||
|
|
||||||
@property
|
|
||||||
def output_types(self) -> Output:
|
|
||||||
return
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def invoke(self, input: Input) -> Output:
|
|
||||||
raise NotImplementedError("To be implemented in subclass")
|
|
||||||
|
|
||||||
def __rshift__(
|
|
||||||
self,
|
|
||||||
other: Type['Runnable']
|
|
||||||
) -> Type['Runnable']:
|
|
||||||
"""Compose this runnable with another object to create a RunnableSequence."""
|
|
||||||
return Chain(first=self, last=coerce_to_runnable(other))
|
|
||||||
|
|
||||||
|
|
||||||
class Component(ABC):
|
|
||||||
"""
|
"""
|
||||||
Base class for all component.
|
Base class for all component.
|
||||||
"""
|
"""
|
||||||
@ -114,3 +71,50 @@ class Component(ABC):
|
|||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return self.__dict__
|
return self.__dict__
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return id(self)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return hash(self) == hash(other)
|
||||||
|
|
||||||
|
def __rshift__(self, other: Union[
|
||||||
|
Type['Chain'],
|
||||||
|
List[Type['Chain']],
|
||||||
|
Type['Component'],
|
||||||
|
List[Type['Component']],
|
||||||
|
None
|
||||||
|
]):
|
||||||
|
from knext.chain.base import Chain
|
||||||
|
if not other:
|
||||||
|
return self
|
||||||
|
if not isinstance(other, list):
|
||||||
|
other = [other]
|
||||||
|
dag_list = []
|
||||||
|
for o in other:
|
||||||
|
if not o:
|
||||||
|
dag = nx.DiGraph()
|
||||||
|
self.last = True
|
||||||
|
dag.add_node(self)
|
||||||
|
print(dag.nodes)
|
||||||
|
dag_list.append(dag)
|
||||||
|
if isinstance(o, Component):
|
||||||
|
dag = nx.DiGraph()
|
||||||
|
dag.add_node(self)
|
||||||
|
dag.add_node(o)
|
||||||
|
dag.add_edge(self, o)
|
||||||
|
dag_list.append(dag)
|
||||||
|
elif isinstance(o, Chain):
|
||||||
|
dag = nx.DiGraph()
|
||||||
|
dag.add_node(self)
|
||||||
|
end_nodes = [node for node, out_degree in dag.out_degree() if out_degree == 0 or node.last]
|
||||||
|
start_nodes = [node for node, in_degree in o.dag.in_degree() if in_degree == 0]
|
||||||
|
|
||||||
|
if len(end_nodes) > 0 and len(start_nodes) > 0:
|
||||||
|
for end_node in end_nodes:
|
||||||
|
for start_node in start_nodes:
|
||||||
|
combined_dag.add_edge(end_node, start_node)
|
||||||
|
combined_dag = nx.compose(dag, o.dag)
|
||||||
|
dag_list.append(combined_dag)
|
||||||
|
final_dag = nx.compose_all(dag_list)
|
||||||
|
|
||||||
|
return Chain(dag=final_dag)
|
||||||
|
|||||||
@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
from knext.component.builder.extractor import UserDefinedExtractor, LLMBasedExtractor, SPGExtractor
|
||||||
|
from knext.component.builder.mapping import SPGTypeMapping, RelationMapping, Mapping
|
||||||
|
from knext.component.builder.source_reader import CsvSourceReader, SourceReader
|
||||||
|
from knext.component.builder.sink_writer import KGSinkWriter, SinkWriter
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"UserDefinedExtractor",
|
||||||
|
"LLMBasedExtractor",
|
||||||
|
"CsvSourceReader",
|
||||||
|
"SPGTypeMapping",
|
||||||
|
"RelationMapping",
|
||||||
|
"KGSinkWriter",
|
||||||
|
"SPGExtractor",
|
||||||
|
"Mapping",
|
||||||
|
"SourceReader",
|
||||||
|
"SinkWriter",
|
||||||
|
]
|
||||||
80
python/knext/component/builder/base.py
Normal file
80
python/knext/component/builder/base.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
from abc import ABC
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from knext.component.base import Component, ComponentTypeEnum, ComponentLabelEnum
|
||||||
|
|
||||||
|
|
||||||
|
class SPGExtractor(Component, ABC):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def upstream_types(self):
|
||||||
|
return Union[SourceReader, SPGExtractor]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def downstream_types(self):
|
||||||
|
return Union[SPGExtractor, Mapping]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self):
|
||||||
|
return ComponentTypeEnum.Builder
|
||||||
|
|
||||||
|
@property
|
||||||
|
def label(self):
|
||||||
|
return ComponentLabelEnum.Extractor
|
||||||
|
|
||||||
|
|
||||||
|
class Mapping(Component, ABC):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def upstream_types(self):
|
||||||
|
return Union[SourceReader, SPGExtractor]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def downstream_types(self):
|
||||||
|
return Union[SinkWriter]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self):
|
||||||
|
return ComponentTypeEnum.Builder
|
||||||
|
|
||||||
|
@property
|
||||||
|
def label(self):
|
||||||
|
return ComponentLabelEnum.Mapping
|
||||||
|
|
||||||
|
|
||||||
|
class SinkWriter(Component, ABC):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def upstream_types(self):
|
||||||
|
return Union[Mapping]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def downstream_types(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self):
|
||||||
|
return ComponentTypeEnum.Builder
|
||||||
|
|
||||||
|
@property
|
||||||
|
def label(self):
|
||||||
|
return ComponentLabelEnum.SinkWriter
|
||||||
|
|
||||||
|
|
||||||
|
class SourceReader(Component, ABC):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def upstream_types(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def downstream_types(self):
|
||||||
|
return Union[SPGExtractor, Mapping]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self):
|
||||||
|
return ComponentTypeEnum.Builder
|
||||||
|
|
||||||
|
@property
|
||||||
|
def label(self):
|
||||||
|
return ComponentLabelEnum.SourceReader
|
||||||
@ -1,33 +1,14 @@
|
|||||||
from abc import ABC
|
from typing import Union, Dict, List
|
||||||
from typing import Union, Mapping, Dict, List
|
|
||||||
|
|
||||||
from NN4K.invoker.base import ModelInvoker
|
from knext.component.builder.base import SPGExtractor
|
||||||
|
from knext.operator.spg_record import SPGRecord
|
||||||
|
from nn4k.invoker.base import NNInvoker
|
||||||
from knext import rest
|
from knext import rest
|
||||||
from knext.component.base import RESTable, Component, ComponentTypeEnum, ComponentLabelEnum, Runnable, Input, Output
|
from knext.component.base import SPGTypeHelper, PropertyHelper
|
||||||
from knext.component.builder.source_reader import SourceReader
|
from knext.operator.op import PromptOp, ExtractOp
|
||||||
from knext.core.builder.operator.model.op import PromptOp
|
|
||||||
|
|
||||||
|
|
||||||
class SPGExtractor(RESTable, Component, ABC):
|
class LLMBasedExtractor(SPGExtractor):
|
||||||
|
|
||||||
@property
|
|
||||||
def upstream_types(self):
|
|
||||||
return Union[SourceReader, SPGExtractor]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def downstream_types(self):
|
|
||||||
return Union[SPGExtractor, Mapping]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def type(self):
|
|
||||||
return ComponentTypeEnum.Builder
|
|
||||||
|
|
||||||
@property
|
|
||||||
def label(self):
|
|
||||||
return ComponentLabelEnum.Extractor
|
|
||||||
|
|
||||||
|
|
||||||
class LLMBasedExtractor(Runnable, SPGExtractor):
|
|
||||||
"""A Process Component that transforming unstructured data into structured data.
|
"""A Process Component that transforming unstructured data into structured data.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
@ -40,7 +21,7 @@ class LLMBasedExtractor(Runnable, SPGExtractor):
|
|||||||
"""All output column names after knowledge extraction processing."""
|
"""All output column names after knowledge extraction processing."""
|
||||||
output_fields: List[str]
|
output_fields: List[str]
|
||||||
"""Knowledge extract operator of this component."""
|
"""Knowledge extract operator of this component."""
|
||||||
llm: ModelInvoker
|
llm: NNInvoker
|
||||||
|
|
||||||
prompt_ops: List[PromptOp]
|
prompt_ops: List[PromptOp]
|
||||||
|
|
||||||
@ -49,11 +30,11 @@ class LLMBasedExtractor(Runnable, SPGExtractor):
|
|||||||
property_names: List[Union[str, PropertyHelper]]
|
property_names: List[Union[str, PropertyHelper]]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def input_types(self) -> Input:
|
def input_types(self):
|
||||||
return Dict[str, str]
|
return Dict[str, str]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def output_types(self) -> Output:
|
def output_types(self):
|
||||||
return SPGRecord
|
return SPGRecord
|
||||||
|
|
||||||
def to_rest(self):
|
def to_rest(self):
|
||||||
@ -68,7 +49,7 @@ class LLMBasedExtractor(Runnable, SPGExtractor):
|
|||||||
|
|
||||||
return rest.Node(**super().to_dict(), node_config=config)
|
return rest.Node(**super().to_dict(), node_config=config)
|
||||||
|
|
||||||
def invoke(self, input: Input) -> Output:
|
def invoke(self, input):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -79,7 +60,7 @@ class LLMBasedExtractor(Runnable, SPGExtractor):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class UserDefinedExtractor(Runnable[Dict[str, str], Dict[str, str]], SPGExtractor):
|
class UserDefinedExtractor(SPGExtractor):
|
||||||
"""A Process Component that transforming unstructured data into structured data.
|
"""A Process Component that transforming unstructured data into structured data.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
@ -95,11 +76,11 @@ class UserDefinedExtractor(Runnable[Dict[str, str], Dict[str, str]], SPGExtracto
|
|||||||
extract_op: ExtractOp
|
extract_op: ExtractOp
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def input_types(self) -> Input:
|
def input_types(self):
|
||||||
return Dict[str, str]
|
return Dict[str, str]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def output_types(self) -> Output:
|
def output_types(self):
|
||||||
return Dict[str, str]
|
return Dict[str, str]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@ -1,34 +1,20 @@
|
|||||||
from abc import ABC
|
from abc import ABC
|
||||||
|
from collections import defaultdict
|
||||||
from typing import Union, Dict, List, Tuple
|
from typing import Union, Dict, List, Tuple
|
||||||
|
|
||||||
from knext.component.base import RESTable, Component, ComponentTypeEnum, ComponentLabelEnum, Runnable
|
from knext import rest
|
||||||
from knext.component.builder.extractor import SPGExtractor
|
|
||||||
from knext.component.builder.sink_writer import SinkWriter
|
from knext.component.base import SPGTypeHelper, PropertyHelper, MappingTypeEnum
|
||||||
from knext.component.builder.source_reader import SourceReader
|
from knext.component.builder.base import Mapping
|
||||||
|
from knext.operator.op import LinkOp
|
||||||
from knext.operator.spg_record import SPGRecord
|
from knext.operator.spg_record import SPGRecord
|
||||||
|
|
||||||
|
|
||||||
class Mapping(RESTable, Component, ABC):
|
class NormalizeOp:
|
||||||
|
pass
|
||||||
@property
|
|
||||||
def upstream_types(self):
|
|
||||||
return Union[SourceReader, SPGExtractor]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def downstream_types(self):
|
|
||||||
return Union[SinkWriter]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def type(self):
|
|
||||||
return ComponentTypeEnum.Builder
|
|
||||||
|
|
||||||
@property
|
|
||||||
def label(self):
|
|
||||||
return ComponentLabelEnum.Mapping
|
|
||||||
|
|
||||||
|
|
||||||
|
class SPGTypeMapping(Mapping):
|
||||||
class SPGTypeMapping(Runnable[Dict[str, str], SPGRecord], Mapping):
|
|
||||||
"""A Process Component that mapping data to entity/event/concept type.
|
"""A Process Component that mapping data to entity/event/concept type.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -49,8 +35,8 @@ class SPGTypeMapping(Runnable[Dict[str, str], SPGRecord], Mapping):
|
|||||||
|
|
||||||
filters: List[Tuple[str, str]] = list()
|
filters: List[Tuple[str, str]] = list()
|
||||||
|
|
||||||
def add_field(self, source_field: str, target_field: Union[str, PropertyHelper], link_op: LinkOp,
|
def add_field(self, source_field: str, target_field: Union[str, PropertyHelper], link_op: LinkOp = None,
|
||||||
norm_op: NormalizeOp):
|
norm_op: NormalizeOp = None):
|
||||||
"""Adds a field mapping from source data to property of spg_type.
|
"""Adds a field mapping from source data to property of spg_type.
|
||||||
|
|
||||||
:param source_field: The source field to be mapped.
|
:param source_field: The source field to be mapped.
|
||||||
@ -124,8 +110,11 @@ class SPGTypeMapping(Runnable[Dict[str, str], SPGRecord], Mapping):
|
|||||||
)
|
)
|
||||||
return rest.Node(**super().to_dict(), node_config=config)
|
return rest.Node(**super().to_dict(), node_config=config)
|
||||||
|
|
||||||
|
def submit(self):
|
||||||
|
pass
|
||||||
|
|
||||||
class RelationMappingComponent(Component):
|
|
||||||
|
class RelationMapping(Mapping):
|
||||||
"""A Process Component that mapping data to relation type.
|
"""A Process Component that mapping data to relation type.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -150,8 +139,6 @@ class RelationMappingComponent(Component):
|
|||||||
|
|
||||||
filters: List[Tuple[str, str]] = list()
|
filters: List[Tuple[str, str]] = list()
|
||||||
|
|
||||||
RELATION_BASE_FIELDS = ["srcId", "dstId"]
|
|
||||||
|
|
||||||
def add_field(self, source_field: str, target_field: str):
|
def add_field(self, source_field: str, target_field: str):
|
||||||
"""Adds a field mapping from source data to property of spg_type.
|
"""Adds a field mapping from source data to property of spg_type.
|
||||||
|
|
||||||
@ -173,13 +160,8 @@ class RelationMappingComponent(Component):
|
|||||||
self.filters.append((column_name, column_value))
|
self.filters.append((column_name, column_value))
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _to_rest(self):
|
def to_rest(self):
|
||||||
"""Transforms `RelationMappingComponent` to REST model `MappingNodeConfig`."""
|
"""Transforms `RelationMappingComponent` to REST model `MappingNodeConfig`."""
|
||||||
assert all(
|
|
||||||
field in self.mapping.keys()
|
|
||||||
for field in RelationMappingComponent.RELATION_BASE_FIELDS
|
|
||||||
), f"{self.__class__.__name__} must include mapping to {str(RelationMappingComponent.RELATION_BASE_FIELDS)}"
|
|
||||||
|
|
||||||
mapping = defaultdict(list)
|
mapping = defaultdict(list)
|
||||||
for dst_name, src_name in self.mapping.items():
|
for dst_name, src_name in self.mapping.items():
|
||||||
mapping[src_name].append(dst_name)
|
mapping[src_name].append(dst_name)
|
||||||
|
|||||||
@ -1,30 +1,12 @@
|
|||||||
from abc import ABC
|
from abc import ABC
|
||||||
from ctypes import Union
|
from ctypes import Union
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
from knext.component.base import RESTable, Component
|
from knext.component.base import Runnable
|
||||||
|
from knext.component.builder.base import SinkWriter
|
||||||
|
|
||||||
|
|
||||||
class SinkWriter(RESTable, Component, ABC):
|
class KGSinkWriter(SinkWriter):
|
||||||
|
|
||||||
@property
|
|
||||||
def upstream_types(self):
|
|
||||||
return Union[Mapping, Evaluator]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def downstream_types(self):
|
|
||||||
return None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def type(self):
|
|
||||||
return ComponentTypeEnum.Builder
|
|
||||||
|
|
||||||
@property
|
|
||||||
def label(self):
|
|
||||||
return ComponentLabelEnum.SinkWriter
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class KGSinkWriter(Runnable[Dict[str, str], None], SinkWriter):
|
|
||||||
"""The Sink Component that writing data to KG storage.
|
"""The Sink Component that writing data to KG storage.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -35,14 +17,14 @@ class KGSinkWriter(Runnable[Dict[str, str], None], SinkWriter):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def input_types(self) -> Input:
|
def input_types(self):
|
||||||
return Dict[str, str]
|
return Dict[str, str]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def output_types(self) -> Output:
|
def output_types(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def invoke(self, input: Input) -> Output:
|
def invoke(self, input):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def to_rest(self):
|
def to_rest(self):
|
||||||
@ -53,3 +35,6 @@ class KGSinkWriter(Runnable[Dict[str, str], None], SinkWriter):
|
|||||||
},
|
},
|
||||||
**super().to_dict(),
|
**super().to_dict(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def submit(self):
|
||||||
|
pass
|
||||||
@ -2,29 +2,7 @@ from abc import ABC
|
|||||||
from typing import Union, List, Dict
|
from typing import Union, List, Dict
|
||||||
|
|
||||||
from knext import rest
|
from knext import rest
|
||||||
|
from knext.component.builder.base import SourceReader
|
||||||
from knext.component.base import RESTable, Component, ComponentTypeEnum, ComponentLabelEnum, Runnable, Input, Output
|
|
||||||
from knext.component.builder.extractor import SPGExtractor
|
|
||||||
from knext.component.builder.mapping import Mapping
|
|
||||||
|
|
||||||
|
|
||||||
class SourceReader(RESTable, Component, ABC):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def upstream_types(self):
|
|
||||||
return None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def downstream_types(self):
|
|
||||||
return Union[SPGExtractor, Mapping]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def type(self):
|
|
||||||
return ComponentTypeEnum.Builder
|
|
||||||
|
|
||||||
@property
|
|
||||||
def label(self):
|
|
||||||
return ComponentLabelEnum.SourceReader
|
|
||||||
|
|
||||||
|
|
||||||
class CsvSourceReader(SourceReader):
|
class CsvSourceReader(SourceReader):
|
||||||
@ -51,7 +29,7 @@ class CsvSourceReader(SourceReader):
|
|||||||
If the CSV file includes a header, it needs to be greater than or equal to 2."""
|
If the CSV file includes a header, it needs to be greater than or equal to 2."""
|
||||||
start_row: int
|
start_row: int
|
||||||
|
|
||||||
def invoke(self, input: Input) -> Output:
|
def invoke(self, input):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def submit(self):
|
def submit(self):
|
||||||
|
|||||||
@ -15,7 +15,7 @@ from enum import Enum
|
|||||||
from typing import List, Dict, Any, Type
|
from typing import List, Dict, Any, Type
|
||||||
|
|
||||||
from knext.operator.eval_result import EvalResult
|
from knext.operator.eval_result import EvalResult
|
||||||
from knext.operator.spg_record import Vertex
|
from knext.operator.spg_record import SPGRecord
|
||||||
|
|
||||||
|
|
||||||
class OperatorTypeEnum(str, Enum):
|
class OperatorTypeEnum(str, Enum):
|
||||||
|
|||||||
@ -31,31 +31,6 @@ class ExtractOp(BaseOp, ABC):
|
|||||||
return EvalResult[List[SPGRecord]](output).to_dict()
|
return EvalResult[List[SPGRecord]](output).to_dict()
|
||||||
|
|
||||||
|
|
||||||
class NormalizeOp(BaseOp, ABC):
|
|
||||||
"""Base class for all property normalize operators."""
|
|
||||||
|
|
||||||
def __init__(self, params: Dict[str, str] = None):
|
|
||||||
super().__init__(params)
|
|
||||||
|
|
||||||
def eval(self, property: str, record: SPGRecord) -> str:
|
|
||||||
raise NotImplementedError(
|
|
||||||
f"{self.__class__.__name__} need to implement `eval` method."
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _pre_process(*inputs):
|
|
||||||
return inputs[0], SPGRecord.from_dict(inputs[1])
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _post_process(output) -> Dict[str, Any]:
|
|
||||||
if isinstance(output, EvalResult):
|
|
||||||
return output.to_dict()
|
|
||||||
if isinstance(output, tuple):
|
|
||||||
return EvalResult[str](*output[:3]).to_dict()
|
|
||||||
else:
|
|
||||||
return EvalResult[str](output).to_dict()
|
|
||||||
|
|
||||||
|
|
||||||
class LinkOp(BaseOp, ABC):
|
class LinkOp(BaseOp, ABC):
|
||||||
"""Base class for all entity link operators."""
|
"""Base class for all entity link operators."""
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
# ATTENTION!
|
||||||
|
# This file is generated by Schema automatically, it will be refreshed after schema has been committed
|
||||||
|
# PLEASE DO NOT MODIFY THIS FILE!!!
|
||||||
|
|
||||||
|
class ${namespace}:
|
||||||
|
def __init__(self):
|
||||||
|
self.BodyPart = self.BodyPart()
|
||||||
|
self.Disease = self.Disease()
|
||||||
|
self.Symptom = self.Symptom()
|
||||||
|
self.Drug = self.Drug()
|
||||||
|
self.HospitalDepartment = self.HospitalDepartment()
|
||||||
|
self.Indicator = self.Indicator()
|
||||||
0
python/tests/__init__.py
Normal file
0
python/tests/__init__.py
Normal file
33
python/tests/chain_test.py
Normal file
33
python/tests/chain_test.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import networkx as nx
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from knext.api.component import SPGTypeMapping
|
||||||
|
from knext.api.component import KGSinkWriter
|
||||||
|
from knext.api.component import CsvSourceReader
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
source = CsvSourceReader(
|
||||||
|
local_path="./builder/job/data/BodyPart.csv", columns=["id"], start_row=1
|
||||||
|
)
|
||||||
|
|
||||||
|
mapping1 = SPGTypeMapping(spg_type_name="Medical.BodyPart").add_field(
|
||||||
|
"id", "Medical.BodyPart.id"
|
||||||
|
)
|
||||||
|
|
||||||
|
mapping2 = SPGTypeMapping(spg_type_name="Medical.BodyPart").add_field(
|
||||||
|
"id", "Medical.BodyPart.id1"
|
||||||
|
)
|
||||||
|
|
||||||
|
sink = KGSinkWriter()
|
||||||
|
sink2 = KGSinkWriter()
|
||||||
|
|
||||||
|
builder_chain = source >> [mapping1, None] >> sink2
|
||||||
|
|
||||||
|
print(builder_chain.dag.edges)
|
||||||
|
|
||||||
|
G = builder_chain.dag
|
||||||
|
# 绘制图形
|
||||||
|
nx.draw(G, with_labels=True, arrows=True)
|
||||||
|
|
||||||
|
# 显示图形
|
||||||
|
plt.show()
|
||||||
Loading…
x
Reference in New Issue
Block a user