openspg/python/knext/component/builder/source_reader.py

81 lines
2.7 KiB
Python
Raw Normal View History

2023-12-18 13:46:44 +08:00
# -*- coding: utf-8 -*-
# Copyright 2023 Ant Group CO., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
from typing import List, Dict
2023-12-06 17:26:39 +08:00
2023-12-15 17:33:54 +08:00
from pydantic import Field
2023-12-06 17:26:39 +08:00
from knext import rest
2023-12-11 10:44:37 +08:00
from knext.common.runnable import Input, Output
2023-12-08 11:25:26 +08:00
from knext.component.builder.base import SourceReader
2023-12-06 17:26:39 +08:00
class CsvSourceReader(SourceReader):
"""A source component that reading data from CSV file.
Args:
local_path: The local path of CSV file.
columns: The column names that need to be read from the CSV file.
start_row: The starting number of rows read from the CSV file.
If the CSV file includes a header, it needs to be greater than or equal to 2.
Examples:
2023-12-18 14:30:59 +08:00
source = CsvSourceReader(
2023-12-06 17:26:39 +08:00
local_path="./builder/job/data/App.csv",
columns=["id", 'riskMark', 'useCert'],
start_row=2
)
"""
"""The local path of CSV file."""
local_path: str
"""The column names that need to be read from the CSV file."""
columns: List[str]
"""The starting number of rows read from the CSV file.
If the CSV file includes a header, it needs to be greater than or equal to 2."""
2023-12-15 17:33:54 +08:00
start_row: int = Field(ge=1)
2023-12-06 17:26:39 +08:00
2023-12-11 10:44:37 +08:00
@property
def input_types(self) -> Input:
return None
@property
def output_types(self) -> Output:
return Dict[str, str]
@property
def input_keys(self):
return None
@property
def output_keys(self):
return self.columns
def invoke(self, input: Input):
2023-12-18 13:46:44 +08:00
raise NotImplementedError(f"{self.__class__.__name__} does not support being invoked separately.")
2023-12-06 17:26:39 +08:00
def submit(self):
2023-12-18 13:46:44 +08:00
raise NotImplementedError(f"{self.__class__.__name__} does not support being submitted separately.")
2023-12-06 17:26:39 +08:00
def to_rest(self):
2023-12-18 14:30:59 +08:00
"""Transforms `CsvSourceReader` to REST model `CsvSourceNodeConfig`."""
2023-12-06 17:26:39 +08:00
config = rest.CsvSourceNodeConfig(
start_row=self.start_row, url=self.local_path, columns=self.columns
)
return rest.Node(**super().to_dict(), node_config=config)
@classmethod
def from_rest(cls, node: rest.Node):
2023-12-18 13:46:44 +08:00
return cls(local_path=node.node_config.url,
columns=node.node_config.columns,
start_row=node.node_config.start_row)