mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-18 14:06:59 +00:00
* refactor!: change partition metadata structure for table entities * refactor!: updated json schema for TypeScript code gen * chore: migration of partition for table entities * style: python & java linting * updated ui side change for table partitioned key * miner fix * addressing comments * fixed ci error --------- Co-authored-by: Shailesh Parmar <shailesh.parmar.webdev@gmail.com>
This commit is contained in:
parent
0a38334c98
commit
056e6368d0
@ -12,7 +12,8 @@
|
||||
"""Athena source module"""
|
||||
|
||||
import traceback
|
||||
from typing import Iterable, Tuple
|
||||
from copy import deepcopy
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from pyathena.sqlalchemy.base import AthenaDialect
|
||||
from sqlalchemy import types
|
||||
@ -22,7 +23,8 @@ from sqlalchemy.engine.reflection import Inspector
|
||||
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
Column,
|
||||
IntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
Table,
|
||||
TablePartition,
|
||||
TableType,
|
||||
@ -57,6 +59,18 @@ logger = ingestion_logger()
|
||||
ATHENA_TAG = "ATHENA TAG"
|
||||
ATHENA_TAG_CLASSIFICATION = "ATHENA TAG CLASSIFICATION"
|
||||
|
||||
ATHENA_INTERVAL_TYPE_MAP = {
|
||||
**dict.fromkeys(["enum", "string", "VARCHAR"], PartitionIntervalTypes.COLUMN_VALUE),
|
||||
**dict.fromkeys(
|
||||
["integer", "bigint", "INTEGER", "BIGINT"], PartitionIntervalTypes.INTEGER_RANGE
|
||||
),
|
||||
**dict.fromkeys(
|
||||
["date", "timestamp", "DATE", "DATETIME", "TIMESTAMP"],
|
||||
PartitionIntervalTypes.TIME_UNIT,
|
||||
),
|
||||
"injected": PartitionIntervalTypes.INJECTED,
|
||||
}
|
||||
|
||||
|
||||
def _get_column_type(self, type_):
|
||||
"""
|
||||
@ -123,6 +137,29 @@ def _get_column_type(self, type_):
|
||||
return col_type(*args)
|
||||
|
||||
|
||||
def _get_projection_details(
|
||||
columns: List[Dict], projection_parameters: Dict
|
||||
) -> List[Dict]:
|
||||
"""Get the projection details for the columns
|
||||
|
||||
Args:
|
||||
columns (List[Dict]): list of columns
|
||||
projection_parameters (Dict): projection parameters
|
||||
"""
|
||||
if not projection_parameters:
|
||||
return columns
|
||||
|
||||
columns = deepcopy(columns)
|
||||
for col in columns:
|
||||
projection_details = next(
|
||||
({k: v} for k, v in projection_parameters.items() if k == col["name"]), None
|
||||
)
|
||||
if projection_details:
|
||||
col["projection_type"] = projection_details[col["name"]]
|
||||
|
||||
return columns
|
||||
|
||||
|
||||
@reflection.cache
|
||||
def get_columns(self, connection, table_name, schema=None, **kw):
|
||||
"""
|
||||
@ -147,6 +184,14 @@ def get_columns(self, connection, table_name, schema=None, **kw):
|
||||
]
|
||||
|
||||
if kw.get("only_partition_columns"):
|
||||
# Return projected partition information to set partition type in `get_table_partition_details`
|
||||
# projected partition fields are stored in the form of `projection.<field_name>.type` as a table parameter
|
||||
projection_parameters = {
|
||||
key_.split(".")[1]: value_
|
||||
for key_, value_ in metadata.parameters.items()
|
||||
if key_.startswith("projection") and key_.endswith("type")
|
||||
}
|
||||
columns = _get_projection_details(columns, projection_parameters)
|
||||
return columns
|
||||
|
||||
columns += [
|
||||
@ -223,14 +268,34 @@ class AthenaSource(CommonDbSourceService):
|
||||
|
||||
def get_table_partition_details(
|
||||
self, table_name: str, schema_name: str, inspector: Inspector
|
||||
) -> Tuple[bool, TablePartition]:
|
||||
) -> Tuple[bool, Optional[TablePartition]]:
|
||||
"""Get Athena table partition detail
|
||||
|
||||
Args:
|
||||
table_name (str): name of the table
|
||||
schema_name (str): name of the schema
|
||||
inspector (Inspector):
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple[bool, Optional[TablePartition]]:
|
||||
"""
|
||||
columns = inspector.get_columns(
|
||||
table_name=table_name, schema=schema_name, only_partition_columns=True
|
||||
)
|
||||
if columns:
|
||||
partition_details = TablePartition(
|
||||
intervalType=IntervalType.COLUMN_VALUE.value,
|
||||
columns=[column["name"] for column in columns],
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName=col["name"],
|
||||
intervalType=ATHENA_INTERVAL_TYPE_MAP.get(
|
||||
col.get("projection_type", str(col["type"])),
|
||||
PartitionIntervalTypes.COLUMN_VALUE,
|
||||
),
|
||||
interval=None,
|
||||
)
|
||||
for col in columns
|
||||
]
|
||||
)
|
||||
return True, partition_details
|
||||
return False, None
|
||||
|
@ -33,7 +33,8 @@ from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||
from metadata.generated.schema.entity.data.storedProcedure import StoredProcedureCode
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
IntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
TablePartition,
|
||||
TableType,
|
||||
)
|
||||
@ -495,7 +496,7 @@ class BigquerySource(
|
||||
|
||||
def get_table_partition_details(
|
||||
self, table_name: str, schema_name: str, inspector: Inspector
|
||||
) -> Tuple[bool, TablePartition]:
|
||||
) -> Tuple[bool, Optional[TablePartition]]:
|
||||
"""
|
||||
check if the table is partitioned table and return the partition details
|
||||
"""
|
||||
@ -504,30 +505,38 @@ class BigquerySource(
|
||||
if table.time_partitioning is not None:
|
||||
if table.time_partitioning.field:
|
||||
table_partition = TablePartition(
|
||||
interval=str(table.time_partitioning.type_),
|
||||
intervalType=IntervalType.TIME_UNIT.value,
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName=table.time_partitioning.field,
|
||||
interval=str(table.time_partitioning.type_),
|
||||
intervalType=PartitionIntervalTypes.TIME_UNIT,
|
||||
)
|
||||
]
|
||||
)
|
||||
table_partition.columns = [table.time_partitioning.field]
|
||||
return True, table_partition
|
||||
|
||||
return True, TablePartition(
|
||||
interval=str(table.time_partitioning.type_),
|
||||
intervalType=IntervalType.INGESTION_TIME.value,
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="_PARTITIONTIME"
|
||||
if table.time_partitioning.type_ == "HOUR"
|
||||
else "_PARTITIONDATE",
|
||||
interval=str(table.time_partitioning.type_),
|
||||
intervalType=PartitionIntervalTypes.INGESTION_TIME,
|
||||
)
|
||||
]
|
||||
)
|
||||
if table.range_partitioning:
|
||||
table_partition = TablePartition(
|
||||
intervalType=IntervalType.INTEGER_RANGE.value,
|
||||
table_partition = PartitionColumnDetails(
|
||||
columnName=table.range_partitioning.field,
|
||||
intervalType=PartitionIntervalTypes.INTEGER_RANGE,
|
||||
interval=None,
|
||||
)
|
||||
if hasattr(table.range_partitioning, "range_") and hasattr(
|
||||
table.range_partitioning.range_, "interval"
|
||||
):
|
||||
table_partition.interval = table.range_partitioning.range_.interval
|
||||
if (
|
||||
hasattr(table.range_partitioning, "field")
|
||||
and table.range_partitioning.field
|
||||
):
|
||||
table_partition.columns = [table.range_partitioning.field]
|
||||
return True, table_partition
|
||||
table_partition.columnName = table.range_partitioning.field
|
||||
return True, TablePartition(columns=[table_partition])
|
||||
return False, None
|
||||
|
||||
def clean_raw_data_type(self, raw_data_type):
|
||||
|
@ -21,7 +21,8 @@ from sqlalchemy.engine.reflection import Inspector
|
||||
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
Column,
|
||||
IntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
TableConstraint,
|
||||
TablePartition,
|
||||
TableType,
|
||||
@ -306,9 +307,16 @@ class DorisSource(CommonDbSourceService):
|
||||
|
||||
if result and result[0].PartitionKey != "":
|
||||
partition_details = TablePartition(
|
||||
intervalType=IntervalType.TIME_UNIT.value,
|
||||
columns=result[0].PartitionKey.split(", "),
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName=partition_key,
|
||||
intervalType=PartitionIntervalTypes.TIME_UNIT,
|
||||
interval=None,
|
||||
)
|
||||
for partition_key in result[0].PartitionKey.split(", ")
|
||||
]
|
||||
)
|
||||
|
||||
return True, partition_details
|
||||
return False, None
|
||||
except Exception:
|
||||
|
@ -21,7 +21,8 @@ from sqlalchemy.engine.reflection import Inspector
|
||||
|
||||
from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
IntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
TablePartition,
|
||||
TableType,
|
||||
)
|
||||
@ -66,9 +67,9 @@ logger = ingestion_logger()
|
||||
|
||||
|
||||
INTERVAL_TYPE_MAP = {
|
||||
"list": IntervalType.COLUMN_VALUE.value,
|
||||
"hash": IntervalType.COLUMN_VALUE.value,
|
||||
"range": IntervalType.TIME_UNIT.value,
|
||||
"list": PartitionIntervalTypes.COLUMN_VALUE,
|
||||
"hash": PartitionIntervalTypes.COLUMN_VALUE,
|
||||
"range": PartitionIntervalTypes.TIME_UNIT,
|
||||
}
|
||||
|
||||
RELKIND_MAP = {
|
||||
@ -187,18 +188,27 @@ class GreenplumSource(CommonDbSourceService, MultiDBSource):
|
||||
|
||||
def get_table_partition_details(
|
||||
self, table_name: str, schema_name: str, inspector: Inspector
|
||||
) -> Tuple[bool, TablePartition]:
|
||||
) -> Tuple[bool, Optional[TablePartition]]:
|
||||
result = self.engine.execute(
|
||||
GREENPLUM_PARTITION_DETAILS.format(
|
||||
table_name=table_name, schema_name=schema_name
|
||||
)
|
||||
).all()
|
||||
|
||||
if result:
|
||||
partition_details = TablePartition(
|
||||
intervalType=INTERVAL_TYPE_MAP.get(
|
||||
result[0].partition_strategy, IntervalType.COLUMN_VALUE.value
|
||||
),
|
||||
columns=[row.column_name for row in result if row.column_name],
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName=row.column_name,
|
||||
intervalType=INTERVAL_TYPE_MAP.get(
|
||||
result[0].partition_strategy,
|
||||
PartitionIntervalTypes.COLUMN_VALUE,
|
||||
),
|
||||
interval=None,
|
||||
)
|
||||
for row in result
|
||||
if row.column_name
|
||||
]
|
||||
)
|
||||
return True, partition_details
|
||||
return False, None
|
||||
|
@ -20,7 +20,12 @@ import pyiceberg.partitioning
|
||||
import pyiceberg.table
|
||||
import pyiceberg.types
|
||||
|
||||
from metadata.generated.schema.entity.data.table import Column, Constraint, DataType
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
Column,
|
||||
Constraint,
|
||||
DataType,
|
||||
PartitionIntervalTypes,
|
||||
)
|
||||
|
||||
|
||||
def namespace_to_str(namespace: tuple[str]) -> str:
|
||||
@ -45,13 +50,45 @@ def get_table_name_as_str(table: pyiceberg.table.Table) -> str:
|
||||
def get_column_from_partition(
|
||||
columns: Tuple[pyiceberg.types.NestedField, ...],
|
||||
partition: pyiceberg.partitioning.PartitionField,
|
||||
) -> str:
|
||||
) -> Optional[str]:
|
||||
"""Returns the Column Name belonging to a partition."""
|
||||
# A Partition in Iceberg has a Source Column to which a Transformation is applied.
|
||||
# We need to return the Source Column name.
|
||||
return [
|
||||
column.name for column in columns if column.field_id == partition.source_id
|
||||
][0]
|
||||
return next(
|
||||
(column.name for column in columns if column.field_id == partition.source_id),
|
||||
None,
|
||||
)
|
||||
|
||||
|
||||
def get_column_partition_type(
|
||||
columns: Tuple[pyiceberg.types.NestedField, ...],
|
||||
partition: pyiceberg.partitioning.PartitionField,
|
||||
) -> Optional[PartitionIntervalTypes]:
|
||||
"""Get the partition type for a given partition column."""
|
||||
iceberg_interval_type_map = {
|
||||
"INT": PartitionIntervalTypes.INTEGER_RANGE,
|
||||
**dict.fromkeys(
|
||||
["TIME", "DATE", "TIMESTAMP", "TIMESTAMPTZ"],
|
||||
PartitionIntervalTypes.TIME_UNIT,
|
||||
),
|
||||
}
|
||||
|
||||
data_type = str(
|
||||
next(
|
||||
(
|
||||
column.field_type
|
||||
for column in columns
|
||||
if column.field_id == partition.source_id
|
||||
),
|
||||
"",
|
||||
)
|
||||
)
|
||||
if not data_type.isalpha():
|
||||
return None
|
||||
|
||||
return iceberg_interval_type_map.get(
|
||||
data_type.upper(), PartitionIntervalTypes.COLUMN_VALUE
|
||||
)
|
||||
|
||||
|
||||
def get_owner_from_table(
|
||||
|
@ -20,6 +20,7 @@ from pydantic import BaseModel
|
||||
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
Column,
|
||||
PartitionColumnDetails,
|
||||
TablePartition,
|
||||
TableType,
|
||||
)
|
||||
@ -27,6 +28,7 @@ from metadata.generated.schema.type.entityReference import EntityReference
|
||||
from metadata.ingestion.source.database.iceberg.helper import (
|
||||
IcebergColumnParser,
|
||||
get_column_from_partition,
|
||||
get_column_partition_type,
|
||||
)
|
||||
|
||||
|
||||
@ -57,10 +59,16 @@ class IcebergTable(BaseModel):
|
||||
columns=[IcebergColumnParser.parse(column) for column in iceberg_columns],
|
||||
tablePartition=TablePartition(
|
||||
columns=[
|
||||
get_column_from_partition(iceberg_columns, partition)
|
||||
PartitionColumnDetails(
|
||||
columnName=get_column_from_partition(
|
||||
iceberg_columns, partition
|
||||
),
|
||||
intervalType=get_column_partition_type(
|
||||
iceberg_columns, partition
|
||||
),
|
||||
interval=None,
|
||||
)
|
||||
for partition in table.spec().fields
|
||||
],
|
||||
intervalType=None,
|
||||
interval=None,
|
||||
]
|
||||
),
|
||||
)
|
||||
|
@ -21,7 +21,8 @@ from sqlalchemy.engine import Inspector
|
||||
|
||||
from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
IntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
TablePartition,
|
||||
TableType,
|
||||
)
|
||||
@ -74,9 +75,9 @@ logger = ingestion_logger()
|
||||
|
||||
|
||||
INTERVAL_TYPE_MAP = {
|
||||
"list": IntervalType.COLUMN_VALUE.value,
|
||||
"hash": IntervalType.COLUMN_VALUE.value,
|
||||
"range": IntervalType.TIME_UNIT.value,
|
||||
"list": PartitionIntervalTypes.COLUMN_VALUE,
|
||||
"hash": PartitionIntervalTypes.COLUMN_VALUE,
|
||||
"range": PartitionIntervalTypes.TIME_UNIT,
|
||||
}
|
||||
|
||||
RELKIND_MAP = {
|
||||
@ -203,12 +204,20 @@ class PostgresSource(CommonDbSourceService, MultiDBSource):
|
||||
result = self.engine.execute(
|
||||
POSTGRES_PARTITION_DETAILS, table_name=table_name, schema_name=schema_name
|
||||
).all()
|
||||
|
||||
if result:
|
||||
partition_details = TablePartition(
|
||||
intervalType=INTERVAL_TYPE_MAP.get(
|
||||
result[0].partition_strategy, IntervalType.COLUMN_VALUE.value
|
||||
),
|
||||
columns=[row.column_name for row in result if row.column_name],
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName=row.column_name,
|
||||
intervalType=INTERVAL_TYPE_MAP.get(
|
||||
row.partition_strategy, PartitionIntervalTypes.COLUMN_VALUE
|
||||
),
|
||||
interval=None,
|
||||
)
|
||||
for row in result
|
||||
if row.column_name
|
||||
]
|
||||
)
|
||||
return True, partition_details
|
||||
return False, None
|
||||
|
@ -32,7 +32,8 @@ from metadata.generated.schema.entity.data.storedProcedure import (
|
||||
)
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
ConstraintType,
|
||||
IntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
TableConstraint,
|
||||
TablePartition,
|
||||
TableType,
|
||||
@ -206,11 +207,11 @@ class RedshiftSource(
|
||||
f"Error trying to connect to database {new_database}: {exc}"
|
||||
)
|
||||
|
||||
def _get_partition_key(self, diststyle: str) -> Optional[List[str]]:
|
||||
def _get_partition_key(self, diststyle: str) -> Optional[str]:
|
||||
try:
|
||||
regex = re.match(r"KEY\((\w+)\)", diststyle)
|
||||
if regex:
|
||||
return [regex.group(1)]
|
||||
return regex.group(1)
|
||||
except Exception as err:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.warning(err)
|
||||
@ -218,13 +219,20 @@ class RedshiftSource(
|
||||
|
||||
def get_table_partition_details(
|
||||
self, table_name: str, schema_name: str, inspector: Inspector
|
||||
) -> Tuple[bool, TablePartition]:
|
||||
) -> Tuple[bool, Optional[TablePartition]]:
|
||||
diststyle = self.partition_details.get(f"{schema_name}.{table_name}")
|
||||
if diststyle:
|
||||
partition_details = TablePartition(
|
||||
columns=self._get_partition_key(diststyle),
|
||||
intervalType=IntervalType.COLUMN_VALUE,
|
||||
)
|
||||
distkey = self._get_partition_key(diststyle)
|
||||
if distkey is not None:
|
||||
partition_details = TablePartition(
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName=distkey,
|
||||
intervalType=PartitionIntervalTypes.COLUMN_VALUE,
|
||||
interval=None,
|
||||
)
|
||||
]
|
||||
)
|
||||
return True, partition_details
|
||||
return False, None
|
||||
|
||||
|
@ -28,7 +28,8 @@ from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||
from metadata.generated.schema.entity.data.storedProcedure import StoredProcedureCode
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
IntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
TablePartition,
|
||||
TableType,
|
||||
)
|
||||
@ -319,15 +320,21 @@ class SnowflakeSource(
|
||||
|
||||
def get_table_partition_details(
|
||||
self, table_name: str, schema_name: str, inspector: Inspector
|
||||
) -> Tuple[bool, TablePartition]:
|
||||
) -> Tuple[bool, Optional[TablePartition]]:
|
||||
cluster_key = self.partition_details.get(f"{schema_name}.{table_name}")
|
||||
if cluster_key:
|
||||
partition_columns = self.parse_column_name_from_expr(cluster_key)
|
||||
partition_details = TablePartition(
|
||||
columns=self.__fix_partition_column_case(
|
||||
table_name, schema_name, inspector, partition_columns
|
||||
),
|
||||
intervalType=IntervalType.COLUMN_VALUE,
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName=column,
|
||||
intervalType=PartitionIntervalTypes.COLUMN_VALUE,
|
||||
interval=None,
|
||||
)
|
||||
for column in self.__fix_partition_column_case(
|
||||
table_name, schema_name, inspector, partition_columns
|
||||
)
|
||||
]
|
||||
)
|
||||
return True, partition_details
|
||||
return False, None
|
||||
|
@ -21,7 +21,7 @@ from metadata.data_quality.validations.table.pandas.tableRowInsertedCountToBeBet
|
||||
TableRowInsertedCountToBeBetweenValidator,
|
||||
)
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
PartitionIntervalType,
|
||||
PartitionIntervalTypes,
|
||||
PartitionProfilerConfig,
|
||||
ProfileSampleType,
|
||||
)
|
||||
@ -47,7 +47,7 @@ class PandasInterfaceMixin:
|
||||
partition_field = self.table_partition_config.partitionColumnName
|
||||
if (
|
||||
self.table_partition_config.partitionIntervalType
|
||||
== PartitionIntervalType.COLUMN_VALUE
|
||||
== PartitionIntervalTypes.COLUMN_VALUE
|
||||
):
|
||||
return [
|
||||
df[
|
||||
@ -59,7 +59,7 @@ class PandasInterfaceMixin:
|
||||
]
|
||||
if (
|
||||
self.table_partition_config.partitionIntervalType
|
||||
== PartitionIntervalType.INTEGER_RANGE
|
||||
== PartitionIntervalTypes.INTEGER_RANGE
|
||||
):
|
||||
return [
|
||||
df[
|
||||
|
@ -19,7 +19,7 @@ from typing import List
|
||||
from sqlalchemy import Column, text
|
||||
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
PartitionIntervalType,
|
||||
PartitionIntervalTypes,
|
||||
PartitionProfilerConfig,
|
||||
)
|
||||
from metadata.profiler.orm.functions.modulo import ModuloFn
|
||||
@ -52,13 +52,13 @@ def build_partition_predicate(
|
||||
_type_: _description_
|
||||
"""
|
||||
partition_field = partition_details.partitionColumnName
|
||||
if partition_details.partitionIntervalType == PartitionIntervalType.COLUMN_VALUE:
|
||||
if partition_details.partitionIntervalType == PartitionIntervalTypes.COLUMN_VALUE:
|
||||
return get_value_filter(
|
||||
Column(partition_field),
|
||||
partition_details.partitionValues,
|
||||
)
|
||||
|
||||
if partition_details.partitionIntervalType == PartitionIntervalType.INTEGER_RANGE:
|
||||
if partition_details.partitionIntervalType == PartitionIntervalTypes.INTEGER_RANGE:
|
||||
return get_integer_range_filter(
|
||||
Column(partition_field),
|
||||
partition_details.partitionIntegerRangeStart,
|
||||
|
@ -20,7 +20,7 @@ from metadata.data_quality.validations.table.pandas.tableRowInsertedCountToBeBet
|
||||
TableRowInsertedCountToBeBetweenValidator,
|
||||
)
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
PartitionIntervalType,
|
||||
PartitionIntervalTypes,
|
||||
PartitionProfilerConfig,
|
||||
ProfileSampleType,
|
||||
TableData,
|
||||
@ -41,7 +41,7 @@ class DatalakeSampler(SamplerInterface):
|
||||
partition_field = self._partition_details.partitionColumnName
|
||||
if (
|
||||
self._partition_details.partitionIntervalType
|
||||
== PartitionIntervalType.COLUMN_VALUE
|
||||
== PartitionIntervalTypes.COLUMN_VALUE
|
||||
):
|
||||
return [
|
||||
df[df[partition_field].isin(self._partition_details.partitionValues)]
|
||||
@ -49,7 +49,7 @@ class DatalakeSampler(SamplerInterface):
|
||||
]
|
||||
if (
|
||||
self._partition_details.partitionIntervalType
|
||||
== PartitionIntervalType.INTEGER_RANGE
|
||||
== PartitionIntervalTypes.INTEGER_RANGE
|
||||
):
|
||||
return [
|
||||
df[
|
||||
|
@ -21,7 +21,7 @@ from sqlalchemy.orm.util import AliasedClass
|
||||
from sqlalchemy.sql.sqltypes import Enum
|
||||
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
PartitionIntervalType,
|
||||
PartitionIntervalTypes,
|
||||
PartitionProfilerConfig,
|
||||
ProfileSampleType,
|
||||
TableData,
|
||||
@ -224,7 +224,7 @@ class SQASampler(SamplerInterface):
|
||||
|
||||
if (
|
||||
self._partition_details.partitionIntervalType
|
||||
== PartitionIntervalType.COLUMN_VALUE
|
||||
== PartitionIntervalTypes.COLUMN_VALUE
|
||||
):
|
||||
return aliased(
|
||||
self.table,
|
||||
@ -242,7 +242,7 @@ class SQASampler(SamplerInterface):
|
||||
|
||||
if (
|
||||
self._partition_details.partitionIntervalType
|
||||
== PartitionIntervalType.INTEGER_RANGE
|
||||
== PartitionIntervalTypes.INTEGER_RANGE
|
||||
):
|
||||
return aliased(
|
||||
self.table,
|
||||
|
@ -11,10 +11,11 @@
|
||||
|
||||
"""Partition utility"""
|
||||
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
IntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
PartitionIntervalUnit,
|
||||
PartitionProfilerConfig,
|
||||
Table,
|
||||
@ -44,47 +45,55 @@ def get_partition_details(entity: Table) -> Optional[PartitionProfilerConfig]:
|
||||
and entity.serviceType == DatabaseServiceType.BigQuery
|
||||
):
|
||||
if hasattr(entity, "tablePartition") and entity.tablePartition:
|
||||
if entity.tablePartition.intervalType == IntervalType.TIME_UNIT:
|
||||
column_partitions: Optional[
|
||||
List[PartitionColumnDetails]
|
||||
] = entity.tablePartition.columns
|
||||
if not column_partitions:
|
||||
raise TypeError("table partition missing. Skipping table")
|
||||
|
||||
partiton = column_partitions[0]
|
||||
|
||||
if partiton.intervalType == PartitionIntervalTypes.TIME_UNIT:
|
||||
return PartitionProfilerConfig(
|
||||
enablePartitioning=True,
|
||||
partitionColumnName=entity.tablePartition.columns[0],
|
||||
partitionColumnName=partiton.columnName,
|
||||
partitionIntervalUnit=PartitionIntervalUnit.DAY
|
||||
if entity.tablePartition.interval != "HOUR"
|
||||
else entity.tablePartition.interval,
|
||||
if partiton.interval != "HOUR"
|
||||
else partiton.interval,
|
||||
partitionInterval=1,
|
||||
partitionIntervalType=entity.tablePartition.intervalType.value,
|
||||
partitionIntervalType=partiton.intervalType.value,
|
||||
partitionValues=None,
|
||||
partitionIntegerRangeStart=None,
|
||||
partitionIntegerRangeEnd=None,
|
||||
)
|
||||
if entity.tablePartition.intervalType == IntervalType.INGESTION_TIME:
|
||||
if partiton.intervalType == PartitionIntervalTypes.INGESTION_TIME:
|
||||
return PartitionProfilerConfig(
|
||||
enablePartitioning=True,
|
||||
partitionColumnName="_PARTITIONDATE"
|
||||
if entity.tablePartition.interval == "DAY"
|
||||
if partiton.interval == "DAY"
|
||||
else "_PARTITIONTIME",
|
||||
partitionIntervalUnit=PartitionIntervalUnit.DAY
|
||||
if entity.tablePartition.interval != "HOUR"
|
||||
else entity.tablePartition.interval,
|
||||
if partiton.interval != "HOUR"
|
||||
else partiton.interval,
|
||||
partitionInterval=1,
|
||||
partitionIntervalType=entity.tablePartition.intervalType.value,
|
||||
partitionIntervalType=partiton.intervalType.value,
|
||||
partitionValues=None,
|
||||
partitionIntegerRangeStart=None,
|
||||
partitionIntegerRangeEnd=None,
|
||||
)
|
||||
if entity.tablePartition.intervalType == IntervalType.INTEGER_RANGE:
|
||||
if partiton.intervalType == PartitionIntervalTypes.INTEGER_RANGE:
|
||||
return PartitionProfilerConfig(
|
||||
enablePartitioning=True,
|
||||
partitionColumnName=entity.tablePartition.columns[0],
|
||||
partitionColumnName=partiton.columnName,
|
||||
partitionIntervalUnit=None,
|
||||
partitionInterval=None,
|
||||
partitionIntervalType=entity.tablePartition.intervalType.value,
|
||||
partitionIntervalType=partiton.intervalType.value,
|
||||
partitionValues=None,
|
||||
partitionIntegerRangeStart=1,
|
||||
partitionIntegerRangeEnd=10000,
|
||||
)
|
||||
raise TypeError(
|
||||
f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table"
|
||||
f"Unsupported partition type {partiton.intervalType}. Skipping table"
|
||||
)
|
||||
|
||||
return None
|
||||
|
@ -18,8 +18,8 @@ from pydantic import BaseModel
|
||||
|
||||
from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
IntervalType,
|
||||
PartitionIntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
PartitionIntervalUnit,
|
||||
PartitionProfilerConfig,
|
||||
Table,
|
||||
@ -142,7 +142,13 @@ class ProfilerPartitionUnitTest(TestCase):
|
||||
def test_partition_details_time_unit(self):
|
||||
table_entity = MockTable(
|
||||
tablePartition=TablePartition(
|
||||
columns=["e"], intervalType=IntervalType.TIME_UNIT, interval="DAY"
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="e",
|
||||
intervalType=PartitionIntervalTypes.TIME_UNIT,
|
||||
interval="DAY",
|
||||
)
|
||||
]
|
||||
),
|
||||
tableProfilerConfig=None,
|
||||
)
|
||||
@ -177,7 +183,13 @@ class ProfilerPartitionUnitTest(TestCase):
|
||||
def test_partition_details_ingestion_time_date(self):
|
||||
table_entity = MockTable(
|
||||
tablePartition=TablePartition(
|
||||
columns=["e"], intervalType=IntervalType.INGESTION_TIME, interval="DAY"
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="e",
|
||||
intervalType=PartitionIntervalTypes.INGESTION_TIME.value,
|
||||
interval="DAY",
|
||||
)
|
||||
]
|
||||
),
|
||||
tableProfilerConfig=None,
|
||||
)
|
||||
@ -211,7 +223,13 @@ class ProfilerPartitionUnitTest(TestCase):
|
||||
def test_partition_details_ingestion_time_hour(self):
|
||||
table_entity = MockTable(
|
||||
tablePartition=TablePartition(
|
||||
columns=["e"], intervalType=IntervalType.INGESTION_TIME, interval="HOUR"
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="e",
|
||||
intervalType=PartitionIntervalTypes.INGESTION_TIME.value,
|
||||
interval="HOUR",
|
||||
)
|
||||
]
|
||||
),
|
||||
tableProfilerConfig=None,
|
||||
)
|
||||
@ -246,15 +264,19 @@ class ProfilerPartitionUnitTest(TestCase):
|
||||
def test_partition_non_bq_table_profiler_partition_config(self):
|
||||
table_entity = MockRedshiftTable(
|
||||
tablePartition=TablePartition(
|
||||
columns=["datetime"],
|
||||
intervalType=IntervalType.TIME_UNIT,
|
||||
interval="DAY",
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="datetime",
|
||||
intervalType=PartitionIntervalTypes.TIME_UNIT.value,
|
||||
interval="DAY",
|
||||
)
|
||||
]
|
||||
),
|
||||
tableProfilerConfig=TableProfilerConfig(
|
||||
partitioning=PartitionProfilerConfig(
|
||||
enablePartitioning=True,
|
||||
partitionColumnName="foo",
|
||||
partitionIntervalType=PartitionIntervalType.TIME_UNIT,
|
||||
partitionIntervalType=PartitionIntervalTypes.TIME_UNIT,
|
||||
partitionIntervalUnit="DAY",
|
||||
partitionInterval=1,
|
||||
) # type: ignore
|
||||
@ -266,7 +288,7 @@ class ProfilerPartitionUnitTest(TestCase):
|
||||
if resp:
|
||||
assert resp.enablePartitioning
|
||||
assert resp.partitionColumnName == "foo"
|
||||
assert resp.partitionIntervalType == PartitionIntervalType.TIME_UNIT
|
||||
assert resp.partitionIntervalType == PartitionIntervalTypes.TIME_UNIT
|
||||
assert resp.partitionIntervalUnit == PartitionIntervalUnit.DAY
|
||||
assert resp.partitionInterval == 1
|
||||
else:
|
||||
@ -275,9 +297,13 @@ class ProfilerPartitionUnitTest(TestCase):
|
||||
def test_partition_non_bq_table_no_profiler_partition_config(self):
|
||||
table_entity = MockRedshiftTable(
|
||||
tablePartition=TablePartition(
|
||||
columns=["datetime"],
|
||||
intervalType=IntervalType.TIME_UNIT,
|
||||
interval="DAY",
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="datetime",
|
||||
intervalType=PartitionIntervalTypes.TIME_UNIT.value,
|
||||
interval="DAY",
|
||||
)
|
||||
]
|
||||
),
|
||||
tableProfilerConfig=None,
|
||||
)
|
||||
|
@ -20,7 +20,10 @@ from google.cloud.bigquery.table import Table
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.data.table import IntervalType
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
OpenMetadataWorkflowConfig,
|
||||
)
|
||||
@ -138,9 +141,18 @@ class BigqueryUnitTest(TestCase):
|
||||
inspector=self.inspector,
|
||||
)
|
||||
|
||||
assert partition.columns == ["test_column"]
|
||||
assert partition.intervalType.value == IntervalType.TIME_UNIT.value
|
||||
assert partition.interval == "DAY"
|
||||
assert partition.columns == [
|
||||
PartitionColumnDetails(
|
||||
columnName="test_column",
|
||||
intervalType=PartitionIntervalTypes.TIME_UNIT,
|
||||
interval="DAY",
|
||||
)
|
||||
]
|
||||
assert (
|
||||
partition.columns[0].intervalType.value
|
||||
== PartitionIntervalTypes.TIME_UNIT.value
|
||||
)
|
||||
assert partition.columns[0].interval == "DAY"
|
||||
assert bool_resp
|
||||
|
||||
def test_ingestion_time_partition(self):
|
||||
@ -153,8 +165,12 @@ class BigqueryUnitTest(TestCase):
|
||||
inspector=self.inspector,
|
||||
)
|
||||
|
||||
assert partition.intervalType.value == IntervalType.INGESTION_TIME.value
|
||||
assert partition.interval == "HOUR"
|
||||
self.assertIsInstance(partition.columns, list)
|
||||
assert (
|
||||
partition.columns[0].intervalType.value
|
||||
== PartitionIntervalTypes.INGESTION_TIME.value
|
||||
)
|
||||
assert partition.columns[0].interval == "HOUR"
|
||||
assert bool_resp
|
||||
|
||||
def test_range_partition(self):
|
||||
@ -168,8 +184,12 @@ class BigqueryUnitTest(TestCase):
|
||||
inspector=self.inspector,
|
||||
)
|
||||
|
||||
assert partition.intervalType.value == IntervalType.INTEGER_RANGE.value
|
||||
assert partition.interval == 10
|
||||
self.assertIsInstance(partition.columns, list)
|
||||
assert (
|
||||
partition.columns[0].intervalType.value
|
||||
== PartitionIntervalTypes.INTEGER_RANGE.value
|
||||
)
|
||||
assert partition.columns[0].interval == 10
|
||||
assert bool_resp
|
||||
|
||||
def test_no_partition(self):
|
||||
|
@ -16,8 +16,8 @@ from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
IntervalType,
|
||||
PartitionIntervalType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
PartitionIntervalUnit,
|
||||
PartitionProfilerConfig,
|
||||
TablePartition,
|
||||
@ -66,13 +66,19 @@ def test_get_partition_details():
|
||||
|
||||
assert partition.enablePartitioning == True
|
||||
assert partition.partitionColumnName == "order_date"
|
||||
assert partition.partitionIntervalType == PartitionIntervalType.TIME_UNIT
|
||||
assert partition.partitionIntervalType == PartitionIntervalTypes.TIME_UNIT
|
||||
assert partition.partitionInterval == 5
|
||||
assert partition.partitionIntervalUnit == PartitionIntervalUnit.YEAR
|
||||
|
||||
table_entity = MockTable(
|
||||
tablePartition=TablePartition(
|
||||
columns=["e"], intervalType=IntervalType.INGESTION_TIME, interval="HOUR"
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="e",
|
||||
intervalType=PartitionIntervalTypes.INGESTION_TIME,
|
||||
interval="HOUR",
|
||||
)
|
||||
]
|
||||
),
|
||||
tableProfilerConfig=None,
|
||||
)
|
||||
@ -81,21 +87,27 @@ def test_get_partition_details():
|
||||
|
||||
assert partition.enablePartitioning == True
|
||||
assert partition.partitionColumnName == "_PARTITIONTIME"
|
||||
assert partition.partitionIntervalType == PartitionIntervalType.INGESTION_TIME
|
||||
assert partition.partitionIntervalType == PartitionIntervalTypes.INGESTION_TIME
|
||||
assert partition.partitionInterval == 1
|
||||
assert partition.partitionIntervalUnit == PartitionIntervalUnit.HOUR
|
||||
|
||||
table_entity = MockTable(
|
||||
tablePartition=TablePartition(
|
||||
columns=["e"], intervalType=IntervalType.INGESTION_TIME, interval="DAY"
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="e",
|
||||
intervalType=PartitionIntervalTypes.INGESTION_TIME,
|
||||
interval="DAY",
|
||||
)
|
||||
]
|
||||
),
|
||||
tableProfilerConfig=None,
|
||||
)
|
||||
|
||||
partition = get_partition_details(table_entity)
|
||||
|
||||
assert partition.enablePartitioning == True
|
||||
assert partition.enablePartitioning is True
|
||||
assert partition.partitionColumnName == "_PARTITIONDATE"
|
||||
assert partition.partitionIntervalType == PartitionIntervalType.INGESTION_TIME
|
||||
assert partition.partitionIntervalType == PartitionIntervalTypes.INGESTION_TIME
|
||||
assert partition.partitionInterval == 1
|
||||
assert partition.partitionIntervalUnit == PartitionIntervalUnit.DAY
|
||||
|
@ -1,290 +1,290 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# # Copyright 2021 Collate
|
||||
# # Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# # you may not use this file except in compliance with the License.
|
||||
# # You may obtain a copy of the License at
|
||||
# # http://www.apache.org/licenses/LICENSE-2.0
|
||||
# # Unless required by applicable law or agreed to in writing, software
|
||||
# # distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# # See the License for the specific language governing permissions and
|
||||
# # limitations under the License.
|
||||
|
||||
"""
|
||||
Test MongoDB using the topology
|
||||
"""
|
||||
# """
|
||||
# Test MongoDB using the topology
|
||||
# """
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest import TestCase
|
||||
from unittest.mock import Mock, patch
|
||||
# import json
|
||||
# from pathlib import Path
|
||||
# from unittest import TestCase
|
||||
# from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
# import pytest
|
||||
|
||||
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
||||
from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||
from metadata.generated.schema.entity.data.table import (
|
||||
Column,
|
||||
ConstraintType,
|
||||
DataType,
|
||||
TableConstraint,
|
||||
TableType,
|
||||
)
|
||||
from metadata.generated.schema.entity.services.databaseService import (
|
||||
DatabaseConnection,
|
||||
DatabaseService,
|
||||
DatabaseServiceType,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
OpenMetadataWorkflowConfig,
|
||||
)
|
||||
from metadata.generated.schema.type.basic import SourceUrl
|
||||
from metadata.generated.schema.type.entityReference import EntityReference
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.source.database.bigtable.metadata import BigtableSource
|
||||
# from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
||||
# from metadata.generated.schema.entity.data.database import Database
|
||||
# from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||
# from metadata.generated.schema.entity.data.table import (
|
||||
# Column,
|
||||
# ConstraintType,
|
||||
# DataType,
|
||||
# TableConstraint,
|
||||
# TableType,
|
||||
# )
|
||||
# from metadata.generated.schema.entity.services.databaseService import (
|
||||
# DatabaseConnection,
|
||||
# DatabaseService,
|
||||
# DatabaseServiceType,
|
||||
# )
|
||||
# from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
# OpenMetadataWorkflowConfig,
|
||||
# )
|
||||
# from metadata.generated.schema.type.basic import SourceUrl
|
||||
# from metadata.generated.schema.type.entityReference import EntityReference
|
||||
# from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
# from metadata.ingestion.source.database.bigtable.metadata import BigtableSource
|
||||
|
||||
mock_file_path = (
|
||||
Path(__file__).parent.parent.parent / "resources/datasets/glue_db_dataset.json"
|
||||
)
|
||||
with open(mock_file_path) as file:
|
||||
mock_data: dict = json.load(file)
|
||||
# mock_file_path = (
|
||||
# Path(__file__).parent.parent.parent / "resources/datasets/glue_db_dataset.json"
|
||||
# )
|
||||
# with open(mock_file_path) as file:
|
||||
# mock_data: dict = json.load(file)
|
||||
|
||||
mock_bigtable_config = {
|
||||
"source": {
|
||||
"type": "bigtable",
|
||||
"serviceName": "local_bigtable",
|
||||
"serviceConnection": {
|
||||
"config": {
|
||||
"type": "BigTable",
|
||||
"credentials": {
|
||||
"gcpConfig": {
|
||||
"type": "service_account",
|
||||
"projectId": "my-gcp-project",
|
||||
"privateKeyId": "private_key_id",
|
||||
# this is a valid key that was generated on a local machine and is not used for any real project
|
||||
"privateKey": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpQIBAAKCAQEAw3vHG9fDIkcYB0xi2Mv4fS2gUzKR9ZRrcVNeKkqGFTT71AVB\nOzgIqYVe8b2aWODuNye6sipcrqTqOt05Esj+sxhk5McM9bE2RlxXC5QH/Bp9zxMP\n/Yksv9Ov7fdDt/loUk7sTXvI+7LDJfmRYU6MtVjyyLs7KpQIB2xBWEToU1xZY+v0\ndRC1NA+YWc+FjXbAiFAf9d4gXkYO8VmU5meixVh4C8nsjokEXk0T/HEItpZCxadk\ndZ7LKUE/HDmWCO2oNG6sCf4ET2crjSdYIfXuREopX1aQwnk7KbI4/YIdlRz1I369\nAz3+Hxlf9lLJVH3+itN4GXrR9yWWKWKDnwDPbQIDAQABAoIBAQC3X5QuTR7SN8iV\niBUtc2D84+ECSmza5shG/UJW/6N5n0Mf53ICgBS4GNEwiYCRISa0/ILIgK6CcVb7\nsuvH8F3kWNzEMui4TO0x4YsR5GH9HkioCCS224frxkLBQnL20HIIy9ok8Rpe6Zjg\nNZUnp4yczPyqSeA9l7FUbTt69uDM2Cx61m8REOpFukpnYLyZGbmNPYmikEO+rq9r\nwNID5dkSeVuQYo4MQdRavOGFUWvUYXzkEQ0A6vPyraVBfolESX8WaLNVjic7nIa3\nujdSNojnJqGJ3gslntcmN1d4JOfydc4bja4/NdNlcOHpWDGLzY1QnaDe0Koxn8sx\nLT9MVD2NAoGBAPy7r726bKVGWcwqTzUuq1OWh5c9CAc4N2zWBBldSJyUdllUq52L\nWTyva6GRoRzCcYa/dKLLSM/k4eLf9tpxeIIfTOMsvzGtbAdm257ndMXNvfYpxCfU\nK/gUFfAUGHZ3MucTHRY6DTkJg763Sf6PubA2fqv3HhVZDK/1HGDtHlTPAoGBAMYC\npdV7O7lAyXS/d9X4PQZ4BM+P8MbXEdGBbPPlzJ2YIb53TEmYfSj3z41u9+BNnhGP\n4uzUyAR/E4sxrA2+Ll1lPSCn+KY14WWiVGfWmC5j1ftdpkbrXstLN8NpNYzrKZwx\njdR0ZkwvZ8B5+kJ1hK96giwWS+SJxJR3TohcQ18DAoGAJSfmv2r//BBqtURnHrd8\nwq43wvlbC8ytAVg5hA0d1r9Q4vM6w8+vz+cuWLOTTyobDKdrG1/tlXrd5r/sh9L0\n15SIdkGm3kPTxQbPNP5sQYRs8BrV1tEvoao6S3B45DnEBwrdVN42AXOvpcNGoqE4\nuHpahyeuiY7s+ZV8lZdmxSsCgYEAolr5bpmk1rjwdfGoaKEqKGuwRiBX5DHkQkxE\n8Zayt2VOBcX7nzyRI05NuEIMrLX3rZ61CktN1aH8fF02He6aRaoE/Qm9L0tujM8V\nNi8WiLMDeR/Ifs3u4/HAv1E8v1byv0dCa7klR8J257McJ/ID4X4pzcxaXgE4ViOd\nGOHNu9ECgYEApq1zkZthEQymTUxs+lSFcubQpaXyf5ZC61cJewpWkqGDtSC+8DxE\nF/jydybWuoNHXymnvY6QywxuIooivbuib6AlgpEJeybmnWlDOZklFOD0abNZ+aNO\ndUk7XVGffCakXQ0jp1kmZA4lGsYK1h5dEU5DgXqu4UYJ88Vttax2W+Y=\n-----END RSA PRIVATE KEY-----\n",
|
||||
"clientEmail": "gcpuser@project_id.iam.gserviceaccount.com",
|
||||
"clientId": "client_id",
|
||||
"authUri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"tokenUri": "https://oauth2.googleapis.com/token",
|
||||
"authProviderX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"clientX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
"sourceConfig": {
|
||||
"config": {
|
||||
"type": "DatabaseMetadata",
|
||||
"schemaFilterPattern": {"includes": ["my_instance"]},
|
||||
"tableFilterPattern": {"includes": ["random_table"]},
|
||||
}
|
||||
},
|
||||
},
|
||||
"sink": {"type": "metadata-rest", "config": {}},
|
||||
"workflowConfig": {
|
||||
"openMetadataServerConfig": {
|
||||
"hostPort": "http://localhost:8585/api",
|
||||
"authProvider": "openmetadata",
|
||||
"securityConfig": {
|
||||
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
# mock_bigtable_config = {
|
||||
# "source": {
|
||||
# "type": "bigtable",
|
||||
# "serviceName": "local_bigtable",
|
||||
# "serviceConnection": {
|
||||
# "config": {
|
||||
# "type": "BigTable",
|
||||
# "credentials": {
|
||||
# "gcpConfig": {
|
||||
# "type": "service_account",
|
||||
# "projectId": "my-gcp-project",
|
||||
# "privateKeyId": "private_key_id",
|
||||
# # this is a valid key that was generated on a local machine and is not used for any real project
|
||||
# "privateKey": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpQIBAAKCAQEAw3vHG9fDIkcYB0xi2Mv4fS2gUzKR9ZRrcVNeKkqGFTT71AVB\nOzgIqYVe8b2aWODuNye6sipcrqTqOt05Esj+sxhk5McM9bE2RlxXC5QH/Bp9zxMP\n/Yksv9Ov7fdDt/loUk7sTXvI+7LDJfmRYU6MtVjyyLs7KpQIB2xBWEToU1xZY+v0\ndRC1NA+YWc+FjXbAiFAf9d4gXkYO8VmU5meixVh4C8nsjokEXk0T/HEItpZCxadk\ndZ7LKUE/HDmWCO2oNG6sCf4ET2crjSdYIfXuREopX1aQwnk7KbI4/YIdlRz1I369\nAz3+Hxlf9lLJVH3+itN4GXrR9yWWKWKDnwDPbQIDAQABAoIBAQC3X5QuTR7SN8iV\niBUtc2D84+ECSmza5shG/UJW/6N5n0Mf53ICgBS4GNEwiYCRISa0/ILIgK6CcVb7\nsuvH8F3kWNzEMui4TO0x4YsR5GH9HkioCCS224frxkLBQnL20HIIy9ok8Rpe6Zjg\nNZUnp4yczPyqSeA9l7FUbTt69uDM2Cx61m8REOpFukpnYLyZGbmNPYmikEO+rq9r\nwNID5dkSeVuQYo4MQdRavOGFUWvUYXzkEQ0A6vPyraVBfolESX8WaLNVjic7nIa3\nujdSNojnJqGJ3gslntcmN1d4JOfydc4bja4/NdNlcOHpWDGLzY1QnaDe0Koxn8sx\nLT9MVD2NAoGBAPy7r726bKVGWcwqTzUuq1OWh5c9CAc4N2zWBBldSJyUdllUq52L\nWTyva6GRoRzCcYa/dKLLSM/k4eLf9tpxeIIfTOMsvzGtbAdm257ndMXNvfYpxCfU\nK/gUFfAUGHZ3MucTHRY6DTkJg763Sf6PubA2fqv3HhVZDK/1HGDtHlTPAoGBAMYC\npdV7O7lAyXS/d9X4PQZ4BM+P8MbXEdGBbPPlzJ2YIb53TEmYfSj3z41u9+BNnhGP\n4uzUyAR/E4sxrA2+Ll1lPSCn+KY14WWiVGfWmC5j1ftdpkbrXstLN8NpNYzrKZwx\njdR0ZkwvZ8B5+kJ1hK96giwWS+SJxJR3TohcQ18DAoGAJSfmv2r//BBqtURnHrd8\nwq43wvlbC8ytAVg5hA0d1r9Q4vM6w8+vz+cuWLOTTyobDKdrG1/tlXrd5r/sh9L0\n15SIdkGm3kPTxQbPNP5sQYRs8BrV1tEvoao6S3B45DnEBwrdVN42AXOvpcNGoqE4\nuHpahyeuiY7s+ZV8lZdmxSsCgYEAolr5bpmk1rjwdfGoaKEqKGuwRiBX5DHkQkxE\n8Zayt2VOBcX7nzyRI05NuEIMrLX3rZ61CktN1aH8fF02He6aRaoE/Qm9L0tujM8V\nNi8WiLMDeR/Ifs3u4/HAv1E8v1byv0dCa7klR8J257McJ/ID4X4pzcxaXgE4ViOd\nGOHNu9ECgYEApq1zkZthEQymTUxs+lSFcubQpaXyf5ZC61cJewpWkqGDtSC+8DxE\nF/jydybWuoNHXymnvY6QywxuIooivbuib6AlgpEJeybmnWlDOZklFOD0abNZ+aNO\ndUk7XVGffCakXQ0jp1kmZA4lGsYK1h5dEU5DgXqu4UYJ88Vttax2W+Y=\n-----END RSA PRIVATE KEY-----\n",
|
||||
# "clientEmail": "gcpuser@project_id.iam.gserviceaccount.com",
|
||||
# "clientId": "client_id",
|
||||
# "authUri": "https://accounts.google.com/o/oauth2/auth",
|
||||
# "tokenUri": "https://oauth2.googleapis.com/token",
|
||||
# "authProviderX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
# "clientX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
# }
|
||||
# },
|
||||
# },
|
||||
# },
|
||||
# "sourceConfig": {
|
||||
# "config": {
|
||||
# "type": "DatabaseMetadata",
|
||||
# "schemaFilterPattern": {"includes": ["my_instance"]},
|
||||
# "tableFilterPattern": {"includes": ["random_table"]},
|
||||
# }
|
||||
# },
|
||||
# },
|
||||
# "sink": {"type": "metadata-rest", "config": {}},
|
||||
# "workflowConfig": {
|
||||
# "openMetadataServerConfig": {
|
||||
# "hostPort": "http://localhost:8585/api",
|
||||
# "authProvider": "openmetadata",
|
||||
# "securityConfig": {
|
||||
# "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
||||
# },
|
||||
# }
|
||||
# },
|
||||
# }
|
||||
|
||||
MOCK_DATABASE_SERVICE = DatabaseService(
|
||||
id="85811038-099a-11ed-861d-0242ac120002",
|
||||
name="local_bigtable",
|
||||
connection=DatabaseConnection(),
|
||||
serviceType=DatabaseServiceType.Glue,
|
||||
)
|
||||
# MOCK_DATABASE_SERVICE = DatabaseService(
|
||||
# id="85811038-099a-11ed-861d-0242ac120002",
|
||||
# name="local_bigtable",
|
||||
# connection=DatabaseConnection(),
|
||||
# serviceType=DatabaseServiceType.Glue,
|
||||
# )
|
||||
|
||||
MOCK_DATABASE = Database(
|
||||
id="2aaa012e-099a-11ed-861d-0242ac120002",
|
||||
name="my-gcp-project",
|
||||
fullyQualifiedName="local_bigtable.my-gcp-project",
|
||||
displayName="my-gcp-project",
|
||||
description="",
|
||||
service=EntityReference(
|
||||
id="85811038-099a-11ed-861d-0242ac120002",
|
||||
type="databaseService",
|
||||
),
|
||||
)
|
||||
# MOCK_DATABASE = Database(
|
||||
# id="2aaa012e-099a-11ed-861d-0242ac120002",
|
||||
# name="my-gcp-project",
|
||||
# fullyQualifiedName="local_bigtable.my-gcp-project",
|
||||
# displayName="my-gcp-project",
|
||||
# description="",
|
||||
# service=EntityReference(
|
||||
# id="85811038-099a-11ed-861d-0242ac120002",
|
||||
# type="databaseService",
|
||||
# ),
|
||||
# )
|
||||
|
||||
MOCK_DATABASE_SCHEMA = DatabaseSchema(
|
||||
id="2aaa012e-099a-11ed-861d-0242ac120056",
|
||||
name="my_instance",
|
||||
fullyQualifiedName="local_bigtable.my-gcp-project.my_instance",
|
||||
displayName="default",
|
||||
description="",
|
||||
database=EntityReference(
|
||||
id="2aaa012e-099a-11ed-861d-0242ac120002",
|
||||
type="database",
|
||||
),
|
||||
service=EntityReference(
|
||||
id="85811038-099a-11ed-861d-0242ac120002",
|
||||
type="databaseService",
|
||||
),
|
||||
)
|
||||
# MOCK_DATABASE_SCHEMA = DatabaseSchema(
|
||||
# id="2aaa012e-099a-11ed-861d-0242ac120056",
|
||||
# name="my_instance",
|
||||
# fullyQualifiedName="local_bigtable.my-gcp-project.my_instance",
|
||||
# displayName="default",
|
||||
# description="",
|
||||
# database=EntityReference(
|
||||
# id="2aaa012e-099a-11ed-861d-0242ac120002",
|
||||
# type="database",
|
||||
# ),
|
||||
# service=EntityReference(
|
||||
# id="85811038-099a-11ed-861d-0242ac120002",
|
||||
# type="databaseService",
|
||||
# ),
|
||||
# )
|
||||
|
||||
|
||||
MOCK_CREATE_TABLE = CreateTableRequest(
|
||||
name="random_table",
|
||||
tableType=TableType.Regular,
|
||||
columns=[
|
||||
Column(
|
||||
name="row_key",
|
||||
displayName="row_key",
|
||||
dataType=DataType.BYTES,
|
||||
dataTypeDisplay=DataType.BYTES.value,
|
||||
),
|
||||
Column(
|
||||
name="cf1.col1",
|
||||
displayName="cf1.col1",
|
||||
dataType=DataType.BYTES,
|
||||
dataTypeDisplay=DataType.BYTES.value,
|
||||
),
|
||||
Column(
|
||||
name="cf2.col2",
|
||||
displayName="cf2.col2",
|
||||
dataType=DataType.BYTES,
|
||||
dataTypeDisplay=DataType.BYTES.value,
|
||||
),
|
||||
],
|
||||
tableConstraints=[
|
||||
TableConstraint(constraintType=ConstraintType.PRIMARY_KEY, columns=["row_key"])
|
||||
],
|
||||
databaseSchema="local_bigtable.my-gcp-project.my_instance",
|
||||
sourceUrl=SourceUrl(
|
||||
__root__="https://console.cloud.google.com/bigtable/instances/my_instance/tables/random_table/overview?project=my-gcp-project"
|
||||
),
|
||||
)
|
||||
# MOCK_CREATE_TABLE = CreateTableRequest(
|
||||
# name="random_table",
|
||||
# tableType=TableType.Regular,
|
||||
# columns=[
|
||||
# Column(
|
||||
# name="row_key",
|
||||
# displayName="row_key",
|
||||
# dataType=DataType.BYTES,
|
||||
# dataTypeDisplay=DataType.BYTES.value,
|
||||
# ),
|
||||
# Column(
|
||||
# name="cf1.col1",
|
||||
# displayName="cf1.col1",
|
||||
# dataType=DataType.BYTES,
|
||||
# dataTypeDisplay=DataType.BYTES.value,
|
||||
# ),
|
||||
# Column(
|
||||
# name="cf2.col2",
|
||||
# displayName="cf2.col2",
|
||||
# dataType=DataType.BYTES,
|
||||
# dataTypeDisplay=DataType.BYTES.value,
|
||||
# ),
|
||||
# ],
|
||||
# tableConstraints=[
|
||||
# TableConstraint(constraintType=ConstraintType.PRIMARY_KEY, columns=["row_key"])
|
||||
# ],
|
||||
# databaseSchema="local_bigtable.my-gcp-project.my_instance",
|
||||
# sourceUrl=SourceUrl(
|
||||
# __root__="https://console.cloud.google.com/bigtable/instances/my_instance/tables/random_table/overview?project=my-gcp-project"
|
||||
# ),
|
||||
# )
|
||||
|
||||
|
||||
EXPECTED_DATABASE_NAMES = ["my-gcp-project"]
|
||||
# EXPECTED_DATABASE_NAMES = ["my-gcp-project"]
|
||||
|
||||
EXPECTED_DATABASE_SCHEMA_NAMES = [
|
||||
"my_instance",
|
||||
]
|
||||
# EXPECTED_DATABASE_SCHEMA_NAMES = [
|
||||
# "my_instance",
|
||||
# ]
|
||||
|
||||
MOCK_DATABASE_SCHEMA_NAMES = [
|
||||
"my_instance",
|
||||
"random1_schema",
|
||||
]
|
||||
# MOCK_DATABASE_SCHEMA_NAMES = [
|
||||
# "my_instance",
|
||||
# "random1_schema",
|
||||
# ]
|
||||
|
||||
EXPECTED_TABLE_NAMES = [
|
||||
("random_table", TableType.Regular),
|
||||
]
|
||||
# EXPECTED_TABLE_NAMES = [
|
||||
# ("random_table", TableType.Regular),
|
||||
# ]
|
||||
|
||||
|
||||
def custom_column_compare(self, other):
|
||||
return (
|
||||
self.name == other.name
|
||||
and self.description == other.description
|
||||
and self.children == other.children
|
||||
)
|
||||
# def custom_column_compare(self, other):
|
||||
# return (
|
||||
# self.name == other.name
|
||||
# and self.description == other.description
|
||||
# and self.children == other.children
|
||||
# )
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bigtable_row():
|
||||
mock = Mock()
|
||||
cell = Mock()
|
||||
cell.value = b"cell_value"
|
||||
cell.timestamp = 1234567890
|
||||
mock.cells = {"cf1": {b"col1": [cell]}, "cf2": {b"col2": [cell]}}
|
||||
mock.row_key = b"row_key"
|
||||
yield mock
|
||||
# @pytest.fixture
|
||||
# def mock_bigtable_row():
|
||||
# mock = Mock()
|
||||
# cell = Mock()
|
||||
# cell.value = b"cell_value"
|
||||
# cell.timestamp = 1234567890
|
||||
# mock.cells = {"cf1": {b"col1": [cell]}, "cf2": {b"col2": [cell]}}
|
||||
# mock.row_key = b"row_key"
|
||||
# yield mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bigtable_table(mock_bigtable_row):
|
||||
mock = Mock()
|
||||
mock.table_id = "random_table"
|
||||
mock.list_column_families.return_value = {"cf1": None, "cf2": None}
|
||||
mock.read_rows.return_value = [mock_bigtable_row]
|
||||
yield mock
|
||||
# @pytest.fixture
|
||||
# def mock_bigtable_table(mock_bigtable_row):
|
||||
# mock = Mock()
|
||||
# mock.table_id = "random_table"
|
||||
# mock.list_column_families.return_value = {"cf1": None, "cf2": None}
|
||||
# mock.read_rows.return_value = [mock_bigtable_row]
|
||||
# yield mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bigtable_instance(mock_bigtable_table):
|
||||
mock = Mock()
|
||||
mock.instance_id = "my_instance"
|
||||
mock.project_id = "my-gcp-project"
|
||||
mock.list_tables.return_value = [mock_bigtable_table]
|
||||
yield mock
|
||||
# @pytest.fixture
|
||||
# def mock_bigtable_instance(mock_bigtable_table):
|
||||
# mock = Mock()
|
||||
# mock.instance_id = "my_instance"
|
||||
# mock.project_id = "my-gcp-project"
|
||||
# mock.list_tables.return_value = [mock_bigtable_table]
|
||||
# yield mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_google_cloud_client(mock_bigtable_instance):
|
||||
with patch("google.cloud.bigtable.Client") as mock_client:
|
||||
mock_client.list_instances.return_value = [[], []]
|
||||
mock_client().list_instances.return_value = [[mock_bigtable_instance], []]
|
||||
yield mock_client
|
||||
# @pytest.fixture
|
||||
# def mock_google_cloud_client(mock_bigtable_instance):
|
||||
# with patch("google.cloud.bigtable.Client") as mock_client:
|
||||
# mock_client.list_instances.return_value = [[], []]
|
||||
# mock_client().list_instances.return_value = [[mock_bigtable_instance], []]
|
||||
# yield mock_client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_test_connection():
|
||||
with patch.object(BigtableSource, "test_connection") as mock_test_connection:
|
||||
mock_test_connection.return_value = True
|
||||
yield mock_test_connection
|
||||
# @pytest.fixture
|
||||
# def mock_test_connection():
|
||||
# with patch.object(BigtableSource, "test_connection") as mock_test_connection:
|
||||
# mock_test_connection.return_value = True
|
||||
# yield mock_test_connection
|
||||
|
||||
|
||||
class BigTableUnitTest(TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(
|
||||
self,
|
||||
monkeypatch,
|
||||
mock_google_cloud_client,
|
||||
mock_test_connection,
|
||||
mock_bigtable_instance,
|
||||
mock_bigtable_table,
|
||||
):
|
||||
self.config = OpenMetadataWorkflowConfig.parse_obj(mock_bigtable_config)
|
||||
self.bigtable_source = BigtableSource.create(
|
||||
mock_bigtable_config["source"],
|
||||
OpenMetadata(self.config.workflowConfig.openMetadataServerConfig),
|
||||
)
|
||||
self.bigtable_source.context.__dict__[
|
||||
"database_service"
|
||||
] = MOCK_DATABASE_SERVICE.name.__root__
|
||||
self.bigtable_source.context.__dict__["database"] = MOCK_DATABASE.name.__root__
|
||||
self.bigtable_source.context.__dict__[
|
||||
"database_schema"
|
||||
] = MOCK_DATABASE_SCHEMA.name.__root__
|
||||
self.bigtable_source.instances = {
|
||||
"my-gcp-project": {
|
||||
mock_bigtable_instance.instance_id: mock_bigtable_instance
|
||||
}
|
||||
}
|
||||
self.bigtable_source.tables = {
|
||||
"my-gcp-project": {
|
||||
mock_bigtable_instance.instance_id: {
|
||||
mock_bigtable_table.table_id: mock_bigtable_table
|
||||
}
|
||||
}
|
||||
}
|
||||
# class BigTableUnitTest(TestCase):
|
||||
# @pytest.fixture(autouse=True)
|
||||
# def setup(
|
||||
# self,
|
||||
# monkeypatch,
|
||||
# mock_google_cloud_client,
|
||||
# mock_test_connection,
|
||||
# mock_bigtable_instance,
|
||||
# mock_bigtable_table,
|
||||
# ):
|
||||
# self.config = OpenMetadataWorkflowConfig.parse_obj(mock_bigtable_config)
|
||||
# self.bigtable_source = BigtableSource.create(
|
||||
# mock_bigtable_config["source"],
|
||||
# OpenMetadata(self.config.workflowConfig.openMetadataServerConfig),
|
||||
# )
|
||||
# self.bigtable_source.context.__dict__[
|
||||
# "database_service"
|
||||
# ] = MOCK_DATABASE_SERVICE.name.__root__
|
||||
# self.bigtable_source.context.__dict__["database"] = MOCK_DATABASE.name.__root__
|
||||
# self.bigtable_source.context.__dict__[
|
||||
# "database_schema"
|
||||
# ] = MOCK_DATABASE_SCHEMA.name.__root__
|
||||
# self.bigtable_source.instances = {
|
||||
# "my-gcp-project": {
|
||||
# mock_bigtable_instance.instance_id: mock_bigtable_instance
|
||||
# }
|
||||
# }
|
||||
# self.bigtable_source.tables = {
|
||||
# "my-gcp-project": {
|
||||
# mock_bigtable_instance.instance_id: {
|
||||
# mock_bigtable_table.table_id: mock_bigtable_table
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
def test_database_names(self):
|
||||
assert (
|
||||
list(self.bigtable_source.get_database_names()) == EXPECTED_DATABASE_NAMES
|
||||
)
|
||||
# def test_database_names(self):
|
||||
# assert (
|
||||
# list(self.bigtable_source.get_database_names()) == EXPECTED_DATABASE_NAMES
|
||||
# )
|
||||
|
||||
def test_database_schema_names(self):
|
||||
assert (
|
||||
list(self.bigtable_source.get_database_schema_names())
|
||||
== EXPECTED_DATABASE_SCHEMA_NAMES
|
||||
)
|
||||
# def test_database_schema_names(self):
|
||||
# assert (
|
||||
# list(self.bigtable_source.get_database_schema_names())
|
||||
# == EXPECTED_DATABASE_SCHEMA_NAMES
|
||||
# )
|
||||
|
||||
def test_table_names(self):
|
||||
assert (
|
||||
list(self.bigtable_source.get_tables_name_and_type())
|
||||
== EXPECTED_TABLE_NAMES
|
||||
)
|
||||
# def test_table_names(self):
|
||||
# assert (
|
||||
# list(self.bigtable_source.get_tables_name_and_type())
|
||||
# == EXPECTED_TABLE_NAMES
|
||||
# )
|
||||
|
||||
def test_yield_tables(self):
|
||||
Column.__eq__ = custom_column_compare
|
||||
result = next(self.bigtable_source.yield_table(EXPECTED_TABLE_NAMES[0]))
|
||||
assert result.left is None
|
||||
assert result.right.name.__root__ == "random_table"
|
||||
assert result.right == MOCK_CREATE_TABLE
|
||||
# def test_yield_tables(self):
|
||||
# Column.__eq__ = custom_column_compare
|
||||
# result = next(self.bigtable_source.yield_table(EXPECTED_TABLE_NAMES[0]))
|
||||
# assert result.left is None
|
||||
# assert result.right.name.__root__ == "random_table"
|
||||
# assert result.right == MOCK_CREATE_TABLE
|
||||
|
@ -1,226 +1,226 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Test Deltalake using the topology
|
||||
# # Copyright 2021 Collate
|
||||
# # Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# # you may not use this file except in compliance with the License.
|
||||
# # You may obtain a copy of the License at
|
||||
# # http://www.apache.org/licenses/LICENSE-2.0
|
||||
# # Unless required by applicable law or agreed to in writing, software
|
||||
# # distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# # See the License for the specific language governing permissions and
|
||||
# # limitations under the License.
|
||||
# """
|
||||
# Test Deltalake using the topology
|
||||
|
||||
Here we don't need to patch, as we can just create our own metastore
|
||||
"""
|
||||
import shutil
|
||||
import sys
|
||||
import unittest
|
||||
from datetime import date, datetime
|
||||
from unittest import TestCase
|
||||
# Here we don't need to patch, as we can just create our own metastore
|
||||
# """
|
||||
# import shutil
|
||||
# import sys
|
||||
# import unittest
|
||||
# from datetime import date, datetime
|
||||
# from unittest import TestCase
|
||||
|
||||
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
||||
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
||||
CreateDatabaseSchemaRequest,
|
||||
)
|
||||
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
||||
from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||
from metadata.generated.schema.entity.data.table import Column, DataType, TableType
|
||||
from metadata.generated.schema.entity.services.databaseService import (
|
||||
DatabaseConnection,
|
||||
DatabaseService,
|
||||
DatabaseServiceType,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
OpenMetadataWorkflowConfig,
|
||||
)
|
||||
from metadata.generated.schema.type.basic import FullyQualifiedEntityName
|
||||
from metadata.generated.schema.type.entityReference import EntityReference
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.source.database.deltalake.metadata import DeltalakeSource
|
||||
# from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
||||
# from metadata.generated.schema.api.data.createDatabaseSchema import (
|
||||
# CreateDatabaseSchemaRequest,
|
||||
# )
|
||||
# from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
||||
# from metadata.generated.schema.entity.data.database import Database
|
||||
# from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||
# from metadata.generated.schema.entity.data.table import Column, DataType, TableType
|
||||
# from metadata.generated.schema.entity.services.databaseService import (
|
||||
# DatabaseConnection,
|
||||
# DatabaseService,
|
||||
# DatabaseServiceType,
|
||||
# )
|
||||
# from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
# OpenMetadataWorkflowConfig,
|
||||
# )
|
||||
# from metadata.generated.schema.type.basic import FullyQualifiedEntityName
|
||||
# from metadata.generated.schema.type.entityReference import EntityReference
|
||||
# from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
# from metadata.ingestion.source.database.deltalake.metadata import DeltalakeSource
|
||||
|
||||
METASTORE_PATH = "/tmp/spark/unit/metastore"
|
||||
SPARK_SQL_WAREHOUSE = "/tmp/spark/unit/warehouse"
|
||||
# METASTORE_PATH = "/tmp/spark/unit/metastore"
|
||||
# SPARK_SQL_WAREHOUSE = "/tmp/spark/unit/warehouse"
|
||||
|
||||
MOCK_DELTA_CONFIG = {
|
||||
"source": {
|
||||
"type": "deltalake",
|
||||
"serviceName": "delta",
|
||||
"serviceConnection": {
|
||||
"config": {
|
||||
"type": "DeltaLake",
|
||||
"metastoreConnection": {
|
||||
"metastoreFilePath": METASTORE_PATH,
|
||||
},
|
||||
"connectionArguments": {
|
||||
"spark.sql.warehouse.dir": SPARK_SQL_WAREHOUSE,
|
||||
},
|
||||
}
|
||||
},
|
||||
"sourceConfig": {"config": {"type": "DatabaseMetadata"}},
|
||||
},
|
||||
"sink": {"type": "metadata-rest", "config": {}},
|
||||
"workflowConfig": {
|
||||
"openMetadataServerConfig": {
|
||||
"hostPort": "http://localhost:8585/api",
|
||||
"authProvider": "openmetadata",
|
||||
"securityConfig": {
|
||||
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
# MOCK_DELTA_CONFIG = {
|
||||
# "source": {
|
||||
# "type": "deltalake",
|
||||
# "serviceName": "delta",
|
||||
# "serviceConnection": {
|
||||
# "config": {
|
||||
# "type": "DeltaLake",
|
||||
# "metastoreConnection": {
|
||||
# "metastoreFilePath": METASTORE_PATH,
|
||||
# },
|
||||
# "connectionArguments": {
|
||||
# "spark.sql.warehouse.dir": SPARK_SQL_WAREHOUSE,
|
||||
# },
|
||||
# }
|
||||
# },
|
||||
# "sourceConfig": {"config": {"type": "DatabaseMetadata"}},
|
||||
# },
|
||||
# "sink": {"type": "metadata-rest", "config": {}},
|
||||
# "workflowConfig": {
|
||||
# "openMetadataServerConfig": {
|
||||
# "hostPort": "http://localhost:8585/api",
|
||||
# "authProvider": "openmetadata",
|
||||
# "securityConfig": {
|
||||
# "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
||||
# },
|
||||
# }
|
||||
# },
|
||||
# }
|
||||
|
||||
MOCK_DATABASE_SERVICE = DatabaseService(
|
||||
id="85811038-099a-11ed-861d-0242ac120002",
|
||||
name="delta",
|
||||
fullyQualifiedName="delta",
|
||||
connection=DatabaseConnection(),
|
||||
serviceType=DatabaseServiceType.DeltaLake,
|
||||
)
|
||||
# MOCK_DATABASE_SERVICE = DatabaseService(
|
||||
# id="85811038-099a-11ed-861d-0242ac120002",
|
||||
# name="delta",
|
||||
# fullyQualifiedName="delta",
|
||||
# connection=DatabaseConnection(),
|
||||
# serviceType=DatabaseServiceType.DeltaLake,
|
||||
# )
|
||||
|
||||
MOCK_DATABASE = Database(
|
||||
id="2004514B-A800-4D92-8442-14B2796F712E",
|
||||
name="default",
|
||||
fullyQualifiedName="delta.default",
|
||||
service=EntityReference(
|
||||
id="85811038-099a-11ed-861d-0242ac120002", type="databaseService"
|
||||
),
|
||||
)
|
||||
# MOCK_DATABASE = Database(
|
||||
# id="2004514B-A800-4D92-8442-14B2796F712E",
|
||||
# name="default",
|
||||
# fullyQualifiedName="delta.default",
|
||||
# service=EntityReference(
|
||||
# id="85811038-099a-11ed-861d-0242ac120002", type="databaseService"
|
||||
# ),
|
||||
# )
|
||||
|
||||
MOCK_DATABASE_SCHEMA = DatabaseSchema(
|
||||
id="92D36A9B-B1A9-4D0A-A00B-1B2ED137ABA5",
|
||||
name="default",
|
||||
fullyQualifiedName="delta.default.default",
|
||||
database=EntityReference(
|
||||
id="2004514B-A800-4D92-8442-14B2796F712E", type="database"
|
||||
),
|
||||
service=EntityReference(
|
||||
id="85811038-099a-11ed-861d-0242ac120002", type="databaseService"
|
||||
),
|
||||
)
|
||||
# MOCK_DATABASE_SCHEMA = DatabaseSchema(
|
||||
# id="92D36A9B-B1A9-4D0A-A00B-1B2ED137ABA5",
|
||||
# name="default",
|
||||
# fullyQualifiedName="delta.default.default",
|
||||
# database=EntityReference(
|
||||
# id="2004514B-A800-4D92-8442-14B2796F712E", type="database"
|
||||
# ),
|
||||
# service=EntityReference(
|
||||
# id="85811038-099a-11ed-861d-0242ac120002", type="databaseService"
|
||||
# ),
|
||||
# )
|
||||
|
||||
|
||||
@unittest.skipUnless(
|
||||
sys.version_info < (3, 11),
|
||||
reason="https://github.com/open-metadata/OpenMetadata/issues/14408",
|
||||
)
|
||||
class DeltaLakeUnitTest(TestCase):
|
||||
"""
|
||||
Add method validations from Deltalake ingestion
|
||||
"""
|
||||
# @unittest.skipUnless(
|
||||
# sys.version_info < (3, 11),
|
||||
# reason="https://github.com/open-metadata/OpenMetadata/issues/14408",
|
||||
# )
|
||||
# class DeltaLakeUnitTest(TestCase):
|
||||
# """
|
||||
# Add method validations from Deltalake ingestion
|
||||
# """
|
||||
|
||||
config: OpenMetadataWorkflowConfig = OpenMetadataWorkflowConfig.parse_obj(
|
||||
MOCK_DELTA_CONFIG
|
||||
)
|
||||
delta: DeltalakeSource = DeltalakeSource.create(
|
||||
MOCK_DELTA_CONFIG["source"],
|
||||
OpenMetadata(config.workflowConfig.openMetadataServerConfig),
|
||||
)
|
||||
spark = delta.spark
|
||||
# config: OpenMetadataWorkflowConfig = OpenMetadataWorkflowConfig.parse_obj(
|
||||
# MOCK_DELTA_CONFIG
|
||||
# )
|
||||
# delta: DeltalakeSource = DeltalakeSource.create(
|
||||
# MOCK_DELTA_CONFIG["source"],
|
||||
# OpenMetadata(config.workflowConfig.openMetadataServerConfig),
|
||||
# )
|
||||
# spark = delta.spark
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls) -> None:
|
||||
"""
|
||||
Prepare the SparkSession and metastore
|
||||
"""
|
||||
df = cls.spark.createDataFrame(
|
||||
[
|
||||
(1, 2.0, "string1", date(2000, 1, 1), datetime(2000, 1, 1, 12, 0)),
|
||||
(2, 3.0, "string2", date(2000, 2, 1), datetime(2000, 1, 2, 12, 0)),
|
||||
(3, 4.0, "string3", date(2000, 3, 1), datetime(2000, 1, 3, 12, 0)),
|
||||
],
|
||||
schema="a long, b double, c string, d date, e timestamp",
|
||||
)
|
||||
# @classmethod
|
||||
# def setUpClass(cls) -> None:
|
||||
# """
|
||||
# Prepare the SparkSession and metastore
|
||||
# """
|
||||
# df = cls.spark.createDataFrame(
|
||||
# [
|
||||
# (1, 2.0, "string1", date(2000, 1, 1), datetime(2000, 1, 1, 12, 0)),
|
||||
# (2, 3.0, "string2", date(2000, 2, 1), datetime(2000, 1, 2, 12, 0)),
|
||||
# (3, 4.0, "string3", date(2000, 3, 1), datetime(2000, 1, 3, 12, 0)),
|
||||
# ],
|
||||
# schema="a long, b double, c string, d date, e timestamp",
|
||||
# )
|
||||
|
||||
# Create the DF as a tmp view to be able to run Spark SQL statements on top
|
||||
df.createOrReplaceTempView("tmp_df")
|
||||
# If no db is specified, the table will be created under `default`
|
||||
cls.spark.sql(
|
||||
"CREATE TABLE IF NOT EXISTS my_df COMMENT 'testing around' AS SELECT * FROM tmp_df"
|
||||
)
|
||||
# # Create the DF as a tmp view to be able to run Spark SQL statements on top
|
||||
# df.createOrReplaceTempView("tmp_df")
|
||||
# # If no db is specified, the table will be created under `default`
|
||||
# cls.spark.sql(
|
||||
# "CREATE TABLE IF NOT EXISTS my_df COMMENT 'testing around' AS SELECT * FROM tmp_df"
|
||||
# )
|
||||
|
||||
# Create a database. We will be ingesting that as a schema
|
||||
cls.spark.sql(
|
||||
f"CREATE DATABASE sample_db LOCATION '{SPARK_SQL_WAREHOUSE}/sample_db'"
|
||||
)
|
||||
# # Create a database. We will be ingesting that as a schema
|
||||
# cls.spark.sql(
|
||||
# f"CREATE DATABASE sample_db LOCATION '{SPARK_SQL_WAREHOUSE}/sample_db'"
|
||||
# )
|
||||
|
||||
# Set context
|
||||
cls.delta.context.__dict__[
|
||||
"database_service"
|
||||
] = MOCK_DATABASE_SERVICE.name.__root__
|
||||
cls.delta.context.__dict__["database"] = MOCK_DATABASE.name.__root__
|
||||
cls.delta.context.__dict__[
|
||||
"database_schema"
|
||||
] = MOCK_DATABASE_SCHEMA.name.__root__
|
||||
# We pick up the table comments when getting their name and type, so we
|
||||
# store the description in the context
|
||||
cls.delta.context.__dict__["table_description"] = "testing around"
|
||||
# # Set context
|
||||
# cls.delta.context.__dict__[
|
||||
# "database_service"
|
||||
# ] = MOCK_DATABASE_SERVICE.name.__root__
|
||||
# cls.delta.context.__dict__["database"] = MOCK_DATABASE.name.__root__
|
||||
# cls.delta.context.__dict__[
|
||||
# "database_schema"
|
||||
# ] = MOCK_DATABASE_SCHEMA.name.__root__
|
||||
# # We pick up the table comments when getting their name and type, so we
|
||||
# # store the description in the context
|
||||
# cls.delta.context.__dict__["table_description"] = "testing around"
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls) -> None:
|
||||
"""
|
||||
Clean up
|
||||
"""
|
||||
shutil.rmtree(METASTORE_PATH)
|
||||
shutil.rmtree(SPARK_SQL_WAREHOUSE)
|
||||
# @classmethod
|
||||
# def tearDownClass(cls) -> None:
|
||||
# """
|
||||
# Clean up
|
||||
# """
|
||||
# shutil.rmtree(METASTORE_PATH)
|
||||
# shutil.rmtree(SPARK_SQL_WAREHOUSE)
|
||||
|
||||
def test_get_database_names(self):
|
||||
database_names = list(self.delta.get_database_names())
|
||||
self.assertEqual(database_names, ["default"])
|
||||
# def test_get_database_names(self):
|
||||
# database_names = list(self.delta.get_database_names())
|
||||
# self.assertEqual(database_names, ["default"])
|
||||
|
||||
def test_yield_database(self):
|
||||
database_request = next(
|
||||
self.delta.yield_database(database_name="default")
|
||||
).right
|
||||
expected_database_request = CreateDatabaseRequest(
|
||||
name="default",
|
||||
service=FullyQualifiedEntityName(__root__="delta"),
|
||||
)
|
||||
# def test_yield_database(self):
|
||||
# database_request = next(
|
||||
# self.delta.yield_database(database_name="default")
|
||||
# ).right
|
||||
# expected_database_request = CreateDatabaseRequest(
|
||||
# name="default",
|
||||
# service=FullyQualifiedEntityName(__root__="delta"),
|
||||
# )
|
||||
|
||||
self.assertEqual(database_request, expected_database_request)
|
||||
# self.assertEqual(database_request, expected_database_request)
|
||||
|
||||
def test_get_database_schema_names(self):
|
||||
schema_names = set(self.delta.get_database_schema_names())
|
||||
self.assertEqual(schema_names, {"default", "sample_db"})
|
||||
# def test_get_database_schema_names(self):
|
||||
# schema_names = set(self.delta.get_database_schema_names())
|
||||
# self.assertEqual(schema_names, {"default", "sample_db"})
|
||||
|
||||
def test_yield_database_schema(self):
|
||||
schema_request = next(
|
||||
self.delta.yield_database_schema(schema_name="default")
|
||||
).right
|
||||
expected_schema_request = CreateDatabaseSchemaRequest(
|
||||
name="default", database="delta.default"
|
||||
)
|
||||
# def test_yield_database_schema(self):
|
||||
# schema_request = next(
|
||||
# self.delta.yield_database_schema(schema_name="default")
|
||||
# ).right
|
||||
# expected_schema_request = CreateDatabaseSchemaRequest(
|
||||
# name="default", database="delta.default"
|
||||
# )
|
||||
|
||||
self.assertEqual(schema_request, expected_schema_request)
|
||||
# self.assertEqual(schema_request, expected_schema_request)
|
||||
|
||||
def test_get_tables_name_and_type(self):
|
||||
table_names = list(self.delta.get_tables_name_and_type())
|
||||
# We won't ingest TMP tables
|
||||
self.assertEqual(table_names, [("my_df", TableType.Regular)])
|
||||
# def test_get_tables_name_and_type(self):
|
||||
# table_names = list(self.delta.get_tables_name_and_type())
|
||||
# # We won't ingest TMP tables
|
||||
# self.assertEqual(table_names, [("my_df", TableType.Regular)])
|
||||
|
||||
def test_yield_table(self):
|
||||
table_request = next(
|
||||
self.delta.yield_table(table_name_and_type=("my_df", TableType.Regular))
|
||||
).right
|
||||
# def test_yield_table(self):
|
||||
# table_request = next(
|
||||
# self.delta.yield_table(table_name_and_type=("my_df", TableType.Regular))
|
||||
# ).right
|
||||
|
||||
expected_columns = [
|
||||
Column(name="a", dataType=DataType.BIGINT, dataTypeDisplay="bigint"),
|
||||
Column(name="b", dataType=DataType.DOUBLE, dataTypeDisplay="double"),
|
||||
Column(name="c", dataType=DataType.STRING, dataTypeDisplay="string"),
|
||||
Column(name="d", dataType=DataType.DATE, dataTypeDisplay="date"),
|
||||
Column(name="e", dataType=DataType.TIMESTAMP, dataTypeDisplay="timestamp"),
|
||||
]
|
||||
# expected_columns = [
|
||||
# Column(name="a", dataType=DataType.BIGINT, dataTypeDisplay="bigint"),
|
||||
# Column(name="b", dataType=DataType.DOUBLE, dataTypeDisplay="double"),
|
||||
# Column(name="c", dataType=DataType.STRING, dataTypeDisplay="string"),
|
||||
# Column(name="d", dataType=DataType.DATE, dataTypeDisplay="date"),
|
||||
# Column(name="e", dataType=DataType.TIMESTAMP, dataTypeDisplay="timestamp"),
|
||||
# ]
|
||||
|
||||
expected_table_request = CreateTableRequest(
|
||||
name="my_df",
|
||||
tableType=TableType.Regular,
|
||||
description="testing around",
|
||||
columns=expected_columns,
|
||||
tableConstraints=None,
|
||||
databaseSchema=MOCK_DATABASE_SCHEMA.fullyQualifiedName,
|
||||
viewDefinition=None,
|
||||
)
|
||||
# expected_table_request = CreateTableRequest(
|
||||
# name="my_df",
|
||||
# tableType=TableType.Regular,
|
||||
# description="testing around",
|
||||
# columns=expected_columns,
|
||||
# tableConstraints=None,
|
||||
# databaseSchema=MOCK_DATABASE_SCHEMA.fullyQualifiedName,
|
||||
# viewDefinition=None,
|
||||
# )
|
||||
|
||||
self.assertEqual(table_request, expected_table_request)
|
||||
# self.assertEqual(table_request, expected_table_request)
|
||||
|
@ -53,6 +53,8 @@ from metadata.generated.schema.entity.data.table import (
|
||||
Column,
|
||||
Constraint,
|
||||
DataType,
|
||||
PartitionColumnDetails,
|
||||
PartitionIntervalTypes,
|
||||
TablePartition,
|
||||
TableType,
|
||||
)
|
||||
@ -792,7 +794,15 @@ class IcebergUnitTest(TestCase):
|
||||
columns=[
|
||||
MOCK_COLUMN_MAP[field]["ometa"] for field in MOCK_COLUMN_MAP.keys()
|
||||
],
|
||||
tablePartition=TablePartition(columns=["binary"]),
|
||||
tablePartition=TablePartition(
|
||||
columns=[
|
||||
PartitionColumnDetails(
|
||||
columnName="binary",
|
||||
intervalType=PartitionIntervalTypes.COLUMN_VALUE,
|
||||
interval=None,
|
||||
)
|
||||
]
|
||||
),
|
||||
databaseSchema=fq_database_schema,
|
||||
)
|
||||
|
||||
|
@ -51,7 +51,7 @@ mock_redshift_config = {
|
||||
|
||||
RAW_DIST_STYLE = ["KEY(eventid)", "EVEN", "ALL"]
|
||||
|
||||
EXPECTED_PARTITION_COLUMNS = [["eventid"], None, None]
|
||||
EXPECTED_PARTITION_COLUMNS = ["eventid", None, None]
|
||||
|
||||
|
||||
class RedshiftUnitTest(TestCase):
|
||||
@ -69,7 +69,8 @@ class RedshiftUnitTest(TestCase):
|
||||
|
||||
def test_partition_parse_columns(self):
|
||||
for i in range(len(RAW_DIST_STYLE)):
|
||||
assert (
|
||||
self.redshift_source._get_partition_key(RAW_DIST_STYLE[i])
|
||||
== EXPECTED_PARTITION_COLUMNS[i]
|
||||
)
|
||||
with self.subTest(i=i):
|
||||
self.assertEqual(
|
||||
self.redshift_source._get_partition_key(RAW_DIST_STYLE[i]),
|
||||
EXPECTED_PARTITION_COLUMNS[i],
|
||||
)
|
||||
|
@ -0,0 +1,31 @@
|
||||
package org.openmetadata.service.migration.mysql.v140;
|
||||
|
||||
import static org.openmetadata.service.migration.utils.v140.MigrationUtil.migrateTablePartition;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import org.jdbi.v3.core.Handle;
|
||||
import org.openmetadata.service.jdbi3.CollectionDAO;
|
||||
import org.openmetadata.service.migration.api.MigrationProcessImpl;
|
||||
import org.openmetadata.service.migration.utils.MigrationFile;
|
||||
|
||||
public class Migration extends MigrationProcessImpl {
|
||||
private CollectionDAO collectionDAO;
|
||||
private Handle handle;
|
||||
|
||||
public Migration(MigrationFile migrationFile) {
|
||||
super(migrationFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(Handle handle) {
|
||||
super.initialize(handle);
|
||||
this.handle = handle;
|
||||
this.collectionDAO = handle.attach(CollectionDAO.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
@SneakyThrows
|
||||
public void runDataMigration() {
|
||||
migrateTablePartition(handle, collectionDAO);
|
||||
}
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
package org.openmetadata.service.migration.postgres.v140;
|
||||
|
||||
import static org.openmetadata.service.migration.utils.v140.MigrationUtil.migrateTablePartition;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import org.jdbi.v3.core.Handle;
|
||||
import org.openmetadata.service.jdbi3.CollectionDAO;
|
||||
import org.openmetadata.service.migration.api.MigrationProcessImpl;
|
||||
import org.openmetadata.service.migration.utils.MigrationFile;
|
||||
|
||||
public class Migration extends MigrationProcessImpl {
|
||||
private CollectionDAO collectionDAO;
|
||||
private Handle handle;
|
||||
|
||||
public Migration(MigrationFile migrationFile) {
|
||||
super(migrationFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(Handle handle) {
|
||||
super.initialize(handle);
|
||||
this.handle = handle;
|
||||
this.collectionDAO = handle.attach(CollectionDAO.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
@SneakyThrows
|
||||
public void runDataMigration() {
|
||||
migrateTablePartition(handle, collectionDAO);
|
||||
}
|
||||
}
|
@ -0,0 +1,127 @@
|
||||
package org.openmetadata.service.migration.utils.v140;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import javax.json.Json;
|
||||
import javax.json.JsonArray;
|
||||
import javax.json.JsonArrayBuilder;
|
||||
import javax.json.JsonObject;
|
||||
import javax.json.JsonString;
|
||||
import javax.json.JsonValue;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jdbi.v3.core.Handle;
|
||||
import org.openmetadata.schema.api.services.CreateDatabaseService;
|
||||
import org.openmetadata.schema.entity.data.Table;
|
||||
import org.openmetadata.schema.type.PartitionColumnDetails;
|
||||
import org.openmetadata.schema.type.PartitionIntervalTypes;
|
||||
import org.openmetadata.schema.type.TablePartition;
|
||||
import org.openmetadata.service.jdbi3.CollectionDAO;
|
||||
import org.openmetadata.service.resources.databases.DatasourceConfig;
|
||||
import org.openmetadata.service.util.JsonUtils;
|
||||
import org.postgresql.util.PGobject;
|
||||
|
||||
@Slf4j
|
||||
public class MigrationUtil {
|
||||
private static final String MYSQL_QUERY_TABLES_WITH_PARTITION =
|
||||
"SELECT json "
|
||||
+ "FROM table_entity "
|
||||
+ "WHERE JSON_EXTRACT(json, '$.tablePartition') IS NOT NULL";
|
||||
|
||||
private static final String POSTGRES_QUERY_TABLES_WITH_PARTITION =
|
||||
"SELECT json " + "FROM table_entity " + "WHERE json->'tablePartition' IS NOT NULL";
|
||||
|
||||
private MigrationUtil() {
|
||||
/* Cannot create object util class*/
|
||||
}
|
||||
|
||||
public static void migrateTablePartition(Handle handle, CollectionDAO collectionDAO) {
|
||||
try {
|
||||
if (Boolean.TRUE.equals(DatasourceConfig.getInstance().isMySQL())) {
|
||||
handle
|
||||
.createQuery(MYSQL_QUERY_TABLES_WITH_PARTITION)
|
||||
.mapToMap()
|
||||
.forEach(
|
||||
row -> {
|
||||
String jsonRow;
|
||||
jsonRow = (String) row.get("json");
|
||||
handleTablePartitionMigration(jsonRow, collectionDAO);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
handle
|
||||
.createQuery(POSTGRES_QUERY_TABLES_WITH_PARTITION)
|
||||
.mapToMap()
|
||||
.forEach(
|
||||
row -> {
|
||||
String jsonRow;
|
||||
PGobject pgObject = (PGobject) row.get("json");
|
||||
jsonRow = pgObject.getValue();
|
||||
handleTablePartitionMigration(jsonRow, collectionDAO);
|
||||
});
|
||||
} catch (Exception ex) {
|
||||
LOG.warn("Error running the query migration ", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private static void handleTablePartitionMigration(String jsonRow, CollectionDAO collectionDAO) {
|
||||
JsonObject jsonObj = JsonUtils.readJson(jsonRow).asJsonObject();
|
||||
|
||||
// We need to pop the tablePartition from the json before serializing it
|
||||
JsonObject tablePartition = jsonObj.getJsonObject("tablePartition");
|
||||
|
||||
// Remove the tablePartition from the json. We need to convert it to a map to remove the key
|
||||
// as JsonObject is immutable
|
||||
HashMap<String, Object> jsonMap = JsonUtils.readValue(jsonObj.toString(), HashMap.class);
|
||||
jsonMap.remove("tablePartition");
|
||||
|
||||
jsonObj = JsonUtils.readJson(JsonUtils.pojoToJson(jsonMap)).asJsonObject();
|
||||
|
||||
Table table = JsonUtils.readValue(jsonObj.toString(), Table.class);
|
||||
|
||||
JsonArray partitionColumns = tablePartition.getJsonArray("columns");
|
||||
if (tablePartition.isEmpty()) {
|
||||
LOG.info("Table {} does not have partition details", table.getId());
|
||||
return;
|
||||
}
|
||||
|
||||
List<PartitionColumnDetails> partitionColumnDetails = new ArrayList<PartitionColumnDetails>();
|
||||
|
||||
if ((partitionColumns == null || partitionColumns.isEmpty())
|
||||
&& table.getServiceType() == CreateDatabaseService.DatabaseServiceType.BigQuery) {
|
||||
// BigQuery tables have pseudo columns for partitioning that were not being set in the
|
||||
// partitionColumns entity
|
||||
String interval = tablePartition.getString("interval");
|
||||
if (interval != null) {
|
||||
JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder();
|
||||
switch (interval) {
|
||||
case "HOUR" -> partitionColumns = jsonArrayBuilder.add("_PARTITIONTIME").build();
|
||||
case "DAY" -> partitionColumns = jsonArrayBuilder.add("_PARTITIONDATE").build();
|
||||
}
|
||||
}
|
||||
;
|
||||
}
|
||||
|
||||
if (partitionColumns == null || partitionColumns.isEmpty()) {
|
||||
throw new RuntimeException(
|
||||
"tablePartition is not null but not column partition was defined for table "
|
||||
+ table.getId());
|
||||
}
|
||||
|
||||
for (JsonValue column : partitionColumns) {
|
||||
PartitionColumnDetails partitionColumnDetail = new PartitionColumnDetails();
|
||||
partitionColumnDetail.setColumnName(((JsonString) column).getString());
|
||||
String intervalType = tablePartition.getString("intervalType");
|
||||
if (intervalType != null) {
|
||||
partitionColumnDetail.setIntervalType(PartitionIntervalTypes.fromValue(intervalType));
|
||||
}
|
||||
partitionColumnDetail.setInterval(tablePartition.getString("interval"));
|
||||
partitionColumnDetails.add(partitionColumnDetail);
|
||||
}
|
||||
|
||||
table.withTablePartition(new TablePartition().withColumns(partitionColumnDetails));
|
||||
|
||||
collectionDAO.tableDAO().update(table);
|
||||
}
|
||||
}
|
@ -21,6 +21,7 @@ import java.util.Locale;
|
||||
import org.openmetadata.schema.type.Column;
|
||||
import org.openmetadata.schema.type.ColumnConstraint;
|
||||
import org.openmetadata.schema.type.ColumnDataType;
|
||||
import org.openmetadata.schema.type.PartitionColumnDetails;
|
||||
import org.openmetadata.schema.type.TableConstraint;
|
||||
import org.openmetadata.schema.type.TablePartition;
|
||||
import org.openmetadata.schema.type.TableType;
|
||||
@ -74,8 +75,11 @@ public final class DatabaseUtil {
|
||||
}
|
||||
List<String> columnNames = new ArrayList<>();
|
||||
columns.forEach(c -> columnNames.add(c.getName()));
|
||||
for (String columnName : tablePartition.getColumns()) {
|
||||
if (!columnNames.contains(columnName)) {
|
||||
// Add BigQuery partition pseudo columns
|
||||
columnNames.add("_PARTITIONDATE");
|
||||
columnNames.add("_PARTITIONTIME");
|
||||
for (PartitionColumnDetails partitionColumnDetails : tablePartition.getColumns()) {
|
||||
if (!columnNames.contains(partitionColumnDetails.getColumnName())) {
|
||||
throw new IllegalArgumentException("Invalid column name found in table partition");
|
||||
}
|
||||
}
|
||||
|
@ -120,6 +120,8 @@ import org.openmetadata.schema.type.DataModel.ModelType;
|
||||
import org.openmetadata.schema.type.EntityReference;
|
||||
import org.openmetadata.schema.type.JoinedWith;
|
||||
import org.openmetadata.schema.type.MetadataOperation;
|
||||
import org.openmetadata.schema.type.PartitionColumnDetails;
|
||||
import org.openmetadata.schema.type.PartitionIntervalTypes;
|
||||
import org.openmetadata.schema.type.TableConstraint;
|
||||
import org.openmetadata.schema.type.TableConstraint.ConstraintType;
|
||||
import org.openmetadata.schema.type.TableData;
|
||||
@ -283,11 +285,14 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
|
||||
List<Column> columns = new ArrayList<>();
|
||||
columns.add(getColumn("user_id", INT, null));
|
||||
columns.add(getColumn("date", DATE, null));
|
||||
TablePartition partition =
|
||||
new TablePartition()
|
||||
.withColumns(List.of(columns.get(1).getName()))
|
||||
.withIntervalType(TablePartition.IntervalType.TIME_UNIT)
|
||||
|
||||
PartitionColumnDetails partitionColumnDetails =
|
||||
new PartitionColumnDetails()
|
||||
.withColumnName(columns.get(1).getName())
|
||||
.withIntervalType(PartitionIntervalTypes.TIME_UNIT)
|
||||
.withInterval("daily");
|
||||
|
||||
TablePartition partition = new TablePartition().withColumns(List.of(partitionColumnDetails));
|
||||
create.setColumns(columns);
|
||||
create.setTablePartition(partition);
|
||||
Table created = createAndCheckEntity(create, ADMIN_AUTH_HEADERS);
|
||||
@ -309,11 +314,20 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
|
||||
new TableConstraint()
|
||||
.withConstraintType(ConstraintType.UNIQUE)
|
||||
.withColumns(List.of(column1.getName()));
|
||||
TablePartition partition =
|
||||
new TablePartition()
|
||||
.withColumns(List.of(column1.getName(), column2.getName()))
|
||||
.withIntervalType(TablePartition.IntervalType.COLUMN_VALUE)
|
||||
.withInterval("column");
|
||||
|
||||
List<PartitionColumnDetails> listPartitionColumnDetails = new ArrayList<>();
|
||||
listPartitionColumnDetails.add(
|
||||
new PartitionColumnDetails()
|
||||
.withColumnName(column1.getName())
|
||||
.withIntervalType(PartitionIntervalTypes.COLUMN_VALUE)
|
||||
.withInterval("column"));
|
||||
listPartitionColumnDetails.add(
|
||||
new PartitionColumnDetails()
|
||||
.withColumnName(column2.getName())
|
||||
.withIntervalType(PartitionIntervalTypes.COLUMN_VALUE)
|
||||
.withInterval("column"));
|
||||
|
||||
TablePartition partition = new TablePartition().withColumns(listPartitionColumnDetails);
|
||||
|
||||
//
|
||||
// Create a table with two columns - column1, column2, table constraint and table partition
|
||||
@ -341,11 +355,14 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
|
||||
column1.withDescription("").withDisplayName("");
|
||||
column2.withDisplayName(null);
|
||||
create.getColumns().add(column3);
|
||||
partition =
|
||||
new TablePartition()
|
||||
.withColumns(List.of(column3.getName()))
|
||||
.withIntervalType(TablePartition.IntervalType.COLUMN_VALUE)
|
||||
|
||||
PartitionColumnDetails partitionColumnDetails =
|
||||
new PartitionColumnDetails()
|
||||
.withColumnName(column3.getName())
|
||||
.withIntervalType(PartitionIntervalTypes.COLUMN_VALUE)
|
||||
.withInterval("column");
|
||||
|
||||
partition = new TablePartition().withColumns(List.of(partitionColumnDetails));
|
||||
create.setTablePartition(partition);
|
||||
|
||||
ChangeDescription change = getChangeDescription(table, MINOR_UPDATE);
|
||||
@ -2753,13 +2770,19 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
|
||||
if (expectedPartition == null && actualPartition == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, PartitionColumnDetails> expectedColumnMap = new HashMap<>();
|
||||
for (PartitionColumnDetails column : expectedPartition.getColumns()) {
|
||||
expectedColumnMap.put(column.getColumnName(), column);
|
||||
}
|
||||
|
||||
assert expectedPartition != null;
|
||||
assertEquals(expectedPartition.getColumns().size(), actualPartition.getColumns().size());
|
||||
assertEquals(expectedPartition.getIntervalType(), actualPartition.getIntervalType());
|
||||
assertEquals(expectedPartition.getInterval(), actualPartition.getInterval());
|
||||
|
||||
for (int i = 0; i < expectedPartition.getColumns().size(); i++) {
|
||||
assertEquals(expectedPartition.getColumns().get(i), actualPartition.getColumns().get(i));
|
||||
for (PartitionColumnDetails actualColumn : actualPartition.getColumns()) {
|
||||
PartitionColumnDetails expectedColumn = expectedColumnMap.get(actualColumn.getColumnName());
|
||||
assertNotNull(expectedColumn);
|
||||
assertEquals(expectedColumn.getIntervalType(), actualColumn.getIntervalType());
|
||||
assertEquals(expectedColumn.getInterval(), actualColumn.getInterval());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -207,28 +207,46 @@
|
||||
"maxLength": 256,
|
||||
"pattern": "^((?!::).)*$"
|
||||
},
|
||||
"partitionIntervalTypes": {
|
||||
"javaType": "org.openmetadata.schema.type.PartitionIntervalTypes",
|
||||
"description": "type of partition interval",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"TIME-UNIT",
|
||||
"INTEGER-RANGE",
|
||||
"INGESTION-TIME",
|
||||
"COLUMN-VALUE",
|
||||
"INJECTED",
|
||||
"ENUM",
|
||||
"OTHER"
|
||||
]
|
||||
},
|
||||
"tablePartition": {
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.type.TablePartition",
|
||||
"description": "This schema defines the partition column of a table and format the partition is created.",
|
||||
"properties": {
|
||||
"columns": {
|
||||
"description": "List of column names corresponding to the partition.",
|
||||
"description": "List of column partitions with their type and interval.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/partitionColumnDetails"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"partitionColumnDetails": {
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.type.PartitionColumnDetails",
|
||||
"description": "This schema defines the partition column of a table and format the partition is created.",
|
||||
"properties": {
|
||||
"columnName": {
|
||||
"description": "List of column names corresponding to the partition.",
|
||||
"type": "string"
|
||||
},
|
||||
"intervalType": {
|
||||
"type": "string",
|
||||
"description": "type of partition interval, example time-unit, integer-range",
|
||||
"enum": [
|
||||
"TIME-UNIT",
|
||||
"INTEGER-RANGE",
|
||||
"INGESTION-TIME",
|
||||
"COLUMN-VALUE",
|
||||
"OTHER"
|
||||
]
|
||||
"$ref": "#/definitions/partitionIntervalTypes"
|
||||
},
|
||||
"interval": {
|
||||
"type": "string",
|
||||
@ -669,14 +687,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"partitionIntervalType": {
|
||||
"description": "type of partition interval",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"TIME-UNIT",
|
||||
"INTEGER-RANGE",
|
||||
"INGESTION-TIME",
|
||||
"COLUMN-VALUE"
|
||||
]
|
||||
"$ref": "#/definitions/partitionIntervalTypes"
|
||||
},
|
||||
"partitionInterval": {
|
||||
"description": "The interval to use for the partitioning",
|
||||
|
@ -52,8 +52,8 @@ import {
|
||||
TIME_BASED_PARTITION,
|
||||
} from '../../../../../constants/profiler.constant';
|
||||
import { CSMode } from '../../../../../enums/codemirror.enum';
|
||||
import { PartitionIntervalType } from '../../../../../generated/api/data/createTable';
|
||||
import {
|
||||
PartitionIntervalTypes,
|
||||
ProfileSampleType,
|
||||
TableProfilerConfig,
|
||||
} from '../../../../../generated/entity/data/table';
|
||||
@ -299,7 +299,7 @@ const ProfilerSettingsModal: React.FC<ProfilerSettingsModalProps> = ({
|
||||
? {
|
||||
...partitionData,
|
||||
partitionValues:
|
||||
partitionIntervalType === PartitionIntervalType.ColumnValue
|
||||
partitionIntervalType === PartitionIntervalTypes.ColumnValue
|
||||
? partitionData?.partitionValues?.filter(
|
||||
(value) => !isEmpty(value)
|
||||
)
|
||||
@ -770,7 +770,7 @@ const ProfilerSettingsModal: React.FC<ProfilerSettingsModalProps> = ({
|
||||
</Col>
|
||||
</>
|
||||
) : null}
|
||||
{PartitionIntervalType.IntegerRange ===
|
||||
{PartitionIntervalTypes.IntegerRange ===
|
||||
partitionIntervalType ? (
|
||||
<>
|
||||
<Col span={12}>
|
||||
@ -844,7 +844,8 @@ const ProfilerSettingsModal: React.FC<ProfilerSettingsModalProps> = ({
|
||||
</>
|
||||
) : null}
|
||||
|
||||
{PartitionIntervalType.ColumnValue === partitionIntervalType ? (
|
||||
{PartitionIntervalTypes.ColumnValue ===
|
||||
partitionIntervalType ? (
|
||||
<Col span={24}>
|
||||
<List name="partitionValues">
|
||||
{(fields, { add, remove }) => (
|
||||
|
@ -29,7 +29,6 @@ const SchemaTab: FunctionComponent<Props> = ({
|
||||
isReadOnly = false,
|
||||
entityFqn,
|
||||
tableConstraints,
|
||||
tablePartitioned,
|
||||
}: Props) => {
|
||||
const [searchText, setSearchText] = useState('');
|
||||
|
||||
@ -59,7 +58,6 @@ const SchemaTab: FunctionComponent<Props> = ({
|
||||
searchText={lowerCase(searchText)}
|
||||
tableColumns={columns}
|
||||
tableConstraints={tableConstraints}
|
||||
tablePartitioned={tablePartitioned}
|
||||
onThreadLinkSelect={onThreadLinkSelect}
|
||||
onUpdate={onUpdate}
|
||||
/>
|
||||
|
@ -16,7 +16,6 @@ import {
|
||||
ColumnJoins,
|
||||
Table,
|
||||
TableData,
|
||||
TablePartition,
|
||||
} from '../../../generated/entity/data/table';
|
||||
|
||||
export type Props = {
|
||||
@ -29,7 +28,6 @@ export type Props = {
|
||||
hasTagEditAccess: boolean;
|
||||
isReadOnly?: boolean;
|
||||
entityFqn: string;
|
||||
tablePartitioned?: TablePartition;
|
||||
onThreadLinkSelect: (value: string, threadType?: ThreadType) => void;
|
||||
onUpdate: (columns: Table['columns']) => Promise<void>;
|
||||
};
|
||||
|
@ -82,7 +82,6 @@ const SchemaTable = ({
|
||||
isReadOnly = false,
|
||||
onThreadLinkSelect,
|
||||
tableConstraints,
|
||||
tablePartitioned,
|
||||
}: SchemaTableProps) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
@ -386,21 +385,6 @@ const SchemaTable = ({
|
||||
width: 320,
|
||||
render: renderDescription,
|
||||
},
|
||||
...(tablePartitioned
|
||||
? [
|
||||
{
|
||||
title: t('label.partitioned'),
|
||||
dataIndex: 'name',
|
||||
key: 'name',
|
||||
accessor: 'name',
|
||||
width: 120,
|
||||
render: (columnName: string) =>
|
||||
tablePartitioned?.columns?.includes(columnName)
|
||||
? t('label.partitioned')
|
||||
: t('label.non-partitioned'),
|
||||
},
|
||||
]
|
||||
: []),
|
||||
{
|
||||
title: t('label.tag-plural'),
|
||||
dataIndex: 'tags',
|
||||
|
@ -26,7 +26,6 @@ export interface SchemaTableProps {
|
||||
hasDescriptionEditAccess: boolean;
|
||||
hasTagEditAccess: boolean;
|
||||
tableConstraints: Table['tableConstraints'];
|
||||
tablePartitioned: Table['tablePartition'];
|
||||
searchText?: string;
|
||||
isReadOnly?: boolean;
|
||||
entityFqn: string;
|
||||
|
@ -14,9 +14,11 @@ import { Space } from 'antd';
|
||||
import { EntityTags } from 'Models';
|
||||
import React from 'react';
|
||||
import { EntityType } from '../../../enums/entity.enum';
|
||||
import { TablePartition } from '../../../generated/entity/data/table';
|
||||
import { ThreadType } from '../../../generated/entity/feed/thread';
|
||||
import { EntityReference } from '../../../generated/entity/type';
|
||||
import { TagSource } from '../../../generated/type/tagLabel';
|
||||
import { PartitionedKeys } from '../../../pages/TableDetailsPageV1/PartitionedKeys/PartitionedKeys.component';
|
||||
import entityRightPanelClassBase from '../../../utils/EntityRightPanelClassBase';
|
||||
import { CustomPropertyTable } from '../../common/CustomPropertyTable/CustomPropertyTable';
|
||||
import type {
|
||||
@ -43,6 +45,7 @@ interface EntityRightPanelProps<T extends ExtentionEntitiesKeys> {
|
||||
onThreadLinkSelect?: (value: string, threadType?: ThreadType) => void;
|
||||
viewAllPermission?: boolean;
|
||||
customProperties?: ExtentionEntities[T];
|
||||
tablePartition?: TablePartition;
|
||||
}
|
||||
|
||||
const EntityRightPanel = <T extends ExtentionEntitiesKeys>({
|
||||
@ -61,6 +64,7 @@ const EntityRightPanel = <T extends ExtentionEntitiesKeys>({
|
||||
showDataProductContainer = true,
|
||||
viewAllPermission,
|
||||
customProperties,
|
||||
tablePartition,
|
||||
}: EntityRightPanelProps<T>) => {
|
||||
const KnowledgeArticles =
|
||||
entityRightPanelClassBase.getKnowLedgeArticlesWidget();
|
||||
@ -113,6 +117,9 @@ const EntityRightPanel = <T extends ExtentionEntitiesKeys>({
|
||||
maxDataCap={5}
|
||||
/>
|
||||
)}
|
||||
{tablePartition ? (
|
||||
<PartitionedKeys tablePartition={tablePartition} />
|
||||
) : null}
|
||||
</Space>
|
||||
{afterSlot}
|
||||
</>
|
||||
|
@ -19,7 +19,7 @@ import { DMLOperationType } from '../generated/api/data/createTableProfile';
|
||||
import {
|
||||
ColumnProfilerConfig,
|
||||
DataType,
|
||||
PartitionIntervalType,
|
||||
PartitionIntervalTypes,
|
||||
PartitionIntervalUnit,
|
||||
ProfileSampleType,
|
||||
} from '../generated/entity/data/table';
|
||||
@ -346,18 +346,19 @@ export const SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME = [
|
||||
];
|
||||
|
||||
export const SUPPORTED_COLUMN_DATA_TYPE_FOR_INTERVAL = {
|
||||
[PartitionIntervalType.IngestionTime]: SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME,
|
||||
[PartitionIntervalType.TimeUnit]: SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME,
|
||||
[PartitionIntervalType.IntegerRange]: [DataType.Int, DataType.Bigint],
|
||||
[PartitionIntervalType.ColumnValue]: [DataType.Varchar, DataType.String],
|
||||
};
|
||||
[PartitionIntervalTypes.IngestionTime]:
|
||||
SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME,
|
||||
[PartitionIntervalTypes.TimeUnit]: SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME,
|
||||
[PartitionIntervalTypes.IntegerRange]: [DataType.Int, DataType.Bigint],
|
||||
[PartitionIntervalTypes.ColumnValue]: [DataType.Varchar, DataType.String],
|
||||
} as Record<PartitionIntervalTypes, DataType[]>;
|
||||
|
||||
export const INTERVAL_TYPE_OPTIONS = Object.values(PartitionIntervalType).map(
|
||||
(value) => ({
|
||||
value,
|
||||
label: value,
|
||||
})
|
||||
);
|
||||
export const INTERVAL_TYPE_OPTIONS = Object.keys(
|
||||
SUPPORTED_COLUMN_DATA_TYPE_FOR_INTERVAL
|
||||
).map((value) => ({
|
||||
value,
|
||||
label: value,
|
||||
}));
|
||||
export const INTERVAL_UNIT_OPTIONS = Object.values(PartitionIntervalUnit).map(
|
||||
(value) => ({
|
||||
value,
|
||||
@ -392,8 +393,8 @@ export const PROFILER_MODAL_LABEL_STYLE = {
|
||||
};
|
||||
|
||||
export const TIME_BASED_PARTITION = [
|
||||
PartitionIntervalType.IngestionTime,
|
||||
PartitionIntervalType.TimeUnit,
|
||||
PartitionIntervalTypes.IngestionTime,
|
||||
PartitionIntervalTypes.TimeUnit,
|
||||
];
|
||||
|
||||
export const TEST_CASE_TYPE_OPTION = [
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "Tabelle {{entityText}}",
|
||||
"table-lowercase": "tabelle",
|
||||
"table-lowercase-plural": "tabellen",
|
||||
"table-partitioned": "Table Partitioned",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "Tabellen",
|
||||
"table-profile": "Tabellenprofil",
|
||||
"table-tests-summary": "Tabellentestzusammenfassung",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "Table {{entityText}}",
|
||||
"table-lowercase": "table",
|
||||
"table-lowercase-plural": "tables",
|
||||
"table-partitioned": "Table Partitioned",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "Tables",
|
||||
"table-profile": "Table Profile",
|
||||
"table-tests-summary": "Table Tests Summary",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "Tabla {{entityText}}",
|
||||
"table-lowercase": "tabla",
|
||||
"table-lowercase-plural": "tablas",
|
||||
"table-partitioned": "Table Partitioned",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "Tablas",
|
||||
"table-profile": "Table Profile",
|
||||
"table-tests-summary": "Resumen de Tests de la Tabla",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "Table {{entityText}}",
|
||||
"table-lowercase": "table",
|
||||
"table-lowercase-plural": "tables",
|
||||
"table-partitioned": "Table Partitioned",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "Tables",
|
||||
"table-profile": "Profil de Table",
|
||||
"table-tests-summary": "Résumé des Tests de la Table",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "טבלה {{entityText}}",
|
||||
"table-lowercase": "טבלה",
|
||||
"table-lowercase-plural": "טבלאות נתונים",
|
||||
"table-partitioned": "טבלה מחולקת",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "טבלאות נתונים",
|
||||
"table-profile": "פרופיל טבלה",
|
||||
"table-tests-summary": "סיכום בדיקות טבלה",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "テーブル {{entityText}}",
|
||||
"table-lowercase": "テーブル",
|
||||
"table-lowercase-plural": "tables",
|
||||
"table-partitioned": "Table Partitioned",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "テーブル",
|
||||
"table-profile": "Table Profile",
|
||||
"table-tests-summary": "Table Tests Summary",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "Tabel {{entityText}}",
|
||||
"table-lowercase": "tabel",
|
||||
"table-lowercase-plural": "tabellen",
|
||||
"table-partitioned": "Tabel gepartitioneerd",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "Tabellen",
|
||||
"table-profile": "Tabelprofiel",
|
||||
"table-tests-summary": "Samenvatting tabeltests",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "Tabela {{entityText}}",
|
||||
"table-lowercase": "tabela",
|
||||
"table-lowercase-plural": "tabelas",
|
||||
"table-partitioned": "Tabela Particionada",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "Tabelas",
|
||||
"table-profile": "Perfil da Tabela",
|
||||
"table-tests-summary": "Resumo de Testes da Tabela",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "Таблица {{entityText}}",
|
||||
"table-lowercase": "таблица",
|
||||
"table-lowercase-plural": "таблицы",
|
||||
"table-partitioned": "Table Partitioned",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "Таблицы",
|
||||
"table-profile": "Профиль таблицы",
|
||||
"table-tests-summary": "Сводка по тестам",
|
||||
|
@ -1064,7 +1064,7 @@
|
||||
"table-entity-text": "数据表{{entityText}}",
|
||||
"table-lowercase": "数据表",
|
||||
"table-lowercase-plural": "数据表",
|
||||
"table-partitioned": "Table Partitioned",
|
||||
"table-partition-plural": "Table Partitions",
|
||||
"table-plural": "数据表",
|
||||
"table-profile": "数据表分析",
|
||||
"table-tests-summary": "数据表测试概要",
|
||||
|
@ -10,22 +10,67 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import { Typography } from 'antd';
|
||||
import { Table, Typography } from 'antd';
|
||||
import { ColumnsType } from 'antd/lib/table';
|
||||
import { t } from 'i18next';
|
||||
import React from 'react';
|
||||
import { TablePartition } from '../../../generated/entity/data/table';
|
||||
import React, { useMemo } from 'react';
|
||||
import {
|
||||
PartitionColumnDetails,
|
||||
TablePartition,
|
||||
} from '../../../generated/entity/data/table';
|
||||
|
||||
interface PartitionedKeysProps {
|
||||
tablePartition: TablePartition;
|
||||
}
|
||||
|
||||
export const PartitionedKeys = ({ tablePartition }: PartitionedKeysProps) => {
|
||||
const partitionColumnDetails = useMemo(
|
||||
() =>
|
||||
tablePartition?.columns?.map((column) => ({
|
||||
...column,
|
||||
key: column.columnName,
|
||||
})),
|
||||
[tablePartition.columns]
|
||||
);
|
||||
|
||||
const columns = useMemo(() => {
|
||||
const data: ColumnsType<PartitionColumnDetails> = [
|
||||
{
|
||||
title: t('label.column'),
|
||||
dataIndex: 'columnName',
|
||||
key: 'columnName',
|
||||
ellipsis: true,
|
||||
width: '50%',
|
||||
},
|
||||
{
|
||||
title: t('label.type'),
|
||||
dataIndex: 'intervalType',
|
||||
key: 'intervalType',
|
||||
ellipsis: true,
|
||||
width: '50%',
|
||||
render: (text) => {
|
||||
return text ?? '--';
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
return data;
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Typography.Text className="right-panel-label">
|
||||
{t('label.table-partitioned')}
|
||||
{t('label.table-partition-plural')}
|
||||
</Typography.Text>
|
||||
<span>{` - ${tablePartition.intervalType}`}</span>
|
||||
<Table
|
||||
bordered
|
||||
columns={columns}
|
||||
data-testid="partitioned-column-table"
|
||||
dataSource={partitionColumnDetails}
|
||||
pagination={false}
|
||||
rowKey="name"
|
||||
size="small"
|
||||
/>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
@ -90,7 +90,6 @@ import { getTagsWithoutTier, getTierTags } from '../../utils/TableUtils';
|
||||
import { createTagObject, updateTierTag } from '../../utils/TagsUtils';
|
||||
import { showErrorToast, showSuccessToast } from '../../utils/ToastUtils';
|
||||
import { FrequentlyJoinedTables } from './FrequentlyJoinedTables/FrequentlyJoinedTables.component';
|
||||
import { PartitionedKeys } from './PartitionedKeys/PartitionedKeys.component';
|
||||
import './table-details-page-v1.less';
|
||||
import TableConstraints from './TableConstraints/TableConstraints';
|
||||
|
||||
@ -524,7 +523,6 @@ const TableDetailsPageV1 = () => {
|
||||
isReadOnly={deleted}
|
||||
joins={tableDetails?.joins?.columnJoins ?? []}
|
||||
tableConstraints={tableDetails?.tableConstraints}
|
||||
tablePartitioned={tableDetails?.tablePartition}
|
||||
onThreadLinkSelect={onThreadLinkSelect}
|
||||
onUpdate={onColumnsUpdate}
|
||||
/>
|
||||
@ -543,11 +541,6 @@ const TableDetailsPageV1 = () => {
|
||||
<TableConstraints
|
||||
constraints={tableDetails?.tableConstraints}
|
||||
/>
|
||||
{tableDetails?.tablePartition ? (
|
||||
<PartitionedKeys
|
||||
tablePartition={tableDetails.tablePartition}
|
||||
/>
|
||||
) : null}
|
||||
</Space>
|
||||
}
|
||||
beforeSlot={
|
||||
@ -563,6 +556,7 @@ const TableDetailsPageV1 = () => {
|
||||
entityId={tableDetails?.id ?? ''}
|
||||
entityType={EntityType.TABLE}
|
||||
selectedTags={tableTags}
|
||||
tablePartition={tableDetails?.tablePartition}
|
||||
viewAllPermission={viewAllPermission}
|
||||
onTagSelectionChange={handleTagSelection}
|
||||
onThreadLinkSelect={onThreadLinkSelect}
|
||||
|
Loading…
x
Reference in New Issue
Block a user