Issue #14765 - Preparatory Work (#15312)

* refactor!: change partition metadata structure for table entities

* refactor!: updated json schema for TypeScript code gen

* chore: migration of partition for table entities

* style: python & java linting

* updated ui side change for table partitioned key

* miner fix

* addressing comments

* fixed ci error

---------

Co-authored-by: Shailesh Parmar <shailesh.parmar.webdev@gmail.com>
This commit is contained in:
Teddy 2024-02-28 07:11:00 +01:00 committed by GitHub
parent 0a38334c98
commit 056e6368d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
46 changed files with 1176 additions and 683 deletions

View File

@ -12,7 +12,8 @@
"""Athena source module"""
import traceback
from typing import Iterable, Tuple
from copy import deepcopy
from typing import Dict, Iterable, List, Optional, Tuple
from pyathena.sqlalchemy.base import AthenaDialect
from sqlalchemy import types
@ -22,7 +23,8 @@ from sqlalchemy.engine.reflection import Inspector
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.table import (
Column,
IntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
Table,
TablePartition,
TableType,
@ -57,6 +59,18 @@ logger = ingestion_logger()
ATHENA_TAG = "ATHENA TAG"
ATHENA_TAG_CLASSIFICATION = "ATHENA TAG CLASSIFICATION"
ATHENA_INTERVAL_TYPE_MAP = {
**dict.fromkeys(["enum", "string", "VARCHAR"], PartitionIntervalTypes.COLUMN_VALUE),
**dict.fromkeys(
["integer", "bigint", "INTEGER", "BIGINT"], PartitionIntervalTypes.INTEGER_RANGE
),
**dict.fromkeys(
["date", "timestamp", "DATE", "DATETIME", "TIMESTAMP"],
PartitionIntervalTypes.TIME_UNIT,
),
"injected": PartitionIntervalTypes.INJECTED,
}
def _get_column_type(self, type_):
"""
@ -123,6 +137,29 @@ def _get_column_type(self, type_):
return col_type(*args)
def _get_projection_details(
columns: List[Dict], projection_parameters: Dict
) -> List[Dict]:
"""Get the projection details for the columns
Args:
columns (List[Dict]): list of columns
projection_parameters (Dict): projection parameters
"""
if not projection_parameters:
return columns
columns = deepcopy(columns)
for col in columns:
projection_details = next(
({k: v} for k, v in projection_parameters.items() if k == col["name"]), None
)
if projection_details:
col["projection_type"] = projection_details[col["name"]]
return columns
@reflection.cache
def get_columns(self, connection, table_name, schema=None, **kw):
"""
@ -147,6 +184,14 @@ def get_columns(self, connection, table_name, schema=None, **kw):
]
if kw.get("only_partition_columns"):
# Return projected partition information to set partition type in `get_table_partition_details`
# projected partition fields are stored in the form of `projection.<field_name>.type` as a table parameter
projection_parameters = {
key_.split(".")[1]: value_
for key_, value_ in metadata.parameters.items()
if key_.startswith("projection") and key_.endswith("type")
}
columns = _get_projection_details(columns, projection_parameters)
return columns
columns += [
@ -223,14 +268,34 @@ class AthenaSource(CommonDbSourceService):
def get_table_partition_details(
self, table_name: str, schema_name: str, inspector: Inspector
) -> Tuple[bool, TablePartition]:
) -> Tuple[bool, Optional[TablePartition]]:
"""Get Athena table partition detail
Args:
table_name (str): name of the table
schema_name (str): name of the schema
inspector (Inspector):
Returns:
Tuple[bool, Optional[TablePartition]]:
"""
columns = inspector.get_columns(
table_name=table_name, schema=schema_name, only_partition_columns=True
)
if columns:
partition_details = TablePartition(
intervalType=IntervalType.COLUMN_VALUE.value,
columns=[column["name"] for column in columns],
columns=[
PartitionColumnDetails(
columnName=col["name"],
intervalType=ATHENA_INTERVAL_TYPE_MAP.get(
col.get("projection_type", str(col["type"])),
PartitionIntervalTypes.COLUMN_VALUE,
),
interval=None,
)
for col in columns
]
)
return True, partition_details
return False, None

View File

@ -33,7 +33,8 @@ from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.storedProcedure import StoredProcedureCode
from metadata.generated.schema.entity.data.table import (
IntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
TablePartition,
TableType,
)
@ -495,7 +496,7 @@ class BigquerySource(
def get_table_partition_details(
self, table_name: str, schema_name: str, inspector: Inspector
) -> Tuple[bool, TablePartition]:
) -> Tuple[bool, Optional[TablePartition]]:
"""
check if the table is partitioned table and return the partition details
"""
@ -504,30 +505,38 @@ class BigquerySource(
if table.time_partitioning is not None:
if table.time_partitioning.field:
table_partition = TablePartition(
interval=str(table.time_partitioning.type_),
intervalType=IntervalType.TIME_UNIT.value,
columns=[
PartitionColumnDetails(
columnName=table.time_partitioning.field,
interval=str(table.time_partitioning.type_),
intervalType=PartitionIntervalTypes.TIME_UNIT,
)
]
)
table_partition.columns = [table.time_partitioning.field]
return True, table_partition
return True, TablePartition(
interval=str(table.time_partitioning.type_),
intervalType=IntervalType.INGESTION_TIME.value,
columns=[
PartitionColumnDetails(
columnName="_PARTITIONTIME"
if table.time_partitioning.type_ == "HOUR"
else "_PARTITIONDATE",
interval=str(table.time_partitioning.type_),
intervalType=PartitionIntervalTypes.INGESTION_TIME,
)
]
)
if table.range_partitioning:
table_partition = TablePartition(
intervalType=IntervalType.INTEGER_RANGE.value,
table_partition = PartitionColumnDetails(
columnName=table.range_partitioning.field,
intervalType=PartitionIntervalTypes.INTEGER_RANGE,
interval=None,
)
if hasattr(table.range_partitioning, "range_") and hasattr(
table.range_partitioning.range_, "interval"
):
table_partition.interval = table.range_partitioning.range_.interval
if (
hasattr(table.range_partitioning, "field")
and table.range_partitioning.field
):
table_partition.columns = [table.range_partitioning.field]
return True, table_partition
table_partition.columnName = table.range_partitioning.field
return True, TablePartition(columns=[table_partition])
return False, None
def clean_raw_data_type(self, raw_data_type):

View File

@ -21,7 +21,8 @@ from sqlalchemy.engine.reflection import Inspector
from metadata.generated.schema.entity.data.table import (
Column,
IntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
TableConstraint,
TablePartition,
TableType,
@ -306,9 +307,16 @@ class DorisSource(CommonDbSourceService):
if result and result[0].PartitionKey != "":
partition_details = TablePartition(
intervalType=IntervalType.TIME_UNIT.value,
columns=result[0].PartitionKey.split(", "),
columns=[
PartitionColumnDetails(
columnName=partition_key,
intervalType=PartitionIntervalTypes.TIME_UNIT,
interval=None,
)
for partition_key in result[0].PartitionKey.split(", ")
]
)
return True, partition_details
return False, None
except Exception:

View File

@ -21,7 +21,8 @@ from sqlalchemy.engine.reflection import Inspector
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.table import (
IntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
TablePartition,
TableType,
)
@ -66,9 +67,9 @@ logger = ingestion_logger()
INTERVAL_TYPE_MAP = {
"list": IntervalType.COLUMN_VALUE.value,
"hash": IntervalType.COLUMN_VALUE.value,
"range": IntervalType.TIME_UNIT.value,
"list": PartitionIntervalTypes.COLUMN_VALUE,
"hash": PartitionIntervalTypes.COLUMN_VALUE,
"range": PartitionIntervalTypes.TIME_UNIT,
}
RELKIND_MAP = {
@ -187,18 +188,27 @@ class GreenplumSource(CommonDbSourceService, MultiDBSource):
def get_table_partition_details(
self, table_name: str, schema_name: str, inspector: Inspector
) -> Tuple[bool, TablePartition]:
) -> Tuple[bool, Optional[TablePartition]]:
result = self.engine.execute(
GREENPLUM_PARTITION_DETAILS.format(
table_name=table_name, schema_name=schema_name
)
).all()
if result:
partition_details = TablePartition(
intervalType=INTERVAL_TYPE_MAP.get(
result[0].partition_strategy, IntervalType.COLUMN_VALUE.value
),
columns=[row.column_name for row in result if row.column_name],
columns=[
PartitionColumnDetails(
columnName=row.column_name,
intervalType=INTERVAL_TYPE_MAP.get(
result[0].partition_strategy,
PartitionIntervalTypes.COLUMN_VALUE,
),
interval=None,
)
for row in result
if row.column_name
]
)
return True, partition_details
return False, None

View File

@ -20,7 +20,12 @@ import pyiceberg.partitioning
import pyiceberg.table
import pyiceberg.types
from metadata.generated.schema.entity.data.table import Column, Constraint, DataType
from metadata.generated.schema.entity.data.table import (
Column,
Constraint,
DataType,
PartitionIntervalTypes,
)
def namespace_to_str(namespace: tuple[str]) -> str:
@ -45,13 +50,45 @@ def get_table_name_as_str(table: pyiceberg.table.Table) -> str:
def get_column_from_partition(
columns: Tuple[pyiceberg.types.NestedField, ...],
partition: pyiceberg.partitioning.PartitionField,
) -> str:
) -> Optional[str]:
"""Returns the Column Name belonging to a partition."""
# A Partition in Iceberg has a Source Column to which a Transformation is applied.
# We need to return the Source Column name.
return [
column.name for column in columns if column.field_id == partition.source_id
][0]
return next(
(column.name for column in columns if column.field_id == partition.source_id),
None,
)
def get_column_partition_type(
columns: Tuple[pyiceberg.types.NestedField, ...],
partition: pyiceberg.partitioning.PartitionField,
) -> Optional[PartitionIntervalTypes]:
"""Get the partition type for a given partition column."""
iceberg_interval_type_map = {
"INT": PartitionIntervalTypes.INTEGER_RANGE,
**dict.fromkeys(
["TIME", "DATE", "TIMESTAMP", "TIMESTAMPTZ"],
PartitionIntervalTypes.TIME_UNIT,
),
}
data_type = str(
next(
(
column.field_type
for column in columns
if column.field_id == partition.source_id
),
"",
)
)
if not data_type.isalpha():
return None
return iceberg_interval_type_map.get(
data_type.upper(), PartitionIntervalTypes.COLUMN_VALUE
)
def get_owner_from_table(

View File

@ -20,6 +20,7 @@ from pydantic import BaseModel
from metadata.generated.schema.entity.data.table import (
Column,
PartitionColumnDetails,
TablePartition,
TableType,
)
@ -27,6 +28,7 @@ from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.source.database.iceberg.helper import (
IcebergColumnParser,
get_column_from_partition,
get_column_partition_type,
)
@ -57,10 +59,16 @@ class IcebergTable(BaseModel):
columns=[IcebergColumnParser.parse(column) for column in iceberg_columns],
tablePartition=TablePartition(
columns=[
get_column_from_partition(iceberg_columns, partition)
PartitionColumnDetails(
columnName=get_column_from_partition(
iceberg_columns, partition
),
intervalType=get_column_partition_type(
iceberg_columns, partition
),
interval=None,
)
for partition in table.spec().fields
],
intervalType=None,
interval=None,
]
),
)

View File

@ -21,7 +21,8 @@ from sqlalchemy.engine import Inspector
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.table import (
IntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
TablePartition,
TableType,
)
@ -74,9 +75,9 @@ logger = ingestion_logger()
INTERVAL_TYPE_MAP = {
"list": IntervalType.COLUMN_VALUE.value,
"hash": IntervalType.COLUMN_VALUE.value,
"range": IntervalType.TIME_UNIT.value,
"list": PartitionIntervalTypes.COLUMN_VALUE,
"hash": PartitionIntervalTypes.COLUMN_VALUE,
"range": PartitionIntervalTypes.TIME_UNIT,
}
RELKIND_MAP = {
@ -203,12 +204,20 @@ class PostgresSource(CommonDbSourceService, MultiDBSource):
result = self.engine.execute(
POSTGRES_PARTITION_DETAILS, table_name=table_name, schema_name=schema_name
).all()
if result:
partition_details = TablePartition(
intervalType=INTERVAL_TYPE_MAP.get(
result[0].partition_strategy, IntervalType.COLUMN_VALUE.value
),
columns=[row.column_name for row in result if row.column_name],
columns=[
PartitionColumnDetails(
columnName=row.column_name,
intervalType=INTERVAL_TYPE_MAP.get(
row.partition_strategy, PartitionIntervalTypes.COLUMN_VALUE
),
interval=None,
)
for row in result
if row.column_name
]
)
return True, partition_details
return False, None

View File

@ -32,7 +32,8 @@ from metadata.generated.schema.entity.data.storedProcedure import (
)
from metadata.generated.schema.entity.data.table import (
ConstraintType,
IntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
TableConstraint,
TablePartition,
TableType,
@ -206,11 +207,11 @@ class RedshiftSource(
f"Error trying to connect to database {new_database}: {exc}"
)
def _get_partition_key(self, diststyle: str) -> Optional[List[str]]:
def _get_partition_key(self, diststyle: str) -> Optional[str]:
try:
regex = re.match(r"KEY\((\w+)\)", diststyle)
if regex:
return [regex.group(1)]
return regex.group(1)
except Exception as err:
logger.debug(traceback.format_exc())
logger.warning(err)
@ -218,13 +219,20 @@ class RedshiftSource(
def get_table_partition_details(
self, table_name: str, schema_name: str, inspector: Inspector
) -> Tuple[bool, TablePartition]:
) -> Tuple[bool, Optional[TablePartition]]:
diststyle = self.partition_details.get(f"{schema_name}.{table_name}")
if diststyle:
partition_details = TablePartition(
columns=self._get_partition_key(diststyle),
intervalType=IntervalType.COLUMN_VALUE,
)
distkey = self._get_partition_key(diststyle)
if distkey is not None:
partition_details = TablePartition(
columns=[
PartitionColumnDetails(
columnName=distkey,
intervalType=PartitionIntervalTypes.COLUMN_VALUE,
interval=None,
)
]
)
return True, partition_details
return False, None

View File

@ -28,7 +28,8 @@ from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.storedProcedure import StoredProcedureCode
from metadata.generated.schema.entity.data.table import (
IntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
TablePartition,
TableType,
)
@ -319,15 +320,21 @@ class SnowflakeSource(
def get_table_partition_details(
self, table_name: str, schema_name: str, inspector: Inspector
) -> Tuple[bool, TablePartition]:
) -> Tuple[bool, Optional[TablePartition]]:
cluster_key = self.partition_details.get(f"{schema_name}.{table_name}")
if cluster_key:
partition_columns = self.parse_column_name_from_expr(cluster_key)
partition_details = TablePartition(
columns=self.__fix_partition_column_case(
table_name, schema_name, inspector, partition_columns
),
intervalType=IntervalType.COLUMN_VALUE,
columns=[
PartitionColumnDetails(
columnName=column,
intervalType=PartitionIntervalTypes.COLUMN_VALUE,
interval=None,
)
for column in self.__fix_partition_column_case(
table_name, schema_name, inspector, partition_columns
)
]
)
return True, partition_details
return False, None

View File

@ -21,7 +21,7 @@ from metadata.data_quality.validations.table.pandas.tableRowInsertedCountToBeBet
TableRowInsertedCountToBeBetweenValidator,
)
from metadata.generated.schema.entity.data.table import (
PartitionIntervalType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
)
@ -47,7 +47,7 @@ class PandasInterfaceMixin:
partition_field = self.table_partition_config.partitionColumnName
if (
self.table_partition_config.partitionIntervalType
== PartitionIntervalType.COLUMN_VALUE
== PartitionIntervalTypes.COLUMN_VALUE
):
return [
df[
@ -59,7 +59,7 @@ class PandasInterfaceMixin:
]
if (
self.table_partition_config.partitionIntervalType
== PartitionIntervalType.INTEGER_RANGE
== PartitionIntervalTypes.INTEGER_RANGE
):
return [
df[

View File

@ -19,7 +19,7 @@ from typing import List
from sqlalchemy import Column, text
from metadata.generated.schema.entity.data.table import (
PartitionIntervalType,
PartitionIntervalTypes,
PartitionProfilerConfig,
)
from metadata.profiler.orm.functions.modulo import ModuloFn
@ -52,13 +52,13 @@ def build_partition_predicate(
_type_: _description_
"""
partition_field = partition_details.partitionColumnName
if partition_details.partitionIntervalType == PartitionIntervalType.COLUMN_VALUE:
if partition_details.partitionIntervalType == PartitionIntervalTypes.COLUMN_VALUE:
return get_value_filter(
Column(partition_field),
partition_details.partitionValues,
)
if partition_details.partitionIntervalType == PartitionIntervalType.INTEGER_RANGE:
if partition_details.partitionIntervalType == PartitionIntervalTypes.INTEGER_RANGE:
return get_integer_range_filter(
Column(partition_field),
partition_details.partitionIntegerRangeStart,

View File

@ -20,7 +20,7 @@ from metadata.data_quality.validations.table.pandas.tableRowInsertedCountToBeBet
TableRowInsertedCountToBeBetweenValidator,
)
from metadata.generated.schema.entity.data.table import (
PartitionIntervalType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
TableData,
@ -41,7 +41,7 @@ class DatalakeSampler(SamplerInterface):
partition_field = self._partition_details.partitionColumnName
if (
self._partition_details.partitionIntervalType
== PartitionIntervalType.COLUMN_VALUE
== PartitionIntervalTypes.COLUMN_VALUE
):
return [
df[df[partition_field].isin(self._partition_details.partitionValues)]
@ -49,7 +49,7 @@ class DatalakeSampler(SamplerInterface):
]
if (
self._partition_details.partitionIntervalType
== PartitionIntervalType.INTEGER_RANGE
== PartitionIntervalTypes.INTEGER_RANGE
):
return [
df[

View File

@ -21,7 +21,7 @@ from sqlalchemy.orm.util import AliasedClass
from sqlalchemy.sql.sqltypes import Enum
from metadata.generated.schema.entity.data.table import (
PartitionIntervalType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
TableData,
@ -224,7 +224,7 @@ class SQASampler(SamplerInterface):
if (
self._partition_details.partitionIntervalType
== PartitionIntervalType.COLUMN_VALUE
== PartitionIntervalTypes.COLUMN_VALUE
):
return aliased(
self.table,
@ -242,7 +242,7 @@ class SQASampler(SamplerInterface):
if (
self._partition_details.partitionIntervalType
== PartitionIntervalType.INTEGER_RANGE
== PartitionIntervalTypes.INTEGER_RANGE
):
return aliased(
self.table,

View File

@ -11,10 +11,11 @@
"""Partition utility"""
from typing import Optional
from typing import List, Optional
from metadata.generated.schema.entity.data.table import (
IntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
PartitionIntervalUnit,
PartitionProfilerConfig,
Table,
@ -44,47 +45,55 @@ def get_partition_details(entity: Table) -> Optional[PartitionProfilerConfig]:
and entity.serviceType == DatabaseServiceType.BigQuery
):
if hasattr(entity, "tablePartition") and entity.tablePartition:
if entity.tablePartition.intervalType == IntervalType.TIME_UNIT:
column_partitions: Optional[
List[PartitionColumnDetails]
] = entity.tablePartition.columns
if not column_partitions:
raise TypeError("table partition missing. Skipping table")
partiton = column_partitions[0]
if partiton.intervalType == PartitionIntervalTypes.TIME_UNIT:
return PartitionProfilerConfig(
enablePartitioning=True,
partitionColumnName=entity.tablePartition.columns[0],
partitionColumnName=partiton.columnName,
partitionIntervalUnit=PartitionIntervalUnit.DAY
if entity.tablePartition.interval != "HOUR"
else entity.tablePartition.interval,
if partiton.interval != "HOUR"
else partiton.interval,
partitionInterval=1,
partitionIntervalType=entity.tablePartition.intervalType.value,
partitionIntervalType=partiton.intervalType.value,
partitionValues=None,
partitionIntegerRangeStart=None,
partitionIntegerRangeEnd=None,
)
if entity.tablePartition.intervalType == IntervalType.INGESTION_TIME:
if partiton.intervalType == PartitionIntervalTypes.INGESTION_TIME:
return PartitionProfilerConfig(
enablePartitioning=True,
partitionColumnName="_PARTITIONDATE"
if entity.tablePartition.interval == "DAY"
if partiton.interval == "DAY"
else "_PARTITIONTIME",
partitionIntervalUnit=PartitionIntervalUnit.DAY
if entity.tablePartition.interval != "HOUR"
else entity.tablePartition.interval,
if partiton.interval != "HOUR"
else partiton.interval,
partitionInterval=1,
partitionIntervalType=entity.tablePartition.intervalType.value,
partitionIntervalType=partiton.intervalType.value,
partitionValues=None,
partitionIntegerRangeStart=None,
partitionIntegerRangeEnd=None,
)
if entity.tablePartition.intervalType == IntervalType.INTEGER_RANGE:
if partiton.intervalType == PartitionIntervalTypes.INTEGER_RANGE:
return PartitionProfilerConfig(
enablePartitioning=True,
partitionColumnName=entity.tablePartition.columns[0],
partitionColumnName=partiton.columnName,
partitionIntervalUnit=None,
partitionInterval=None,
partitionIntervalType=entity.tablePartition.intervalType.value,
partitionIntervalType=partiton.intervalType.value,
partitionValues=None,
partitionIntegerRangeStart=1,
partitionIntegerRangeEnd=10000,
)
raise TypeError(
f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table"
f"Unsupported partition type {partiton.intervalType}. Skipping table"
)
return None

View File

@ -18,8 +18,8 @@ from pydantic import BaseModel
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.table import (
IntervalType,
PartitionIntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
PartitionIntervalUnit,
PartitionProfilerConfig,
Table,
@ -142,7 +142,13 @@ class ProfilerPartitionUnitTest(TestCase):
def test_partition_details_time_unit(self):
table_entity = MockTable(
tablePartition=TablePartition(
columns=["e"], intervalType=IntervalType.TIME_UNIT, interval="DAY"
columns=[
PartitionColumnDetails(
columnName="e",
intervalType=PartitionIntervalTypes.TIME_UNIT,
interval="DAY",
)
]
),
tableProfilerConfig=None,
)
@ -177,7 +183,13 @@ class ProfilerPartitionUnitTest(TestCase):
def test_partition_details_ingestion_time_date(self):
table_entity = MockTable(
tablePartition=TablePartition(
columns=["e"], intervalType=IntervalType.INGESTION_TIME, interval="DAY"
columns=[
PartitionColumnDetails(
columnName="e",
intervalType=PartitionIntervalTypes.INGESTION_TIME.value,
interval="DAY",
)
]
),
tableProfilerConfig=None,
)
@ -211,7 +223,13 @@ class ProfilerPartitionUnitTest(TestCase):
def test_partition_details_ingestion_time_hour(self):
table_entity = MockTable(
tablePartition=TablePartition(
columns=["e"], intervalType=IntervalType.INGESTION_TIME, interval="HOUR"
columns=[
PartitionColumnDetails(
columnName="e",
intervalType=PartitionIntervalTypes.INGESTION_TIME.value,
interval="HOUR",
)
]
),
tableProfilerConfig=None,
)
@ -246,15 +264,19 @@ class ProfilerPartitionUnitTest(TestCase):
def test_partition_non_bq_table_profiler_partition_config(self):
table_entity = MockRedshiftTable(
tablePartition=TablePartition(
columns=["datetime"],
intervalType=IntervalType.TIME_UNIT,
interval="DAY",
columns=[
PartitionColumnDetails(
columnName="datetime",
intervalType=PartitionIntervalTypes.TIME_UNIT.value,
interval="DAY",
)
]
),
tableProfilerConfig=TableProfilerConfig(
partitioning=PartitionProfilerConfig(
enablePartitioning=True,
partitionColumnName="foo",
partitionIntervalType=PartitionIntervalType.TIME_UNIT,
partitionIntervalType=PartitionIntervalTypes.TIME_UNIT,
partitionIntervalUnit="DAY",
partitionInterval=1,
) # type: ignore
@ -266,7 +288,7 @@ class ProfilerPartitionUnitTest(TestCase):
if resp:
assert resp.enablePartitioning
assert resp.partitionColumnName == "foo"
assert resp.partitionIntervalType == PartitionIntervalType.TIME_UNIT
assert resp.partitionIntervalType == PartitionIntervalTypes.TIME_UNIT
assert resp.partitionIntervalUnit == PartitionIntervalUnit.DAY
assert resp.partitionInterval == 1
else:
@ -275,9 +297,13 @@ class ProfilerPartitionUnitTest(TestCase):
def test_partition_non_bq_table_no_profiler_partition_config(self):
table_entity = MockRedshiftTable(
tablePartition=TablePartition(
columns=["datetime"],
intervalType=IntervalType.TIME_UNIT,
interval="DAY",
columns=[
PartitionColumnDetails(
columnName="datetime",
intervalType=PartitionIntervalTypes.TIME_UNIT.value,
interval="DAY",
)
]
),
tableProfilerConfig=None,
)

View File

@ -20,7 +20,10 @@ from google.cloud.bigquery.table import Table
from pydantic import BaseModel
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.table import IntervalType
from metadata.generated.schema.entity.data.table import (
PartitionColumnDetails,
PartitionIntervalTypes,
)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
)
@ -138,9 +141,18 @@ class BigqueryUnitTest(TestCase):
inspector=self.inspector,
)
assert partition.columns == ["test_column"]
assert partition.intervalType.value == IntervalType.TIME_UNIT.value
assert partition.interval == "DAY"
assert partition.columns == [
PartitionColumnDetails(
columnName="test_column",
intervalType=PartitionIntervalTypes.TIME_UNIT,
interval="DAY",
)
]
assert (
partition.columns[0].intervalType.value
== PartitionIntervalTypes.TIME_UNIT.value
)
assert partition.columns[0].interval == "DAY"
assert bool_resp
def test_ingestion_time_partition(self):
@ -153,8 +165,12 @@ class BigqueryUnitTest(TestCase):
inspector=self.inspector,
)
assert partition.intervalType.value == IntervalType.INGESTION_TIME.value
assert partition.interval == "HOUR"
self.assertIsInstance(partition.columns, list)
assert (
partition.columns[0].intervalType.value
== PartitionIntervalTypes.INGESTION_TIME.value
)
assert partition.columns[0].interval == "HOUR"
assert bool_resp
def test_range_partition(self):
@ -168,8 +184,12 @@ class BigqueryUnitTest(TestCase):
inspector=self.inspector,
)
assert partition.intervalType.value == IntervalType.INTEGER_RANGE.value
assert partition.interval == 10
self.assertIsInstance(partition.columns, list)
assert (
partition.columns[0].intervalType.value
== PartitionIntervalTypes.INTEGER_RANGE.value
)
assert partition.columns[0].interval == 10
assert bool_resp
def test_no_partition(self):

View File

@ -16,8 +16,8 @@ from typing import Optional
from pydantic import BaseModel
from metadata.generated.schema.entity.data.table import (
IntervalType,
PartitionIntervalType,
PartitionColumnDetails,
PartitionIntervalTypes,
PartitionIntervalUnit,
PartitionProfilerConfig,
TablePartition,
@ -66,13 +66,19 @@ def test_get_partition_details():
assert partition.enablePartitioning == True
assert partition.partitionColumnName == "order_date"
assert partition.partitionIntervalType == PartitionIntervalType.TIME_UNIT
assert partition.partitionIntervalType == PartitionIntervalTypes.TIME_UNIT
assert partition.partitionInterval == 5
assert partition.partitionIntervalUnit == PartitionIntervalUnit.YEAR
table_entity = MockTable(
tablePartition=TablePartition(
columns=["e"], intervalType=IntervalType.INGESTION_TIME, interval="HOUR"
columns=[
PartitionColumnDetails(
columnName="e",
intervalType=PartitionIntervalTypes.INGESTION_TIME,
interval="HOUR",
)
]
),
tableProfilerConfig=None,
)
@ -81,21 +87,27 @@ def test_get_partition_details():
assert partition.enablePartitioning == True
assert partition.partitionColumnName == "_PARTITIONTIME"
assert partition.partitionIntervalType == PartitionIntervalType.INGESTION_TIME
assert partition.partitionIntervalType == PartitionIntervalTypes.INGESTION_TIME
assert partition.partitionInterval == 1
assert partition.partitionIntervalUnit == PartitionIntervalUnit.HOUR
table_entity = MockTable(
tablePartition=TablePartition(
columns=["e"], intervalType=IntervalType.INGESTION_TIME, interval="DAY"
columns=[
PartitionColumnDetails(
columnName="e",
intervalType=PartitionIntervalTypes.INGESTION_TIME,
interval="DAY",
)
]
),
tableProfilerConfig=None,
)
partition = get_partition_details(table_entity)
assert partition.enablePartitioning == True
assert partition.enablePartitioning is True
assert partition.partitionColumnName == "_PARTITIONDATE"
assert partition.partitionIntervalType == PartitionIntervalType.INGESTION_TIME
assert partition.partitionIntervalType == PartitionIntervalTypes.INGESTION_TIME
assert partition.partitionInterval == 1
assert partition.partitionIntervalUnit == PartitionIntervalUnit.DAY

View File

@ -1,290 +1,290 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# # Copyright 2021 Collate
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# # http://www.apache.org/licenses/LICENSE-2.0
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.
"""
Test MongoDB using the topology
"""
# """
# Test MongoDB using the topology
# """
import json
from pathlib import Path
from unittest import TestCase
from unittest.mock import Mock, patch
# import json
# from pathlib import Path
# from unittest import TestCase
# from unittest.mock import Mock, patch
import pytest
# import pytest
from metadata.generated.schema.api.data.createTable import CreateTableRequest
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.table import (
Column,
ConstraintType,
DataType,
TableConstraint,
TableType,
)
from metadata.generated.schema.entity.services.databaseService import (
DatabaseConnection,
DatabaseService,
DatabaseServiceType,
)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
)
from metadata.generated.schema.type.basic import SourceUrl
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.bigtable.metadata import BigtableSource
# from metadata.generated.schema.api.data.createTable import CreateTableRequest
# from metadata.generated.schema.entity.data.database import Database
# from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
# from metadata.generated.schema.entity.data.table import (
# Column,
# ConstraintType,
# DataType,
# TableConstraint,
# TableType,
# )
# from metadata.generated.schema.entity.services.databaseService import (
# DatabaseConnection,
# DatabaseService,
# DatabaseServiceType,
# )
# from metadata.generated.schema.metadataIngestion.workflow import (
# OpenMetadataWorkflowConfig,
# )
# from metadata.generated.schema.type.basic import SourceUrl
# from metadata.generated.schema.type.entityReference import EntityReference
# from metadata.ingestion.ometa.ometa_api import OpenMetadata
# from metadata.ingestion.source.database.bigtable.metadata import BigtableSource
mock_file_path = (
Path(__file__).parent.parent.parent / "resources/datasets/glue_db_dataset.json"
)
with open(mock_file_path) as file:
mock_data: dict = json.load(file)
# mock_file_path = (
# Path(__file__).parent.parent.parent / "resources/datasets/glue_db_dataset.json"
# )
# with open(mock_file_path) as file:
# mock_data: dict = json.load(file)
mock_bigtable_config = {
"source": {
"type": "bigtable",
"serviceName": "local_bigtable",
"serviceConnection": {
"config": {
"type": "BigTable",
"credentials": {
"gcpConfig": {
"type": "service_account",
"projectId": "my-gcp-project",
"privateKeyId": "private_key_id",
# this is a valid key that was generated on a local machine and is not used for any real project
"privateKey": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpQIBAAKCAQEAw3vHG9fDIkcYB0xi2Mv4fS2gUzKR9ZRrcVNeKkqGFTT71AVB\nOzgIqYVe8b2aWODuNye6sipcrqTqOt05Esj+sxhk5McM9bE2RlxXC5QH/Bp9zxMP\n/Yksv9Ov7fdDt/loUk7sTXvI+7LDJfmRYU6MtVjyyLs7KpQIB2xBWEToU1xZY+v0\ndRC1NA+YWc+FjXbAiFAf9d4gXkYO8VmU5meixVh4C8nsjokEXk0T/HEItpZCxadk\ndZ7LKUE/HDmWCO2oNG6sCf4ET2crjSdYIfXuREopX1aQwnk7KbI4/YIdlRz1I369\nAz3+Hxlf9lLJVH3+itN4GXrR9yWWKWKDnwDPbQIDAQABAoIBAQC3X5QuTR7SN8iV\niBUtc2D84+ECSmza5shG/UJW/6N5n0Mf53ICgBS4GNEwiYCRISa0/ILIgK6CcVb7\nsuvH8F3kWNzEMui4TO0x4YsR5GH9HkioCCS224frxkLBQnL20HIIy9ok8Rpe6Zjg\nNZUnp4yczPyqSeA9l7FUbTt69uDM2Cx61m8REOpFukpnYLyZGbmNPYmikEO+rq9r\nwNID5dkSeVuQYo4MQdRavOGFUWvUYXzkEQ0A6vPyraVBfolESX8WaLNVjic7nIa3\nujdSNojnJqGJ3gslntcmN1d4JOfydc4bja4/NdNlcOHpWDGLzY1QnaDe0Koxn8sx\nLT9MVD2NAoGBAPy7r726bKVGWcwqTzUuq1OWh5c9CAc4N2zWBBldSJyUdllUq52L\nWTyva6GRoRzCcYa/dKLLSM/k4eLf9tpxeIIfTOMsvzGtbAdm257ndMXNvfYpxCfU\nK/gUFfAUGHZ3MucTHRY6DTkJg763Sf6PubA2fqv3HhVZDK/1HGDtHlTPAoGBAMYC\npdV7O7lAyXS/d9X4PQZ4BM+P8MbXEdGBbPPlzJ2YIb53TEmYfSj3z41u9+BNnhGP\n4uzUyAR/E4sxrA2+Ll1lPSCn+KY14WWiVGfWmC5j1ftdpkbrXstLN8NpNYzrKZwx\njdR0ZkwvZ8B5+kJ1hK96giwWS+SJxJR3TohcQ18DAoGAJSfmv2r//BBqtURnHrd8\nwq43wvlbC8ytAVg5hA0d1r9Q4vM6w8+vz+cuWLOTTyobDKdrG1/tlXrd5r/sh9L0\n15SIdkGm3kPTxQbPNP5sQYRs8BrV1tEvoao6S3B45DnEBwrdVN42AXOvpcNGoqE4\nuHpahyeuiY7s+ZV8lZdmxSsCgYEAolr5bpmk1rjwdfGoaKEqKGuwRiBX5DHkQkxE\n8Zayt2VOBcX7nzyRI05NuEIMrLX3rZ61CktN1aH8fF02He6aRaoE/Qm9L0tujM8V\nNi8WiLMDeR/Ifs3u4/HAv1E8v1byv0dCa7klR8J257McJ/ID4X4pzcxaXgE4ViOd\nGOHNu9ECgYEApq1zkZthEQymTUxs+lSFcubQpaXyf5ZC61cJewpWkqGDtSC+8DxE\nF/jydybWuoNHXymnvY6QywxuIooivbuib6AlgpEJeybmnWlDOZklFOD0abNZ+aNO\ndUk7XVGffCakXQ0jp1kmZA4lGsYK1h5dEU5DgXqu4UYJ88Vttax2W+Y=\n-----END RSA PRIVATE KEY-----\n",
"clientEmail": "gcpuser@project_id.iam.gserviceaccount.com",
"clientId": "client_id",
"authUri": "https://accounts.google.com/o/oauth2/auth",
"tokenUri": "https://oauth2.googleapis.com/token",
"authProviderX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs",
"clientX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs",
}
},
},
},
"sourceConfig": {
"config": {
"type": "DatabaseMetadata",
"schemaFilterPattern": {"includes": ["my_instance"]},
"tableFilterPattern": {"includes": ["random_table"]},
}
},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
}
},
}
# mock_bigtable_config = {
# "source": {
# "type": "bigtable",
# "serviceName": "local_bigtable",
# "serviceConnection": {
# "config": {
# "type": "BigTable",
# "credentials": {
# "gcpConfig": {
# "type": "service_account",
# "projectId": "my-gcp-project",
# "privateKeyId": "private_key_id",
# # this is a valid key that was generated on a local machine and is not used for any real project
# "privateKey": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpQIBAAKCAQEAw3vHG9fDIkcYB0xi2Mv4fS2gUzKR9ZRrcVNeKkqGFTT71AVB\nOzgIqYVe8b2aWODuNye6sipcrqTqOt05Esj+sxhk5McM9bE2RlxXC5QH/Bp9zxMP\n/Yksv9Ov7fdDt/loUk7sTXvI+7LDJfmRYU6MtVjyyLs7KpQIB2xBWEToU1xZY+v0\ndRC1NA+YWc+FjXbAiFAf9d4gXkYO8VmU5meixVh4C8nsjokEXk0T/HEItpZCxadk\ndZ7LKUE/HDmWCO2oNG6sCf4ET2crjSdYIfXuREopX1aQwnk7KbI4/YIdlRz1I369\nAz3+Hxlf9lLJVH3+itN4GXrR9yWWKWKDnwDPbQIDAQABAoIBAQC3X5QuTR7SN8iV\niBUtc2D84+ECSmza5shG/UJW/6N5n0Mf53ICgBS4GNEwiYCRISa0/ILIgK6CcVb7\nsuvH8F3kWNzEMui4TO0x4YsR5GH9HkioCCS224frxkLBQnL20HIIy9ok8Rpe6Zjg\nNZUnp4yczPyqSeA9l7FUbTt69uDM2Cx61m8REOpFukpnYLyZGbmNPYmikEO+rq9r\nwNID5dkSeVuQYo4MQdRavOGFUWvUYXzkEQ0A6vPyraVBfolESX8WaLNVjic7nIa3\nujdSNojnJqGJ3gslntcmN1d4JOfydc4bja4/NdNlcOHpWDGLzY1QnaDe0Koxn8sx\nLT9MVD2NAoGBAPy7r726bKVGWcwqTzUuq1OWh5c9CAc4N2zWBBldSJyUdllUq52L\nWTyva6GRoRzCcYa/dKLLSM/k4eLf9tpxeIIfTOMsvzGtbAdm257ndMXNvfYpxCfU\nK/gUFfAUGHZ3MucTHRY6DTkJg763Sf6PubA2fqv3HhVZDK/1HGDtHlTPAoGBAMYC\npdV7O7lAyXS/d9X4PQZ4BM+P8MbXEdGBbPPlzJ2YIb53TEmYfSj3z41u9+BNnhGP\n4uzUyAR/E4sxrA2+Ll1lPSCn+KY14WWiVGfWmC5j1ftdpkbrXstLN8NpNYzrKZwx\njdR0ZkwvZ8B5+kJ1hK96giwWS+SJxJR3TohcQ18DAoGAJSfmv2r//BBqtURnHrd8\nwq43wvlbC8ytAVg5hA0d1r9Q4vM6w8+vz+cuWLOTTyobDKdrG1/tlXrd5r/sh9L0\n15SIdkGm3kPTxQbPNP5sQYRs8BrV1tEvoao6S3B45DnEBwrdVN42AXOvpcNGoqE4\nuHpahyeuiY7s+ZV8lZdmxSsCgYEAolr5bpmk1rjwdfGoaKEqKGuwRiBX5DHkQkxE\n8Zayt2VOBcX7nzyRI05NuEIMrLX3rZ61CktN1aH8fF02He6aRaoE/Qm9L0tujM8V\nNi8WiLMDeR/Ifs3u4/HAv1E8v1byv0dCa7klR8J257McJ/ID4X4pzcxaXgE4ViOd\nGOHNu9ECgYEApq1zkZthEQymTUxs+lSFcubQpaXyf5ZC61cJewpWkqGDtSC+8DxE\nF/jydybWuoNHXymnvY6QywxuIooivbuib6AlgpEJeybmnWlDOZklFOD0abNZ+aNO\ndUk7XVGffCakXQ0jp1kmZA4lGsYK1h5dEU5DgXqu4UYJ88Vttax2W+Y=\n-----END RSA PRIVATE KEY-----\n",
# "clientEmail": "gcpuser@project_id.iam.gserviceaccount.com",
# "clientId": "client_id",
# "authUri": "https://accounts.google.com/o/oauth2/auth",
# "tokenUri": "https://oauth2.googleapis.com/token",
# "authProviderX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs",
# "clientX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs",
# }
# },
# },
# },
# "sourceConfig": {
# "config": {
# "type": "DatabaseMetadata",
# "schemaFilterPattern": {"includes": ["my_instance"]},
# "tableFilterPattern": {"includes": ["random_table"]},
# }
# },
# },
# "sink": {"type": "metadata-rest", "config": {}},
# "workflowConfig": {
# "openMetadataServerConfig": {
# "hostPort": "http://localhost:8585/api",
# "authProvider": "openmetadata",
# "securityConfig": {
# "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
# },
# }
# },
# }
MOCK_DATABASE_SERVICE = DatabaseService(
id="85811038-099a-11ed-861d-0242ac120002",
name="local_bigtable",
connection=DatabaseConnection(),
serviceType=DatabaseServiceType.Glue,
)
# MOCK_DATABASE_SERVICE = DatabaseService(
# id="85811038-099a-11ed-861d-0242ac120002",
# name="local_bigtable",
# connection=DatabaseConnection(),
# serviceType=DatabaseServiceType.Glue,
# )
MOCK_DATABASE = Database(
id="2aaa012e-099a-11ed-861d-0242ac120002",
name="my-gcp-project",
fullyQualifiedName="local_bigtable.my-gcp-project",
displayName="my-gcp-project",
description="",
service=EntityReference(
id="85811038-099a-11ed-861d-0242ac120002",
type="databaseService",
),
)
# MOCK_DATABASE = Database(
# id="2aaa012e-099a-11ed-861d-0242ac120002",
# name="my-gcp-project",
# fullyQualifiedName="local_bigtable.my-gcp-project",
# displayName="my-gcp-project",
# description="",
# service=EntityReference(
# id="85811038-099a-11ed-861d-0242ac120002",
# type="databaseService",
# ),
# )
MOCK_DATABASE_SCHEMA = DatabaseSchema(
id="2aaa012e-099a-11ed-861d-0242ac120056",
name="my_instance",
fullyQualifiedName="local_bigtable.my-gcp-project.my_instance",
displayName="default",
description="",
database=EntityReference(
id="2aaa012e-099a-11ed-861d-0242ac120002",
type="database",
),
service=EntityReference(
id="85811038-099a-11ed-861d-0242ac120002",
type="databaseService",
),
)
# MOCK_DATABASE_SCHEMA = DatabaseSchema(
# id="2aaa012e-099a-11ed-861d-0242ac120056",
# name="my_instance",
# fullyQualifiedName="local_bigtable.my-gcp-project.my_instance",
# displayName="default",
# description="",
# database=EntityReference(
# id="2aaa012e-099a-11ed-861d-0242ac120002",
# type="database",
# ),
# service=EntityReference(
# id="85811038-099a-11ed-861d-0242ac120002",
# type="databaseService",
# ),
# )
MOCK_CREATE_TABLE = CreateTableRequest(
name="random_table",
tableType=TableType.Regular,
columns=[
Column(
name="row_key",
displayName="row_key",
dataType=DataType.BYTES,
dataTypeDisplay=DataType.BYTES.value,
),
Column(
name="cf1.col1",
displayName="cf1.col1",
dataType=DataType.BYTES,
dataTypeDisplay=DataType.BYTES.value,
),
Column(
name="cf2.col2",
displayName="cf2.col2",
dataType=DataType.BYTES,
dataTypeDisplay=DataType.BYTES.value,
),
],
tableConstraints=[
TableConstraint(constraintType=ConstraintType.PRIMARY_KEY, columns=["row_key"])
],
databaseSchema="local_bigtable.my-gcp-project.my_instance",
sourceUrl=SourceUrl(
__root__="https://console.cloud.google.com/bigtable/instances/my_instance/tables/random_table/overview?project=my-gcp-project"
),
)
# MOCK_CREATE_TABLE = CreateTableRequest(
# name="random_table",
# tableType=TableType.Regular,
# columns=[
# Column(
# name="row_key",
# displayName="row_key",
# dataType=DataType.BYTES,
# dataTypeDisplay=DataType.BYTES.value,
# ),
# Column(
# name="cf1.col1",
# displayName="cf1.col1",
# dataType=DataType.BYTES,
# dataTypeDisplay=DataType.BYTES.value,
# ),
# Column(
# name="cf2.col2",
# displayName="cf2.col2",
# dataType=DataType.BYTES,
# dataTypeDisplay=DataType.BYTES.value,
# ),
# ],
# tableConstraints=[
# TableConstraint(constraintType=ConstraintType.PRIMARY_KEY, columns=["row_key"])
# ],
# databaseSchema="local_bigtable.my-gcp-project.my_instance",
# sourceUrl=SourceUrl(
# __root__="https://console.cloud.google.com/bigtable/instances/my_instance/tables/random_table/overview?project=my-gcp-project"
# ),
# )
EXPECTED_DATABASE_NAMES = ["my-gcp-project"]
# EXPECTED_DATABASE_NAMES = ["my-gcp-project"]
EXPECTED_DATABASE_SCHEMA_NAMES = [
"my_instance",
]
# EXPECTED_DATABASE_SCHEMA_NAMES = [
# "my_instance",
# ]
MOCK_DATABASE_SCHEMA_NAMES = [
"my_instance",
"random1_schema",
]
# MOCK_DATABASE_SCHEMA_NAMES = [
# "my_instance",
# "random1_schema",
# ]
EXPECTED_TABLE_NAMES = [
("random_table", TableType.Regular),
]
# EXPECTED_TABLE_NAMES = [
# ("random_table", TableType.Regular),
# ]
def custom_column_compare(self, other):
return (
self.name == other.name
and self.description == other.description
and self.children == other.children
)
# def custom_column_compare(self, other):
# return (
# self.name == other.name
# and self.description == other.description
# and self.children == other.children
# )
@pytest.fixture
def mock_bigtable_row():
mock = Mock()
cell = Mock()
cell.value = b"cell_value"
cell.timestamp = 1234567890
mock.cells = {"cf1": {b"col1": [cell]}, "cf2": {b"col2": [cell]}}
mock.row_key = b"row_key"
yield mock
# @pytest.fixture
# def mock_bigtable_row():
# mock = Mock()
# cell = Mock()
# cell.value = b"cell_value"
# cell.timestamp = 1234567890
# mock.cells = {"cf1": {b"col1": [cell]}, "cf2": {b"col2": [cell]}}
# mock.row_key = b"row_key"
# yield mock
@pytest.fixture
def mock_bigtable_table(mock_bigtable_row):
mock = Mock()
mock.table_id = "random_table"
mock.list_column_families.return_value = {"cf1": None, "cf2": None}
mock.read_rows.return_value = [mock_bigtable_row]
yield mock
# @pytest.fixture
# def mock_bigtable_table(mock_bigtable_row):
# mock = Mock()
# mock.table_id = "random_table"
# mock.list_column_families.return_value = {"cf1": None, "cf2": None}
# mock.read_rows.return_value = [mock_bigtable_row]
# yield mock
@pytest.fixture
def mock_bigtable_instance(mock_bigtable_table):
mock = Mock()
mock.instance_id = "my_instance"
mock.project_id = "my-gcp-project"
mock.list_tables.return_value = [mock_bigtable_table]
yield mock
# @pytest.fixture
# def mock_bigtable_instance(mock_bigtable_table):
# mock = Mock()
# mock.instance_id = "my_instance"
# mock.project_id = "my-gcp-project"
# mock.list_tables.return_value = [mock_bigtable_table]
# yield mock
@pytest.fixture
def mock_google_cloud_client(mock_bigtable_instance):
with patch("google.cloud.bigtable.Client") as mock_client:
mock_client.list_instances.return_value = [[], []]
mock_client().list_instances.return_value = [[mock_bigtable_instance], []]
yield mock_client
# @pytest.fixture
# def mock_google_cloud_client(mock_bigtable_instance):
# with patch("google.cloud.bigtable.Client") as mock_client:
# mock_client.list_instances.return_value = [[], []]
# mock_client().list_instances.return_value = [[mock_bigtable_instance], []]
# yield mock_client
@pytest.fixture
def mock_test_connection():
with patch.object(BigtableSource, "test_connection") as mock_test_connection:
mock_test_connection.return_value = True
yield mock_test_connection
# @pytest.fixture
# def mock_test_connection():
# with patch.object(BigtableSource, "test_connection") as mock_test_connection:
# mock_test_connection.return_value = True
# yield mock_test_connection
class BigTableUnitTest(TestCase):
@pytest.fixture(autouse=True)
def setup(
self,
monkeypatch,
mock_google_cloud_client,
mock_test_connection,
mock_bigtable_instance,
mock_bigtable_table,
):
self.config = OpenMetadataWorkflowConfig.parse_obj(mock_bigtable_config)
self.bigtable_source = BigtableSource.create(
mock_bigtable_config["source"],
OpenMetadata(self.config.workflowConfig.openMetadataServerConfig),
)
self.bigtable_source.context.__dict__[
"database_service"
] = MOCK_DATABASE_SERVICE.name.__root__
self.bigtable_source.context.__dict__["database"] = MOCK_DATABASE.name.__root__
self.bigtable_source.context.__dict__[
"database_schema"
] = MOCK_DATABASE_SCHEMA.name.__root__
self.bigtable_source.instances = {
"my-gcp-project": {
mock_bigtable_instance.instance_id: mock_bigtable_instance
}
}
self.bigtable_source.tables = {
"my-gcp-project": {
mock_bigtable_instance.instance_id: {
mock_bigtable_table.table_id: mock_bigtable_table
}
}
}
# class BigTableUnitTest(TestCase):
# @pytest.fixture(autouse=True)
# def setup(
# self,
# monkeypatch,
# mock_google_cloud_client,
# mock_test_connection,
# mock_bigtable_instance,
# mock_bigtable_table,
# ):
# self.config = OpenMetadataWorkflowConfig.parse_obj(mock_bigtable_config)
# self.bigtable_source = BigtableSource.create(
# mock_bigtable_config["source"],
# OpenMetadata(self.config.workflowConfig.openMetadataServerConfig),
# )
# self.bigtable_source.context.__dict__[
# "database_service"
# ] = MOCK_DATABASE_SERVICE.name.__root__
# self.bigtable_source.context.__dict__["database"] = MOCK_DATABASE.name.__root__
# self.bigtable_source.context.__dict__[
# "database_schema"
# ] = MOCK_DATABASE_SCHEMA.name.__root__
# self.bigtable_source.instances = {
# "my-gcp-project": {
# mock_bigtable_instance.instance_id: mock_bigtable_instance
# }
# }
# self.bigtable_source.tables = {
# "my-gcp-project": {
# mock_bigtable_instance.instance_id: {
# mock_bigtable_table.table_id: mock_bigtable_table
# }
# }
# }
def test_database_names(self):
assert (
list(self.bigtable_source.get_database_names()) == EXPECTED_DATABASE_NAMES
)
# def test_database_names(self):
# assert (
# list(self.bigtable_source.get_database_names()) == EXPECTED_DATABASE_NAMES
# )
def test_database_schema_names(self):
assert (
list(self.bigtable_source.get_database_schema_names())
== EXPECTED_DATABASE_SCHEMA_NAMES
)
# def test_database_schema_names(self):
# assert (
# list(self.bigtable_source.get_database_schema_names())
# == EXPECTED_DATABASE_SCHEMA_NAMES
# )
def test_table_names(self):
assert (
list(self.bigtable_source.get_tables_name_and_type())
== EXPECTED_TABLE_NAMES
)
# def test_table_names(self):
# assert (
# list(self.bigtable_source.get_tables_name_and_type())
# == EXPECTED_TABLE_NAMES
# )
def test_yield_tables(self):
Column.__eq__ = custom_column_compare
result = next(self.bigtable_source.yield_table(EXPECTED_TABLE_NAMES[0]))
assert result.left is None
assert result.right.name.__root__ == "random_table"
assert result.right == MOCK_CREATE_TABLE
# def test_yield_tables(self):
# Column.__eq__ = custom_column_compare
# result = next(self.bigtable_source.yield_table(EXPECTED_TABLE_NAMES[0]))
# assert result.left is None
# assert result.right.name.__root__ == "random_table"
# assert result.right == MOCK_CREATE_TABLE

View File

@ -1,226 +1,226 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test Deltalake using the topology
# # Copyright 2021 Collate
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# # http://www.apache.org/licenses/LICENSE-2.0
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.
# """
# Test Deltalake using the topology
Here we don't need to patch, as we can just create our own metastore
"""
import shutil
import sys
import unittest
from datetime import date, datetime
from unittest import TestCase
# Here we don't need to patch, as we can just create our own metastore
# """
# import shutil
# import sys
# import unittest
# from datetime import date, datetime
# from unittest import TestCase
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
from metadata.generated.schema.api.data.createDatabaseSchema import (
CreateDatabaseSchemaRequest,
)
from metadata.generated.schema.api.data.createTable import CreateTableRequest
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.table import Column, DataType, TableType
from metadata.generated.schema.entity.services.databaseService import (
DatabaseConnection,
DatabaseService,
DatabaseServiceType,
)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
)
from metadata.generated.schema.type.basic import FullyQualifiedEntityName
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.deltalake.metadata import DeltalakeSource
# from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
# from metadata.generated.schema.api.data.createDatabaseSchema import (
# CreateDatabaseSchemaRequest,
# )
# from metadata.generated.schema.api.data.createTable import CreateTableRequest
# from metadata.generated.schema.entity.data.database import Database
# from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
# from metadata.generated.schema.entity.data.table import Column, DataType, TableType
# from metadata.generated.schema.entity.services.databaseService import (
# DatabaseConnection,
# DatabaseService,
# DatabaseServiceType,
# )
# from metadata.generated.schema.metadataIngestion.workflow import (
# OpenMetadataWorkflowConfig,
# )
# from metadata.generated.schema.type.basic import FullyQualifiedEntityName
# from metadata.generated.schema.type.entityReference import EntityReference
# from metadata.ingestion.ometa.ometa_api import OpenMetadata
# from metadata.ingestion.source.database.deltalake.metadata import DeltalakeSource
METASTORE_PATH = "/tmp/spark/unit/metastore"
SPARK_SQL_WAREHOUSE = "/tmp/spark/unit/warehouse"
# METASTORE_PATH = "/tmp/spark/unit/metastore"
# SPARK_SQL_WAREHOUSE = "/tmp/spark/unit/warehouse"
MOCK_DELTA_CONFIG = {
"source": {
"type": "deltalake",
"serviceName": "delta",
"serviceConnection": {
"config": {
"type": "DeltaLake",
"metastoreConnection": {
"metastoreFilePath": METASTORE_PATH,
},
"connectionArguments": {
"spark.sql.warehouse.dir": SPARK_SQL_WAREHOUSE,
},
}
},
"sourceConfig": {"config": {"type": "DatabaseMetadata"}},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
}
},
}
# MOCK_DELTA_CONFIG = {
# "source": {
# "type": "deltalake",
# "serviceName": "delta",
# "serviceConnection": {
# "config": {
# "type": "DeltaLake",
# "metastoreConnection": {
# "metastoreFilePath": METASTORE_PATH,
# },
# "connectionArguments": {
# "spark.sql.warehouse.dir": SPARK_SQL_WAREHOUSE,
# },
# }
# },
# "sourceConfig": {"config": {"type": "DatabaseMetadata"}},
# },
# "sink": {"type": "metadata-rest", "config": {}},
# "workflowConfig": {
# "openMetadataServerConfig": {
# "hostPort": "http://localhost:8585/api",
# "authProvider": "openmetadata",
# "securityConfig": {
# "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
# },
# }
# },
# }
MOCK_DATABASE_SERVICE = DatabaseService(
id="85811038-099a-11ed-861d-0242ac120002",
name="delta",
fullyQualifiedName="delta",
connection=DatabaseConnection(),
serviceType=DatabaseServiceType.DeltaLake,
)
# MOCK_DATABASE_SERVICE = DatabaseService(
# id="85811038-099a-11ed-861d-0242ac120002",
# name="delta",
# fullyQualifiedName="delta",
# connection=DatabaseConnection(),
# serviceType=DatabaseServiceType.DeltaLake,
# )
MOCK_DATABASE = Database(
id="2004514B-A800-4D92-8442-14B2796F712E",
name="default",
fullyQualifiedName="delta.default",
service=EntityReference(
id="85811038-099a-11ed-861d-0242ac120002", type="databaseService"
),
)
# MOCK_DATABASE = Database(
# id="2004514B-A800-4D92-8442-14B2796F712E",
# name="default",
# fullyQualifiedName="delta.default",
# service=EntityReference(
# id="85811038-099a-11ed-861d-0242ac120002", type="databaseService"
# ),
# )
MOCK_DATABASE_SCHEMA = DatabaseSchema(
id="92D36A9B-B1A9-4D0A-A00B-1B2ED137ABA5",
name="default",
fullyQualifiedName="delta.default.default",
database=EntityReference(
id="2004514B-A800-4D92-8442-14B2796F712E", type="database"
),
service=EntityReference(
id="85811038-099a-11ed-861d-0242ac120002", type="databaseService"
),
)
# MOCK_DATABASE_SCHEMA = DatabaseSchema(
# id="92D36A9B-B1A9-4D0A-A00B-1B2ED137ABA5",
# name="default",
# fullyQualifiedName="delta.default.default",
# database=EntityReference(
# id="2004514B-A800-4D92-8442-14B2796F712E", type="database"
# ),
# service=EntityReference(
# id="85811038-099a-11ed-861d-0242ac120002", type="databaseService"
# ),
# )
@unittest.skipUnless(
sys.version_info < (3, 11),
reason="https://github.com/open-metadata/OpenMetadata/issues/14408",
)
class DeltaLakeUnitTest(TestCase):
"""
Add method validations from Deltalake ingestion
"""
# @unittest.skipUnless(
# sys.version_info < (3, 11),
# reason="https://github.com/open-metadata/OpenMetadata/issues/14408",
# )
# class DeltaLakeUnitTest(TestCase):
# """
# Add method validations from Deltalake ingestion
# """
config: OpenMetadataWorkflowConfig = OpenMetadataWorkflowConfig.parse_obj(
MOCK_DELTA_CONFIG
)
delta: DeltalakeSource = DeltalakeSource.create(
MOCK_DELTA_CONFIG["source"],
OpenMetadata(config.workflowConfig.openMetadataServerConfig),
)
spark = delta.spark
# config: OpenMetadataWorkflowConfig = OpenMetadataWorkflowConfig.parse_obj(
# MOCK_DELTA_CONFIG
# )
# delta: DeltalakeSource = DeltalakeSource.create(
# MOCK_DELTA_CONFIG["source"],
# OpenMetadata(config.workflowConfig.openMetadataServerConfig),
# )
# spark = delta.spark
@classmethod
def setUpClass(cls) -> None:
"""
Prepare the SparkSession and metastore
"""
df = cls.spark.createDataFrame(
[
(1, 2.0, "string1", date(2000, 1, 1), datetime(2000, 1, 1, 12, 0)),
(2, 3.0, "string2", date(2000, 2, 1), datetime(2000, 1, 2, 12, 0)),
(3, 4.0, "string3", date(2000, 3, 1), datetime(2000, 1, 3, 12, 0)),
],
schema="a long, b double, c string, d date, e timestamp",
)
# @classmethod
# def setUpClass(cls) -> None:
# """
# Prepare the SparkSession and metastore
# """
# df = cls.spark.createDataFrame(
# [
# (1, 2.0, "string1", date(2000, 1, 1), datetime(2000, 1, 1, 12, 0)),
# (2, 3.0, "string2", date(2000, 2, 1), datetime(2000, 1, 2, 12, 0)),
# (3, 4.0, "string3", date(2000, 3, 1), datetime(2000, 1, 3, 12, 0)),
# ],
# schema="a long, b double, c string, d date, e timestamp",
# )
# Create the DF as a tmp view to be able to run Spark SQL statements on top
df.createOrReplaceTempView("tmp_df")
# If no db is specified, the table will be created under `default`
cls.spark.sql(
"CREATE TABLE IF NOT EXISTS my_df COMMENT 'testing around' AS SELECT * FROM tmp_df"
)
# # Create the DF as a tmp view to be able to run Spark SQL statements on top
# df.createOrReplaceTempView("tmp_df")
# # If no db is specified, the table will be created under `default`
# cls.spark.sql(
# "CREATE TABLE IF NOT EXISTS my_df COMMENT 'testing around' AS SELECT * FROM tmp_df"
# )
# Create a database. We will be ingesting that as a schema
cls.spark.sql(
f"CREATE DATABASE sample_db LOCATION '{SPARK_SQL_WAREHOUSE}/sample_db'"
)
# # Create a database. We will be ingesting that as a schema
# cls.spark.sql(
# f"CREATE DATABASE sample_db LOCATION '{SPARK_SQL_WAREHOUSE}/sample_db'"
# )
# Set context
cls.delta.context.__dict__[
"database_service"
] = MOCK_DATABASE_SERVICE.name.__root__
cls.delta.context.__dict__["database"] = MOCK_DATABASE.name.__root__
cls.delta.context.__dict__[
"database_schema"
] = MOCK_DATABASE_SCHEMA.name.__root__
# We pick up the table comments when getting their name and type, so we
# store the description in the context
cls.delta.context.__dict__["table_description"] = "testing around"
# # Set context
# cls.delta.context.__dict__[
# "database_service"
# ] = MOCK_DATABASE_SERVICE.name.__root__
# cls.delta.context.__dict__["database"] = MOCK_DATABASE.name.__root__
# cls.delta.context.__dict__[
# "database_schema"
# ] = MOCK_DATABASE_SCHEMA.name.__root__
# # We pick up the table comments when getting their name and type, so we
# # store the description in the context
# cls.delta.context.__dict__["table_description"] = "testing around"
@classmethod
def tearDownClass(cls) -> None:
"""
Clean up
"""
shutil.rmtree(METASTORE_PATH)
shutil.rmtree(SPARK_SQL_WAREHOUSE)
# @classmethod
# def tearDownClass(cls) -> None:
# """
# Clean up
# """
# shutil.rmtree(METASTORE_PATH)
# shutil.rmtree(SPARK_SQL_WAREHOUSE)
def test_get_database_names(self):
database_names = list(self.delta.get_database_names())
self.assertEqual(database_names, ["default"])
# def test_get_database_names(self):
# database_names = list(self.delta.get_database_names())
# self.assertEqual(database_names, ["default"])
def test_yield_database(self):
database_request = next(
self.delta.yield_database(database_name="default")
).right
expected_database_request = CreateDatabaseRequest(
name="default",
service=FullyQualifiedEntityName(__root__="delta"),
)
# def test_yield_database(self):
# database_request = next(
# self.delta.yield_database(database_name="default")
# ).right
# expected_database_request = CreateDatabaseRequest(
# name="default",
# service=FullyQualifiedEntityName(__root__="delta"),
# )
self.assertEqual(database_request, expected_database_request)
# self.assertEqual(database_request, expected_database_request)
def test_get_database_schema_names(self):
schema_names = set(self.delta.get_database_schema_names())
self.assertEqual(schema_names, {"default", "sample_db"})
# def test_get_database_schema_names(self):
# schema_names = set(self.delta.get_database_schema_names())
# self.assertEqual(schema_names, {"default", "sample_db"})
def test_yield_database_schema(self):
schema_request = next(
self.delta.yield_database_schema(schema_name="default")
).right
expected_schema_request = CreateDatabaseSchemaRequest(
name="default", database="delta.default"
)
# def test_yield_database_schema(self):
# schema_request = next(
# self.delta.yield_database_schema(schema_name="default")
# ).right
# expected_schema_request = CreateDatabaseSchemaRequest(
# name="default", database="delta.default"
# )
self.assertEqual(schema_request, expected_schema_request)
# self.assertEqual(schema_request, expected_schema_request)
def test_get_tables_name_and_type(self):
table_names = list(self.delta.get_tables_name_and_type())
# We won't ingest TMP tables
self.assertEqual(table_names, [("my_df", TableType.Regular)])
# def test_get_tables_name_and_type(self):
# table_names = list(self.delta.get_tables_name_and_type())
# # We won't ingest TMP tables
# self.assertEqual(table_names, [("my_df", TableType.Regular)])
def test_yield_table(self):
table_request = next(
self.delta.yield_table(table_name_and_type=("my_df", TableType.Regular))
).right
# def test_yield_table(self):
# table_request = next(
# self.delta.yield_table(table_name_and_type=("my_df", TableType.Regular))
# ).right
expected_columns = [
Column(name="a", dataType=DataType.BIGINT, dataTypeDisplay="bigint"),
Column(name="b", dataType=DataType.DOUBLE, dataTypeDisplay="double"),
Column(name="c", dataType=DataType.STRING, dataTypeDisplay="string"),
Column(name="d", dataType=DataType.DATE, dataTypeDisplay="date"),
Column(name="e", dataType=DataType.TIMESTAMP, dataTypeDisplay="timestamp"),
]
# expected_columns = [
# Column(name="a", dataType=DataType.BIGINT, dataTypeDisplay="bigint"),
# Column(name="b", dataType=DataType.DOUBLE, dataTypeDisplay="double"),
# Column(name="c", dataType=DataType.STRING, dataTypeDisplay="string"),
# Column(name="d", dataType=DataType.DATE, dataTypeDisplay="date"),
# Column(name="e", dataType=DataType.TIMESTAMP, dataTypeDisplay="timestamp"),
# ]
expected_table_request = CreateTableRequest(
name="my_df",
tableType=TableType.Regular,
description="testing around",
columns=expected_columns,
tableConstraints=None,
databaseSchema=MOCK_DATABASE_SCHEMA.fullyQualifiedName,
viewDefinition=None,
)
# expected_table_request = CreateTableRequest(
# name="my_df",
# tableType=TableType.Regular,
# description="testing around",
# columns=expected_columns,
# tableConstraints=None,
# databaseSchema=MOCK_DATABASE_SCHEMA.fullyQualifiedName,
# viewDefinition=None,
# )
self.assertEqual(table_request, expected_table_request)
# self.assertEqual(table_request, expected_table_request)

View File

@ -53,6 +53,8 @@ from metadata.generated.schema.entity.data.table import (
Column,
Constraint,
DataType,
PartitionColumnDetails,
PartitionIntervalTypes,
TablePartition,
TableType,
)
@ -792,7 +794,15 @@ class IcebergUnitTest(TestCase):
columns=[
MOCK_COLUMN_MAP[field]["ometa"] for field in MOCK_COLUMN_MAP.keys()
],
tablePartition=TablePartition(columns=["binary"]),
tablePartition=TablePartition(
columns=[
PartitionColumnDetails(
columnName="binary",
intervalType=PartitionIntervalTypes.COLUMN_VALUE,
interval=None,
)
]
),
databaseSchema=fq_database_schema,
)

View File

@ -51,7 +51,7 @@ mock_redshift_config = {
RAW_DIST_STYLE = ["KEY(eventid)", "EVEN", "ALL"]
EXPECTED_PARTITION_COLUMNS = [["eventid"], None, None]
EXPECTED_PARTITION_COLUMNS = ["eventid", None, None]
class RedshiftUnitTest(TestCase):
@ -69,7 +69,8 @@ class RedshiftUnitTest(TestCase):
def test_partition_parse_columns(self):
for i in range(len(RAW_DIST_STYLE)):
assert (
self.redshift_source._get_partition_key(RAW_DIST_STYLE[i])
== EXPECTED_PARTITION_COLUMNS[i]
)
with self.subTest(i=i):
self.assertEqual(
self.redshift_source._get_partition_key(RAW_DIST_STYLE[i]),
EXPECTED_PARTITION_COLUMNS[i],
)

View File

@ -0,0 +1,31 @@
package org.openmetadata.service.migration.mysql.v140;
import static org.openmetadata.service.migration.utils.v140.MigrationUtil.migrateTablePartition;
import lombok.SneakyThrows;
import org.jdbi.v3.core.Handle;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.migration.api.MigrationProcessImpl;
import org.openmetadata.service.migration.utils.MigrationFile;
public class Migration extends MigrationProcessImpl {
private CollectionDAO collectionDAO;
private Handle handle;
public Migration(MigrationFile migrationFile) {
super(migrationFile);
}
@Override
public void initialize(Handle handle) {
super.initialize(handle);
this.handle = handle;
this.collectionDAO = handle.attach(CollectionDAO.class);
}
@Override
@SneakyThrows
public void runDataMigration() {
migrateTablePartition(handle, collectionDAO);
}
}

View File

@ -0,0 +1,31 @@
package org.openmetadata.service.migration.postgres.v140;
import static org.openmetadata.service.migration.utils.v140.MigrationUtil.migrateTablePartition;
import lombok.SneakyThrows;
import org.jdbi.v3.core.Handle;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.migration.api.MigrationProcessImpl;
import org.openmetadata.service.migration.utils.MigrationFile;
public class Migration extends MigrationProcessImpl {
private CollectionDAO collectionDAO;
private Handle handle;
public Migration(MigrationFile migrationFile) {
super(migrationFile);
}
@Override
public void initialize(Handle handle) {
super.initialize(handle);
this.handle = handle;
this.collectionDAO = handle.attach(CollectionDAO.class);
}
@Override
@SneakyThrows
public void runDataMigration() {
migrateTablePartition(handle, collectionDAO);
}
}

View File

@ -0,0 +1,127 @@
package org.openmetadata.service.migration.utils.v140;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObject;
import javax.json.JsonString;
import javax.json.JsonValue;
import lombok.extern.slf4j.Slf4j;
import org.jdbi.v3.core.Handle;
import org.openmetadata.schema.api.services.CreateDatabaseService;
import org.openmetadata.schema.entity.data.Table;
import org.openmetadata.schema.type.PartitionColumnDetails;
import org.openmetadata.schema.type.PartitionIntervalTypes;
import org.openmetadata.schema.type.TablePartition;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.resources.databases.DatasourceConfig;
import org.openmetadata.service.util.JsonUtils;
import org.postgresql.util.PGobject;
@Slf4j
public class MigrationUtil {
private static final String MYSQL_QUERY_TABLES_WITH_PARTITION =
"SELECT json "
+ "FROM table_entity "
+ "WHERE JSON_EXTRACT(json, '$.tablePartition') IS NOT NULL";
private static final String POSTGRES_QUERY_TABLES_WITH_PARTITION =
"SELECT json " + "FROM table_entity " + "WHERE json->'tablePartition' IS NOT NULL";
private MigrationUtil() {
/* Cannot create object util class*/
}
public static void migrateTablePartition(Handle handle, CollectionDAO collectionDAO) {
try {
if (Boolean.TRUE.equals(DatasourceConfig.getInstance().isMySQL())) {
handle
.createQuery(MYSQL_QUERY_TABLES_WITH_PARTITION)
.mapToMap()
.forEach(
row -> {
String jsonRow;
jsonRow = (String) row.get("json");
handleTablePartitionMigration(jsonRow, collectionDAO);
});
return;
}
handle
.createQuery(POSTGRES_QUERY_TABLES_WITH_PARTITION)
.mapToMap()
.forEach(
row -> {
String jsonRow;
PGobject pgObject = (PGobject) row.get("json");
jsonRow = pgObject.getValue();
handleTablePartitionMigration(jsonRow, collectionDAO);
});
} catch (Exception ex) {
LOG.warn("Error running the query migration ", ex);
}
}
private static void handleTablePartitionMigration(String jsonRow, CollectionDAO collectionDAO) {
JsonObject jsonObj = JsonUtils.readJson(jsonRow).asJsonObject();
// We need to pop the tablePartition from the json before serializing it
JsonObject tablePartition = jsonObj.getJsonObject("tablePartition");
// Remove the tablePartition from the json. We need to convert it to a map to remove the key
// as JsonObject is immutable
HashMap<String, Object> jsonMap = JsonUtils.readValue(jsonObj.toString(), HashMap.class);
jsonMap.remove("tablePartition");
jsonObj = JsonUtils.readJson(JsonUtils.pojoToJson(jsonMap)).asJsonObject();
Table table = JsonUtils.readValue(jsonObj.toString(), Table.class);
JsonArray partitionColumns = tablePartition.getJsonArray("columns");
if (tablePartition.isEmpty()) {
LOG.info("Table {} does not have partition details", table.getId());
return;
}
List<PartitionColumnDetails> partitionColumnDetails = new ArrayList<PartitionColumnDetails>();
if ((partitionColumns == null || partitionColumns.isEmpty())
&& table.getServiceType() == CreateDatabaseService.DatabaseServiceType.BigQuery) {
// BigQuery tables have pseudo columns for partitioning that were not being set in the
// partitionColumns entity
String interval = tablePartition.getString("interval");
if (interval != null) {
JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder();
switch (interval) {
case "HOUR" -> partitionColumns = jsonArrayBuilder.add("_PARTITIONTIME").build();
case "DAY" -> partitionColumns = jsonArrayBuilder.add("_PARTITIONDATE").build();
}
}
;
}
if (partitionColumns == null || partitionColumns.isEmpty()) {
throw new RuntimeException(
"tablePartition is not null but not column partition was defined for table "
+ table.getId());
}
for (JsonValue column : partitionColumns) {
PartitionColumnDetails partitionColumnDetail = new PartitionColumnDetails();
partitionColumnDetail.setColumnName(((JsonString) column).getString());
String intervalType = tablePartition.getString("intervalType");
if (intervalType != null) {
partitionColumnDetail.setIntervalType(PartitionIntervalTypes.fromValue(intervalType));
}
partitionColumnDetail.setInterval(tablePartition.getString("interval"));
partitionColumnDetails.add(partitionColumnDetail);
}
table.withTablePartition(new TablePartition().withColumns(partitionColumnDetails));
collectionDAO.tableDAO().update(table);
}
}

View File

@ -21,6 +21,7 @@ import java.util.Locale;
import org.openmetadata.schema.type.Column;
import org.openmetadata.schema.type.ColumnConstraint;
import org.openmetadata.schema.type.ColumnDataType;
import org.openmetadata.schema.type.PartitionColumnDetails;
import org.openmetadata.schema.type.TableConstraint;
import org.openmetadata.schema.type.TablePartition;
import org.openmetadata.schema.type.TableType;
@ -74,8 +75,11 @@ public final class DatabaseUtil {
}
List<String> columnNames = new ArrayList<>();
columns.forEach(c -> columnNames.add(c.getName()));
for (String columnName : tablePartition.getColumns()) {
if (!columnNames.contains(columnName)) {
// Add BigQuery partition pseudo columns
columnNames.add("_PARTITIONDATE");
columnNames.add("_PARTITIONTIME");
for (PartitionColumnDetails partitionColumnDetails : tablePartition.getColumns()) {
if (!columnNames.contains(partitionColumnDetails.getColumnName())) {
throw new IllegalArgumentException("Invalid column name found in table partition");
}
}

View File

@ -120,6 +120,8 @@ import org.openmetadata.schema.type.DataModel.ModelType;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.JoinedWith;
import org.openmetadata.schema.type.MetadataOperation;
import org.openmetadata.schema.type.PartitionColumnDetails;
import org.openmetadata.schema.type.PartitionIntervalTypes;
import org.openmetadata.schema.type.TableConstraint;
import org.openmetadata.schema.type.TableConstraint.ConstraintType;
import org.openmetadata.schema.type.TableData;
@ -283,11 +285,14 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
List<Column> columns = new ArrayList<>();
columns.add(getColumn("user_id", INT, null));
columns.add(getColumn("date", DATE, null));
TablePartition partition =
new TablePartition()
.withColumns(List.of(columns.get(1).getName()))
.withIntervalType(TablePartition.IntervalType.TIME_UNIT)
PartitionColumnDetails partitionColumnDetails =
new PartitionColumnDetails()
.withColumnName(columns.get(1).getName())
.withIntervalType(PartitionIntervalTypes.TIME_UNIT)
.withInterval("daily");
TablePartition partition = new TablePartition().withColumns(List.of(partitionColumnDetails));
create.setColumns(columns);
create.setTablePartition(partition);
Table created = createAndCheckEntity(create, ADMIN_AUTH_HEADERS);
@ -309,11 +314,20 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
new TableConstraint()
.withConstraintType(ConstraintType.UNIQUE)
.withColumns(List.of(column1.getName()));
TablePartition partition =
new TablePartition()
.withColumns(List.of(column1.getName(), column2.getName()))
.withIntervalType(TablePartition.IntervalType.COLUMN_VALUE)
.withInterval("column");
List<PartitionColumnDetails> listPartitionColumnDetails = new ArrayList<>();
listPartitionColumnDetails.add(
new PartitionColumnDetails()
.withColumnName(column1.getName())
.withIntervalType(PartitionIntervalTypes.COLUMN_VALUE)
.withInterval("column"));
listPartitionColumnDetails.add(
new PartitionColumnDetails()
.withColumnName(column2.getName())
.withIntervalType(PartitionIntervalTypes.COLUMN_VALUE)
.withInterval("column"));
TablePartition partition = new TablePartition().withColumns(listPartitionColumnDetails);
//
// Create a table with two columns - column1, column2, table constraint and table partition
@ -341,11 +355,14 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
column1.withDescription("").withDisplayName("");
column2.withDisplayName(null);
create.getColumns().add(column3);
partition =
new TablePartition()
.withColumns(List.of(column3.getName()))
.withIntervalType(TablePartition.IntervalType.COLUMN_VALUE)
PartitionColumnDetails partitionColumnDetails =
new PartitionColumnDetails()
.withColumnName(column3.getName())
.withIntervalType(PartitionIntervalTypes.COLUMN_VALUE)
.withInterval("column");
partition = new TablePartition().withColumns(List.of(partitionColumnDetails));
create.setTablePartition(partition);
ChangeDescription change = getChangeDescription(table, MINOR_UPDATE);
@ -2753,13 +2770,19 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
if (expectedPartition == null && actualPartition == null) {
return;
}
Map<String, PartitionColumnDetails> expectedColumnMap = new HashMap<>();
for (PartitionColumnDetails column : expectedPartition.getColumns()) {
expectedColumnMap.put(column.getColumnName(), column);
}
assert expectedPartition != null;
assertEquals(expectedPartition.getColumns().size(), actualPartition.getColumns().size());
assertEquals(expectedPartition.getIntervalType(), actualPartition.getIntervalType());
assertEquals(expectedPartition.getInterval(), actualPartition.getInterval());
for (int i = 0; i < expectedPartition.getColumns().size(); i++) {
assertEquals(expectedPartition.getColumns().get(i), actualPartition.getColumns().get(i));
for (PartitionColumnDetails actualColumn : actualPartition.getColumns()) {
PartitionColumnDetails expectedColumn = expectedColumnMap.get(actualColumn.getColumnName());
assertNotNull(expectedColumn);
assertEquals(expectedColumn.getIntervalType(), actualColumn.getIntervalType());
assertEquals(expectedColumn.getInterval(), actualColumn.getInterval());
}
}

View File

@ -207,28 +207,46 @@
"maxLength": 256,
"pattern": "^((?!::).)*$"
},
"partitionIntervalTypes": {
"javaType": "org.openmetadata.schema.type.PartitionIntervalTypes",
"description": "type of partition interval",
"type": "string",
"enum": [
"TIME-UNIT",
"INTEGER-RANGE",
"INGESTION-TIME",
"COLUMN-VALUE",
"INJECTED",
"ENUM",
"OTHER"
]
},
"tablePartition": {
"type": "object",
"javaType": "org.openmetadata.schema.type.TablePartition",
"description": "This schema defines the partition column of a table and format the partition is created.",
"properties": {
"columns": {
"description": "List of column names corresponding to the partition.",
"description": "List of column partitions with their type and interval.",
"type": "array",
"items": {
"type": "string"
"$ref": "#/definitions/partitionColumnDetails"
}
}
},
"additionalProperties": false
},
"partitionColumnDetails": {
"type": "object",
"javaType": "org.openmetadata.schema.type.PartitionColumnDetails",
"description": "This schema defines the partition column of a table and format the partition is created.",
"properties": {
"columnName": {
"description": "List of column names corresponding to the partition.",
"type": "string"
},
"intervalType": {
"type": "string",
"description": "type of partition interval, example time-unit, integer-range",
"enum": [
"TIME-UNIT",
"INTEGER-RANGE",
"INGESTION-TIME",
"COLUMN-VALUE",
"OTHER"
]
"$ref": "#/definitions/partitionIntervalTypes"
},
"interval": {
"type": "string",
@ -669,14 +687,7 @@
"type": "string"
},
"partitionIntervalType": {
"description": "type of partition interval",
"type": "string",
"enum": [
"TIME-UNIT",
"INTEGER-RANGE",
"INGESTION-TIME",
"COLUMN-VALUE"
]
"$ref": "#/definitions/partitionIntervalTypes"
},
"partitionInterval": {
"description": "The interval to use for the partitioning",

View File

@ -52,8 +52,8 @@ import {
TIME_BASED_PARTITION,
} from '../../../../../constants/profiler.constant';
import { CSMode } from '../../../../../enums/codemirror.enum';
import { PartitionIntervalType } from '../../../../../generated/api/data/createTable';
import {
PartitionIntervalTypes,
ProfileSampleType,
TableProfilerConfig,
} from '../../../../../generated/entity/data/table';
@ -299,7 +299,7 @@ const ProfilerSettingsModal: React.FC<ProfilerSettingsModalProps> = ({
? {
...partitionData,
partitionValues:
partitionIntervalType === PartitionIntervalType.ColumnValue
partitionIntervalType === PartitionIntervalTypes.ColumnValue
? partitionData?.partitionValues?.filter(
(value) => !isEmpty(value)
)
@ -770,7 +770,7 @@ const ProfilerSettingsModal: React.FC<ProfilerSettingsModalProps> = ({
</Col>
</>
) : null}
{PartitionIntervalType.IntegerRange ===
{PartitionIntervalTypes.IntegerRange ===
partitionIntervalType ? (
<>
<Col span={12}>
@ -844,7 +844,8 @@ const ProfilerSettingsModal: React.FC<ProfilerSettingsModalProps> = ({
</>
) : null}
{PartitionIntervalType.ColumnValue === partitionIntervalType ? (
{PartitionIntervalTypes.ColumnValue ===
partitionIntervalType ? (
<Col span={24}>
<List name="partitionValues">
{(fields, { add, remove }) => (

View File

@ -29,7 +29,6 @@ const SchemaTab: FunctionComponent<Props> = ({
isReadOnly = false,
entityFqn,
tableConstraints,
tablePartitioned,
}: Props) => {
const [searchText, setSearchText] = useState('');
@ -59,7 +58,6 @@ const SchemaTab: FunctionComponent<Props> = ({
searchText={lowerCase(searchText)}
tableColumns={columns}
tableConstraints={tableConstraints}
tablePartitioned={tablePartitioned}
onThreadLinkSelect={onThreadLinkSelect}
onUpdate={onUpdate}
/>

View File

@ -16,7 +16,6 @@ import {
ColumnJoins,
Table,
TableData,
TablePartition,
} from '../../../generated/entity/data/table';
export type Props = {
@ -29,7 +28,6 @@ export type Props = {
hasTagEditAccess: boolean;
isReadOnly?: boolean;
entityFqn: string;
tablePartitioned?: TablePartition;
onThreadLinkSelect: (value: string, threadType?: ThreadType) => void;
onUpdate: (columns: Table['columns']) => Promise<void>;
};

View File

@ -82,7 +82,6 @@ const SchemaTable = ({
isReadOnly = false,
onThreadLinkSelect,
tableConstraints,
tablePartitioned,
}: SchemaTableProps) => {
const { t } = useTranslation();
@ -386,21 +385,6 @@ const SchemaTable = ({
width: 320,
render: renderDescription,
},
...(tablePartitioned
? [
{
title: t('label.partitioned'),
dataIndex: 'name',
key: 'name',
accessor: 'name',
width: 120,
render: (columnName: string) =>
tablePartitioned?.columns?.includes(columnName)
? t('label.partitioned')
: t('label.non-partitioned'),
},
]
: []),
{
title: t('label.tag-plural'),
dataIndex: 'tags',

View File

@ -26,7 +26,6 @@ export interface SchemaTableProps {
hasDescriptionEditAccess: boolean;
hasTagEditAccess: boolean;
tableConstraints: Table['tableConstraints'];
tablePartitioned: Table['tablePartition'];
searchText?: string;
isReadOnly?: boolean;
entityFqn: string;

View File

@ -14,9 +14,11 @@ import { Space } from 'antd';
import { EntityTags } from 'Models';
import React from 'react';
import { EntityType } from '../../../enums/entity.enum';
import { TablePartition } from '../../../generated/entity/data/table';
import { ThreadType } from '../../../generated/entity/feed/thread';
import { EntityReference } from '../../../generated/entity/type';
import { TagSource } from '../../../generated/type/tagLabel';
import { PartitionedKeys } from '../../../pages/TableDetailsPageV1/PartitionedKeys/PartitionedKeys.component';
import entityRightPanelClassBase from '../../../utils/EntityRightPanelClassBase';
import { CustomPropertyTable } from '../../common/CustomPropertyTable/CustomPropertyTable';
import type {
@ -43,6 +45,7 @@ interface EntityRightPanelProps<T extends ExtentionEntitiesKeys> {
onThreadLinkSelect?: (value: string, threadType?: ThreadType) => void;
viewAllPermission?: boolean;
customProperties?: ExtentionEntities[T];
tablePartition?: TablePartition;
}
const EntityRightPanel = <T extends ExtentionEntitiesKeys>({
@ -61,6 +64,7 @@ const EntityRightPanel = <T extends ExtentionEntitiesKeys>({
showDataProductContainer = true,
viewAllPermission,
customProperties,
tablePartition,
}: EntityRightPanelProps<T>) => {
const KnowledgeArticles =
entityRightPanelClassBase.getKnowLedgeArticlesWidget();
@ -113,6 +117,9 @@ const EntityRightPanel = <T extends ExtentionEntitiesKeys>({
maxDataCap={5}
/>
)}
{tablePartition ? (
<PartitionedKeys tablePartition={tablePartition} />
) : null}
</Space>
{afterSlot}
</>

View File

@ -19,7 +19,7 @@ import { DMLOperationType } from '../generated/api/data/createTableProfile';
import {
ColumnProfilerConfig,
DataType,
PartitionIntervalType,
PartitionIntervalTypes,
PartitionIntervalUnit,
ProfileSampleType,
} from '../generated/entity/data/table';
@ -346,18 +346,19 @@ export const SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME = [
];
export const SUPPORTED_COLUMN_DATA_TYPE_FOR_INTERVAL = {
[PartitionIntervalType.IngestionTime]: SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME,
[PartitionIntervalType.TimeUnit]: SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME,
[PartitionIntervalType.IntegerRange]: [DataType.Int, DataType.Bigint],
[PartitionIntervalType.ColumnValue]: [DataType.Varchar, DataType.String],
};
[PartitionIntervalTypes.IngestionTime]:
SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME,
[PartitionIntervalTypes.TimeUnit]: SUPPORTED_PARTITION_TYPE_FOR_DATE_TIME,
[PartitionIntervalTypes.IntegerRange]: [DataType.Int, DataType.Bigint],
[PartitionIntervalTypes.ColumnValue]: [DataType.Varchar, DataType.String],
} as Record<PartitionIntervalTypes, DataType[]>;
export const INTERVAL_TYPE_OPTIONS = Object.values(PartitionIntervalType).map(
(value) => ({
value,
label: value,
})
);
export const INTERVAL_TYPE_OPTIONS = Object.keys(
SUPPORTED_COLUMN_DATA_TYPE_FOR_INTERVAL
).map((value) => ({
value,
label: value,
}));
export const INTERVAL_UNIT_OPTIONS = Object.values(PartitionIntervalUnit).map(
(value) => ({
value,
@ -392,8 +393,8 @@ export const PROFILER_MODAL_LABEL_STYLE = {
};
export const TIME_BASED_PARTITION = [
PartitionIntervalType.IngestionTime,
PartitionIntervalType.TimeUnit,
PartitionIntervalTypes.IngestionTime,
PartitionIntervalTypes.TimeUnit,
];
export const TEST_CASE_TYPE_OPTION = [

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "Tabelle {{entityText}}",
"table-lowercase": "tabelle",
"table-lowercase-plural": "tabellen",
"table-partitioned": "Table Partitioned",
"table-partition-plural": "Table Partitions",
"table-plural": "Tabellen",
"table-profile": "Tabellenprofil",
"table-tests-summary": "Tabellentestzusammenfassung",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "Table {{entityText}}",
"table-lowercase": "table",
"table-lowercase-plural": "tables",
"table-partitioned": "Table Partitioned",
"table-partition-plural": "Table Partitions",
"table-plural": "Tables",
"table-profile": "Table Profile",
"table-tests-summary": "Table Tests Summary",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "Tabla {{entityText}}",
"table-lowercase": "tabla",
"table-lowercase-plural": "tablas",
"table-partitioned": "Table Partitioned",
"table-partition-plural": "Table Partitions",
"table-plural": "Tablas",
"table-profile": "Table Profile",
"table-tests-summary": "Resumen de Tests de la Tabla",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "Table {{entityText}}",
"table-lowercase": "table",
"table-lowercase-plural": "tables",
"table-partitioned": "Table Partitioned",
"table-partition-plural": "Table Partitions",
"table-plural": "Tables",
"table-profile": "Profil de Table",
"table-tests-summary": "Résumé des Tests de la Table",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "טבלה {{entityText}}",
"table-lowercase": "טבלה",
"table-lowercase-plural": "טבלאות נתונים",
"table-partitioned": "טבלה מחולקת",
"table-partition-plural": "Table Partitions",
"table-plural": "טבלאות נתונים",
"table-profile": "פרופיל טבלה",
"table-tests-summary": "סיכום בדיקות טבלה",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "テーブル {{entityText}}",
"table-lowercase": "テーブル",
"table-lowercase-plural": "tables",
"table-partitioned": "Table Partitioned",
"table-partition-plural": "Table Partitions",
"table-plural": "テーブル",
"table-profile": "Table Profile",
"table-tests-summary": "Table Tests Summary",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "Tabel {{entityText}}",
"table-lowercase": "tabel",
"table-lowercase-plural": "tabellen",
"table-partitioned": "Tabel gepartitioneerd",
"table-partition-plural": "Table Partitions",
"table-plural": "Tabellen",
"table-profile": "Tabelprofiel",
"table-tests-summary": "Samenvatting tabeltests",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "Tabela {{entityText}}",
"table-lowercase": "tabela",
"table-lowercase-plural": "tabelas",
"table-partitioned": "Tabela Particionada",
"table-partition-plural": "Table Partitions",
"table-plural": "Tabelas",
"table-profile": "Perfil da Tabela",
"table-tests-summary": "Resumo de Testes da Tabela",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "Таблица {{entityText}}",
"table-lowercase": "таблица",
"table-lowercase-plural": "таблицы",
"table-partitioned": "Table Partitioned",
"table-partition-plural": "Table Partitions",
"table-plural": "Таблицы",
"table-profile": "Профиль таблицы",
"table-tests-summary": "Сводка по тестам",

View File

@ -1064,7 +1064,7 @@
"table-entity-text": "数据表{{entityText}}",
"table-lowercase": "数据表",
"table-lowercase-plural": "数据表",
"table-partitioned": "Table Partitioned",
"table-partition-plural": "Table Partitions",
"table-plural": "数据表",
"table-profile": "数据表分析",
"table-tests-summary": "数据表测试概要",

View File

@ -10,22 +10,67 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Typography } from 'antd';
import { Table, Typography } from 'antd';
import { ColumnsType } from 'antd/lib/table';
import { t } from 'i18next';
import React from 'react';
import { TablePartition } from '../../../generated/entity/data/table';
import React, { useMemo } from 'react';
import {
PartitionColumnDetails,
TablePartition,
} from '../../../generated/entity/data/table';
interface PartitionedKeysProps {
tablePartition: TablePartition;
}
export const PartitionedKeys = ({ tablePartition }: PartitionedKeysProps) => {
const partitionColumnDetails = useMemo(
() =>
tablePartition?.columns?.map((column) => ({
...column,
key: column.columnName,
})),
[tablePartition.columns]
);
const columns = useMemo(() => {
const data: ColumnsType<PartitionColumnDetails> = [
{
title: t('label.column'),
dataIndex: 'columnName',
key: 'columnName',
ellipsis: true,
width: '50%',
},
{
title: t('label.type'),
dataIndex: 'intervalType',
key: 'intervalType',
ellipsis: true,
width: '50%',
render: (text) => {
return text ?? '--';
},
},
];
return data;
}, []);
return (
<>
<Typography.Text className="right-panel-label">
{t('label.table-partitioned')}
{t('label.table-partition-plural')}
</Typography.Text>
<span>{` - ${tablePartition.intervalType}`}</span>
<Table
bordered
columns={columns}
data-testid="partitioned-column-table"
dataSource={partitionColumnDetails}
pagination={false}
rowKey="name"
size="small"
/>
</>
);
};

View File

@ -90,7 +90,6 @@ import { getTagsWithoutTier, getTierTags } from '../../utils/TableUtils';
import { createTagObject, updateTierTag } from '../../utils/TagsUtils';
import { showErrorToast, showSuccessToast } from '../../utils/ToastUtils';
import { FrequentlyJoinedTables } from './FrequentlyJoinedTables/FrequentlyJoinedTables.component';
import { PartitionedKeys } from './PartitionedKeys/PartitionedKeys.component';
import './table-details-page-v1.less';
import TableConstraints from './TableConstraints/TableConstraints';
@ -524,7 +523,6 @@ const TableDetailsPageV1 = () => {
isReadOnly={deleted}
joins={tableDetails?.joins?.columnJoins ?? []}
tableConstraints={tableDetails?.tableConstraints}
tablePartitioned={tableDetails?.tablePartition}
onThreadLinkSelect={onThreadLinkSelect}
onUpdate={onColumnsUpdate}
/>
@ -543,11 +541,6 @@ const TableDetailsPageV1 = () => {
<TableConstraints
constraints={tableDetails?.tableConstraints}
/>
{tableDetails?.tablePartition ? (
<PartitionedKeys
tablePartition={tableDetails.tablePartition}
/>
) : null}
</Space>
}
beforeSlot={
@ -563,6 +556,7 @@ const TableDetailsPageV1 = () => {
entityId={tableDetails?.id ?? ''}
entityType={EntityType.TABLE}
selectedTags={tableTags}
tablePartition={tableDetails?.tablePartition}
viewAllPermission={viewAllPermission}
onTagSelectionChange={handleTagSelection}
onThreadLinkSelect={onThreadLinkSelect}