2022-11-10 10:54:31 +01:00
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Partition utility tests """
from typing import Optional
2024-02-28 15:20:59 +01:00
import pytest
2022-11-10 10:54:31 +01:00
from pydantic import BaseModel
from metadata . generated . schema . entity . data . table import (
2024-02-28 07:11:00 +01:00
PartitionColumnDetails ,
PartitionIntervalTypes ,
2022-11-10 10:54:31 +01:00
PartitionIntervalUnit ,
PartitionProfilerConfig ,
TablePartition ,
TableProfilerConfig ,
)
from metadata . generated . schema . entity . services . databaseService import (
DatabaseServiceType ,
)
2024-11-19 08:10:45 +01:00
from metadata . sampler . partition import get_partition_details
2022-11-10 10:54:31 +01:00
class MockTable ( BaseModel ) :
2024-06-05 21:18:37 +02:00
tablePartition : Optional [ TablePartition ] = None
tableProfilerConfig : Optional [ TableProfilerConfig ] = None
serviceType : DatabaseServiceType = DatabaseServiceType . BigQuery
2022-11-10 10:54:31 +01:00
class Config :
arbitrary_types_allowed = True
class MockRedshiftTable ( BaseModel ) :
2024-06-05 21:18:37 +02:00
tablePartition : Optional [ TablePartition ] = None
tableProfilerConfig : Optional [ TableProfilerConfig ] = None
serviceType : DatabaseServiceType = DatabaseServiceType . Redshift
2022-11-10 10:54:31 +01:00
class Config :
arbitrary_types_allowed = True
2024-02-28 15:20:59 +01:00
class MockAthenaTable ( BaseModel ) :
2024-06-05 21:18:37 +02:00
tablePartition : Optional [ TablePartition ] = None
tableProfilerConfig : Optional [ TableProfilerConfig ] = None
serviceType : DatabaseServiceType = DatabaseServiceType . Athena
2024-02-28 15:20:59 +01:00
class Config :
arbitrary_types_allowed = True
2022-11-10 10:54:31 +01:00
def test_get_partition_details ( ) :
""" test get_partition_details function """
table_entity = MockRedshiftTable (
tableProfilerConfig = TableProfilerConfig (
partitioning = PartitionProfilerConfig (
enablePartitioning = True ,
partitionColumnName = " order_date " ,
partitionIntervalType = " TIME-UNIT " ,
partitionInterval = 5 ,
partitionIntervalUnit = " YEAR " ,
partitionValues = None ,
)
)
2024-02-28 15:20:59 +01:00
) # type: ignore
2022-11-10 10:54:31 +01:00
partition = get_partition_details ( table_entity )
assert partition . enablePartitioning == True
assert partition . partitionColumnName == " order_date "
2024-02-28 07:11:00 +01:00
assert partition . partitionIntervalType == PartitionIntervalTypes . TIME_UNIT
2022-11-10 10:54:31 +01:00
assert partition . partitionInterval == 5
assert partition . partitionIntervalUnit == PartitionIntervalUnit . YEAR
table_entity = MockTable (
tablePartition = TablePartition (
2024-02-28 07:11:00 +01:00
columns = [
PartitionColumnDetails (
columnName = " e " ,
intervalType = PartitionIntervalTypes . INGESTION_TIME ,
interval = " HOUR " ,
)
]
2022-11-10 10:54:31 +01:00
) ,
tableProfilerConfig = None ,
)
partition = get_partition_details ( table_entity )
assert partition . enablePartitioning == True
assert partition . partitionColumnName == " _PARTITIONTIME "
2024-02-28 07:11:00 +01:00
assert partition . partitionIntervalType == PartitionIntervalTypes . INGESTION_TIME
2023-12-27 19:13:44 +01:00
assert partition . partitionInterval == 1
2022-11-10 10:54:31 +01:00
assert partition . partitionIntervalUnit == PartitionIntervalUnit . HOUR
table_entity = MockTable (
tablePartition = TablePartition (
2024-02-28 07:11:00 +01:00
columns = [
PartitionColumnDetails (
columnName = " e " ,
intervalType = PartitionIntervalTypes . INGESTION_TIME ,
interval = " DAY " ,
)
]
2022-11-10 10:54:31 +01:00
) ,
tableProfilerConfig = None ,
)
partition = get_partition_details ( table_entity )
2024-02-28 07:11:00 +01:00
assert partition . enablePartitioning is True
2022-11-10 10:54:31 +01:00
assert partition . partitionColumnName == " _PARTITIONDATE "
2024-02-28 07:11:00 +01:00
assert partition . partitionIntervalType == PartitionIntervalTypes . INGESTION_TIME
2023-12-27 19:13:44 +01:00
assert partition . partitionInterval == 1
2022-11-10 10:54:31 +01:00
assert partition . partitionIntervalUnit == PartitionIntervalUnit . DAY
2024-02-28 15:20:59 +01:00
def test_athena_injected_partition ( ) :
""" Test injected partitioning for athena table """
entity = MockAthenaTable (
tablePartition = TablePartition (
columns = [
PartitionColumnDetails (
columnName = " e " ,
intervalType = PartitionIntervalTypes . INJECTED ,
interval = None ,
)
]
) ,
tableProfilerConfig = None ,
)
with pytest . raises (
RuntimeError ,
match = " Table profiler config is missing for table with injected partitioning. Please define the partitioning in the table profiler config for column e " ,
) :
# As athena table has injected partitioning, it should raise an error
# since we have not provided any partitioning details for the injected partition
get_partition_details ( entity )
profiler_config = TableProfilerConfig (
partitioning = PartitionProfilerConfig (
enablePartitioning = True ,
partitionColumnName = " e " ,
partitionIntervalType = " COLUMN-VALUE " ,
partitionValues = [ " red " ] ,
)
)
entity . tableProfilerConfig = profiler_config
partition = get_partition_details ( entity )
assert partition . enablePartitioning == True
assert partition . partitionColumnName == " e "
assert partition . partitionIntervalType == PartitionIntervalTypes . COLUMN_VALUE
assert partition . partitionValues == [ " red " ]