2025-04-03 10:39:47 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								#  Copyright 2025 Collate  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								#  Licensed under the Collate Community License, Version 1.0 (the "License");  
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								#  you may not use this file except in compliance with the License.  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								#  You may obtain a copy of the License at  
						 
					
						
							
								
									
										
										
										
											2025-04-03 10:39:47 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								#  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE  
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								#  Unless required by applicable law or agreed to in writing, software  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								#  distributed under the License is distributed on an "AS IS" BASIS,  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								#  See the License for the specific language governing permissions and  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								#  limitations under the License.  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								""" Partition utility tests """  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  typing  import  Optional  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 15:20:59 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  pytest  
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								from  pydantic  import  BaseModel  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  metadata . generated . schema . entity . data . table  import  (  
						 
					
						
							
								
									
										
										
										
											2024-02-28 07:11:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    PartitionColumnDetails , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    PartitionIntervalTypes , 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    PartitionIntervalUnit , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    PartitionProfilerConfig , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    TablePartition , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    TableProfilerConfig , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  metadata . generated . schema . entity . services . databaseService  import  (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    DatabaseServiceType , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
									
										
										
										
											2024-11-19 08:10:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  metadata . sampler . partition  import  get_partition_details  
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  MockTable ( BaseModel ) :  
						 
					
						
							
								
									
										
										
										
											2024-06-05 21:18:37 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    tablePartition :  Optional [ TablePartition ]  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    tableProfilerConfig :  Optional [ TableProfilerConfig ]  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    serviceType :  DatabaseServiceType  =  DatabaseServiceType . BigQuery 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    class  Config : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        arbitrary_types_allowed  =  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  MockRedshiftTable ( BaseModel ) :  
						 
					
						
							
								
									
										
										
										
											2024-06-05 21:18:37 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    tablePartition :  Optional [ TablePartition ]  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    tableProfilerConfig :  Optional [ TableProfilerConfig ]  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    serviceType :  DatabaseServiceType  =  DatabaseServiceType . Redshift 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    class  Config : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        arbitrary_types_allowed  =  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 15:20:59 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  MockAthenaTable ( BaseModel ) :  
						 
					
						
							
								
									
										
										
										
											2024-06-05 21:18:37 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    tablePartition :  Optional [ TablePartition ]  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    tableProfilerConfig :  Optional [ TableProfilerConfig ]  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    serviceType :  DatabaseServiceType  =  DatabaseServiceType . Athena 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 15:20:59 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    class  Config : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        arbitrary_types_allowed  =  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								def  test_get_partition_details ( ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ test get_partition_details function """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    table_entity  =  MockRedshiftTable ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        tableProfilerConfig = TableProfilerConfig ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            partitioning = PartitionProfilerConfig ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                enablePartitioning = True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                partitionColumnName = " order_date " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                partitionIntervalType = " TIME-UNIT " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                partitionInterval = 5 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                partitionIntervalUnit = " YEAR " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                partitionValues = None , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ) 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 15:20:59 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    )   # type: ignore 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    partition  =  get_partition_details ( table_entity ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . enablePartitioning  ==  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionColumnName  ==  " order_date " 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 07:11:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  partition . partitionIntervalType  ==  PartitionIntervalTypes . TIME_UNIT 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionInterval  ==  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionIntervalUnit  ==  PartitionIntervalUnit . YEAR 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    table_entity  =  MockTable ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        tablePartition = TablePartition ( 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 07:11:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            columns = [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                PartitionColumnDetails ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    columnName = " e " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    intervalType = PartitionIntervalTypes . INGESTION_TIME , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    interval = " HOUR " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ] 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        tableProfilerConfig = None , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    partition  =  get_partition_details ( table_entity ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . enablePartitioning  ==  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionColumnName  ==  " _PARTITIONTIME " 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 07:11:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  partition . partitionIntervalType  ==  PartitionIntervalTypes . INGESTION_TIME 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-27 19:13:44 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  partition . partitionInterval  ==  1 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionIntervalUnit  ==  PartitionIntervalUnit . HOUR 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    table_entity  =  MockTable ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        tablePartition = TablePartition ( 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 07:11:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            columns = [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                PartitionColumnDetails ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    columnName = " e " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    intervalType = PartitionIntervalTypes . INGESTION_TIME , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    interval = " DAY " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ] 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        tableProfilerConfig = None , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    partition  =  get_partition_details ( table_entity ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 07:11:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  partition . enablePartitioning  is  True 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionColumnName  ==  " _PARTITIONDATE " 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 07:11:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  partition . partitionIntervalType  ==  PartitionIntervalTypes . INGESTION_TIME 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-27 19:13:44 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  partition . partitionInterval  ==  1 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-10 10:54:31 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionIntervalUnit  ==  PartitionIntervalUnit . DAY 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-28 15:20:59 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_athena_injected_partition ( ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ Test injected partitioning for athena table """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    entity  =  MockAthenaTable ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        tablePartition = TablePartition ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            columns = [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                PartitionColumnDetails ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    columnName = " e " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    intervalType = PartitionIntervalTypes . INJECTED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    interval = None , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        tableProfilerConfig = None , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    with  pytest . raises ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        RuntimeError , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        match = " Table profiler config is missing for table with injected partitioning. Please define the partitioning in the table profiler config for column e " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # As athena table has injected partitioning, it should raise an error 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # since we have not provided any partitioning details for the injected partition 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        get_partition_details ( entity ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    profiler_config  =  TableProfilerConfig ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        partitioning = PartitionProfilerConfig ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            enablePartitioning = True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            partitionColumnName = " e " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            partitionIntervalType = " COLUMN-VALUE " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            partitionValues = [ " red " ] , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    entity . tableProfilerConfig  =  profiler_config 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    partition  =  get_partition_details ( entity ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . enablePartitioning  ==  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionColumnName  ==  " e " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionIntervalType  ==  PartitionIntervalTypes . COLUMN_VALUE 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  partition . partitionValues  ==  [ " red " ]