mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-04 04:39:10 +00:00 
			
		
		
		
	feat(ingest/presto-on-hive): enable partition key for presto-on-hive (#8380)
This commit is contained in:
		
							parent
							
								
									4f9fc671dc
								
							
						
					
					
						commit
						bf47d65412
					
				@ -34,6 +34,7 @@ public class SchemaFieldMapper {
 | 
			
		||||
            result.setGlossaryTerms(GlossaryTermsMapper.map(input.getGlossaryTerms(), entityUrn));
 | 
			
		||||
        }
 | 
			
		||||
        result.setIsPartOfKey(input.isIsPartOfKey());
 | 
			
		||||
        result.setIsPartitioningKey(input.isIsPartitioningKey());
 | 
			
		||||
        return result;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -2872,6 +2872,11 @@ type SchemaField {
 | 
			
		||||
    Whether the field is part of a key schema
 | 
			
		||||
    """
 | 
			
		||||
    isPartOfKey: Boolean
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    Whether the field is part of a partitioning key schema
 | 
			
		||||
    """
 | 
			
		||||
    isPartitioningKey: Boolean
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
@ -549,6 +549,7 @@ export const dataset3WithSchema = {
 | 
			
		||||
                    type: SchemaFieldDataType.String,
 | 
			
		||||
                    nativeDataType: 'varchar(100)',
 | 
			
		||||
                    isPartOfKey: false,
 | 
			
		||||
                    isPartitioningKey: false,
 | 
			
		||||
                    jsonPath: null,
 | 
			
		||||
                    globalTags: null,
 | 
			
		||||
                    glossaryTerms: null,
 | 
			
		||||
@ -563,6 +564,7 @@ export const dataset3WithSchema = {
 | 
			
		||||
                    type: SchemaFieldDataType.String,
 | 
			
		||||
                    nativeDataType: 'boolean',
 | 
			
		||||
                    isPartOfKey: false,
 | 
			
		||||
                    isPartitioningKey: false,
 | 
			
		||||
                    jsonPath: null,
 | 
			
		||||
                    globalTags: null,
 | 
			
		||||
                    glossaryTerms: null,
 | 
			
		||||
 | 
			
		||||
@ -7,6 +7,7 @@ import { ExtendedSchemaFields } from './types';
 | 
			
		||||
import TypeLabel from '../../../../shared/tabs/Dataset/Schema/components/TypeLabel';
 | 
			
		||||
import { ForeignKeyConstraint, SchemaMetadata } from '../../../../../../types.generated';
 | 
			
		||||
import PrimaryKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/PrimaryKeyLabel';
 | 
			
		||||
import PartitioningKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/PartitioningKeyLabel';
 | 
			
		||||
import NullableLabel from '../../../../shared/tabs/Dataset/Schema/components/NullableLabel';
 | 
			
		||||
import ForeignKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/ForeignKeyLabel';
 | 
			
		||||
 | 
			
		||||
@ -62,6 +63,7 @@ export default function useSchemaTitleRenderer(
 | 
			
		||||
                    </FieldPathText>
 | 
			
		||||
                    <TypeLabel type={record.type} nativeDataType={record.nativeDataType} />
 | 
			
		||||
                    {(schemaMetadata?.primaryKeys?.includes(fieldPath) || record.isPartOfKey) && <PrimaryKeyLabel />}
 | 
			
		||||
                    {record.isPartitioningKey && <PartitioningKeyLabel />}
 | 
			
		||||
                    {record.nullable && <NullableLabel />}
 | 
			
		||||
                    {schemaMetadata?.foreignKeys
 | 
			
		||||
                        ?.filter(
 | 
			
		||||
 | 
			
		||||
@ -0,0 +1,21 @@
 | 
			
		||||
import React from 'react';
 | 
			
		||||
import { Badge } from 'antd';
 | 
			
		||||
import styled from 'styled-components';
 | 
			
		||||
import { blue } from '@ant-design/colors';
 | 
			
		||||
import { ANTD_GRAY } from '../../../../constants';
 | 
			
		||||
 | 
			
		||||
const PartitioningKeyBadge = styled(Badge)`
 | 
			
		||||
    margin-left: 4px;
 | 
			
		||||
    &&& .ant-badge-count {
 | 
			
		||||
        background-color: ${ANTD_GRAY[1]};
 | 
			
		||||
        color: ${blue[5]};
 | 
			
		||||
        border: 1px solid ${blue[2]};
 | 
			
		||||
        font-size: 12px;
 | 
			
		||||
        font-weight: 400;
 | 
			
		||||
        height: 22px;
 | 
			
		||||
    }
 | 
			
		||||
`;
 | 
			
		||||
 | 
			
		||||
export default function PartitioningKeyLabel() {
 | 
			
		||||
    return <PartitioningKeyBadge count="Partition Key" />;
 | 
			
		||||
}
 | 
			
		||||
@ -678,6 +678,7 @@ fragment schemaFieldFields on SchemaField {
 | 
			
		||||
    nativeDataType
 | 
			
		||||
    recursive
 | 
			
		||||
    isPartOfKey
 | 
			
		||||
    isPartitioningKey
 | 
			
		||||
    globalTags {
 | 
			
		||||
        ...globalTagsFields
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -10,6 +10,7 @@ query getVersionedDataset($urn: String!, $versionStamp: String) {
 | 
			
		||||
                nativeDataType
 | 
			
		||||
                recursive
 | 
			
		||||
                isPartOfKey
 | 
			
		||||
                isPartitioningKey
 | 
			
		||||
            }
 | 
			
		||||
            lastObserved
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
@ -524,6 +524,8 @@ class PrestoOnHiveSource(SQLAlchemySource):
 | 
			
		||||
            # add table schema fields
 | 
			
		||||
            schema_fields = self.get_schema_fields(dataset_name, columns)
 | 
			
		||||
 | 
			
		||||
            self._set_partition_key(columns, schema_fields)
 | 
			
		||||
 | 
			
		||||
            schema_metadata = get_schema_metadata(
 | 
			
		||||
                self.report,
 | 
			
		||||
                dataset_name,
 | 
			
		||||
@ -888,6 +890,18 @@ class PrestoOnHiveSource(SQLAlchemySource):
 | 
			
		||||
            default_nullable=True,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def _set_partition_key(self, columns, schema_fields):
 | 
			
		||||
        if len(columns) > 0:
 | 
			
		||||
            partition_key_names = set()
 | 
			
		||||
            for column in columns:
 | 
			
		||||
                if column["is_partition_col"]:
 | 
			
		||||
                    partition_key_names.add(column["col_name"])
 | 
			
		||||
 | 
			
		||||
            for schema_field in schema_fields:
 | 
			
		||||
                name = schema_field.fieldPath.split(".")[-1]
 | 
			
		||||
                if name in partition_key_names:
 | 
			
		||||
                    schema_field.isPartitioningKey = True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SQLAlchemyClient:
 | 
			
		||||
    def __init__(self, config: SQLAlchemyConfig):
 | 
			
		||||
 | 
			
		||||
@ -1277,6 +1277,7 @@
 | 
			
		||||
                                "nativeDataType": "string",
 | 
			
		||||
                                "recursive": false,
 | 
			
		||||
                                "isPartOfKey": false,
 | 
			
		||||
                                "isPartitioningKey": true,
 | 
			
		||||
                                "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
 | 
			
		||||
                            },
 | 
			
		||||
                            {
 | 
			
		||||
 | 
			
		||||
@ -1277,6 +1277,7 @@
 | 
			
		||||
                                "nativeDataType": "string",
 | 
			
		||||
                                "recursive": false,
 | 
			
		||||
                                "isPartOfKey": false,
 | 
			
		||||
                                "isPartitioningKey": true,
 | 
			
		||||
                                "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
 | 
			
		||||
                            },
 | 
			
		||||
                            {
 | 
			
		||||
 | 
			
		||||
@ -1277,6 +1277,7 @@
 | 
			
		||||
                                "nativeDataType": "string",
 | 
			
		||||
                                "recursive": false,
 | 
			
		||||
                                "isPartOfKey": false,
 | 
			
		||||
                                "isPartitioningKey": true,
 | 
			
		||||
                                "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
 | 
			
		||||
                            },
 | 
			
		||||
                            {
 | 
			
		||||
 | 
			
		||||
@ -1277,6 +1277,7 @@
 | 
			
		||||
                                "nativeDataType": "string",
 | 
			
		||||
                                "recursive": false,
 | 
			
		||||
                                "isPartOfKey": false,
 | 
			
		||||
                                "isPartitioningKey": true,
 | 
			
		||||
                                "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
 | 
			
		||||
                            },
 | 
			
		||||
                            {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user