From 843f82b943b48c2e340cdb643b44fd97443a062c Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Tue, 1 Aug 2023 14:05:50 -0700 Subject: [PATCH] feat(presto-on-hive): allow v1 fieldpaths in the presto-on-hive source (#8474) --- .../datahub/ingestion/source/csv_enricher.py | 2 +- .../ingestion/source/sql/presto_on_hive.py | 7 + .../ingestion/source/sql/sql_common.py | 10 + .../datahub/ingestion/source/sql/sql_utils.py | 31 + .../src/datahub/utilities/sqlglot_lineage.py | 4 +- .../src/datahub/utilities/urns/dataset_urn.py | 2 +- .../presto_on_hive_mces_golden_5.json | 1693 +++++++++++++++++ .../presto-on-hive/test_presto_on_hive.py | 14 +- 8 files changed, 1754 insertions(+), 9 deletions(-) create mode 100644 metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_5.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index e41c02b462..f057862a34 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -438,7 +438,7 @@ class CSVEnricherSource(Source): field_match = False for field_info in current_editable_schema_metadata.editableSchemaFieldInfo: if ( - DatasetUrn._get_simple_field_path_from_v2_field_path( + DatasetUrn.get_simple_field_path_from_v2_field_path( field_info.fieldPath ) == field_path diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py index 55d4ebc5ff..a54cb9d50e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py @@ -134,6 +134,11 @@ class PrestoOnHiveConfig(BasicSQLAlchemyConfig): description="By default, the connector overwrites properties every time. Set this to True to enable merging of properties with what exists on the server.", ) + simplify_nested_field_paths: bool = Field( + default=False, + description="Simplify v2 field paths to v1 by default. If the schema has Union or Array types, still falls back to v2", + ) + def get_sql_alchemy_url( self, uri_opts: Optional[Dict[str, Any]] = None, database: Optional[str] = None ) -> str: @@ -527,6 +532,7 @@ class PrestoOnHiveSource(SQLAlchemySource): None, None, schema_fields, + self.config.simplify_nested_field_paths, ) dataset_snapshot.aspects.append(schema_metadata) @@ -756,6 +762,7 @@ class PrestoOnHiveSource(SQLAlchemySource): self.platform, dataset.columns, canonical_schema=schema_fields, + simplify_nested_field_paths=self.config.simplify_nested_field_paths, ) dataset_snapshot.aspects.append(schema_metadata) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 42ea7aed9b..4869df75d4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -41,11 +41,13 @@ from datahub.ingestion.source.common.subtypes import ( from datahub.ingestion.source.sql.sql_config import SQLAlchemyConfig from datahub.ingestion.source.sql.sql_utils import ( add_table_to_schema_container, + downgrade_schema_from_v2, gen_database_container, gen_database_key, gen_schema_container, gen_schema_key, get_domain_wu, + schema_requires_v2, ) from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalHandler, @@ -287,7 +289,15 @@ def get_schema_metadata( pk_constraints: Optional[dict] = None, foreign_keys: Optional[List[ForeignKeyConstraint]] = None, canonical_schema: Optional[List[SchemaField]] = None, + simplify_nested_field_paths: bool = False, ) -> SchemaMetadata: + if ( + simplify_nested_field_paths + and canonical_schema is not None + and not schema_requires_v2(canonical_schema) + ): + canonical_schema = downgrade_schema_from_v2(canonical_schema) + schema_metadata = SchemaMetadata( schemaName=dataset_name, platform=make_data_platform_urn(platform), diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py index a5f5034d17..c5baf148b0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py @@ -17,9 +17,16 @@ from datahub.emitter.mcp_builder import ( ) from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage +from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField from datahub.metadata.schema_classes import DataPlatformInstanceClass from datahub.specific.dataset import DatasetPatchBuilder from datahub.utilities.registries.domain_registry import DomainRegistry +from datahub.utilities.urns.dataset_urn import DatasetUrn + +ARRAY_TOKEN = "[type=array]" +UNION_TOKEN = "[type=union]" +KEY_SCHEMA_PREFIX = "[key=True]." +VERSION_PREFIX = "[version=2.0]." def gen_schema_key( @@ -223,3 +230,27 @@ def gen_lineage( for wu in lineage_workunits: yield wu + + +# downgrade a schema field +def downgrade_schema_field_from_v2(field: SchemaField) -> SchemaField: + field.fieldPath = DatasetUrn.get_simple_field_path_from_v2_field_path( + field.fieldPath + ) + return field + + +# downgrade a list of schema fields +def downgrade_schema_from_v2( + canonical_schema: List[SchemaField], +) -> List[SchemaField]: + return [downgrade_schema_field_from_v2(field) for field in canonical_schema] + + +# v2 is only required in case UNION or ARRAY types are present- all other types can be represented in v1 paths +def schema_requires_v2(canonical_schema: List[SchemaField]) -> bool: + for field in canonical_schema: + field_name = field.fieldPath + if ARRAY_TOKEN in field_name or UNION_TOKEN in field_name: + return True + return False diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 57f93f27e9..e5a9954802 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -328,7 +328,7 @@ class SchemaResolver(Closeable): cls, schema_metadata: SchemaMetadataClass ) -> SchemaInfo: return { - DatasetUrn._get_simple_field_path_from_v2_field_path(col.fieldPath): ( + DatasetUrn.get_simple_field_path_from_v2_field_path(col.fieldPath): ( # The actual types are more of a "nice to have". col.nativeDataType or "str" @@ -336,7 +336,7 @@ class SchemaResolver(Closeable): for col in schema_metadata.fields # TODO: We can't generate lineage to columns nested within structs yet. if "." - not in DatasetUrn._get_simple_field_path_from_v2_field_path(col.fieldPath) + not in DatasetUrn.get_simple_field_path_from_v2_field_path(col.fieldPath) } # TODO add a method to load all from graphql diff --git a/metadata-ingestion/src/datahub/utilities/urns/dataset_urn.py b/metadata-ingestion/src/datahub/utilities/urns/dataset_urn.py index 283d4ac926..3ed33c0684 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/dataset_urn.py +++ b/metadata-ingestion/src/datahub/utilities/urns/dataset_urn.py @@ -97,7 +97,7 @@ class DatasetUrn(Urn): """A helper function to extract simple . path notation from the v2 field path""" @staticmethod - def _get_simple_field_path_from_v2_field_path(field_path: str) -> str: + def get_simple_field_path_from_v2_field_path(field_path: str) -> str: if field_path.startswith("[version=2.0]"): # this is a v2 field path tokens = [ diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_5.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_5.json new file mode 100644 index 0000000000..0b09920cf9 --- /dev/null +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_5.json @@ -0,0 +1,1693 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "hive", + "env": "PROD", + "database": "hive" + }, + "name": "hive" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:hive" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "hive", + "env": "PROD", + "database": "hive", + "schema": "db1" + }, + "name": "db1" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:hive" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.map_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "keyvalue", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "recordid", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "string" + } + } + }, + "nativeDataType": "map", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"map\", \"key_type\": {\"type\": \"int\", \"native_data_type\": \"int\", \"_nullable\": true}, \"key_native_data_type\": \"int\"}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "numFiles": "0", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "rawDataSize": "0", + "numRows": "0", + "totalSize": "0", + "transient_lastDdlTime": "1688395014", + "table_type": "MANAGED_TABLE", + "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", + "create_date": "2023-07-03" + }, + "name": "map_test", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.union_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=struct].[type=union].foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.UnionType": {} + } + }, + "nativeDataType": "union", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=union].[type=int].foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.UnionType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=union].[type=double].foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.UnionType": {} + } + }, + "nativeDataType": "double", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=union].[type=array].[type=string].foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.UnionType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct0].foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.UnionType": {} + } + }, + "nativeDataType": "struct0", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct0].foo.[type=int].a", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=union].[type=struct0].foo.[type=string].b", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "numFiles": "0", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "rawDataSize": "0", + "numRows": "0", + "totalSize": "0", + "transient_lastDdlTime": "1688395014", + "table_type": "MANAGED_TABLE", + "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", + "create_date": "2023-07-03" + }, + "name": "union_test", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.nested_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + }, + { + "fieldPath": "service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "service.type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "service.provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "struct", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct\"}" + }, + { + "fieldPath": "service.provider.name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "varchar(50)", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"varchar(50)\", \"_nullable\": true}" + }, + { + "fieldPath": "service.provider.id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "tinyint", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"tinyint\", \"_nullable\": true}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "numFiles": "0", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "rawDataSize": "0", + "numRows": "0", + "totalSize": "0", + "transient_lastDdlTime": "1688395014", + "table_type": "MANAGED_TABLE", + "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", + "create_date": "2023-07-03" + }, + "name": "nested_struct_test", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=int].property_id", + "nullable": true, + "description": "id of property", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "description": "service types and providers", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "comment": "This table has array of structs", + "numFiles": "1", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1688395011", + "rawDataSize": "32", + "numRows": "1", + "totalSize": "33", + "another.comment": "This table has no partitions", + "table_type": "MANAGED_TABLE", + "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", + "create_date": "2023-07-03" + }, + "name": "array_struct_test", + "description": "This table has array of structs", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=int].property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "numFiles": "0", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1688395008", + "rawDataSize": "0", + "numRows": "0", + "totalSize": "0", + "table_type": "MANAGED_TABLE", + "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", + "create_date": "2023-07-03" + }, + "name": "struct_test", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1._test_table_underscore", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + }, + { + "fieldPath": "bar", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "numFiles": "0", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1688395008", + "rawDataSize": "0", + "numRows": "0", + "totalSize": "0", + "table_type": "MANAGED_TABLE", + "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", + "create_date": "2023-07-03" + }, + "name": "_test_table_underscore", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.pokes", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + }, + { + "fieldPath": "baz", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "bar", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "transient_lastDdlTime": "1688395005", + "table_type": "MANAGED_TABLE", + "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", + "create_date": "2023-07-03", + "partitioned_columns": "baz" + }, + "name": "pokes", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_presto_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_presto_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test_presto_view", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "dummy", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "is_view": "True" + }, + "name": "array_struct_test_presto_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": false, + "viewLogic": "/* Presto View */", + "viewLanguage": "SQL" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_presto_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "view" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_presto_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "/* Presto View */", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_presto_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test_view", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=int].property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "is_view": "True" + }, + "name": "array_struct_test_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": false, + "viewLogic": "select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "viewLanguage": "SQL" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "view" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + }, + { + "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "presto-on-hive-test" + } +} +] diff --git a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py index 8828f0a92e..17e21f3790 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py +++ b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py @@ -53,12 +53,14 @@ def loaded_presto_on_hive(presto_on_hive_runner): @freeze_time(FROZEN_TIME) @pytest.mark.integration_batch_1 @pytest.mark.parametrize( - "mode,use_catalog_subtype,use_dataset_pascalcase_subtype,include_catalog_name_in_ids,test_suffix", + "mode,use_catalog_subtype,use_dataset_pascalcase_subtype,include_catalog_name_in_ids,simplify_nested_field_paths," + "test_suffix", [ - ("hive", False, False, False, "_1"), - ("presto-on-hive", True, True, False, "_2"), - ("hive", False, False, True, "_3"), - ("presto-on-hive", True, True, True, "_4"), + ("hive", False, False, False, False, "_1"), + ("presto-on-hive", True, True, False, False, "_2"), + ("hive", False, False, True, False, "_3"), + ("presto-on-hive", True, True, True, False, "_4"), + ("hive", False, False, False, True, "_5"), ], ) def test_presto_on_hive_ingest( @@ -71,6 +73,7 @@ def test_presto_on_hive_ingest( use_catalog_subtype, use_dataset_pascalcase_subtype, include_catalog_name_in_ids, + simplify_nested_field_paths, test_suffix, ): # Run the metadata ingestion pipeline. @@ -97,6 +100,7 @@ def test_presto_on_hive_ingest( "mode": mode, "use_catalog_subtype": use_catalog_subtype, "use_dataset_pascalcase_subtype": use_dataset_pascalcase_subtype, + "simplify_nested_field_paths": simplify_nested_field_paths, }, }, "sink": {