From e085a9e7dc678b9aa5024967ebad2cc38ef5f14a Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Wed, 23 Nov 2022 00:45:40 +0530 Subject: [PATCH] feat(ingest): add config for ingesting delta table without files (#6403) Closes undefined --- metadata-ingestion/setup.py | 2 +- .../ingestion/source/delta_lake/config.py | 8 + .../source/delta_lake/delta_lake_utils.py | 10 +- .../ingestion/source/delta_lake/source.py | 9 +- .../delta_lake_minio_mces_golden.json | 156 ++------ .../local/golden_mces_allow_table.json | 378 +++++++++--------- .../local/golden_mces_inner_table.json | 378 +++++++++--------- .../local/golden_mces_relative_path.json | 15 +- .../local/golden_mces_single_table.json | 8 +- .../sources/local/single_table.json | 3 +- 10 files changed, 468 insertions(+), 499 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 7315341f10..f717bb1093 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -209,7 +209,7 @@ data_lake_profiling = { delta_lake = { *s3_base, - "deltalake", + "deltalake>=0.6.3", } powerbi_report_server = {"requests", "requests_ntlm"} diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py index ff68b1db15..d983538174 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py @@ -65,6 +65,14 @@ class DeltaLakeSourceConfig(PlatformSourceConfigBase, EnvBasedSourceConfigBase): description="Number of previous version histories to be ingested. Defaults to 1. If set to -1 all version history will be ingested.", ) + require_files: Optional[bool] = Field( + default=True, + description="Whether DeltaTable should track files. " + "Consider setting this to `False` for large delta tables, " + "resulting in significant memory reduction for ingestion process." + "When set to `False`, number_of_files in delta table can not be reported.", + ) + s3: Optional[S3] = Field() @cached_property diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/delta_lake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/delta_lake_utils.py index 076323f48f..30e20a31cb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/delta_lake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/delta_lake_utils.py @@ -20,12 +20,20 @@ def read_delta_table( opts = { "AWS_ACCESS_KEY_ID": creds.get("aws_access_key_id", ""), "AWS_SECRET_ACCESS_KEY": creds.get("aws_secret_access_key", ""), + # Allow http connections, this is required for minio + "AWS_STORAGE_ALLOW_HTTP": "true", } + if delta_lake_config.s3.aws_config.aws_region: + opts["AWS_REGION"] = delta_lake_config.s3.aws_config.aws_region if delta_lake_config.s3.aws_config.aws_endpoint_url: opts[ "AWS_ENDPOINT_URL" ] = delta_lake_config.s3.aws_config.aws_endpoint_url - delta_table = DeltaTable(path, storage_options=opts) + delta_table = DeltaTable( + path, + storage_options=opts, + without_files=not delta_lake_config.require_files, + ) except PyDeltaTableError as e: if "Not a Delta table" not in str(e): diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py index 57732aafdf..bb15be842b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py @@ -36,6 +36,7 @@ from datahub.ingestion.source.delta_lake.delta_lake_utils import ( from datahub.ingestion.source.delta_lake.report import DeltaLakeSourceReport from datahub.ingestion.source.s3.data_lake_utils import ContainerWUCreator from datahub.ingestion.source.schema_inference.csv_tsv import tableschema_type_map +from datahub.metadata.com.linkedin.pegasus2avro.common import Status from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.com.linkedin.pegasus2avro.schema import ( @@ -161,10 +162,10 @@ class DeltaLakeSource(Source): ) operation_custom_properties = dict() - for key, val in hist.items(): + for key, val in sorted(hist.items()): if val is not None: if isinstance(val, dict): - for k, v in val.items(): + for k, v in sorted(val.items()): if v is not None: operation_custom_properties[f"{key}_{k}"] = str(v) else: @@ -224,7 +225,7 @@ class DeltaLakeSource(Source): ) dataset_snapshot = DatasetSnapshot( urn=dataset_urn, - aspects=[], + aspects=[Status(removed=False)], ) customProperties = { @@ -235,6 +236,8 @@ class DeltaLakeSource(Source): "version": str(delta_table.version()), "location": self.source_config.complete_path, } + if not self.source_config.require_files: + del customProperties["number_of_files"] # always 0 dataset_properties = DatasetPropertiesClass( description=delta_table.metadata().description, diff --git a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json index d9dff773ed..3c05420f3c 100644 --- a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json +++ b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json @@ -1,10 +1,14 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-test-bucket/delta_tables/sales,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -15,11 +19,8 @@ "version": "0", "location": "s3://my-test-bucket/delta_tables/sales" }, - "externalUrl": null, "name": "my_table", - "qualifiedName": null, "description": "my table description", - "uri": null, "tags": [] } }, @@ -30,19 +31,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -52,11 +46,8 @@ "fields": [ { "fieldPath": "customer", - "jsonPath": null, "nullable": true, "description": "{}", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -64,19 +55,13 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, "isPartOfKey": false, - "isPartitioningKey": false, - "jsonProps": null + "isPartitioningKey": false }, { "fieldPath": "day", - "jsonPath": null, "nullable": true, "description": "{}", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -84,19 +69,13 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, "isPartOfKey": false, - "isPartitioningKey": false, - "jsonProps": null + "isPartitioningKey": false }, { "fieldPath": "month", - "jsonPath": null, "nullable": true, "description": "{}", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -104,19 +83,13 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, "isPartOfKey": false, - "isPartitioningKey": false, - "jsonProps": null + "isPartitioningKey": false }, { "fieldPath": "sale_id", - "jsonPath": null, "nullable": true, "description": "{}", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -124,19 +97,13 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, "isPartOfKey": false, - "isPartitioningKey": false, - "jsonProps": null + "isPartitioningKey": false }, { "fieldPath": "total_cost", - "jsonPath": null, "nullable": true, "description": "{}", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -144,19 +111,13 @@ }, "nativeDataType": "float", "recursive": false, - "globalTags": null, - "glossaryTerms": null, "isPartOfKey": false, - "isPartitioningKey": false, - "jsonProps": null + "isPartitioningKey": false }, { "fieldPath": "year", - "jsonPath": null, "nullable": true, "description": "{}", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -164,35 +125,23 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, "isPartOfKey": false, - "isPartitioningKey": false, - "jsonProps": null + "isPartitioningKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:34fc0473e206bb1f4307aadf4177b2fd", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -201,10 +150,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { @@ -218,17 +164,12 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:34fc0473e206bb1f4307aadf4177b2fd", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -237,17 +178,12 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:34fc0473e206bb1f4307aadf4177b2fd", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -256,17 +192,12 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:acebf8bcf966274632d3d2b710ef4947", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -275,10 +206,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { @@ -292,17 +220,12 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:acebf8bcf966274632d3d2b710ef4947", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -311,17 +234,12 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:acebf8bcf966274632d3d2b710ef4947", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -330,17 +248,12 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:acebf8bcf966274632d3d2b710ef4947", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -349,17 +262,12 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-test-bucket/delta_tables/sales,DEV)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -368,29 +276,21 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-test-bucket/delta_tables/sales,DEV)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"CONVERT\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655664815399}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"CONVERT\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655664815399}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "delta-lake-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "delta-lake-test" } } -] +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json index a4c1ca47c0..423e489564 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -448,7 +453,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831476907}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655831476907}", "contentType": "application/json" }, "systemMetadata": { @@ -462,7 +467,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"0\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477701}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"0\"}, \"lastUpdatedTimestamp\": 1655831477701}", "contentType": "application/json" }, "systemMetadata": { @@ -476,7 +481,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"1\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477726}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"1\"}, \"lastUpdatedTimestamp\": 1655831477726}", "contentType": "application/json" }, "systemMetadata": { @@ -490,7 +495,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"2\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477745}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"2\"}, \"lastUpdatedTimestamp\": 1655831477745}", "contentType": "application/json" }, "systemMetadata": { @@ -504,170 +509,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"3\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477768}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "allow_table.json" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "number_of_files": "3", - "partition_columns": "[]", - "table_creation_time": "1655664813952", - "id": "eca9d2a0-4ce6-4ace-a732-75fda0157fb8", - "version": "0", - "location": "tests/integration/delta_lake/test_data/delta_tables" - }, - "name": "my_table", - "description": "my table description", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "my_table", - "platform": "urn:li:dataPlatform:delta-lake", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "customer", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "day", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "month", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "sale_id", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "total_cost", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "float", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "year", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - } - ] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "allow_table.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:a282913be26fceff334523c2be119df1\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "allow_table.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"CONVERT\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655664815399}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"3\"}, \"lastUpdatedTimestamp\": 1655831477768}", "contentType": "application/json" }, "systemMetadata": { @@ -680,6 +522,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -787,7 +634,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649166}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655831649166}", "contentType": "application/json" }, "systemMetadata": { @@ -801,7 +648,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"0\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649715}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"0\"}, \"lastUpdatedTimestamp\": 1655831649715}", "contentType": "application/json" }, "systemMetadata": { @@ -815,7 +662,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"1\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649731}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"1\"}, \"lastUpdatedTimestamp\": 1655831649731}", "contentType": "application/json" }, "systemMetadata": { @@ -829,7 +676,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"2\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649754}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"2\"}, \"lastUpdatedTimestamp\": 1655831649754}", "contentType": "application/json" }, "systemMetadata": { @@ -843,7 +690,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"3\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649788}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"3\"}, \"lastUpdatedTimestamp\": 1655831649788}", "contentType": "application/json" }, "systemMetadata": { @@ -856,6 +703,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1034,7 +886,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831865396}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655831865396}", "contentType": "application/json" }, "systemMetadata": { @@ -1048,7 +900,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"0\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831866337}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"0\"}, \"lastUpdatedTimestamp\": 1655831866337}", "contentType": "application/json" }, "systemMetadata": { @@ -1062,7 +914,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"1\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831866398}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"1\"}, \"lastUpdatedTimestamp\": 1655831866398}", "contentType": "application/json" }, "systemMetadata": { @@ -1076,7 +928,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"2\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831866447}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"2\"}, \"lastUpdatedTimestamp\": 1655831866447}", "contentType": "application/json" }, "systemMetadata": { @@ -1090,7 +942,175 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"3\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831866541}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"3\"}, \"lastUpdatedTimestamp\": 1655831866541}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "number_of_files": "3", + "partition_columns": "[]", + "table_creation_time": "1655664813952", + "id": "eca9d2a0-4ce6-4ace-a732-75fda0157fb8", + "version": "0", + "location": "tests/integration/delta_lake/test_data/delta_tables" + }, + "name": "my_table", + "description": "my table description", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "my_table", + "platform": "urn:li:dataPlatform:delta-lake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "day", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "month", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "sale_id", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "total_cost", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "float", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "year", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:a282913be26fceff334523c2be119df1\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"CONVERT\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655664815399}", "contentType": "application/json" }, "systemMetadata": { @@ -1098,4 +1118,4 @@ "runId": "allow_table.json" } } -] \ No newline at end of file +] diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json index 57c6737b39..9487a67257 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -448,7 +453,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831476907}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655831476907}", "contentType": "application/json" }, "systemMetadata": { @@ -462,7 +467,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"0\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477701}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"0\"}, \"lastUpdatedTimestamp\": 1655831477701}", "contentType": "application/json" }, "systemMetadata": { @@ -476,7 +481,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"1\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477726}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"1\"}, \"lastUpdatedTimestamp\": 1655831477726}", "contentType": "application/json" }, "systemMetadata": { @@ -490,7 +495,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"2\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477745}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"2\"}, \"lastUpdatedTimestamp\": 1655831477745}", "contentType": "application/json" }, "systemMetadata": { @@ -504,170 +509,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"3\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477768}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "inner_table.json" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "number_of_files": "3", - "partition_columns": "[]", - "table_creation_time": "1655664813952", - "id": "eca9d2a0-4ce6-4ace-a732-75fda0157fb8", - "version": "0", - "location": "tests/integration/delta_lake/test_data/delta_tables" - }, - "name": "my_table", - "description": "my table description", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "my_table", - "platform": "urn:li:dataPlatform:delta-lake", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "customer", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "day", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "month", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "sale_id", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "total_cost", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "float", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - }, - { - "fieldPath": "year", - "nullable": true, - "description": "{}", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false, - "isPartitioningKey": false - } - ] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "inner_table.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:ad4b596846e8e010114b1ec82b324fab\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "inner_table.json" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"CONVERT\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655664815399}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"3\"}, \"lastUpdatedTimestamp\": 1655831477768}", "contentType": "application/json" }, "systemMetadata": { @@ -680,6 +522,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -787,7 +634,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649166}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655831649166}", "contentType": "application/json" }, "systemMetadata": { @@ -801,7 +648,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"0\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649715}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"0\"}, \"lastUpdatedTimestamp\": 1655831649715}", "contentType": "application/json" }, "systemMetadata": { @@ -815,7 +662,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"1\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649731}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"1\"}, \"lastUpdatedTimestamp\": 1655831649731}", "contentType": "application/json" }, "systemMetadata": { @@ -829,7 +676,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"2\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649754}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"2\"}, \"lastUpdatedTimestamp\": 1655831649754}", "contentType": "application/json" }, "systemMetadata": { @@ -843,7 +690,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"3\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831649788}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"3\"}, \"lastUpdatedTimestamp\": 1655831649788}", "contentType": "application/json" }, "systemMetadata": { @@ -856,6 +703,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1034,7 +886,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831865396}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655831865396}", "contentType": "application/json" }, "systemMetadata": { @@ -1048,7 +900,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"0\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831866337}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"0\"}, \"lastUpdatedTimestamp\": 1655831866337}", "contentType": "application/json" }, "systemMetadata": { @@ -1062,7 +914,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"1\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831866398}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"1\"}, \"lastUpdatedTimestamp\": 1655831866398}", "contentType": "application/json" }, "systemMetadata": { @@ -1076,7 +928,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"2\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831866447}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"2\"}, \"lastUpdatedTimestamp\": 1655831866447}", "contentType": "application/json" }, "systemMetadata": { @@ -1090,7 +942,175 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"3\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831866541}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"3\"}, \"lastUpdatedTimestamp\": 1655831866541}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "number_of_files": "3", + "partition_columns": "[]", + "table_creation_time": "1655664813952", + "id": "eca9d2a0-4ce6-4ace-a732-75fda0157fb8", + "version": "0", + "location": "tests/integration/delta_lake/test_data/delta_tables" + }, + "name": "my_table", + "description": "my table description", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "my_table", + "platform": "urn:li:dataPlatform:delta-lake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "day", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "month", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "sale_id", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "total_cost", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "float", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "year", + "nullable": true, + "description": "{}", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:ad4b596846e8e010114b1ec82b324fab\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"CONVERT\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655664815399}", "contentType": "application/json" }, "systemMetadata": { @@ -1098,4 +1118,4 @@ "runId": "inner_table.json" } } -] \ No newline at end of file +] diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json index 7f68474be1..538b7ffc3e 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,delta_tables/my_table_basic,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -168,7 +173,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831476907}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\"}, \"lastUpdatedTimestamp\": 1655831476907}", "contentType": "application/json" }, "systemMetadata": { @@ -182,7 +187,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"0\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477701}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"0\"}, \"lastUpdatedTimestamp\": 1655831477701}", "contentType": "application/json" }, "systemMetadata": { @@ -196,7 +201,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"1\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477726}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"1\"}, \"lastUpdatedTimestamp\": 1655831477726}", "contentType": "application/json" }, "systemMetadata": { @@ -210,7 +215,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"2\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477745}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"2\"}, \"lastUpdatedTimestamp\": 1655831477745}", "contentType": "application/json" }, "systemMetadata": { @@ -224,7 +229,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"3\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477768}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"3\"}, \"lastUpdatedTimestamp\": 1655831477768}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json index 89597c63ab..470bda2d1d 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json @@ -4,10 +4,14 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "number_of_files": "5", "partition_columns": "['foo', 'bar']", "table_creation_time": "1655831476360", "id": "628d06df-ecb0-4314-a97e-75d8872db7c3", @@ -448,7 +452,7 @@ "changeType": "UPSERT", "aspectName": "operation", "aspect": { - "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"readVersion\": \"3\", \"isolationLevel\": \"Serializable\", \"isBlindAppend\": \"True\", \"engineInfo\": \"local Delta-Standalone/0.4.0\"}, \"lastUpdatedTimestamp\": 1655831477768}", + "value": "{\"timestampMillis\": 1615443388097, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"operationType\": \"CUSTOM\", \"customOperationType\": \"WRITE\", \"customProperties\": {\"engineInfo\": \"local Delta-Standalone/0.4.0\", \"isBlindAppend\": \"True\", \"isolationLevel\": \"Serializable\", \"readVersion\": \"3\"}, \"lastUpdatedTimestamp\": 1655831477768}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/delta_lake/sources/local/single_table.json b/metadata-ingestion/tests/integration/delta_lake/sources/local/single_table.json index 27fa25e09e..53d93369fc 100644 --- a/metadata-ingestion/tests/integration/delta_lake/sources/local/single_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/sources/local/single_table.json @@ -2,6 +2,7 @@ "type": "delta-lake", "config": { "env": "UAT", - "base_path": "tests/integration/delta_lake/test_data/delta_tables/my_table_basic" + "base_path": "tests/integration/delta_lake/test_data/delta_tables/my_table_basic", + "require_files":"False" } }