diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index a6393aa9d0..d65d17f223 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -1,3 +1,4 @@ +import datetime import json import logging from collections import defaultdict @@ -895,6 +896,12 @@ class GlueSource(StatefulIngestionSourceBase): ) -> Iterable[MetadataWorkUnit]: domain_urn = self._gen_domain_urn(database["Name"]) database_container_key = self.gen_database_key(database["Name"]) + parameters = database.get("Parameters", {}) + if database.get("LocationUri") is not None: + parameters["LocationUri"] = database["LocationUri"] + if database.get("CreateTime") is not None: + create_time: datetime.datetime = database["CreateTime"] + parameters["CreateTime"] = create_time.strftime("%B %-d, %Y at %H:%M:%S") yield from gen_containers( container_key=database_container_key, name=database["Name"], @@ -904,6 +911,7 @@ class GlueSource(StatefulIngestionSourceBase): qualified_name=self.get_glue_arn( account_id=database["CatalogId"], database=database["Name"] ), + extra_properties=parameters, ) def add_table_to_database_container( diff --git a/metadata-ingestion/tests/unit/glue/glue_delta_mces_golden.json b/metadata-ingestion/tests/unit/glue/glue_delta_mces_golden.json index f3d4812b79..992ec338de 100644 --- a/metadata-ingestion/tests/unit/glue/glue_delta_mces_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_delta_mces_golden.json @@ -10,7 +10,8 @@ "platform": "glue", "instance": "delta_platform_instance", "env": "PROD", - "database": "delta-database" + "database": "delta-database", + "CreateTime": "June 9, 2021 at 14:14:19" }, "name": "delta-database", "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database" diff --git a/metadata-ingestion/tests/unit/glue/glue_malformed_delta_mces_golden.json b/metadata-ingestion/tests/unit/glue/glue_malformed_delta_mces_golden.json index 015daaa271..b8e3445eea 100644 --- a/metadata-ingestion/tests/unit/glue/glue_malformed_delta_mces_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_malformed_delta_mces_golden.json @@ -10,7 +10,8 @@ "platform": "glue", "instance": "delta_platform_instance", "env": "PROD", - "database": "delta-database" + "database": "delta-database", + "CreateTime": "June 9, 2021 at 14:14:19" }, "name": "delta-database", "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database" diff --git a/metadata-ingestion/tests/unit/glue/glue_mces_golden.json b/metadata-ingestion/tests/unit/glue/glue_mces_golden.json index 7460e21e88..f180185f67 100644 --- a/metadata-ingestion/tests/unit/glue/glue_mces_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_mces_golden.json @@ -9,7 +9,11 @@ "customProperties": { "platform": "glue", "env": "PROD", - "database": "flights-database" + "database": "flights-database", + "param1": "value1", + "param2": "value2", + "LocationUri": "s3://test-bucket/test-prefix", + "CreateTime": "June 9, 2021 at 14:14:19" }, "name": "flights-database", "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/flights-database" @@ -288,7 +292,8 @@ "customProperties": { "platform": "glue", "env": "PROD", - "database": "test-database" + "database": "test-database", + "CreateTime": "June 1, 2021 at 14:55:02" }, "name": "test-database", "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/test-database" diff --git a/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json b/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json index f0d506cd59..4b64ee1bf0 100644 --- a/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json @@ -10,7 +10,11 @@ "platform": "glue", "instance": "some_instance_name", "env": "PROD", - "database": "flights-database" + "database": "flights-database", + "param1": "value1", + "param2": "value2", + "LocationUri": "s3://test-bucket/test-prefix", + "CreateTime": "June 9, 2021 at 14:14:19" }, "name": "flights-database", "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/flights-database" @@ -235,6 +239,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -292,7 +297,8 @@ "platform": "glue", "instance": "some_instance_name", "env": "PROD", - "database": "test-database" + "database": "test-database", + "CreateTime": "June 1, 2021 at 14:55:02" }, "name": "test-database", "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/test-database" @@ -474,6 +480,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -660,6 +667,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" diff --git a/metadata-ingestion/tests/unit/test_glue_source_stubs.py b/metadata-ingestion/tests/unit/test_glue_source_stubs.py index c971001f97..80d16b9390 100644 --- a/metadata-ingestion/tests/unit/test_glue_source_stubs.py +++ b/metadata-ingestion/tests/unit/test_glue_source_stubs.py @@ -61,6 +61,8 @@ get_databases_response = { } ], "CatalogId": "123412341234", + "LocationUri": "s3://test-bucket/test-prefix", + "Parameters": {"param1": "value1", "param2": "value2"}, }, { "Name": "test-database",