diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java index 073727ea83..f4f17df8bc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java @@ -128,6 +128,10 @@ public class ContainerMapper { if (gmsProperties.hasCustomProperties()) { propertiesResult.setCustomProperties(CustomPropertiesMapper.map(gmsProperties.getCustomProperties(), entityUrn)); } + if (gmsProperties.hasQualifiedName()) { + propertiesResult.setQualifiedName(gmsProperties.getQualifiedName().toString()); + } + return propertiesResult; } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 3625feb7d5..0d1a1178c0 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -1974,6 +1974,11 @@ type ContainerProperties { Native platform URL of the Container """ externalUrl: String + + """ + Fully-qualified name of the Container + """ + qualifiedName: String } """ diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index ad2df41a1b..708aa996ed 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -159,6 +159,7 @@ def gen_containers( owner_urn: Optional[str] = None, external_url: Optional[str] = None, tags: Optional[List[str]] = None, + qualified_name: Optional[str] = None, ) -> Iterable[MetadataWorkUnit]: container_urn = make_container_urn( guid=container_key.guid(), @@ -173,6 +174,7 @@ def gen_containers( description=description, customProperties=container_key.dict(exclude_none=True, by_alias=True), externalUrl=external_url, + qualifiedName=qualified_name, ), ) wu = MetadataWorkUnit(id=f"container-info-{name}-{container_urn}", mcp=mcp) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 238e6160fd..89d2bfd0c8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -291,6 +291,14 @@ class GlueSource(StatefulIngestionSourceBase): run_id=self.ctx.run_id, ) + def get_glue_arn( + self, account_id: str, database: str, table: Optional[str] = None + ) -> str: + prefix = f"arn:aws:glue:{self.source_config.aws_region}:{account_id}" + if table: + return f"{prefix}:table/{database}/{table}" + return f"{prefix}:database/{database}" + @classmethod def create(cls, config_dict, ctx): config = GlueSourceConfig.parse_obj(config_dict) @@ -925,6 +933,9 @@ class GlueSource(StatefulIngestionSourceBase): sub_types=["Database"], domain_urn=domain_urn, description=database.get("Description"), + qualified_name=self.get_glue_arn( + account_id=database["CatalogId"], database=database["Name"] + ), ) for wu in container_workunits: @@ -1124,6 +1135,11 @@ class GlueSource(StatefulIngestionSourceBase): }, uri=table.get("Location"), tags=[], + qualifiedName=self.get_glue_arn( + account_id=table["CatalogId"], + database=table["DatabaseName"], + table=table["Name"], + ), ) def get_s3_tags() -> Optional[GlobalTagsClass]: diff --git a/metadata-ingestion/tests/unit/glue/glue_deleted_actor_mces_golden.json b/metadata-ingestion/tests/unit/glue/glue_deleted_actor_mces_golden.json index 34a884f401..98b0525b6d 100644 --- a/metadata-ingestion/tests/unit/glue/glue_deleted_actor_mces_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_deleted_actor_mces_golden.json @@ -5,7 +5,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"PROD\", \"database\": \"test-database\"}, \"name\": \"test-database\"}", + "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"PROD\", \"database\": \"test-database\"}, \"name\": \"test-database\", \"qualifiedName\": \"arn:aws:glue:eu-east-1:123412341234:database/test-database\"}", "contentType": "application/json" }, "systemMetadata": { @@ -74,6 +74,7 @@ "SortColumns": "[]", "StoredAsSubDirectories": "False" }, + "qualifiedName": "arn:aws:glue:eu-east-1:795586375822:table/test-database/test_jsons_markers", "tags": [] } }, @@ -252,6 +253,7 @@ "SortColumns": "[]", "StoredAsSubDirectories": "False" }, + "qualifiedName": "arn:aws:glue:eu-east-1:795586375822:table/test-database/test_parquet", "tags": [] } }, diff --git a/metadata-ingestion/tests/unit/glue/glue_mces_golden.json b/metadata-ingestion/tests/unit/glue/glue_mces_golden.json index 9d65962c13..f27d702770 100644 --- a/metadata-ingestion/tests/unit/glue/glue_mces_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_mces_golden.json @@ -5,7 +5,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"PROD\", \"database\": \"flights-database\"}, \"name\": \"flights-database\"}", + "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"PROD\", \"database\": \"flights-database\"}, \"name\": \"flights-database\", \"qualifiedName\": \"arn:aws:glue:us-west-2:123412341234:database/flights-database\"}", "contentType": "application/json" } }, @@ -63,6 +63,7 @@ "SortColumns": "[]", "StoredAsSubDirectories": "False" }, + "qualifiedName": "arn:aws:glue:us-west-2:123412341234:table/flights-database/avro", "tags": [] } }, @@ -254,7 +255,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"PROD\", \"database\": \"test-database\"}, \"name\": \"test-database\"}", + "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"PROD\", \"database\": \"test-database\"}, \"name\": \"test-database\", \"qualifiedName\": \"arn:aws:glue:us-west-2:123412341234:database/test-database\"}", "contentType": "application/json" } }, @@ -311,6 +312,7 @@ "SortColumns": "[]", "StoredAsSubDirectories": "False" }, + "qualifiedName": "arn:aws:glue:us-west-2:795586375822:table/test-database/test_jsons_markers", "tags": [] } }, @@ -489,6 +491,7 @@ "SortColumns": "[]", "StoredAsSubDirectories": "False" }, + "qualifiedName": "arn:aws:glue:us-west-2:795586375822:table/test-database/test_parquet", "tags": [] } }, diff --git a/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json b/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json index 1fc3759f0d..a35bae1d2e 100644 --- a/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json @@ -5,7 +5,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"some_instance_name\", \"database\": \"flights-database\"}, \"name\": \"flights-database\"}", + "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"some_instance_name\", \"database\": \"flights-database\"}, \"name\": \"flights-database\", \"qualifiedName\": \"arn:aws:glue:us-west-2:123412341234:database/flights-database\"}", "contentType": "application/json" } }, @@ -63,6 +63,7 @@ "SortColumns": "[]", "StoredAsSubDirectories": "False" }, + "qualifiedName": "arn:aws:glue:us-west-2:123412341234:table/flights-database/avro", "tags": [] } }, @@ -255,7 +256,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"some_instance_name\", \"database\": \"test-database\"}, \"name\": \"test-database\"}", + "value": "{\"customProperties\": {\"platform\": \"glue\", \"instance\": \"some_instance_name\", \"database\": \"test-database\"}, \"name\": \"test-database\", \"qualifiedName\": \"arn:aws:glue:us-west-2:123412341234:database/test-database\"}", "contentType": "application/json" } }, @@ -312,6 +313,7 @@ "SortColumns": "[]", "StoredAsSubDirectories": "False" }, + "qualifiedName": "arn:aws:glue:us-west-2:795586375822:table/test-database/test_jsons_markers", "tags": [] } }, @@ -491,6 +493,7 @@ "SortColumns": "[]", "StoredAsSubDirectories": "False" }, + "qualifiedName": "arn:aws:glue:us-west-2:795586375822:table/test-database/test_parquet", "tags": [] } }, diff --git a/metadata-ingestion/tests/unit/test_glue_source_stubs.py b/metadata-ingestion/tests/unit/test_glue_source_stubs.py index bca9bc90c8..6165659655 100644 --- a/metadata-ingestion/tests/unit/test_glue_source_stubs.py +++ b/metadata-ingestion/tests/unit/test_glue_source_stubs.py @@ -34,8 +34,10 @@ get_databases_response = { }, ] } -databases_1 = {"flights-database": {"Name": "flights-database"}} -databases_2 = {"test-database": {"Name": "test-database"}} +databases_1 = { + "flights-database": {"Name": "flights-database", "CatalogId": "123412341234"} +} +databases_2 = {"test-database": {"Name": "test-database", "CatalogId": "123412341234"}} tables_1 = [ { "Name": "avro",