feat(ingest/glue): database parameters extraction (#10665)

This commit is contained in:
skrydal 2024-06-11 20:50:46 +02:00 committed by GitHub
parent 46dbb10940
commit b9e71a61b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 31 additions and 6 deletions

View File

@ -1,3 +1,4 @@
import datetime
import json import json
import logging import logging
from collections import defaultdict from collections import defaultdict
@ -895,6 +896,12 @@ class GlueSource(StatefulIngestionSourceBase):
) -> Iterable[MetadataWorkUnit]: ) -> Iterable[MetadataWorkUnit]:
domain_urn = self._gen_domain_urn(database["Name"]) domain_urn = self._gen_domain_urn(database["Name"])
database_container_key = self.gen_database_key(database["Name"]) database_container_key = self.gen_database_key(database["Name"])
parameters = database.get("Parameters", {})
if database.get("LocationUri") is not None:
parameters["LocationUri"] = database["LocationUri"]
if database.get("CreateTime") is not None:
create_time: datetime.datetime = database["CreateTime"]
parameters["CreateTime"] = create_time.strftime("%B %-d, %Y at %H:%M:%S")
yield from gen_containers( yield from gen_containers(
container_key=database_container_key, container_key=database_container_key,
name=database["Name"], name=database["Name"],
@ -904,6 +911,7 @@ class GlueSource(StatefulIngestionSourceBase):
qualified_name=self.get_glue_arn( qualified_name=self.get_glue_arn(
account_id=database["CatalogId"], database=database["Name"] account_id=database["CatalogId"], database=database["Name"]
), ),
extra_properties=parameters,
) )
def add_table_to_database_container( def add_table_to_database_container(

View File

@ -10,7 +10,8 @@
"platform": "glue", "platform": "glue",
"instance": "delta_platform_instance", "instance": "delta_platform_instance",
"env": "PROD", "env": "PROD",
"database": "delta-database" "database": "delta-database",
"CreateTime": "June 9, 2021 at 14:14:19"
}, },
"name": "delta-database", "name": "delta-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database" "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database"

View File

@ -10,7 +10,8 @@
"platform": "glue", "platform": "glue",
"instance": "delta_platform_instance", "instance": "delta_platform_instance",
"env": "PROD", "env": "PROD",
"database": "delta-database" "database": "delta-database",
"CreateTime": "June 9, 2021 at 14:14:19"
}, },
"name": "delta-database", "name": "delta-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database" "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database"

View File

@ -9,7 +9,11 @@
"customProperties": { "customProperties": {
"platform": "glue", "platform": "glue",
"env": "PROD", "env": "PROD",
"database": "flights-database" "database": "flights-database",
"param1": "value1",
"param2": "value2",
"LocationUri": "s3://test-bucket/test-prefix",
"CreateTime": "June 9, 2021 at 14:14:19"
}, },
"name": "flights-database", "name": "flights-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/flights-database" "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/flights-database"
@ -288,7 +292,8 @@
"customProperties": { "customProperties": {
"platform": "glue", "platform": "glue",
"env": "PROD", "env": "PROD",
"database": "test-database" "database": "test-database",
"CreateTime": "June 1, 2021 at 14:55:02"
}, },
"name": "test-database", "name": "test-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/test-database" "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/test-database"

View File

@ -10,7 +10,11 @@
"platform": "glue", "platform": "glue",
"instance": "some_instance_name", "instance": "some_instance_name",
"env": "PROD", "env": "PROD",
"database": "flights-database" "database": "flights-database",
"param1": "value1",
"param2": "value2",
"LocationUri": "s3://test-bucket/test-prefix",
"CreateTime": "June 9, 2021 at 14:14:19"
}, },
"name": "flights-database", "name": "flights-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/flights-database" "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/flights-database"
@ -235,6 +239,7 @@
"type": "DATAOWNER" "type": "DATAOWNER"
} }
], ],
"ownerTypes": {},
"lastModified": { "lastModified": {
"time": 0, "time": 0,
"actor": "urn:li:corpuser:unknown" "actor": "urn:li:corpuser:unknown"
@ -292,7 +297,8 @@
"platform": "glue", "platform": "glue",
"instance": "some_instance_name", "instance": "some_instance_name",
"env": "PROD", "env": "PROD",
"database": "test-database" "database": "test-database",
"CreateTime": "June 1, 2021 at 14:55:02"
}, },
"name": "test-database", "name": "test-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/test-database" "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/test-database"
@ -474,6 +480,7 @@
"type": "DATAOWNER" "type": "DATAOWNER"
} }
], ],
"ownerTypes": {},
"lastModified": { "lastModified": {
"time": 0, "time": 0,
"actor": "urn:li:corpuser:unknown" "actor": "urn:li:corpuser:unknown"
@ -660,6 +667,7 @@
"type": "DATAOWNER" "type": "DATAOWNER"
} }
], ],
"ownerTypes": {},
"lastModified": { "lastModified": {
"time": 0, "time": 0,
"actor": "urn:li:corpuser:unknown" "actor": "urn:li:corpuser:unknown"

View File

@ -61,6 +61,8 @@ get_databases_response = {
} }
], ],
"CatalogId": "123412341234", "CatalogId": "123412341234",
"LocationUri": "s3://test-bucket/test-prefix",
"Parameters": {"param1": "value1", "param2": "value2"},
}, },
{ {
"Name": "test-database", "Name": "test-database",