feat(ingest/glue): database parameters extraction (#10665)

This commit is contained in:
skrydal 2024-06-11 20:50:46 +02:00 committed by GitHub
parent 46dbb10940
commit b9e71a61b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 31 additions and 6 deletions

View File

@ -1,3 +1,4 @@
import datetime
import json
import logging
from collections import defaultdict
@ -895,6 +896,12 @@ class GlueSource(StatefulIngestionSourceBase):
) -> Iterable[MetadataWorkUnit]:
domain_urn = self._gen_domain_urn(database["Name"])
database_container_key = self.gen_database_key(database["Name"])
parameters = database.get("Parameters", {})
if database.get("LocationUri") is not None:
parameters["LocationUri"] = database["LocationUri"]
if database.get("CreateTime") is not None:
create_time: datetime.datetime = database["CreateTime"]
parameters["CreateTime"] = create_time.strftime("%B %-d, %Y at %H:%M:%S")
yield from gen_containers(
container_key=database_container_key,
name=database["Name"],
@ -904,6 +911,7 @@ class GlueSource(StatefulIngestionSourceBase):
qualified_name=self.get_glue_arn(
account_id=database["CatalogId"], database=database["Name"]
),
extra_properties=parameters,
)
def add_table_to_database_container(

View File

@ -10,7 +10,8 @@
"platform": "glue",
"instance": "delta_platform_instance",
"env": "PROD",
"database": "delta-database"
"database": "delta-database",
"CreateTime": "June 9, 2021 at 14:14:19"
},
"name": "delta-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database"

View File

@ -10,7 +10,8 @@
"platform": "glue",
"instance": "delta_platform_instance",
"env": "PROD",
"database": "delta-database"
"database": "delta-database",
"CreateTime": "June 9, 2021 at 14:14:19"
},
"name": "delta-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database"

View File

@ -9,7 +9,11 @@
"customProperties": {
"platform": "glue",
"env": "PROD",
"database": "flights-database"
"database": "flights-database",
"param1": "value1",
"param2": "value2",
"LocationUri": "s3://test-bucket/test-prefix",
"CreateTime": "June 9, 2021 at 14:14:19"
},
"name": "flights-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/flights-database"
@ -288,7 +292,8 @@
"customProperties": {
"platform": "glue",
"env": "PROD",
"database": "test-database"
"database": "test-database",
"CreateTime": "June 1, 2021 at 14:55:02"
},
"name": "test-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/test-database"

View File

@ -10,7 +10,11 @@
"platform": "glue",
"instance": "some_instance_name",
"env": "PROD",
"database": "flights-database"
"database": "flights-database",
"param1": "value1",
"param2": "value2",
"LocationUri": "s3://test-bucket/test-prefix",
"CreateTime": "June 9, 2021 at 14:14:19"
},
"name": "flights-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/flights-database"
@ -235,6 +239,7 @@
"type": "DATAOWNER"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
@ -292,7 +297,8 @@
"platform": "glue",
"instance": "some_instance_name",
"env": "PROD",
"database": "test-database"
"database": "test-database",
"CreateTime": "June 1, 2021 at 14:55:02"
},
"name": "test-database",
"qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/test-database"
@ -474,6 +480,7 @@
"type": "DATAOWNER"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
@ -660,6 +667,7 @@
"type": "DATAOWNER"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"

View File

@ -61,6 +61,8 @@ get_databases_response = {
}
],
"CatalogId": "123412341234",
"LocationUri": "s3://test-bucket/test-prefix",
"Parameters": {"param1": "value1", "param2": "value2"},
},
{
"Name": "test-database",