mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-20 06:58:18 +00:00
Issue-2538: Add Iceberg type for Glue Table + Location Entity (#3210)
This commit is contained in:
parent
b4118c7863
commit
6ee31eb21f
2
.gitignore
vendored
2
.gitignore
vendored
@ -67,6 +67,8 @@ openmetadata-ui/src/main/resources/ui/src/test
|
||||
|
||||
#tests
|
||||
.coverage
|
||||
/ingestion/coverage.xml
|
||||
/ingestion/junit/*
|
||||
|
||||
#vscode
|
||||
*/.vscode/*
|
||||
|
@ -16,7 +16,7 @@
|
||||
"javaType": "org.openmetadata.catalog.type.LocationType",
|
||||
"description": "This schema defines the type used for describing different types of Location.",
|
||||
"type": "string",
|
||||
"enum": ["Bucket", "Prefix", "Database", "Table"],
|
||||
"enum": ["Bucket", "Prefix", "Database", "Table", "Iceberg"],
|
||||
"javaEnums": [
|
||||
{
|
||||
"name": "Bucket"
|
||||
@ -29,6 +29,9 @@
|
||||
},
|
||||
{
|
||||
"name": "Table"
|
||||
},
|
||||
{
|
||||
"name": "Iceberg"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
"javaType": "org.openmetadata.catalog.type.TableType",
|
||||
"description": "This schema defines the type used for describing different types of tables.",
|
||||
"type": "string",
|
||||
"enum": ["Regular", "External", "View", "SecureView", "MaterializedView"],
|
||||
"enum": ["Regular", "External", "View", "SecureView", "MaterializedView", "Iceberg"],
|
||||
"javaEnums": [
|
||||
{
|
||||
"name": "Regular"
|
||||
@ -26,6 +26,9 @@
|
||||
},
|
||||
{
|
||||
"name": "MaterializedView"
|
||||
},
|
||||
{
|
||||
"name": "Iceberg"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -20,6 +20,30 @@
|
||||
"ordinalPosition": 2
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "marketing",
|
||||
"description": "Marketing data",
|
||||
"tableType": "Iceberg",
|
||||
"columns": [
|
||||
{
|
||||
"name": "ad_id",
|
||||
"dataType": "NUMERIC",
|
||||
"dataTypeDisplay": "bigint",
|
||||
"description": "Ad ID",
|
||||
"ordinalPosition": 1
|
||||
},
|
||||
{
|
||||
"name": "campaign_id",
|
||||
"dataType": "NUMERIC",
|
||||
"dataTypeDisplay": "bigint",
|
||||
"description": "campaign ID",
|
||||
"ordinalPosition": 1
|
||||
}
|
||||
],
|
||||
"Parameters": {
|
||||
"table_type": "ICEBERG"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
@ -152,18 +152,31 @@ class GlueSource(Source[Entity]):
|
||||
service=EntityReference(id=self.service.id, type="databaseService"),
|
||||
)
|
||||
fqn = f"{self.config.service_name}.{self.database_name}.{table['Name']}"
|
||||
parameters = table.get("Parameters")
|
||||
location_type = LocationType.Table
|
||||
if parameters:
|
||||
# iceberg tables need to pass a key/value pair in the DDL `'table_type'='ICEBERG'`
|
||||
# https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html
|
||||
location_type = (
|
||||
location_type
|
||||
if parameters.get("table_type") != "ICEBERG"
|
||||
else LocationType.Iceberg
|
||||
)
|
||||
|
||||
self.dataset_name = fqn
|
||||
table_columns = self.get_columns(table["StorageDescriptor"])
|
||||
location_entity = Location(
|
||||
name=table["StorageDescriptor"]["Location"],
|
||||
locationType=LocationType.Table,
|
||||
locationType=location_type,
|
||||
service=EntityReference(
|
||||
id=self.storage_service.id, type="storageService"
|
||||
),
|
||||
)
|
||||
|
||||
table_type: TableType = TableType.Regular
|
||||
if table["TableType"] == "EXTERNAL_TABLE":
|
||||
if location_type == LocationType.Iceberg:
|
||||
table_type = TableType.Iceberg
|
||||
elif table["TableType"] == "EXTERNAL_TABLE":
|
||||
table_type = TableType.External
|
||||
elif table["TableType"] == "VIRTUAL_VIEW":
|
||||
table_type = TableType.View
|
||||
|
@ -331,12 +331,21 @@ class SampleDataSource(Source[Entity]):
|
||||
)
|
||||
for table in self.glue_tables["tables"]:
|
||||
table["id"] = uuid.uuid4()
|
||||
parameters = table.get("Parameters")
|
||||
table = {key: val for key, val in table.items() if key != "Parameters"}
|
||||
table_metadata = Table(**table)
|
||||
location_type = LocationType.Table
|
||||
if parameters:
|
||||
location_type = (
|
||||
location_type
|
||||
if parameters.get("table_type") != "ICEBERG"
|
||||
else LocationType.Iceberg
|
||||
)
|
||||
location_metadata = Location(
|
||||
id=uuid.uuid4(),
|
||||
name="s3://glue_bucket/dwh/schema/" + table["name"],
|
||||
description=table["description"],
|
||||
locationType=LocationType.Table,
|
||||
locationType=location_type,
|
||||
service=EntityReference(
|
||||
id=self.glue_storage_service.id, type="storageService"
|
||||
),
|
||||
|
@ -254,7 +254,7 @@ class ColumnTypeParser:
|
||||
}
|
||||
elif ColumnTypeParser._FIXED_STRING.match(s):
|
||||
m = ColumnTypeParser._FIXED_STRING.match(s)
|
||||
return {"type": "STRING", "dataTypeDisplay": s}
|
||||
return {"dataType": "STRING", "dataTypeDisplay": s}
|
||||
elif ColumnTypeParser._FIXED_DECIMAL.match(s):
|
||||
m = ColumnTypeParser._FIXED_DECIMAL.match(s)
|
||||
if m.group(2) is not None: # type: ignore
|
||||
|
Loading…
x
Reference in New Issue
Block a user