mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-04 06:33:10 +00:00
fix: DBX profiler struct support + nested colum retrieval for profiler (#17267)
This commit is contained in:
parent
4bb6d7ec1c
commit
2a854f90e1
@ -17,7 +17,6 @@ from typing import List
|
|||||||
|
|
||||||
from pyhive.sqlalchemy_hive import HiveCompiler
|
from pyhive.sqlalchemy_hive import HiveCompiler
|
||||||
from sqlalchemy import Column, inspect
|
from sqlalchemy import Column, inspect
|
||||||
from sqlalchemy.sql import column
|
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.table import Column as OMColumn
|
from metadata.generated.schema.entity.data.table import Column as OMColumn
|
||||||
from metadata.generated.schema.entity.data.table import ColumnName, DataType, TableData
|
from metadata.generated.schema.entity.data.table import ColumnName, DataType, TableData
|
||||||
@ -61,20 +60,26 @@ class DatabricksProfilerInterface(SQAProfilerInterface):
|
|||||||
columns_list = []
|
columns_list = []
|
||||||
for col in columns:
|
for col in columns:
|
||||||
if col.dataType != DataType.STRUCT:
|
if col.dataType != DataType.STRUCT:
|
||||||
col.name = ColumnName(f"{parent}.{col.name.root}")
|
# For DBX struct we need to quote the column name as `a`.`b`.`c`
|
||||||
col = build_orm_col(
|
# otherwise the driver will quote it as `a.b.c`
|
||||||
idx=1, col=col, table_service_type=DatabaseServiceType.Databricks
|
col_name = ".".join([f"`{part}`" for part in parent.split(".")])
|
||||||
|
col.name = ColumnName(f"{col_name}.`{col.name.root}`")
|
||||||
|
# Set `_quote` to False to avoid quoting the column name again when compiled
|
||||||
|
sqa_col = build_orm_col(
|
||||||
|
idx=1,
|
||||||
|
col=col,
|
||||||
|
table_service_type=DatabaseServiceType.Databricks,
|
||||||
|
_quote=False,
|
||||||
)
|
)
|
||||||
col._set_parent( # pylint: disable=protected-access
|
sqa_col._set_parent( # pylint: disable=protected-access
|
||||||
self.table.__table__
|
self.table.__table__
|
||||||
)
|
)
|
||||||
|
columns_list.append(sqa_col)
|
||||||
columns_list.append(column(col.label(col.name.replace(".", "_"))))
|
|
||||||
else:
|
else:
|
||||||
col = self._get_struct_columns(
|
cols = self._get_struct_columns(
|
||||||
col.children, f"{parent}.{col.name.root}"
|
col.children, f"{parent}.{col.name.root}"
|
||||||
)
|
)
|
||||||
columns_list.extend(col)
|
columns_list.extend(cols)
|
||||||
return columns_list
|
return columns_list
|
||||||
|
|
||||||
def get_columns(self) -> Column:
|
def get_columns(self) -> Column:
|
||||||
@ -86,7 +91,7 @@ class DatabricksProfilerInterface(SQAProfilerInterface):
|
|||||||
self._get_struct_columns(column_obj.children, column_obj.name.root)
|
self._get_struct_columns(column_obj.children, column_obj.name.root)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
col = build_orm_col(idx, column, DatabaseServiceType.Databricks)
|
col = build_orm_col(idx, column_obj, DatabaseServiceType.Databricks)
|
||||||
col._set_parent( # pylint: disable=protected-access
|
col._set_parent( # pylint: disable=protected-access
|
||||||
self.table.__table__
|
self.table.__table__
|
||||||
)
|
)
|
||||||
|
@ -64,7 +64,9 @@ def check_if_should_quote_column_name(table_service_type) -> Optional[bool]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def build_orm_col(idx: int, col: Column, table_service_type) -> sqlalchemy.Column:
|
def build_orm_col(
|
||||||
|
idx: int, col: Column, table_service_type, *, _quote=None
|
||||||
|
) -> sqlalchemy.Column:
|
||||||
"""
|
"""
|
||||||
Cook the ORM column from our metadata instance
|
Cook the ORM column from our metadata instance
|
||||||
information.
|
information.
|
||||||
@ -76,14 +78,20 @@ def build_orm_col(idx: int, col: Column, table_service_type) -> sqlalchemy.Colum
|
|||||||
As this is only used for INSERT/UPDATE/DELETE,
|
As this is only used for INSERT/UPDATE/DELETE,
|
||||||
there is no impact for our read-only purposes.
|
there is no impact for our read-only purposes.
|
||||||
"""
|
"""
|
||||||
|
if _quote is not None:
|
||||||
|
quote = _quote
|
||||||
|
else:
|
||||||
|
quote = check_if_should_quote_column_name(
|
||||||
|
table_service_type
|
||||||
|
) or check_snowflake_case_sensitive(table_service_type, col.name.root)
|
||||||
|
|
||||||
return sqlalchemy.Column(
|
return sqlalchemy.Column(
|
||||||
name=str(col.name.root),
|
name=str(col.name.root),
|
||||||
type_=converter_registry[table_service_type]().map_types(
|
type_=converter_registry[table_service_type]().map_types(
|
||||||
col, table_service_type
|
col, table_service_type
|
||||||
),
|
),
|
||||||
primary_key=not bool(idx), # The first col seen is used as PK
|
primary_key=not bool(idx), # The first col seen is used as PK
|
||||||
quote=check_if_should_quote_column_name(table_service_type)
|
quote=quote,
|
||||||
or check_snowflake_case_sensitive(table_service_type, col.name.root),
|
|
||||||
key=str(
|
key=str(
|
||||||
col.name.root
|
col.name.root
|
||||||
).lower(), # Add lowercase column name as key for snowflake case sensitive columns
|
).lower(), # Add lowercase column name as key for snowflake case sensitive columns
|
||||||
|
@ -181,7 +181,11 @@ public class FullyQualifiedName {
|
|||||||
public static String getTableFQN(String columnFQN) {
|
public static String getTableFQN(String columnFQN) {
|
||||||
// Split columnFQN of format databaseServiceName.databaseName.tableName.columnName
|
// Split columnFQN of format databaseServiceName.databaseName.tableName.columnName
|
||||||
String[] split = split(columnFQN);
|
String[] split = split(columnFQN);
|
||||||
if (split.length != 5) {
|
// column FQN for struct columns are of format
|
||||||
|
// service.database.schema.table.column.child1.child2
|
||||||
|
// and not service.database.schema.table."column.child1.child2" so split length should be 5 or
|
||||||
|
// more
|
||||||
|
if (split.length < 5) {
|
||||||
throw new IllegalArgumentException("Invalid fully qualified column name " + columnFQN);
|
throw new IllegalArgumentException("Invalid fully qualified column name " + columnFQN);
|
||||||
}
|
}
|
||||||
// Return table FQN of format databaseService.tableName
|
// Return table FQN of format databaseService.tableName
|
||||||
|
Loading…
x
Reference in New Issue
Block a user