diff --git a/ingestion/src/metadata/profiler/interface/sqlalchemy/databricks/profiler_interface.py b/ingestion/src/metadata/profiler/interface/sqlalchemy/databricks/profiler_interface.py index 90f2c98879a..60e3f7c7cc3 100644 --- a/ingestion/src/metadata/profiler/interface/sqlalchemy/databricks/profiler_interface.py +++ b/ingestion/src/metadata/profiler/interface/sqlalchemy/databricks/profiler_interface.py @@ -17,7 +17,6 @@ from typing import List from pyhive.sqlalchemy_hive import HiveCompiler from sqlalchemy import Column, inspect -from sqlalchemy.sql import column from metadata.generated.schema.entity.data.table import Column as OMColumn from metadata.generated.schema.entity.data.table import ColumnName, DataType, TableData @@ -61,20 +60,26 @@ class DatabricksProfilerInterface(SQAProfilerInterface): columns_list = [] for col in columns: if col.dataType != DataType.STRUCT: - col.name = ColumnName(f"{parent}.{col.name.root}") - col = build_orm_col( - idx=1, col=col, table_service_type=DatabaseServiceType.Databricks + # For DBX struct we need to quote the column name as `a`.`b`.`c` + # otherwise the driver will quote it as `a.b.c` + col_name = ".".join([f"`{part}`" for part in parent.split(".")]) + col.name = ColumnName(f"{col_name}.`{col.name.root}`") + # Set `_quote` to False to avoid quoting the column name again when compiled + sqa_col = build_orm_col( + idx=1, + col=col, + table_service_type=DatabaseServiceType.Databricks, + _quote=False, ) - col._set_parent( # pylint: disable=protected-access + sqa_col._set_parent( # pylint: disable=protected-access self.table.__table__ ) - - columns_list.append(column(col.label(col.name.replace(".", "_")))) + columns_list.append(sqa_col) else: - col = self._get_struct_columns( + cols = self._get_struct_columns( col.children, f"{parent}.{col.name.root}" ) - columns_list.extend(col) + columns_list.extend(cols) return columns_list def get_columns(self) -> Column: @@ -86,7 +91,7 @@ class DatabricksProfilerInterface(SQAProfilerInterface): self._get_struct_columns(column_obj.children, column_obj.name.root) ) else: - col = build_orm_col(idx, column, DatabaseServiceType.Databricks) + col = build_orm_col(idx, column_obj, DatabaseServiceType.Databricks) col._set_parent( # pylint: disable=protected-access self.table.__table__ ) diff --git a/ingestion/src/metadata/profiler/orm/converter/base.py b/ingestion/src/metadata/profiler/orm/converter/base.py index 2a58c60003a..865cf7c1992 100644 --- a/ingestion/src/metadata/profiler/orm/converter/base.py +++ b/ingestion/src/metadata/profiler/orm/converter/base.py @@ -64,7 +64,9 @@ def check_if_should_quote_column_name(table_service_type) -> Optional[bool]: return None -def build_orm_col(idx: int, col: Column, table_service_type) -> sqlalchemy.Column: +def build_orm_col( + idx: int, col: Column, table_service_type, *, _quote=None +) -> sqlalchemy.Column: """ Cook the ORM column from our metadata instance information. @@ -76,14 +78,20 @@ def build_orm_col(idx: int, col: Column, table_service_type) -> sqlalchemy.Colum As this is only used for INSERT/UPDATE/DELETE, there is no impact for our read-only purposes. """ + if _quote is not None: + quote = _quote + else: + quote = check_if_should_quote_column_name( + table_service_type + ) or check_snowflake_case_sensitive(table_service_type, col.name.root) + return sqlalchemy.Column( name=str(col.name.root), type_=converter_registry[table_service_type]().map_types( col, table_service_type ), primary_key=not bool(idx), # The first col seen is used as PK - quote=check_if_should_quote_column_name(table_service_type) - or check_snowflake_case_sensitive(table_service_type, col.name.root), + quote=quote, key=str( col.name.root ).lower(), # Add lowercase column name as key for snowflake case sensitive columns diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/FullyQualifiedName.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/FullyQualifiedName.java index 448bf828357..17f33aa8130 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/FullyQualifiedName.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/FullyQualifiedName.java @@ -181,7 +181,11 @@ public class FullyQualifiedName { public static String getTableFQN(String columnFQN) { // Split columnFQN of format databaseServiceName.databaseName.tableName.columnName String[] split = split(columnFQN); - if (split.length != 5) { + // column FQN for struct columns are of format + // service.database.schema.table.column.child1.child2 + // and not service.database.schema.table."column.child1.child2" so split length should be 5 or + // more + if (split.length < 5) { throw new IllegalArgumentException("Invalid fully qualified column name " + columnFQN); } // Return table FQN of format databaseService.tableName