diff --git a/ingestion/src/metadata/ingestion/source/database/databricks/metadata.py b/ingestion/src/metadata/ingestion/source/database/databricks/metadata.py index 3bc7b99f6d0..5166ca1f56d 100644 --- a/ingestion/src/metadata/ingestion/source/database/databricks/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/databricks/metadata.py @@ -113,11 +113,6 @@ _type_map.update( ) -def format_schema_name(schema): - # Adds back quotes(``) if hyphen(-) in schema name - return f"`{schema}`" if "-" in schema else schema - - # This method is from hive dialect originally but # is overridden to optimize DESCRIBE query execution def _get_table_columns(self, connection, table_name, schema, db_name): @@ -158,7 +153,6 @@ def _get_table_columns(self, connection, table_name, schema, db_name): def _get_column_rows(self, connection, table_name, schema, db_name): # get columns and strip whitespace - schema = format_schema_name(schema=schema) table_columns = _get_table_columns( # pylint: disable=protected-access self, connection, table_name, schema, db_name ) @@ -212,11 +206,10 @@ def get_columns(self, connection, table_name, schema=None, **kw): "system_data_type": raw_col_type, } if col_type in {"array", "struct", "map"}: - col_name = f"`{col_name}`" if "." in col_name else col_name try: rows = dict( connection.execute( - f"DESCRIBE TABLE {kw.get('db_name')}.{schema}.{table_name} {col_name}" + f"DESCRIBE TABLE `{kw.get('db_name')}`.`{schema}`.`{table_name}` `{col_name}`" ).fetchall() ) col_info["system_data_type"] = rows["data_type"] @@ -393,8 +386,7 @@ def get_table_type(self, connection, database, schema, table): database_name=database, schema_name=schema, table_name=table ) else: - schema = format_schema_name(schema=schema) - query = f"DESCRIBE TABLE EXTENDED {schema}.{table}" + query = f"DESCRIBE TABLE EXTENDED `{schema}`.`{table}`" rows = get_table_comment_result( self, connection=connection, @@ -761,7 +753,6 @@ class DatabricksSource(ExternalTableLineageMixin, CommonDbSourceService, MultiDB ) -> str: description = None try: - schema_name = format_schema_name(schema=schema_name) query = DATABRICKS_GET_TABLE_COMMENTS.format( database_name=self.context.get().database, schema_name=schema_name, @@ -816,9 +807,7 @@ class DatabricksSource(ExternalTableLineageMixin, CommonDbSourceService, MultiDB try: query = DATABRICKS_GET_TABLE_COMMENTS.format( database_name=self.context.get().database, - schema_name=format_schema_name( - schema=self.context.get().database_schema - ), + schema_name=self.context.get().database_schema, table_name=table_name, ) result = self.inspector.dialect.get_table_comment_result( diff --git a/ingestion/src/metadata/ingestion/source/database/databricks/queries.py b/ingestion/src/metadata/ingestion/source/database/databricks/queries.py index ca057716609..732dc79ad68 100644 --- a/ingestion/src/metadata/ingestion/source/database/databricks/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/databricks/queries.py @@ -25,27 +25,27 @@ DATABRICKS_VIEW_DEFINITIONS = textwrap.dedent( ) DATABRICKS_GET_TABLE_COMMENTS = ( - "DESCRIBE TABLE EXTENDED {database_name}.{schema_name}.{table_name}" + "DESCRIBE TABLE EXTENDED `{database_name}`.`{schema_name}`.`{table_name}`" ) DATABRICKS_GET_CATALOGS = "SHOW CATALOGS" DATABRICKS_GET_CATALOGS_TAGS = textwrap.dedent( - """SELECT * FROM {database_name}.information_schema.catalog_tags;""" + """SELECT * FROM `{database_name}`.information_schema.catalog_tags;""" ) DATABRICKS_GET_SCHEMA_TAGS = textwrap.dedent( """ SELECT * - FROM {database_name}.information_schema.schema_tags""" + FROM `{database_name}`.information_schema.schema_tags""" ) DATABRICKS_GET_TABLE_TAGS = textwrap.dedent( """ SELECT * - FROM {database_name}.information_schema.table_tags + FROM `{database_name}`.information_schema.table_tags """ ) @@ -53,8 +53,8 @@ DATABRICKS_GET_COLUMN_TAGS = textwrap.dedent( """ SELECT * - FROM {database_name}.information_schema.column_tags + FROM `{database_name}`.information_schema.column_tags """ ) -DATABRICKS_DDL = "SHOW CREATE TABLE {table_name}" +DATABRICKS_DDL = "SHOW CREATE TABLE `{table_name}`" diff --git a/ingestion/tests/unit/test_databricks_schema.py b/ingestion/tests/unit/test_databricks_schema.py deleted file mode 100644 index a5d9ed6b112..00000000000 --- a/ingestion/tests/unit/test_databricks_schema.py +++ /dev/null @@ -1,21 +0,0 @@ -import pytest - -from metadata.ingestion.source.database.databricks.metadata import format_schema_name - - -@pytest.mark.parametrize( - "input_schema, expected_schema", - [ - ("test_schema-name", "`test_schema-name`"), - ("test_schema_name", "test_schema_name"), - ("schema-with-hyphen", "`schema-with-hyphen`"), - ("schema_with_underscore", "schema_with_underscore"), - ("validSchema", "validSchema"), - ], -) -def test_schema_name_sanitization(input_schema, expected_schema): - """ - Test sanitization of schema names by adding backticks only around hyphenated names. - """ - sanitized_schema = format_schema_name(input_schema) - assert sanitized_schema == expected_schema