Fix #19633: Fix databricks schema not found (#19646)

2025-10-13 01:38:13 +00:00 · 2025-02-04 11:42:11 +05:30 · 2025-02-04 11:42:11 +05:30 · 208c40be09
commit 208c40be09
parent 636a83514d
3 changed files with 9 additions and 41 deletions
--- a/ingestion/src/metadata/ingestion/source/database/databricks/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/databricks/metadata.py
@ -113,11 +113,6 @@ _type_map.update(
 )
 def format_schema_name(schema):
    # Adds back quotes(``) if hyphen(-) in schema name
    return f"`{schema}`" if "-" in schema else schema
 # This method is from hive dialect originally but
 # is overridden to optimize DESCRIBE query execution
 def _get_table_columns(self, connection, table_name, schema, db_name):
@ -158,7 +153,6 @@ def _get_table_columns(self, connection, table_name, schema, db_name):
 def _get_column_rows(self, connection, table_name, schema, db_name):
    # get columns and strip whitespace
    schema = format_schema_name(schema=schema)
    table_columns = _get_table_columns(  # pylint: disable=protected-access
        self, connection, table_name, schema, db_name
    )
@ -212,11 +206,10 @@ def get_columns(self, connection, table_name, schema=None, **kw):
            "system_data_type": raw_col_type,
        }
        if col_type in {"array", "struct", "map"}:
            col_name = f"`{col_name}`" if "." in col_name else col_name
            try:
                rows = dict(
                    connection.execute(
-                        f"DESCRIBE TABLE {kw.get('db_name')}.{schema}.{table_name} {col_name}"
+                        f"DESCRIBE TABLE `{kw.get('db_name')}`.`{schema}`.`{table_name}` `{col_name}`"
                    ).fetchall()
                )
                col_info["system_data_type"] = rows["data_type"]
@ -393,8 +386,7 @@ def get_table_type(self, connection, database, schema, table):
                database_name=database, schema_name=schema, table_name=table
            )
        else:
-            schema = format_schema_name(schema=schema)
+            query = f"DESCRIBE TABLE EXTENDED `{schema}`.`{table}`"
            query = f"DESCRIBE TABLE EXTENDED {schema}.{table}"
        rows = get_table_comment_result(
            self,
            connection=connection,
@ -761,7 +753,6 @@ class DatabricksSource(ExternalTableLineageMixin, CommonDbSourceService, MultiDB
    ) -> str:
        description = None
        try:
            schema_name = format_schema_name(schema=schema_name)
            query = DATABRICKS_GET_TABLE_COMMENTS.format(
                database_name=self.context.get().database,
                schema_name=schema_name,
@ -816,9 +807,7 @@ class DatabricksSource(ExternalTableLineageMixin, CommonDbSourceService, MultiDB
        try:
            query = DATABRICKS_GET_TABLE_COMMENTS.format(
                database_name=self.context.get().database,
-                schema_name=format_schema_name(
+                schema_name=self.context.get().database_schema,
                    schema=self.context.get().database_schema
                ),
                table_name=table_name,
            )
            result = self.inspector.dialect.get_table_comment_result(
--- a/ingestion/src/metadata/ingestion/source/database/databricks/queries.py
+++ b/ingestion/src/metadata/ingestion/source/database/databricks/queries.py
@ -25,27 +25,27 @@ DATABRICKS_VIEW_DEFINITIONS = textwrap.dedent(
 )
 DATABRICKS_GET_TABLE_COMMENTS = (
-    "DESCRIBE TABLE EXTENDED {database_name}.{schema_name}.{table_name}"
+    "DESCRIBE TABLE EXTENDED `{database_name}`.`{schema_name}`.`{table_name}`"
 )
 DATABRICKS_GET_CATALOGS = "SHOW CATALOGS"
 DATABRICKS_GET_CATALOGS_TAGS = textwrap.dedent(
-    """SELECT * FROM {database_name}.information_schema.catalog_tags;"""
+    """SELECT * FROM `{database_name}`.information_schema.catalog_tags;"""
 )
 DATABRICKS_GET_SCHEMA_TAGS = textwrap.dedent(
    """
    SELECT 
        * 
-    FROM {database_name}.information_schema.schema_tags"""
+    FROM `{database_name}`.information_schema.schema_tags"""
 )
 DATABRICKS_GET_TABLE_TAGS = textwrap.dedent(
    """
    SELECT 
        * 
-    FROM {database_name}.information_schema.table_tags 
+    FROM `{database_name}`.information_schema.table_tags 
    """
 )
@ -53,8 +53,8 @@ DATABRICKS_GET_COLUMN_TAGS = textwrap.dedent(
    """
    SELECT 
        * 
-    FROM {database_name}.information_schema.column_tags 
+    FROM `{database_name}`.information_schema.column_tags 
    """
 )
-DATABRICKS_DDL = "SHOW CREATE TABLE {table_name}"
+DATABRICKS_DDL = "SHOW CREATE TABLE `{table_name}`"
--- a/ingestion/tests/unit/test_databricks_schema.py
+++ b/ingestion/tests/unit/test_databricks_schema.py
@ -1,21 +0,0 @@
 import pytest
 from metadata.ingestion.source.database.databricks.metadata import format_schema_name
@pytest.mark.parametrize(
    "input_schema, expected_schema",
    [
        ("test_schema-name", "`test_schema-name`"),
        ("test_schema_name", "test_schema_name"),
        ("schema-with-hyphen", "`schema-with-hyphen`"),
        ("schema_with_underscore", "schema_with_underscore"),
        ("validSchema", "validSchema"),
    ],
 )
 def test_schema_name_sanitization(input_schema, expected_schema):
    """
    Test sanitization of schema names by adding backticks only around hyphenated names.
    """
    sanitized_schema = format_schema_name(input_schema)
    assert sanitized_schema == expected_schema