Fix #19856 - Set the db in mysql/mariadb for metric computations (#19994)

* use db for mysql/mariadb * format
2025-11-04 04:29:13 +00:00 · 2025-02-26 20:30:42 +01:00 · 2025-02-26 20:30:42 +01:00 · 603d61eaa2
commit 603d61eaa2
parent 6b7a9fe76c
6 changed files with 26 additions and 13 deletions
--- a/ingestion/src/metadata/mixins/sqalchemy/sqa_mixin.py
+++ b/ingestion/src/metadata/mixins/sqalchemy/sqa_mixin.py
@ -24,6 +24,12 @@ from metadata.generated.schema.entity.data.table import Table
 from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
    DatabricksConnection,
 )
+from metadata.generated.schema.entity.services.connections.database.mariaDBConnection import (
+    MariaDBConnection,
+)
+from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
+    MysqlConnection,
+)
 from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import (
    SnowflakeType,
 )
@ -79,21 +85,26 @@ class SQAInterfaceMixin(Root):
            )

    def set_catalog(self, session) -> None:
-        """Set catalog for the session. Right now only databricks and unity catalog requires it
+        """Set the catalog or database for the session.

        Args:
            session (Session): sqa session object
        """
-        if not isinstance(
+        if isinstance(
            self.service_connection_config,
            (UnityCatalogConnection, DatabricksConnection),
        ):
-            return
-        bind = session.get_bind()
-        bind.execute(
-            "USE CATALOG %(catalog)s;",
-            {"catalog": self.service_connection_config.catalog},
-        ).first()
+            session.get_bind().execute(
+                "USE CATALOG %(catalog)s;",
+                {"catalog": self.service_connection_config.catalog},
+            ).first()
+
+        if isinstance(
+            self.service_connection_config, (MysqlConnection, MariaDBConnection)
+        ):
+            session.get_bind().execute(
+                f"USE {self.table_entity.databaseSchema.name};",
+            )

    def close(self):
        """close session"""
--- a/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py
+++ b/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py
@ -297,7 +297,7 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin):
            column: the column to compute the metrics against
            metrics: list of metrics to compute
        Returns:
-            dictionnary of results
+            dictionary of results
        """

        if not metrics:
--- a/ingestion/src/metadata/profiler/metrics/window/first_quartile.py
+++ b/ingestion/src/metadata/profiler/metrics/window/first_quartile.py
@ -88,7 +88,7 @@ class FirstQuartile(StaticMetric, PercentilMixin):
            except MemoryError:
                logger.error(
                    f"Unable to compute Median for {self.col.name} due to memory constraints."
-                    f"We recommend using a smaller sample size or partitionning."
+                    f"We recommend using a smaller sample size or partitioning."
                )
                return None
            # check if nan
--- a/ingestion/src/metadata/profiler/metrics/window/median.py
+++ b/ingestion/src/metadata/profiler/metrics/window/median.py
@ -87,7 +87,7 @@ class Median(StaticMetric, PercentilMixin):
            except MemoryError:
                logger.error(
                    f"Unable to compute Median for {self.col.name} due to memory constraints."
-                    f"We recommend using a smaller sample size or partitionning."
+                    f"We recommend using a smaller sample size or partitioning."
                )
                return None
            try:
--- a/ingestion/src/metadata/profiler/metrics/window/third_quartile.py
+++ b/ingestion/src/metadata/profiler/metrics/window/third_quartile.py
@ -88,7 +88,7 @@ class ThirdQuartile(StaticMetric, PercentilMixin):
            except MemoryError:
                logger.error(
                    f"Unable to compute Median for {self.col.name} due to memory constraints."
-                    f"We recommend using a smaller sample size or partitionning."
+                    f"We recommend using a smaller sample size or partitioning."
                )
                return None
            # check if nan
--- a/ingestion/src/metadata/sampler/sqlalchemy/azuresql/sampler.py
+++ b/ingestion/src/metadata/sampler/sqlalchemy/azuresql/sampler.py
@ -51,7 +51,9 @@ class AzureSQLSampler(SQASampler):

    def get_sample_query(self, *, column=None) -> Query:
        """get query for sample data"""
-        rnd = self._base_sample_query(column).cte(f"{self.get_sampler_table_name()}_rnd")
+        rnd = self._base_sample_query(column).cte(
+            f"{self.get_sampler_table_name()}_rnd"
+        )
        query = self.client.query(rnd)
        return query.cte(f"{self.get_sampler_table_name()}_sample")