Fix #19856 - Set the db in mysql/mariadb for metric computations (#19994)

* use db for mysql/mariadb

* format
This commit is contained in:
Pere Miquel Brull 2025-02-26 20:30:42 +01:00 committed by GitHub
parent 6b7a9fe76c
commit 603d61eaa2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 26 additions and 13 deletions

View File

@ -24,6 +24,12 @@ from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
DatabricksConnection,
)
from metadata.generated.schema.entity.services.connections.database.mariaDBConnection import (
MariaDBConnection,
)
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
MysqlConnection,
)
from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import (
SnowflakeType,
)
@ -79,21 +85,26 @@ class SQAInterfaceMixin(Root):
)
def set_catalog(self, session) -> None:
"""Set catalog for the session. Right now only databricks and unity catalog requires it
"""Set the catalog or database for the session.
Args:
session (Session): sqa session object
"""
if not isinstance(
if isinstance(
self.service_connection_config,
(UnityCatalogConnection, DatabricksConnection),
):
return
bind = session.get_bind()
bind.execute(
"USE CATALOG %(catalog)s;",
{"catalog": self.service_connection_config.catalog},
).first()
session.get_bind().execute(
"USE CATALOG %(catalog)s;",
{"catalog": self.service_connection_config.catalog},
).first()
if isinstance(
self.service_connection_config, (MysqlConnection, MariaDBConnection)
):
session.get_bind().execute(
f"USE {self.table_entity.databaseSchema.name};",
)
def close(self):
"""close session"""

View File

@ -297,7 +297,7 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin):
column: the column to compute the metrics against
metrics: list of metrics to compute
Returns:
dictionnary of results
dictionary of results
"""
if not metrics:

View File

@ -88,7 +88,7 @@ class FirstQuartile(StaticMetric, PercentilMixin):
except MemoryError:
logger.error(
f"Unable to compute Median for {self.col.name} due to memory constraints."
f"We recommend using a smaller sample size or partitionning."
f"We recommend using a smaller sample size or partitioning."
)
return None
# check if nan

View File

@ -87,7 +87,7 @@ class Median(StaticMetric, PercentilMixin):
except MemoryError:
logger.error(
f"Unable to compute Median for {self.col.name} due to memory constraints."
f"We recommend using a smaller sample size or partitionning."
f"We recommend using a smaller sample size or partitioning."
)
return None
try:

View File

@ -88,7 +88,7 @@ class ThirdQuartile(StaticMetric, PercentilMixin):
except MemoryError:
logger.error(
f"Unable to compute Median for {self.col.name} due to memory constraints."
f"We recommend using a smaller sample size or partitionning."
f"We recommend using a smaller sample size or partitioning."
)
return None
# check if nan

View File

@ -51,7 +51,9 @@ class AzureSQLSampler(SQASampler):
def get_sample_query(self, *, column=None) -> Query:
"""get query for sample data"""
rnd = self._base_sample_query(column).cte(f"{self.get_sampler_table_name()}_rnd")
rnd = self._base_sample_query(column).cte(
f"{self.get_sampler_table_name()}_rnd"
)
query = self.client.query(rnd)
return query.cte(f"{self.get_sampler_table_name()}_sample")