From 016a840b2f5eb6f9bd46463f3e6d7ab17e9300d3 Mon Sep 17 00:00:00 2001 From: Imri Paran Date: Tue, 29 Oct 2024 18:33:36 +0100 Subject: [PATCH] MINOR Fix snowflake profiler by using case-insensitive strings (#18438) * use snowflake system metrics computer instead of source * reverted pylint * use case-insensitive strings equality for snowflake filters --- .../database/snowflake/profiler/profiler.py | 9 ++++--- .../database/snowflake/profiler/system.py | 24 +++++++++++++++---- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/profiler.py b/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/profiler.py index e028bb2a229..f68f18f85c1 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/profiler.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/profiler.py @@ -13,15 +13,14 @@ Profiler for Snowflake """ from metadata.ingestion.source.database.snowflake.profiler.system import ( - SnowflakeSystemMetricsSource, + SnowflakeSystemMetricsComputer, ) from metadata.profiler.interface.sqlalchemy.snowflake.profiler_interface import ( SnowflakeProfilerInterface, ) +from metadata.profiler.metrics.system.system import SystemMetricsComputer class SnowflakeProfiler(SnowflakeProfilerInterface): - def initialize_system_metrics_computer( - self, **kwargs - ) -> SnowflakeSystemMetricsSource: - return SnowflakeSystemMetricsSource(session=self.session) + def initialize_system_metrics_computer(self, **kwargs) -> SystemMetricsComputer: + return SnowflakeSystemMetricsComputer(session=self.session) diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/system.py b/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/system.py index fa92017db4d..c986ac0beed 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/system.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/system.py @@ -17,7 +17,9 @@ from metadata.profiler.metrics.system.system import ( CacheProvider, EmptySystemMetricsSource, SQASessionProvider, + SystemMetricsComputer, ) +from metadata.utils.collections import CaseInsensitiveString from metadata.utils.logger import profiler_logger from metadata.utils.lru_cache import LRU_CACHE_SIZE, LRUCache from metadata.utils.profiler_utils import get_identifiers_from_string @@ -222,7 +224,7 @@ def get_snowflake_system_queries( """ try: - logger.debug(f"Trying to parse query [{query_log_entry.query_id}]") + logger.debug(f"Parsing snowflake query [{query_log_entry.query_id}]") identifier = _parse_query(query_log_entry.query_text) if not identifier: raise RuntimeError("Could not identify the table from the query.") @@ -358,9 +360,17 @@ class SnowflakeSystemMetricsSource( } for q in query_results if getattr(q, rows_affected_field) > 0 - and q.database_name == db - and q.schema_name == schema - and q.table_name == table + # snowflake SQL identifiers are case insensitive. All identifiers are stored in upper case. + and ( + CaseInsensitiveString(db), + CaseInsensitiveString(schema), + CaseInsensitiveString(table), + ) + == ( + q.database_name, + q.schema_name, + q.table_name, + ) ] ) @@ -387,3 +397,9 @@ class SnowflakeSystemMetricsSource( for row in queries ] return [result for result in results if result is not None] + + +class SnowflakeSystemMetricsComputer( + SystemMetricsComputer, SnowflakeSystemMetricsSource +): + pass