MINOR Fix snowflake profiler by using case-insensitive strings (#18438)

* use snowflake system metrics computer instead of source

* reverted pylint

* use case-insensitive strings equality for snowflake filters
This commit is contained in:
Imri Paran 2024-10-29 18:33:36 +01:00 committed by GitHub
parent ed7b1c7b95
commit 016a840b2f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 24 additions and 9 deletions

View File

@ -13,15 +13,14 @@
Profiler for Snowflake
"""
from metadata.ingestion.source.database.snowflake.profiler.system import (
SnowflakeSystemMetricsSource,
SnowflakeSystemMetricsComputer,
)
from metadata.profiler.interface.sqlalchemy.snowflake.profiler_interface import (
SnowflakeProfilerInterface,
)
from metadata.profiler.metrics.system.system import SystemMetricsComputer
class SnowflakeProfiler(SnowflakeProfilerInterface):
def initialize_system_metrics_computer(
self, **kwargs
) -> SnowflakeSystemMetricsSource:
return SnowflakeSystemMetricsSource(session=self.session)
def initialize_system_metrics_computer(self, **kwargs) -> SystemMetricsComputer:
return SnowflakeSystemMetricsComputer(session=self.session)

View File

@ -17,7 +17,9 @@ from metadata.profiler.metrics.system.system import (
CacheProvider,
EmptySystemMetricsSource,
SQASessionProvider,
SystemMetricsComputer,
)
from metadata.utils.collections import CaseInsensitiveString
from metadata.utils.logger import profiler_logger
from metadata.utils.lru_cache import LRU_CACHE_SIZE, LRUCache
from metadata.utils.profiler_utils import get_identifiers_from_string
@ -222,7 +224,7 @@ def get_snowflake_system_queries(
"""
try:
logger.debug(f"Trying to parse query [{query_log_entry.query_id}]")
logger.debug(f"Parsing snowflake query [{query_log_entry.query_id}]")
identifier = _parse_query(query_log_entry.query_text)
if not identifier:
raise RuntimeError("Could not identify the table from the query.")
@ -358,9 +360,17 @@ class SnowflakeSystemMetricsSource(
}
for q in query_results
if getattr(q, rows_affected_field) > 0
and q.database_name == db
and q.schema_name == schema
and q.table_name == table
# snowflake SQL identifiers are case insensitive. All identifiers are stored in upper case.
and (
CaseInsensitiveString(db),
CaseInsensitiveString(schema),
CaseInsensitiveString(table),
)
== (
q.database_name,
q.schema_name,
q.table_name,
)
]
)
@ -387,3 +397,9 @@ class SnowflakeSystemMetricsSource(
for row in queries
]
return [result for result in results if result is not None]
class SnowflakeSystemMetricsComputer(
SystemMetricsComputer, SnowflakeSystemMetricsSource
):
pass