fix(snowflake): avoid reporting warnings/info for sys tables (#11114)

This commit is contained in:
Harshal Sheth 2024-08-07 14:04:18 -07:00 committed by GitHub
parent c226883097
commit a25df8e6a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 16 additions and 8 deletions

View File

@ -47,7 +47,7 @@ from datahub.utilities.type_annotations import get_class_from_annotation
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_MAX_CONTEXT_STRING_LENGTH = 300 _MAX_CONTEXT_STRING_LENGTH = 1000
class SourceCapability(Enum): class SourceCapability(Enum):

View File

@ -440,7 +440,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
yield from self._process_tag(tag) yield from self._process_tag(tag)
if not snowflake_schema.views and not snowflake_schema.tables: if not snowflake_schema.views and not snowflake_schema.tables:
self.structured_reporter.warning( self.structured_reporter.info(
title="No tables/views found in schema", title="No tables/views found in schema",
message="If tables exist, please grant REFERENCES or SELECT permissions on them.", message="If tables exist, please grant REFERENCES or SELECT permissions on them.",
context=f"{db_name}.{schema_name}", context=f"{db_name}.{schema_name}",

View File

@ -127,6 +127,8 @@ class SnowflakeFilter:
SnowflakeObjectDomain.MATERIALIZED_VIEW, SnowflakeObjectDomain.MATERIALIZED_VIEW,
): ):
return False return False
if _is_sys_table(dataset_name):
return False
if len(dataset_params) != 3: if len(dataset_params) != 3:
self.structured_reporter.info( self.structured_reporter.info(
@ -176,6 +178,11 @@ def _combine_identifier_parts(
return f"{db_name}.{schema_name}.{table_name}" return f"{db_name}.{schema_name}.{table_name}"
def _is_sys_table(table_name: str) -> bool:
# Often will look like `SYS$_UNPIVOT_VIEW1737` or `sys$_pivot_view19`.
return table_name.lower().startswith("sys$")
# Qualified Object names from snowflake audit logs have quotes for for snowflake quoted identifiers, # Qualified Object names from snowflake audit logs have quotes for for snowflake quoted identifiers,
# For example "test-database"."test-schema".test_table # For example "test-database"."test-schema".test_table
# whereas we generate urns without quotes even for quoted identifiers for backward compatibility # whereas we generate urns without quotes even for quoted identifiers for backward compatibility
@ -186,12 +193,13 @@ def _cleanup_qualified_name(
) -> str: ) -> str:
name_parts = qualified_name.split(".") name_parts = qualified_name.split(".")
if len(name_parts) != 3: if len(name_parts) != 3:
structured_reporter.info( if not _is_sys_table(qualified_name):
title="Unexpected dataset pattern", structured_reporter.info(
message="We failed to parse a Snowflake qualified name into its constituent parts. " title="Unexpected dataset pattern",
"DB/schema/table filtering may not work as expected on these entities.", message="We failed to parse a Snowflake qualified name into its constituent parts. "
context=f"{qualified_name} has {len(name_parts)} parts", "DB/schema/table filtering may not work as expected on these entities.",
) context=f"{qualified_name} has {len(name_parts)} parts",
)
return qualified_name.replace('"', "") return qualified_name.replace('"', "")
return _combine_identifier_parts( return _combine_identifier_parts(
db_name=name_parts[0].strip('"'), db_name=name_parts[0].strip('"'),