From f847c1cf102f1166a8e95aa2e799b07d3ae55e81 Mon Sep 17 00:00:00 2001 From: Pere Miquel Brull Date: Fri, 17 Nov 2023 11:29:27 +0100 Subject: [PATCH] Fix #14012 - Snowflake procedure with empty definition (#14014) * Fix #14012 - Snowflake procedure with empty definition * Add debugs --- .../source/database/snowflake/metadata.py | 30 +++++++++++++++++++ .../source/database/snowflake/models.py | 2 +- .../source/database/snowflake/queries.py | 4 +++ .../database/stored_procedures_mixin.py | 6 +++- .../connectors/database/snowflake/index.md | 8 +++++ 5 files changed, 48 insertions(+), 2 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py b/ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py index 901b86c3b97..b65542cf1d9 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py @@ -13,6 +13,7 @@ Snowflake source module """ import json import traceback +import urllib from typing import Dict, Iterable, List, Optional, Tuple import sqlparse @@ -58,6 +59,7 @@ from metadata.ingestion.source.database.snowflake.models import ( SnowflakeStoredProcedure, ) from metadata.ingestion.source.database.snowflake.queries import ( + SNOWFLAKE_DESC_STORED_PROCEDURE, SNOWFLAKE_FETCH_ALL_TAGS, SNOWFLAKE_GET_CLUSTER_KEY, SNOWFLAKE_GET_CURRENT_ACCOUNT, @@ -515,8 +517,36 @@ class SnowflakeSource( ).all() for row in results: stored_procedure = SnowflakeStoredProcedure.parse_obj(dict(row)) + if stored_procedure.definition is None: + logger.debug( + f"Missing ownership permissions on procedure {stored_procedure.name}." + " Trying to fetch description via DESCRIBE." + ) + stored_procedure.definition = self.describe_procedure_definition( + stored_procedure + ) yield stored_procedure + def describe_procedure_definition( + self, stored_procedure: SnowflakeStoredProcedure + ) -> str: + """ + We can only get the SP definition via the INFORMATION_SCHEMA.PROCEDURES if the + user has OWNERSHIP grants, which will not always be the case. + + Then, if the procedure is created with `EXECUTE AS CALLER`, we can still try to + get the definition with a DESCRIBE. + """ + res = self.engine.execute( + SNOWFLAKE_DESC_STORED_PROCEDURE.format( + database_name=self.context.database.name.__root__, + schema_name=self.context.database_schema.name.__root__, + procedure_name=stored_procedure.name, + procedure_signature=urllib.parse.unquote(stored_procedure.signature), + ) + ) + return dict(res.all()).get("body", "") + def yield_stored_procedure( self, stored_procedure: SnowflakeStoredProcedure ) -> Iterable[Either[CreateStoredProcedureRequest]]: diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/models.py b/ingestion/src/metadata/ingestion/source/database/snowflake/models.py index 5324fb37fdd..dce2b10dc59 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/models.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/models.py @@ -35,7 +35,7 @@ class SnowflakeStoredProcedure(BaseModel): name: str = Field(..., alias="NAME") owner: Optional[str] = Field(..., alias="OWNER") language: str = Field(..., alias="LANGUAGE") - definition: str = Field(..., alias="DEFINITION") + definition: str = Field(None, alias="DEFINITION") signature: Optional[str] = Field( ..., alias="SIGNATURE", description="Used to build the source URL" ) diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py b/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py index 7b962e75f10..4dd000b7e9c 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py @@ -173,6 +173,10 @@ WHERE PROCEDURE_CATALOG = '{database_name}' """ ) +SNOWFLAKE_DESC_STORED_PROCEDURE = ( + "DESC PROCEDURE {database_name}.{schema_name}.{procedure_name}{procedure_signature}" +) + SNOWFLAKE_GET_STORED_PROCEDURE_QUERIES = textwrap.dedent( """ WITH SP_HISTORY AS ( diff --git a/ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py b/ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py index 6293ab3cb4c..97bcee33286 100644 --- a/ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py +++ b/ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py @@ -125,6 +125,10 @@ class StoredProcedureMixin(ABC): def is_lineage_query(query_type: str, query_text: str) -> bool: """Check if it's worth it to parse the query for lineage""" + logger.debug( + f"Validating query lineage for type [{query_type}] and text [{query_text}]" + ) + if query_type in ("MERGE", "UPDATE", "CREATE_TABLE_AS_SELECT"): return True @@ -139,7 +143,6 @@ class StoredProcedureMixin(ABC): self, query_by_procedure: QueryByProcedure, procedure: StoredProcedure ) -> Iterable[Either[AddLineageRequest]]: """Add procedure lineage from its query""" - self.context.stored_procedure_query_lineage = False if self.is_lineage_query( query_type=query_by_procedure.query_type, @@ -201,6 +204,7 @@ class StoredProcedureMixin(ABC): queries_dict = self.get_stored_procedure_queries_dict() # Then for each procedure, iterate over all its queries for procedure in self.context.stored_procedures: + logger.debug(f"Processing Lineage for [{procedure.name}]") for query_by_procedure in ( queries_dict.get(procedure.name.__root__.lower()) or [] ): diff --git a/openmetadata-docs/content/v1.2.x/connectors/database/snowflake/index.md b/openmetadata-docs/content/v1.2.x/connectors/database/snowflake/index.md index 433a0855b89..9bc09172606 100644 --- a/openmetadata-docs/content/v1.2.x/connectors/database/snowflake/index.md +++ b/openmetadata-docs/content/v1.2.x/connectors/database/snowflake/index.md @@ -89,6 +89,14 @@ GRANT IMPORTED PRIVILEGES ON ALL SCHEMAS IN DATABASE SNOWFLAKE TO ROLE NEW_ROLE; You can find more information about the `account_usage` schema [here](https://docs.snowflake.com/en/sql-reference/account-usage). +Regarding Stored Procedures: +1. Snowflake only allows the grant of `USAGE` or `OWNERSHIP` +2. A user can only see the definition of the procedure in 2 situations: + 1. If it has the `OWNERSHIP` grant, + 2. If it has the `USAGE` grant and the procedure is created with `EXECUTE AS CALLER`. + +Make sure to add the `GRANT ON PROCEDURE () to NEW_ROLE`, e.g., `GRANT USAGE ON PROCEDURE CLEAN_DATA(varchar, varchar) to NEW_ROLE`. + ## Metadata Ingestion {% partial