From 9dbfea2fcaf2b3ec5b77b2289f3de2af2eeccee2 Mon Sep 17 00:00:00 2001 From: Mayur Singal <39544459+ulixius9@users.noreply.github.com> Date: Thu, 23 Feb 2023 15:40:48 +0530 Subject: [PATCH] Add Database & Schema Description: Snowflake (#10276) --- .../source/database/common_db_source.py | 14 ++++++++ .../source/database/snowflake/metadata.py | 33 +++++++++++++++++++ .../source/database/snowflake/queries.py | 14 ++++++++ 3 files changed, 61 insertions(+) diff --git a/ingestion/src/metadata/ingestion/source/database/common_db_source.py b/ingestion/src/metadata/ingestion/source/database/common_db_source.py index 8834b2e9583..aef549701f2 100644 --- a/ingestion/src/metadata/ingestion/source/database/common_db_source.py +++ b/ingestion/src/metadata/ingestion/source/database/common_db_source.py @@ -139,6 +139,18 @@ class CommonDbSourceService( self.inspector = inspect(self.engine) yield database_name + def get_database_description(self, database_name: str) -> Optional[str]: + """ + Method to fetch the database description + by default there will be no database description + """ + + def get_schema_description(self, schema_name: str) -> Optional[str]: + """ + Method to fetch the schema description + by default there will be no schema description + """ + def yield_database(self, database_name: str) -> Iterable[CreateDatabaseRequest]: """ From topology. @@ -148,6 +160,7 @@ class CommonDbSourceService( yield CreateDatabaseRequest( name=database_name, service=self.context.database_service.fullyQualifiedName, + description=self.get_database_description(database_name), ) def get_raw_database_schema_names(self) -> Iterable[str]: @@ -174,6 +187,7 @@ class CommonDbSourceService( yield CreateDatabaseSchemaRequest( name=schema_name, database=self.context.database.fullyQualifiedName, + description=self.get_schema_description(schema_name), ) @staticmethod diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py b/ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py index 63c3ce0c3ca..ac6b590ab85 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py @@ -45,6 +45,8 @@ from metadata.ingestion.source.database.snowflake.queries import ( SNOWFLAKE_FETCH_ALL_TAGS, SNOWFLAKE_GET_CLUSTER_KEY, SNOWFLAKE_GET_COMMENTS, + SNOWFLAKE_GET_DATABASE_COMMENTS, + SNOWFLAKE_GET_SCHEMA_COMMENTS, SNOWFLAKE_GET_TABLE_NAMES, SNOWFLAKE_GET_VIEW_NAMES, SNOWFLAKE_SESSION_TAG_QUERY, @@ -148,6 +150,8 @@ class SnowflakeSource(CommonDbSourceService): def __init__(self, config, metadata_config): self.partition_details = {} super().__init__(config, metadata_config) + self.schema_desc_map = {} + self.database_desc_map = {} @classmethod def create(cls, config_dict, metadata_config: OpenMetadataConnection): @@ -179,12 +183,39 @@ class SnowflakeSource(CommonDbSourceService): f"{row.TABLE_SCHEMA}.{row.TABLE_NAME}" ] = row.CLUSTERING_KEY + def set_schema_description_map(self) -> None: + results = self.engine.execute(SNOWFLAKE_GET_SCHEMA_COMMENTS).all() + for row in results: + self.schema_desc_map[(row.DATABASE_NAME, row.SCHEMA_NAME)] = row.COMMENT + + def set_database_description_map(self) -> None: + if not self.database_desc_map: + results = self.engine.execute(SNOWFLAKE_GET_DATABASE_COMMENTS).all() + for row in results: + self.database_desc_map[row.DATABASE_NAME] = row.COMMENT + + def get_schema_description(self, schema_name: str) -> Optional[str]: + """ + Method to fetch the schema description + """ + return self.schema_desc_map.get( + (self.context.database.name.__root__, schema_name) + ) + + def get_database_description(self, database_name: str) -> Optional[str]: + """ + Method to fetch the database description + """ + return self.database_desc_map.get(database_name) + def get_database_names(self) -> Iterable[str]: configured_db = self.config.serviceConnection.__root__.config.database if configured_db: self.set_inspector(configured_db) self.set_session_query_tag() self.set_partition_details() + self.set_schema_description_map() + self.set_database_description_map() yield configured_db else: results = self.connection.execute("SHOW DATABASES") @@ -211,6 +242,8 @@ class SnowflakeSource(CommonDbSourceService): self.set_inspector(database_name=new_database) self.set_session_query_tag() self.set_partition_details() + self.set_schema_description_map() + self.set_database_description_map() yield new_database except Exception as exc: logger.debug(traceback.format_exc()) diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py b/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py index a45c37c8dcc..488a44f5149 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py @@ -73,3 +73,17 @@ SNOWFLAKE_GET_CLUSTER_KEY = """ where TABLE_TYPE = 'BASE TABLE' and CLUSTERING_KEY is not null """ + + +SNOWFLAKE_GET_SCHEMA_COMMENTS = """ +SELECT + catalog_name DATABASE_NAME, + SCHEMA_NAME, + COMMENT +FROM information_schema.schemata +""" + + +SNOWFLAKE_GET_DATABASE_COMMENTS = """ +select DATABASE_NAME,COMMENT from information_schema.databases +"""