From 00289bd85f0d2bd6605d28eb5a818fe6b1beeda7 Mon Sep 17 00:00:00 2001 From: Keith Sirmons Date: Wed, 3 May 2023 04:36:33 -0500 Subject: [PATCH] Fixes#11189: Implement Impala and hive get_view_definition (#11237) * updated metadata to work with the impala query engine. Uses the describe function to grab column names, data types, and comments. * added the ordinalPosition data point into the Column constructor. * renamed variable to better describe its usage. * updated profile errors. Hive connections now comment columns by default. * removed print statements * Cleaned up code by pulling check into its own function * Updated median function to return null when it is being used for first and third quartiles. * updated metadata to work with the impala query engine. Uses the describe function to grab column names, data types, and comments. * added the ordinalPosition data point into the Column constructor. * renamed variable to better describe its usage. * updated profile errors. Hive connections now comment columns by default. * removed print statements * Cleaned up code by pulling check into its own function * Updated median function to return null when it is being used for first and third quartiles. * removed print statements and ran make py_format * updated to fix some pylint errors. imported Dialects to remove string compare to "impala" engine * moved huge comment into function docstring. This comment shows us the sql to get quartiles in Impala * added cast to decimal for column when running average in mean.py * fixed lint error * fixed ui ordering of precision and scale. Precision should be ordred in front of scale since the precision is set first in decimal data types * Added get_view_definition to hive and impala connectors. --------- Co-authored-by: Chirag Madlani <12962843+chirag-madlani@users.noreply.github.com> --- .../source/database/hive/metadata.py | 30 ++++++++++++++++++- .../source/database/impala/metadata.py | 14 +++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metadata.py b/ingestion/src/metadata/ingestion/source/database/hive/metadata.py index e71d504d355..6897b10aa4e 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/metadata.py @@ -300,12 +300,38 @@ def get_impala_columns( return column_info +# pylint: disable=unused-argument +@reflection.cache +def get_view_definition(self, connection, view_name, schema=None, **kw): + """ + Gets the view definition + """ + full_view_name = f"`{view_name}`" if not schema else f"`{schema}`.`{view_name}`" + res = connection.execute(f"SHOW CREATE TABLE {full_view_name}").fetchall() + if res: + return "\n".join(i[0] for i in res) + return None + + +# pylint: disable=unused-argument +@reflection.cache +def get_impala_view_definition(self, connection, view_name, schema=None, **kw): + """ + Gets the view definition + """ + full_view_name = f"`{view_name}`" if not schema else f"`{schema}`.`{view_name}`" + res = connection.execute(f"SHOW CREATE VIEW {full_view_name}").fetchall() + if res: + return "\n".join(i[0] for i in res) + return None + + HiveDialect.get_columns = get_columns HiveDialect.get_table_comment = get_table_comment ImpalaDialect.get_columns = get_impala_columns ImpalaDialect.get_table_comment = get_impala_table_comment - +ImpalaDialect.get_view_definition = get_impala_view_definition HIVE_VERSION_WITH_VIEW_SUPPORT = "2.2.0" @@ -342,6 +368,7 @@ class HiveSource(CommonDbSourceService): ImpalaDialect.get_view_names = get_impala_view_names ImpalaDialect.get_table_comment = get_impala_table_comment ImpalaDialect.get_columns = get_impala_columns + ImpalaDialect.get_view_definition = get_impala_view_definition else: result = dict(self.engine.execute("SELECT VERSION()").fetchone()) @@ -351,6 +378,7 @@ class HiveSource(CommonDbSourceService): ): HiveDialect.get_table_names = get_table_names HiveDialect.get_view_names = get_view_names + HiveDialect.get_view_definition = get_view_definition else: HiveDialect.get_table_names = get_table_names_older_versions HiveDialect.get_view_names = get_view_names_older_versions diff --git a/ingestion/src/metadata/ingestion/source/database/impala/metadata.py b/ingestion/src/metadata/ingestion/source/database/impala/metadata.py index 400a12332c2..dc94ed7b4ba 100644 --- a/ingestion/src/metadata/ingestion/source/database/impala/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/impala/metadata.py @@ -162,6 +162,19 @@ def get_columns( return column_info +# pylint: disable=unused-argument +@reflection.cache +def get_view_definition(self, connection, view_name, schema=None, **kw): + """ + Gets the view definition + """ + full_view_name = f"`{view_name}`" if not schema else f"`{schema}`.`{view_name}`" + res = connection.execute(f"SHOW CREATE VIEW {full_view_name}").fetchall() + if res: + return "\n".join(i[0] for i in res) + return None + + class ImpalaSource(CommonDbSourceService): """ Implements the necessary methods to extract @@ -183,3 +196,4 @@ class ImpalaSource(CommonDbSourceService): ImpalaDialect.get_view_names = get_view_names ImpalaDialect.get_table_comment = get_table_comment ImpalaDialect.get_columns = get_columns + ImpalaDialect.get_view_definition = get_view_definition