Fixes#11189: Implement Impala and hive get_view_definition (#11237)

* updated metadata to work with the impala query engine.
Uses the describe function to grab column names, data types, and comments.

* added the ordinalPosition data point into the Column constructor.

* renamed variable to better describe its usage.

* updated profile errors.
Hive connections now comment columns by default.

* removed print statements

* Cleaned up code by pulling check into its own function

* Updated median function to return null when it is being used for first and third quartiles.

* updated metadata to work with the impala query engine.
Uses the describe function to grab column names, data types, and comments.

* added the ordinalPosition data point into the Column constructor.

* renamed variable to better describe its usage.

* updated profile errors.
Hive connections now comment columns by default.

* removed print statements

* Cleaned up code by pulling check into its own function

* Updated median function to return null when it is being used for first and third quartiles.

* removed print statements and ran make py_format

* updated to fix some pylint errors.
imported Dialects to remove string compare to "impala" engine

* moved huge comment into function docstring.
This comment shows us the sql to get quartiles in Impala

* added cast to decimal for column when running average in mean.py

* fixed lint error

* fixed ui ordering of precision and scale.
Precision should be ordred in front of scale since the precision is set first in decimal data types

* Added get_view_definition to hive and impala connectors.

---------

Co-authored-by: Chirag Madlani <12962843+chirag-madlani@users.noreply.github.com>
This commit is contained in:
Keith Sirmons 2023-05-03 04:36:33 -05:00 committed by GitHub
parent 3299f6bda2
commit 00289bd85f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 1 deletions

View File

@ -300,12 +300,38 @@ def get_impala_columns(
return column_info
# pylint: disable=unused-argument
@reflection.cache
def get_view_definition(self, connection, view_name, schema=None, **kw):
"""
Gets the view definition
"""
full_view_name = f"`{view_name}`" if not schema else f"`{schema}`.`{view_name}`"
res = connection.execute(f"SHOW CREATE TABLE {full_view_name}").fetchall()
if res:
return "\n".join(i[0] for i in res)
return None
# pylint: disable=unused-argument
@reflection.cache
def get_impala_view_definition(self, connection, view_name, schema=None, **kw):
"""
Gets the view definition
"""
full_view_name = f"`{view_name}`" if not schema else f"`{schema}`.`{view_name}`"
res = connection.execute(f"SHOW CREATE VIEW {full_view_name}").fetchall()
if res:
return "\n".join(i[0] for i in res)
return None
HiveDialect.get_columns = get_columns
HiveDialect.get_table_comment = get_table_comment
ImpalaDialect.get_columns = get_impala_columns
ImpalaDialect.get_table_comment = get_impala_table_comment
ImpalaDialect.get_view_definition = get_impala_view_definition
HIVE_VERSION_WITH_VIEW_SUPPORT = "2.2.0"
@ -342,6 +368,7 @@ class HiveSource(CommonDbSourceService):
ImpalaDialect.get_view_names = get_impala_view_names
ImpalaDialect.get_table_comment = get_impala_table_comment
ImpalaDialect.get_columns = get_impala_columns
ImpalaDialect.get_view_definition = get_impala_view_definition
else:
result = dict(self.engine.execute("SELECT VERSION()").fetchone())
@ -351,6 +378,7 @@ class HiveSource(CommonDbSourceService):
):
HiveDialect.get_table_names = get_table_names
HiveDialect.get_view_names = get_view_names
HiveDialect.get_view_definition = get_view_definition
else:
HiveDialect.get_table_names = get_table_names_older_versions
HiveDialect.get_view_names = get_view_names_older_versions

View File

@ -162,6 +162,19 @@ def get_columns(
return column_info
# pylint: disable=unused-argument
@reflection.cache
def get_view_definition(self, connection, view_name, schema=None, **kw):
"""
Gets the view definition
"""
full_view_name = f"`{view_name}`" if not schema else f"`{schema}`.`{view_name}`"
res = connection.execute(f"SHOW CREATE VIEW {full_view_name}").fetchall()
if res:
return "\n".join(i[0] for i in res)
return None
class ImpalaSource(CommonDbSourceService):
"""
Implements the necessary methods to extract
@ -183,3 +196,4 @@ class ImpalaSource(CommonDbSourceService):
ImpalaDialect.get_view_names = get_view_names
ImpalaDialect.get_table_comment = get_table_comment
ImpalaDialect.get_columns = get_columns
ImpalaDialect.get_view_definition = get_view_definition