mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-03 03:59:12 +00:00
* Deprecate impala scheme from Hive * Add migrations
This commit is contained in:
parent
c930427390
commit
1b7c5e3233
@ -21,3 +21,7 @@ CREATE TABLE IF NOT EXISTS data_product_entity (
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (fqnHash)
|
||||
);
|
||||
|
||||
UPDATE dbservice_entity
|
||||
SET json = JSON_REPLACE(json, '$.connection.config.scheme', 'hive')
|
||||
WHERE JSON_EXTRACT(json, '$.connection.config.scheme') IN ('impala', 'impala4');
|
||||
|
||||
@ -21,3 +21,7 @@ CREATE TABLE IF NOT EXISTS data_product_entity (
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (fqnHash)
|
||||
);
|
||||
|
||||
update dbservice_entity
|
||||
set json = jsonb_set(json::jsonb, '{connection,config,scheme}', '"hive"')
|
||||
where json#>>'{connection,config,scheme}' in ('impala', 'impala4');
|
||||
|
||||
@ -15,7 +15,6 @@ Hive source methods.
|
||||
import re
|
||||
from typing import Tuple
|
||||
|
||||
from impala.sqlalchemy import ImpalaDialect
|
||||
from pyhive.sqlalchemy_hive import HiveDialect, _type_map
|
||||
from sqlalchemy import types, util
|
||||
from sqlalchemy.engine import reflection
|
||||
@ -32,7 +31,6 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
from metadata.ingestion.api.source import InvalidSourceException
|
||||
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
|
||||
from metadata.ingestion.source.database.hive.queries import HIVE_GET_COMMENTS
|
||||
from metadata.profiler.orm.registry import Dialects
|
||||
|
||||
complex_data_types = ["struct", "map", "array", "union"]
|
||||
|
||||
@ -182,124 +180,6 @@ def get_table_comment( # pylint: disable=unused-argument
|
||||
return {"text": None}
|
||||
|
||||
|
||||
def get_impala_table_or_view_names(connection, schema=None, target_type="table"):
|
||||
"""
|
||||
Depending on the targetType returns either the Views or Tables
|
||||
since they share the same method for getting their names.
|
||||
"""
|
||||
query = "show tables"
|
||||
if schema:
|
||||
query += " IN " + schema
|
||||
|
||||
cursor = connection.execute(query)
|
||||
results = cursor.fetchall()
|
||||
tables_and_views = [result[0] for result in results]
|
||||
|
||||
retvalue = []
|
||||
|
||||
for table_view in tables_and_views:
|
||||
query = f"describe formatted `{schema}`.`{table_view}`"
|
||||
cursor = connection.execute(query)
|
||||
results = cursor.fetchall()
|
||||
|
||||
for result in list(results):
|
||||
data = result
|
||||
if data[0].strip() == "Table Type:":
|
||||
if target_type.lower() in data[1].lower():
|
||||
retvalue.append(table_view)
|
||||
return retvalue
|
||||
|
||||
|
||||
def get_impala_view_names(
|
||||
self, connection, schema=None, **kw
|
||||
): # pylint: disable=unused-argument
|
||||
results = get_impala_table_or_view_names(connection, schema, "view")
|
||||
return results
|
||||
|
||||
|
||||
def get_impala_table_names(
|
||||
self, connection, schema=None, **kw
|
||||
): # pylint: disable=unused-argument
|
||||
results = get_impala_table_or_view_names(connection, schema, "table")
|
||||
return results
|
||||
|
||||
|
||||
def get_impala_table_comment(
|
||||
self, connection, table_name, schema_name, **kw
|
||||
): # pylint: disable=unused-argument
|
||||
"""
|
||||
Gets the table comment from the describe formatted query result under the Table Parameters section.
|
||||
"""
|
||||
full_table_name = (
|
||||
f"{schema_name}.{table_name}" if schema_name is not None else table_name
|
||||
)
|
||||
split_name = full_table_name.split(".")
|
||||
query = f"describe formatted `{split_name[0]}`.`{split_name[1]}`"
|
||||
cursor = connection.execute(query)
|
||||
results = cursor.fetchall()
|
||||
|
||||
found_table_parameters = False
|
||||
try:
|
||||
for result in list(results):
|
||||
data = result
|
||||
if not found_table_parameters and data[0].strip() == "Table Parameters:":
|
||||
found_table_parameters = True
|
||||
if found_table_parameters:
|
||||
coltext = data[1].strip() if data[1] is not None else ""
|
||||
if coltext == "comment":
|
||||
return {"text": data[2]}
|
||||
except Exception:
|
||||
return {"text": None}
|
||||
return {"text": None}
|
||||
|
||||
|
||||
def get_impala_columns(
|
||||
self, connection, table_name, schema=None, **kwargs
|
||||
): # pylint: disable=unused-argument
|
||||
# pylint: disable=too-many-locals
|
||||
"""
|
||||
Extracted from the Impala Dialect. We'll tune the implementation.
|
||||
|
||||
By default, this gives us the column name as `table.column`. We just
|
||||
want to get `column`.
|
||||
"""
|
||||
full_table_name = f"{schema}.{table_name}" if schema is not None else table_name
|
||||
split_name = full_table_name.split(".")
|
||||
query = f"DESCRIBE `{split_name[0]}`.`{split_name[1]}`"
|
||||
describe_table_rows = connection.execute(query)
|
||||
column_info = []
|
||||
ordinal_pos = 0
|
||||
for col in describe_table_rows:
|
||||
ordinal_pos = ordinal_pos + 1
|
||||
col_raw = col[1]
|
||||
attype = re.sub(r"\(.*\)", "", col[1])
|
||||
col_type = re.search(r"^\w+", col[1]).group(0)
|
||||
try:
|
||||
coltype = _type_map[col_type]
|
||||
except KeyError:
|
||||
util.warn(f"Did not recognize type '{col_raw}' of column '{col[0]}'")
|
||||
coltype = types.NullType
|
||||
charlen = re.search(r"\(([\d,]+)\)", col_raw.lower())
|
||||
if charlen:
|
||||
charlen = charlen.group(1)
|
||||
if attype == "decimal":
|
||||
prec, scale = charlen.split(",")
|
||||
args = (int(prec), int(scale))
|
||||
else:
|
||||
args = (int(charlen),)
|
||||
coltype = coltype(*args)
|
||||
add_column = {
|
||||
"name": col[0],
|
||||
"type": coltype,
|
||||
"comment": col[2],
|
||||
"nullable": True,
|
||||
"autoincrement": False,
|
||||
"ordinalPosition": ordinal_pos,
|
||||
}
|
||||
column_info.append(add_column)
|
||||
return column_info
|
||||
|
||||
|
||||
# pylint: disable=unused-argument
|
||||
@reflection.cache
|
||||
def get_view_definition(self, connection, view_name, schema=None, **kw):
|
||||
@ -313,25 +193,9 @@ def get_view_definition(self, connection, view_name, schema=None, **kw):
|
||||
return None
|
||||
|
||||
|
||||
# pylint: disable=unused-argument
|
||||
@reflection.cache
|
||||
def get_impala_view_definition(self, connection, view_name, schema=None, **kw):
|
||||
"""
|
||||
Gets the view definition
|
||||
"""
|
||||
full_view_name = f"`{view_name}`" if not schema else f"`{schema}`.`{view_name}`"
|
||||
res = connection.execute(f"SHOW CREATE VIEW {full_view_name}").fetchall()
|
||||
if res:
|
||||
return "\n".join(i[0] for i in res)
|
||||
return None
|
||||
|
||||
|
||||
HiveDialect.get_columns = get_columns
|
||||
HiveDialect.get_table_comment = get_table_comment
|
||||
|
||||
ImpalaDialect.get_columns = get_impala_columns
|
||||
ImpalaDialect.get_table_comment = get_impala_table_comment
|
||||
ImpalaDialect.get_view_definition = get_impala_view_definition
|
||||
|
||||
HIVE_VERSION_WITH_VIEW_SUPPORT = "2.2.0"
|
||||
|
||||
@ -363,22 +227,15 @@ class HiveSource(CommonDbSourceService):
|
||||
Fetching views in hive server with query "SHOW VIEWS" was possible
|
||||
only after hive 2.2.0 version
|
||||
"""
|
||||
if self.engine.driver == Dialects.Impala:
|
||||
ImpalaDialect.get_table_names = get_impala_table_names
|
||||
ImpalaDialect.get_view_names = get_impala_view_names
|
||||
ImpalaDialect.get_table_comment = get_impala_table_comment
|
||||
ImpalaDialect.get_columns = get_impala_columns
|
||||
ImpalaDialect.get_view_definition = get_impala_view_definition
|
||||
else:
|
||||
result = dict(self.engine.execute("SELECT VERSION()").fetchone())
|
||||
result = dict(self.engine.execute("SELECT VERSION()").fetchone())
|
||||
|
||||
version = result.get("_c0", "").split()
|
||||
if version and self._parse_version(version[0]) >= self._parse_version(
|
||||
HIVE_VERSION_WITH_VIEW_SUPPORT
|
||||
):
|
||||
HiveDialect.get_table_names = get_table_names
|
||||
HiveDialect.get_view_names = get_view_names
|
||||
HiveDialect.get_view_definition = get_view_definition
|
||||
else:
|
||||
HiveDialect.get_table_names = get_table_names_older_versions
|
||||
HiveDialect.get_view_names = get_view_names_older_versions
|
||||
version = result.get("_c0", "").split()
|
||||
if version and self._parse_version(version[0]) >= self._parse_version(
|
||||
HIVE_VERSION_WITH_VIEW_SUPPORT
|
||||
):
|
||||
HiveDialect.get_table_names = get_table_names
|
||||
HiveDialect.get_view_names = get_view_names
|
||||
HiveDialect.get_view_definition = get_view_definition
|
||||
else:
|
||||
HiveDialect.get_table_names = get_table_names_older_versions
|
||||
HiveDialect.get_view_names = get_view_names_older_versions
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
"hiveScheme": {
|
||||
"description": "SQLAlchemy driver scheme options.",
|
||||
"type": "string",
|
||||
"enum": ["hive", "hive+http", "hive+https", "impala", "impala4"],
|
||||
"enum": ["hive", "hive+http", "hive+https"],
|
||||
"default": "hive"
|
||||
}
|
||||
},
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user