diff --git a/ingestion/src/metadata/ingestion/source/hive.py b/ingestion/src/metadata/ingestion/source/hive.py index 93abd686831..11f59ec9ffe 100644 --- a/ingestion/src/metadata/ingestion/source/hive.py +++ b/ingestion/src/metadata/ingestion/source/hive.py @@ -52,7 +52,26 @@ def get_columns(self, connection, table_name, schema=None, **kw): return result +def get_table_names(self, connection, schema=None, **kw): + query = "SHOW TABLES" + if schema: + query += " IN " + self.identifier_preparer.quote_identifier(schema) + tables_in_schema = connection.execute(query) + tables = [] + for row in tables_in_schema: + # check number of columns in result + # if it is > 1, we use spark thrift server with 3 columns in the result (schema, table, is_temporary) + # else it is hive with 1 column in the result + if len(row) > 1: + tables.append(row[1]) + else: + tables.append(row[0]) + return tables + + HiveDialect.get_columns = get_columns +HiveDialect.get_table_names = get_table_names + from metadata.generated.schema.entity.services.connections.database.hiveConnection import ( HiveSQLConnection,