From b55a46f4de1b2336a50dbb653eb68fccec38d0d2 Mon Sep 17 00:00:00 2001 From: Mayur Singal <39544459+ulixius9@users.noreply.github.com> Date: Fri, 14 Oct 2022 10:09:31 +0530 Subject: [PATCH] Fix #5492: Fix Trino Comments (#8113) --- .../src/metadata/examples/workflows/trino.yaml | 4 +--- .../metadata/ingestion/source/database/trino.py | 15 +++++---------- ingestion/src/metadata/utils/sql_queries.py | 16 ---------------- 3 files changed, 6 insertions(+), 29 deletions(-) diff --git a/ingestion/src/metadata/examples/workflows/trino.yaml b/ingestion/src/metadata/examples/workflows/trino.yaml index c6706f80c29..c937ec3c505 100644 --- a/ingestion/src/metadata/examples/workflows/trino.yaml +++ b/ingestion/src/metadata/examples/workflows/trino.yaml @@ -12,11 +12,9 @@ source: connectionArguments: {} sourceConfig: config: - enableDataProfiler: true - generateSampleData: false tableFilterPattern: includes: - - customer.* + - customer.* sink: type: metadata-rest config: {} diff --git a/ingestion/src/metadata/ingestion/source/database/trino.py b/ingestion/src/metadata/ingestion/source/database/trino.py index 112e0f18d63..020ed4297dc 100644 --- a/ingestion/src/metadata/ingestion/source/database/trino.py +++ b/ingestion/src/metadata/ingestion/source/database/trino.py @@ -14,7 +14,6 @@ Trino source implementation. import logging import re import sys -from textwrap import dedent from typing import Any, Dict, Iterable, List, Optional, Tuple import click @@ -36,7 +35,6 @@ from metadata.generated.schema.metadataIngestion.workflow import ( from metadata.ingestion.api.source import InvalidSourceException from metadata.ingestion.source.database.common_db_source import CommonDbSourceService from metadata.utils.logger import ingestion_logger -from metadata.utils.sql_queries import TRINO_GET_COLUMNS logger = ingestion_logger() ROW_DATA_TYPE = "row" @@ -100,19 +98,16 @@ def _get_columns( ) -> List[Dict[str, Any]]: # pylint: disable=protected-access schema = schema or self._get_default_schema_name(connection) - query = dedent(TRINO_GET_COLUMNS).strip() + query = f"SHOW COLUMNS FROM {schema}.{table_name}" + res = connection.execute(sql.text(query), schema=schema, table=table_name) columns = [] - for record in res: - col_type = datatype.parse_sqltype(record.data_type) + col_type = datatype.parse_sqltype(record.Type) column = dict( - name=record.column_name, - type=col_type, - nullable=record.is_nullable == "YES", - default=record.column_default, + name=record.Column, type=col_type, nullable=True, comment=record.Comment ) - type_str = record.data_type.strip().lower() + type_str = record.Type.strip().lower() type_name, type_opts = get_type_name_and_opts(type_str) if type_opts and type_name == ROW_DATA_TYPE: column["raw_data_type"] = parse_row_data_type(type_str) diff --git a/ingestion/src/metadata/utils/sql_queries.py b/ingestion/src/metadata/utils/sql_queries.py index 048e699b3f6..26224f48912 100644 --- a/ingestion/src/metadata/utils/sql_queries.py +++ b/ingestion/src/metadata/utils/sql_queries.py @@ -447,22 +447,6 @@ WHERE creation_time BETWEEN "{start_time}" AND "{end_time}" """ ) - -TRINO_GET_COLUMNS = textwrap.dedent( - """ - SELECT - "column_name", - "data_type", - "column_default", - UPPER("is_nullable") AS "is_nullable" - FROM "information_schema"."columns" - WHERE "table_schema" = :schema - AND "table_name" = :table - ORDER BY "ordinal_position" ASC -""" -) - - POSTGRES_SQL_STATEMENT = textwrap.dedent( """ SELECT