mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-09 16:03:31 +00:00
fix(ingest): bigquery-beta - Additional fixes for Bigquery beta (#6051)
This commit is contained in:
parent
60928757e0
commit
32b8bef92e
@ -150,7 +150,7 @@ redshift_common = {
|
|||||||
"sqlalchemy-redshift",
|
"sqlalchemy-redshift",
|
||||||
"psycopg2-binary",
|
"psycopg2-binary",
|
||||||
"GeoAlchemy2",
|
"GeoAlchemy2",
|
||||||
"sqllineage==1.3.5",
|
"sqllineage==1.3.6",
|
||||||
*path_spec_common,
|
*path_spec_common,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,18 +216,18 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"gql>=3.3.0",
|
"gql>=3.3.0",
|
||||||
"gql[requests]>=3.3.0",
|
"gql[requests]>=3.3.0",
|
||||||
},
|
},
|
||||||
"great-expectations": sql_common | {"sqllineage==1.3.5"},
|
"great-expectations": sql_common | {"sqllineage==1.3.6"},
|
||||||
# Source plugins
|
# Source plugins
|
||||||
# PyAthena is pinned with exact version because we use private method in PyAthena
|
# PyAthena is pinned with exact version because we use private method in PyAthena
|
||||||
"athena": sql_common | {"PyAthena[SQLAlchemy]==2.4.1"},
|
"athena": sql_common | {"PyAthena[SQLAlchemy]==2.4.1"},
|
||||||
"azure-ad": set(),
|
"azure-ad": set(),
|
||||||
"bigquery": sql_common
|
"bigquery": sql_common
|
||||||
| bigquery_common
|
| bigquery_common
|
||||||
| {"sqlalchemy-bigquery>=1.4.1", "sqllineage==1.3.5", "sqlparse"},
|
| {"sqlalchemy-bigquery>=1.4.1", "sqllineage==1.3.6", "sqlparse"},
|
||||||
"bigquery-usage": bigquery_common | usage_common | {"cachetools"},
|
"bigquery-usage": bigquery_common | usage_common | {"cachetools"},
|
||||||
"bigquery-beta": sql_common
|
"bigquery-beta": sql_common
|
||||||
| bigquery_common
|
| bigquery_common
|
||||||
| {"sqllineage==1.3.5", "sql_metadata"},
|
| {"sqllineage==1.3.6", "sql_metadata"},
|
||||||
"clickhouse": sql_common | {"clickhouse-sqlalchemy==0.1.8"},
|
"clickhouse": sql_common | {"clickhouse-sqlalchemy==0.1.8"},
|
||||||
"clickhouse-usage": sql_common
|
"clickhouse-usage": sql_common
|
||||||
| usage_common
|
| usage_common
|
||||||
@ -269,9 +269,9 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"looker": looker_common,
|
"looker": looker_common,
|
||||||
# lkml>=1.1.2 is required to support the sql_preamble expression in LookML
|
# lkml>=1.1.2 is required to support the sql_preamble expression in LookML
|
||||||
"lookml": looker_common
|
"lookml": looker_common
|
||||||
| {"lkml>=1.1.2", "sql-metadata==2.2.2", "sqllineage==1.3.5", "GitPython>2"},
|
| {"lkml>=1.1.2", "sql-metadata==2.2.2", "sqllineage==1.3.6", "GitPython>2"},
|
||||||
"metabase": {"requests", "sqllineage==1.3.5"},
|
"metabase": {"requests", "sqllineage==1.3.6"},
|
||||||
"mode": {"requests", "sqllineage==1.3.5", "tenacity>=8.0.1"},
|
"mode": {"requests", "sqllineage==1.3.6", "tenacity>=8.0.1"},
|
||||||
"mongodb": {"pymongo[srv]>=3.11", "packaging"},
|
"mongodb": {"pymongo[srv]>=3.11", "packaging"},
|
||||||
"mssql": sql_common | {"sqlalchemy-pytds>=0.3"},
|
"mssql": sql_common | {"sqlalchemy-pytds>=0.3"},
|
||||||
"mssql-odbc": sql_common | {"pyodbc"},
|
"mssql-odbc": sql_common | {"pyodbc"},
|
||||||
@ -284,7 +284,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"presto-on-hive": sql_common
|
"presto-on-hive": sql_common
|
||||||
| {"psycopg2-binary", "acryl-pyhive[hive]>=0.6.12", "pymysql>=1.0.2"},
|
| {"psycopg2-binary", "acryl-pyhive[hive]>=0.6.12", "pymysql>=1.0.2"},
|
||||||
"pulsar": {"requests"},
|
"pulsar": {"requests"},
|
||||||
"redash": {"redash-toolbelt", "sql-metadata", "sqllineage==1.3.5"},
|
"redash": {"redash-toolbelt", "sql-metadata", "sqllineage==1.3.6"},
|
||||||
"redshift": sql_common | redshift_common,
|
"redshift": sql_common | redshift_common,
|
||||||
"redshift-usage": sql_common | usage_common | redshift_common,
|
"redshift-usage": sql_common | usage_common | redshift_common,
|
||||||
"s3": {*s3_base, *data_lake_profiling},
|
"s3": {*s3_base, *data_lake_profiling},
|
||||||
|
|||||||
@ -571,9 +571,10 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|||||||
view.columns = self.get_columns_for_table(conn, table_identifier)
|
view.columns = self.get_columns_for_table(conn, table_identifier)
|
||||||
|
|
||||||
lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]] = None
|
lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]] = None
|
||||||
lineage_info = self.lineage_extractor.get_upstream_lineage_info(
|
if self.config.include_table_lineage:
|
||||||
table_identifier, self.platform
|
lineage_info = self.lineage_extractor.get_upstream_lineage_info(
|
||||||
)
|
table_identifier, self.platform
|
||||||
|
)
|
||||||
|
|
||||||
view_workunits = self.gen_view_dataset_workunits(
|
view_workunits = self.gen_view_dataset_workunits(
|
||||||
view, project_id, dataset_name, lineage_info
|
view, project_id, dataset_name, lineage_info
|
||||||
|
|||||||
@ -15,7 +15,7 @@ class BigQueryV2Report(SQLSourceReport):
|
|||||||
num_total_lineage_entries: Optional[int] = None
|
num_total_lineage_entries: Optional[int] = None
|
||||||
num_skipped_lineage_entries_missing_data: Optional[int] = None
|
num_skipped_lineage_entries_missing_data: Optional[int] = None
|
||||||
num_skipped_lineage_entries_not_allowed: Optional[int] = None
|
num_skipped_lineage_entries_not_allowed: Optional[int] = None
|
||||||
num_skipped_lineage_entries_sql_parser_failure: Optional[int] = None
|
num_lineage_entries_sql_parser_failure: Optional[int] = None
|
||||||
num_skipped_lineage_entries_other: Optional[int] = None
|
num_skipped_lineage_entries_other: Optional[int] = None
|
||||||
num_total_log_entries: Optional[int] = None
|
num_total_log_entries: Optional[int] = None
|
||||||
num_parsed_log_entires: Optional[int] = None
|
num_parsed_log_entires: Optional[int] = None
|
||||||
|
|||||||
@ -362,7 +362,7 @@ timestamp < "{end_time}"
|
|||||||
self.report.num_skipped_lineage_entries_missing_data = 0
|
self.report.num_skipped_lineage_entries_missing_data = 0
|
||||||
self.report.num_skipped_lineage_entries_not_allowed = 0
|
self.report.num_skipped_lineage_entries_not_allowed = 0
|
||||||
self.report.num_skipped_lineage_entries_other = 0
|
self.report.num_skipped_lineage_entries_other = 0
|
||||||
self.report.num_skipped_lineage_entries_sql_parser_failure = 0
|
self.report.num_lineage_entries_sql_parser_failure = 0
|
||||||
for e in entries:
|
for e in entries:
|
||||||
self.report.num_total_lineage_entries += 1
|
self.report.num_total_lineage_entries += 1
|
||||||
if e.destinationTable is None or not (
|
if e.destinationTable is None or not (
|
||||||
@ -400,10 +400,10 @@ timestamp < "{end_time}"
|
|||||||
map(lambda x: x.split(".")[-1], parser.get_tables())
|
map(lambda x: x.split(".")[-1], parser.get_tables())
|
||||||
)
|
)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logger.warning(
|
logger.debug(
|
||||||
f"Sql Parser failed on query: {e.query}. It will be skipped from lineage. The error was {ex}"
|
f"Sql Parser failed on query: {e.query}. It won't cause any issue except table/view lineage can't be detected reliably. The error was {ex}."
|
||||||
)
|
)
|
||||||
self.report.num_skipped_lineage_entries_sql_parser_failure += 1
|
self.report.num_lineage_entries_sql_parser_failure += 1
|
||||||
continue
|
continue
|
||||||
curr_lineage_str = lineage_map[destination_table_str]
|
curr_lineage_str = lineage_map[destination_table_str]
|
||||||
new_lineage_str = set()
|
new_lineage_str = set()
|
||||||
|
|||||||
@ -13,7 +13,10 @@ from datahub.ingestion.api.common import WorkUnit
|
|||||||
from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
|
from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
|
||||||
from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
|
from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
|
||||||
from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
|
from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
|
||||||
from datahub.ingestion.source.bigquery_v2.bigquery_schema import BigqueryTable
|
from datahub.ingestion.source.bigquery_v2.bigquery_schema import (
|
||||||
|
BigqueryColumn,
|
||||||
|
BigqueryTable,
|
||||||
|
)
|
||||||
from datahub.ingestion.source.ge_data_profiler import (
|
from datahub.ingestion.source.ge_data_profiler import (
|
||||||
DatahubGEProfiler,
|
DatahubGEProfiler,
|
||||||
GEProfilerRequest,
|
GEProfilerRequest,
|
||||||
@ -78,39 +81,57 @@ class BigqueryProfiler:
|
|||||||
partition = table.max_partition_id
|
partition = table.max_partition_id
|
||||||
if partition:
|
if partition:
|
||||||
partition_where_clause: str
|
partition_where_clause: str
|
||||||
logger.debug(f"{table} is partitioned and partition column is {partition}")
|
|
||||||
try:
|
|
||||||
(
|
|
||||||
partition_datetime,
|
|
||||||
upper_bound_partition_datetime,
|
|
||||||
) = self.get_partition_range_from_partition_id(
|
|
||||||
partition, partition_datetime
|
|
||||||
)
|
|
||||||
except ValueError as e:
|
|
||||||
logger.error(
|
|
||||||
f"Unable to get partition range for partition id: {partition} it failed with exception {e}"
|
|
||||||
)
|
|
||||||
self.report.invalid_partition_ids[f"{schema}.{table}"] = partition
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
if table.time_partitioning.type_ in ("DAY", "MONTH", "YEAR"):
|
if not table.time_partitioning:
|
||||||
partition_where_clause = "{column_name} BETWEEN DATE('{partition_id}') AND DATE('{upper_bound_partition_id}')".format(
|
partition_column: Optional[BigqueryColumn] = None
|
||||||
column_name=table.time_partitioning.field,
|
for column in table.columns:
|
||||||
partition_id=partition_datetime,
|
if column.is_partition_column:
|
||||||
upper_bound_partition_id=upper_bound_partition_datetime,
|
partition_column = column
|
||||||
)
|
break
|
||||||
elif table.time_partitioning.type_ in ("HOUR"):
|
if partition_column:
|
||||||
partition_where_clause = "{column_name} BETWEEN '{partition_id}' AND '{upper_bound_partition_id}'".format(
|
partition_where_clause = f"{partition_column.name} >= {partition}"
|
||||||
column_name=table.time_partitioning.field,
|
else:
|
||||||
partition_id=partition_datetime,
|
logger.warning(
|
||||||
upper_bound_partition_id=upper_bound_partition_datetime,
|
f"Partitioned table {table.name} without partiton column"
|
||||||
)
|
)
|
||||||
|
return None, None
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.debug(
|
||||||
f"Not supported partition type {table.time_partitioning.type_}"
|
f"{table.name} is partitioned and partition column is {partition}"
|
||||||
)
|
)
|
||||||
return None, None
|
try:
|
||||||
|
(
|
||||||
|
partition_datetime,
|
||||||
|
upper_bound_partition_datetime,
|
||||||
|
) = self.get_partition_range_from_partition_id(
|
||||||
|
partition, partition_datetime
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
logger.error(
|
||||||
|
f"Unable to get partition range for partition id: {partition} it failed with exception {e}"
|
||||||
|
)
|
||||||
|
self.report.invalid_partition_ids[
|
||||||
|
f"{schema}.{table.name}"
|
||||||
|
] = partition
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
if table.time_partitioning.type_ in ("DAY", "MONTH", "YEAR"):
|
||||||
|
partition_where_clause = "{column_name} BETWEEN DATE('{partition_id}') AND DATE('{upper_bound_partition_id}')".format(
|
||||||
|
column_name=table.time_partitioning.field,
|
||||||
|
partition_id=partition_datetime,
|
||||||
|
upper_bound_partition_id=upper_bound_partition_datetime,
|
||||||
|
)
|
||||||
|
elif table.time_partitioning.type_ in ("HOUR"):
|
||||||
|
partition_where_clause = "{column_name} BETWEEN '{partition_id}' AND '{upper_bound_partition_id}'".format(
|
||||||
|
column_name=table.time_partitioning.field,
|
||||||
|
partition_id=partition_datetime,
|
||||||
|
upper_bound_partition_id=upper_bound_partition_datetime,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"Not supported partition type {table.time_partitioning.type_}"
|
||||||
|
)
|
||||||
|
return None, None
|
||||||
custom_sql = """
|
custom_sql = """
|
||||||
SELECT
|
SELECT
|
||||||
*
|
*
|
||||||
|
|||||||
@ -546,10 +546,25 @@ class LookerView:
|
|||||||
def _get_sql_info(cls, sql: str, sql_parser_path: str) -> SQLInfo:
|
def _get_sql_info(cls, sql: str, sql_parser_path: str) -> SQLInfo:
|
||||||
parser_cls = cls._import_sql_parser_cls(sql_parser_path)
|
parser_cls = cls._import_sql_parser_cls(sql_parser_path)
|
||||||
|
|
||||||
parser_instance: SQLParser = parser_cls(sql)
|
try:
|
||||||
|
parser_instance: SQLParser = parser_cls(sql)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Sql parser failed on {sql} with {e}")
|
||||||
|
return SQLInfo(table_names=[], column_names=[])
|
||||||
|
|
||||||
|
sql_table_names: List[str]
|
||||||
|
try:
|
||||||
|
sql_table_names = parser_instance.get_tables()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Sql parser failed on {sql} with {e}")
|
||||||
|
sql_table_names = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
column_names: List[str] = parser_instance.get_columns()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Sql parser failed on {sql} with {e}")
|
||||||
|
column_names = []
|
||||||
|
|
||||||
sql_table_names: List[str] = parser_instance.get_tables()
|
|
||||||
column_names: List[str] = parser_instance.get_columns()
|
|
||||||
logger.debug(f"Column names parsed = {column_names}")
|
logger.debug(f"Column names parsed = {column_names}")
|
||||||
# Drop table names with # in them
|
# Drop table names with # in them
|
||||||
sql_table_names = [t for t in sql_table_names if "#" not in t]
|
sql_table_names = [t for t in sql_table_names if "#" not in t]
|
||||||
|
|||||||
@ -390,7 +390,11 @@ class RedashSource(Source):
|
|||||||
def _get_sql_table_names(cls, sql: str, sql_parser_path: str) -> List[str]:
|
def _get_sql_table_names(cls, sql: str, sql_parser_path: str) -> List[str]:
|
||||||
parser_cls = cls._import_sql_parser_cls(sql_parser_path)
|
parser_cls = cls._import_sql_parser_cls(sql_parser_path)
|
||||||
|
|
||||||
sql_table_names: List[str] = parser_cls(sql).get_tables()
|
try:
|
||||||
|
sql_table_names: List[str] = parser_cls(sql).get_tables()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Sql parser failed on {sql} with {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
# Remove quotes from table names
|
# Remove quotes from table names
|
||||||
sql_table_names = [t.replace('"', "") for t in sql_table_names]
|
sql_table_names = [t.replace('"', "") for t in sql_table_names]
|
||||||
|
|||||||
@ -642,7 +642,12 @@ class DataHubValidationAction(ValidationAction):
|
|||||||
query=query,
|
query=query,
|
||||||
customProperties=batchSpecProperties,
|
customProperties=batchSpecProperties,
|
||||||
)
|
)
|
||||||
tables = DefaultSQLParser(query).get_tables()
|
try:
|
||||||
|
tables = DefaultSQLParser(query).get_tables()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Sql parser failed on {query} with {e}")
|
||||||
|
tables = []
|
||||||
|
|
||||||
if len(set(tables)) != 1:
|
if len(set(tables)) != 1:
|
||||||
warn(
|
warn(
|
||||||
"DataHubValidationAction does not support cross dataset assertions."
|
"DataHubValidationAction does not support cross dataset assertions."
|
||||||
|
|||||||
@ -8,6 +8,8 @@ from typing import Dict, List, Optional, Set
|
|||||||
from sqllineage.core.holders import Column, SQLLineageHolder
|
from sqllineage.core.holders import Column, SQLLineageHolder
|
||||||
from sqllineage.exceptions import SQLLineageException
|
from sqllineage.exceptions import SQLLineageException
|
||||||
|
|
||||||
|
from datahub.utilities.sql_parser_base import SQLParser, SqlParserException
|
||||||
|
|
||||||
with contextlib.suppress(ImportError):
|
with contextlib.suppress(ImportError):
|
||||||
import sqlparse
|
import sqlparse
|
||||||
from networkx import DiGraph
|
from networkx import DiGraph
|
||||||
@ -17,7 +19,7 @@ with contextlib.suppress(ImportError):
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SqlLineageSQLParserImpl:
|
class SqlLineageSQLParserImpl(SQLParser):
|
||||||
_DATE_SWAP_TOKEN = "__d_a_t_e"
|
_DATE_SWAP_TOKEN = "__d_a_t_e"
|
||||||
_HOUR_SWAP_TOKEN = "__h_o_u_r"
|
_HOUR_SWAP_TOKEN = "__h_o_u_r"
|
||||||
_TIMESTAMP_SWAP_TOKEN = "__t_i_m_e_s_t_a_m_p"
|
_TIMESTAMP_SWAP_TOKEN = "__t_i_m_e_s_t_a_m_p"
|
||||||
@ -27,6 +29,7 @@ class SqlLineageSQLParserImpl:
|
|||||||
_MYVIEW_LOOKER_TOKEN = "my_view.SQL_TABLE_NAME"
|
_MYVIEW_LOOKER_TOKEN = "my_view.SQL_TABLE_NAME"
|
||||||
|
|
||||||
def __init__(self, sql_query: str) -> None:
|
def __init__(self, sql_query: str) -> None:
|
||||||
|
super().__init__(sql_query)
|
||||||
original_sql_query = sql_query
|
original_sql_query = sql_query
|
||||||
|
|
||||||
# SqlLineageParser makes mistakes on lateral flatten queries, use the prefix
|
# SqlLineageParser makes mistakes on lateral flatten queries, use the prefix
|
||||||
@ -97,7 +100,9 @@ class SqlLineageSQLParserImpl:
|
|||||||
]
|
]
|
||||||
self._sql_holder = SQLLineageHolder.of(*self._stmt_holders)
|
self._sql_holder = SQLLineageHolder.of(*self._stmt_holders)
|
||||||
except SQLLineageException as e:
|
except SQLLineageException as e:
|
||||||
logger.error(f"SQL lineage analyzer error '{e}' for query: '{self._sql}")
|
raise SqlParserException(
|
||||||
|
f"SQL lineage analyzer error '{e}' for query: '{self._sql}"
|
||||||
|
) from e
|
||||||
|
|
||||||
def get_tables(self) -> List[str]:
|
def get_tables(self) -> List[str]:
|
||||||
result: List[str] = []
|
result: List[str] = []
|
||||||
@ -123,8 +128,7 @@ class SqlLineageSQLParserImpl:
|
|||||||
|
|
||||||
def get_columns(self) -> List[str]:
|
def get_columns(self) -> List[str]:
|
||||||
if self._sql_holder is None:
|
if self._sql_holder is None:
|
||||||
logger.error("sql holder not present so cannot get columns")
|
raise SqlParserException("sql holder not present so cannot get columns")
|
||||||
return []
|
|
||||||
graph: DiGraph = self._sql_holder.graph # For mypy attribute checking
|
graph: DiGraph = self._sql_holder.graph # For mypy attribute checking
|
||||||
column_nodes = [n for n in graph.nodes if isinstance(n, Column)]
|
column_nodes = [n for n in graph.nodes if isinstance(n, Column)]
|
||||||
column_graph = graph.subgraph(column_nodes)
|
column_graph = graph.subgraph(column_nodes)
|
||||||
|
|||||||
@ -4,30 +4,17 @@ import multiprocessing
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
from abc import ABCMeta, abstractmethod
|
|
||||||
from multiprocessing import Process, Queue
|
from multiprocessing import Process, Queue
|
||||||
from typing import List, Optional, Tuple, Type
|
from typing import List, Optional, Tuple, Type
|
||||||
|
|
||||||
from datahub.utilities.sql_lineage_parser_impl import SqlLineageSQLParserImpl
|
from datahub.utilities.sql_lineage_parser_impl import SqlLineageSQLParserImpl
|
||||||
|
from datahub.utilities.sql_parser_base import SQLParser
|
||||||
|
|
||||||
with contextlib.suppress(ImportError):
|
with contextlib.suppress(ImportError):
|
||||||
from sql_metadata import Parser as MetadataSQLParser
|
from sql_metadata import Parser as MetadataSQLParser
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SQLParser(metaclass=ABCMeta):
|
|
||||||
def __init__(self, sql_query: str) -> None:
|
|
||||||
self._sql_query = sql_query
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_tables(self) -> List[str]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_columns(self) -> List[str]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class MetadataSQLSQLParser(SQLParser):
|
class MetadataSQLSQLParser(SQLParser):
|
||||||
_DATE_SWAP_TOKEN = "__d_a_t_e"
|
_DATE_SWAP_TOKEN = "__d_a_t_e"
|
||||||
|
|
||||||
@ -104,7 +91,7 @@ def sql_lineage_parser_impl_func_wrapper(
|
|||||||
exc_info = sys.exc_info()
|
exc_info = sys.exc_info()
|
||||||
exc_msg: str = str(exc_info[1]) + "".join(traceback.format_tb(exc_info[2]))
|
exc_msg: str = str(exc_info[1]) + "".join(traceback.format_tb(exc_info[2]))
|
||||||
exception_details = (exc_info[0], exc_msg)
|
exception_details = (exc_info[0], exc_msg)
|
||||||
logger.error(exc_msg)
|
logger.debug(exc_msg)
|
||||||
finally:
|
finally:
|
||||||
queue.put((tables, columns, exception_details))
|
queue.put((tables, columns, exception_details))
|
||||||
|
|
||||||
|
|||||||
21
metadata-ingestion/src/datahub/utilities/sql_parser_base.py
Normal file
21
metadata-ingestion/src/datahub/utilities/sql_parser_base.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
from abc import ABCMeta, abstractmethod
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
|
class SqlParserException(Exception):
|
||||||
|
"""Raised when sql parser fails"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SQLParser(metaclass=ABCMeta):
|
||||||
|
def __init__(self, sql_query: str) -> None:
|
||||||
|
self._sql_query = sql_query
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_tables(self) -> List[str]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_columns(self) -> List[str]:
|
||||||
|
pass
|
||||||
Loading…
x
Reference in New Issue
Block a user