mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-11 15:28:00 +00:00
fix: improve query and minimize snflk <-> OM backand and forth (#24228)
This commit is contained in:
parent
4e398d003b
commit
f3ef29a117
@ -94,10 +94,9 @@ from metadata.ingestion.source.database.snowflake.queries import (
|
|||||||
SNOWFLAKE_GET_DATABASE_COMMENTS,
|
SNOWFLAKE_GET_DATABASE_COMMENTS,
|
||||||
SNOWFLAKE_GET_DATABASES,
|
SNOWFLAKE_GET_DATABASES,
|
||||||
SNOWFLAKE_GET_EXTERNAL_LOCATIONS,
|
SNOWFLAKE_GET_EXTERNAL_LOCATIONS,
|
||||||
SNOWFLAKE_GET_FUNCTIONS,
|
|
||||||
SNOWFLAKE_GET_ORGANIZATION_NAME,
|
SNOWFLAKE_GET_ORGANIZATION_NAME,
|
||||||
SNOWFLAKE_GET_SCHEMA_COMMENTS,
|
SNOWFLAKE_GET_SCHEMA_COMMENTS,
|
||||||
SNOWFLAKE_GET_STORED_PROCEDURES,
|
SNOWFLAKE_GET_STORED_PROCEDURES_AND_FUNCTIONS,
|
||||||
SNOWFLAKE_GET_STREAM,
|
SNOWFLAKE_GET_STREAM,
|
||||||
SNOWFLAKE_LIFE_CYCLE_QUERY,
|
SNOWFLAKE_LIFE_CYCLE_QUERY,
|
||||||
SNOWFLAKE_SESSION_TAG_QUERY,
|
SNOWFLAKE_SESSION_TAG_QUERY,
|
||||||
@ -558,18 +557,17 @@ class SnowflakeSource(
|
|||||||
def _get_table_names_and_types(
|
def _get_table_names_and_types(
|
||||||
self, schema_name: str, table_type: TableType = TableType.Regular
|
self, schema_name: str, table_type: TableType = TableType.Regular
|
||||||
) -> List[TableNameAndType]:
|
) -> List[TableNameAndType]:
|
||||||
table_type_to_params_map = {
|
|
||||||
TableType.Regular: {},
|
|
||||||
TableType.External: {"external_tables": True},
|
|
||||||
TableType.Transient: {"include_transient_tables": True},
|
|
||||||
TableType.Dynamic: {"dynamic_tables": True},
|
|
||||||
}
|
|
||||||
|
|
||||||
snowflake_tables = self.inspector.get_table_names(
|
snowflake_tables = self.inspector.get_table_names(
|
||||||
schema=schema_name,
|
schema=schema_name,
|
||||||
incremental=self.incremental,
|
incremental=self.incremental,
|
||||||
account_usage=self.service_connection.accountUsageSchema,
|
account_usage=self.service_connection.accountUsageSchema,
|
||||||
**table_type_to_params_map[table_type],
|
include_views=self.source_config.includeViews,
|
||||||
|
**(
|
||||||
|
{"include_transient_tables": True}
|
||||||
|
if self.service_connection.includeTransientTables
|
||||||
|
else {}
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.context.get_global().deleted_tables.extend(
|
self.context.get_global().deleted_tables.extend(
|
||||||
@ -587,7 +585,7 @@ class SnowflakeSource(
|
|||||||
)
|
)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
TableNameAndType(name=table.name, type_=table_type)
|
TableNameAndType(name=table.name, type_=table.type_)
|
||||||
for table in snowflake_tables.get_not_deleted()
|
for table in snowflake_tables.get_not_deleted()
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -631,21 +629,6 @@ class SnowflakeSource(
|
|||||||
"""
|
"""
|
||||||
table_list = self._get_table_names_and_types(schema_name)
|
table_list = self._get_table_names_and_types(schema_name)
|
||||||
|
|
||||||
table_list.extend(
|
|
||||||
self._get_table_names_and_types(schema_name, table_type=TableType.External)
|
|
||||||
)
|
|
||||||
|
|
||||||
table_list.extend(
|
|
||||||
self._get_table_names_and_types(schema_name, table_type=TableType.Dynamic)
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.service_connection.includeTransientTables:
|
|
||||||
table_list.extend(
|
|
||||||
self._get_table_names_and_types(
|
|
||||||
schema_name, table_type=TableType.Transient
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.service_connection.includeStreams:
|
if self.service_connection.includeStreams:
|
||||||
table_list.extend(self._get_stream_names_and_types(schema_name))
|
table_list.extend(self._get_stream_names_and_types(schema_name))
|
||||||
|
|
||||||
@ -742,39 +725,6 @@ class SnowflakeSource(
|
|||||||
logger.error(f"Unable to get procedure source url: {exc}")
|
logger.error(f"Unable to get procedure source url: {exc}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _get_view_names_and_types(
|
|
||||||
self, schema_name: str, materialized_views: bool = False
|
|
||||||
) -> List[TableNameAndType]:
|
|
||||||
table_type = (
|
|
||||||
TableType.MaterializedView if materialized_views else TableType.View
|
|
||||||
)
|
|
||||||
|
|
||||||
snowflake_views = self.inspector.get_view_names(
|
|
||||||
schema=schema_name,
|
|
||||||
incremental=self.incremental,
|
|
||||||
account_usage=self.service_connection.accountUsageSchema,
|
|
||||||
materialized_views=materialized_views,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.context.get_global().deleted_tables.extend(
|
|
||||||
[
|
|
||||||
fqn.build(
|
|
||||||
metadata=self.metadata,
|
|
||||||
entity_type=Table,
|
|
||||||
service_name=self.context.get().database_service,
|
|
||||||
database_name=self.context.get().database,
|
|
||||||
schema_name=schema_name,
|
|
||||||
table_name=view.name,
|
|
||||||
)
|
|
||||||
for view in snowflake_views.get_deleted()
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
return [
|
|
||||||
TableNameAndType(name=view.name, type_=table_type)
|
|
||||||
for view in snowflake_views.get_not_deleted()
|
|
||||||
]
|
|
||||||
|
|
||||||
def query_view_names_and_types(
|
def query_view_names_and_types(
|
||||||
self, schema_name: str
|
self, schema_name: str
|
||||||
) -> Iterable[TableNameAndType]:
|
) -> Iterable[TableNameAndType]:
|
||||||
@ -786,12 +736,7 @@ class SnowflakeSource(
|
|||||||
This is useful for sources where we need fine-grained
|
This is useful for sources where we need fine-grained
|
||||||
logic on how to handle table types, e.g., material views,...
|
logic on how to handle table types, e.g., material views,...
|
||||||
"""
|
"""
|
||||||
views = self._get_view_names_and_types(schema_name)
|
return []
|
||||||
views.extend(
|
|
||||||
self._get_view_names_and_types(schema_name, materialized_views=True)
|
|
||||||
)
|
|
||||||
|
|
||||||
return views
|
|
||||||
|
|
||||||
def _get_stored_procedures_internal(
|
def _get_stored_procedures_internal(
|
||||||
self, query: str
|
self, query: str
|
||||||
@ -823,9 +768,8 @@ class SnowflakeSource(
|
|||||||
"""List Snowflake stored procedures"""
|
"""List Snowflake stored procedures"""
|
||||||
if self.source_config.includeStoredProcedures:
|
if self.source_config.includeStoredProcedures:
|
||||||
yield from self._get_stored_procedures_internal(
|
yield from self._get_stored_procedures_internal(
|
||||||
SNOWFLAKE_GET_STORED_PROCEDURES
|
SNOWFLAKE_GET_STORED_PROCEDURES_AND_FUNCTIONS
|
||||||
)
|
)
|
||||||
yield from self._get_stored_procedures_internal(SNOWFLAKE_GET_FUNCTIONS)
|
|
||||||
|
|
||||||
def describe_procedure_definition(
|
def describe_procedure_definition(
|
||||||
self, stored_procedure: SnowflakeStoredProcedure
|
self, stored_procedure: SnowflakeStoredProcedure
|
||||||
|
|||||||
@ -21,6 +21,7 @@ from sqlalchemy import text
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.storedProcedure import Language
|
from metadata.generated.schema.entity.data.storedProcedure import Language
|
||||||
|
from metadata.generated.schema.entity.data.table import TableType
|
||||||
from metadata.ingestion.source.database.snowflake.queries import (
|
from metadata.ingestion.source.database.snowflake.queries import (
|
||||||
SNOWFLAKE_QUERY_LOG_QUERY,
|
SNOWFLAKE_QUERY_LOG_QUERY,
|
||||||
)
|
)
|
||||||
@ -92,6 +93,7 @@ class SnowflakeTable(BaseModel):
|
|||||||
|
|
||||||
name: str
|
name: str
|
||||||
deleted: Optional[datetime] = None
|
deleted: Optional[datetime] = None
|
||||||
|
type_: Optional[TableType] = None
|
||||||
|
|
||||||
|
|
||||||
class SnowflakeTableList(BaseModel):
|
class SnowflakeTableList(BaseModel):
|
||||||
|
|||||||
@ -14,6 +14,33 @@ SQL Queries used during ingestion
|
|||||||
|
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
|
SNOWFLAKE_GET_TABLE_NAMES = """
|
||||||
|
select TABLE_NAME, NULL, TABLE_TYPE from information_schema.tables
|
||||||
|
where TABLE_SCHEMA = '{schema}'
|
||||||
|
AND COALESCE(IS_TRANSIENT, 'NO') != '{is_transient}'
|
||||||
|
AND {include_views}
|
||||||
|
"""
|
||||||
|
|
||||||
|
SNOWFLAKE_INCREMENTAL_GET_TABLE_NAMES = """
|
||||||
|
select TABLE_NAME, DELETED, TABLE_TYPE
|
||||||
|
from (
|
||||||
|
select
|
||||||
|
TABLE_NAME,
|
||||||
|
DELETED,
|
||||||
|
TABLE_TYPE,
|
||||||
|
ROW_NUMBER() over (
|
||||||
|
partition by TABLE_NAME order by LAST_DDL desc
|
||||||
|
) as ROW_NUMBER
|
||||||
|
from {account_usage}.tables
|
||||||
|
where TABLE_CATALOG = '{database}'
|
||||||
|
and TABLE_SCHEMA = '{schema}'
|
||||||
|
and COALESCE(IS_TRANSIENT, 'NO') != '{is_transient}'
|
||||||
|
and DATE_PART(epoch_millisecond, LAST_DDL) >= '{date}'
|
||||||
|
and {include_views}
|
||||||
|
)
|
||||||
|
where ROW_NUMBER = 1
|
||||||
|
"""
|
||||||
|
|
||||||
SNOWFLAKE_SQL_STATEMENT = textwrap.dedent(
|
SNOWFLAKE_SQL_STATEMENT = textwrap.dedent(
|
||||||
"""
|
"""
|
||||||
SELECT
|
SELECT
|
||||||
@ -317,7 +344,7 @@ and table_catalog = '{database_name}'
|
|||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
SNOWFLAKE_GET_STORED_PROCEDURES = textwrap.dedent(
|
SNOWFLAKE_GET_STORED_PROCEDURES_AND_FUNCTIONS = textwrap.dedent(
|
||||||
"""
|
"""
|
||||||
SELECT
|
SELECT
|
||||||
PROCEDURE_NAME AS name,
|
PROCEDURE_NAME AS name,
|
||||||
@ -331,11 +358,9 @@ FROM {account_usage}.PROCEDURES
|
|||||||
WHERE PROCEDURE_CATALOG = '{database_name}'
|
WHERE PROCEDURE_CATALOG = '{database_name}'
|
||||||
AND PROCEDURE_SCHEMA = '{schema_name}'
|
AND PROCEDURE_SCHEMA = '{schema_name}'
|
||||||
AND DELETED IS NULL
|
AND DELETED IS NULL
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
SNOWFLAKE_GET_FUNCTIONS = textwrap.dedent(
|
UNION ALL
|
||||||
"""
|
|
||||||
SELECT
|
SELECT
|
||||||
FUNCTION_NAME AS name,
|
FUNCTION_NAME AS name,
|
||||||
FUNCTION_OWNER AS owner,
|
FUNCTION_OWNER AS owner,
|
||||||
|
|||||||
@ -24,6 +24,7 @@ from sqlalchemy.engine import reflection
|
|||||||
from sqlalchemy.sql import text
|
from sqlalchemy.sql import text
|
||||||
from sqlalchemy.types import FLOAT
|
from sqlalchemy.types import FLOAT
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.data.table import TableType
|
||||||
from metadata.ingestion.source.database.incremental_metadata_extraction import (
|
from metadata.ingestion.source.database.incremental_metadata_extraction import (
|
||||||
IncrementalConfig,
|
IncrementalConfig,
|
||||||
)
|
)
|
||||||
@ -33,25 +34,19 @@ from metadata.ingestion.source.database.snowflake.models import (
|
|||||||
)
|
)
|
||||||
from metadata.ingestion.source.database.snowflake.queries import (
|
from metadata.ingestion.source.database.snowflake.queries import (
|
||||||
SNOWFLAKE_GET_COMMENTS,
|
SNOWFLAKE_GET_COMMENTS,
|
||||||
SNOWFLAKE_GET_DYNAMIC_TABLE_NAMES,
|
|
||||||
SNOWFLAKE_GET_EXTERNAL_TABLE_NAMES,
|
|
||||||
SNOWFLAKE_GET_MVIEW_NAMES,
|
SNOWFLAKE_GET_MVIEW_NAMES,
|
||||||
SNOWFLAKE_GET_SCHEMA_COLUMNS,
|
SNOWFLAKE_GET_SCHEMA_COLUMNS,
|
||||||
SNOWFLAKE_GET_STREAM_DEFINITION,
|
SNOWFLAKE_GET_STREAM_DEFINITION,
|
||||||
SNOWFLAKE_GET_STREAM_NAMES,
|
SNOWFLAKE_GET_STREAM_NAMES,
|
||||||
SNOWFLAKE_GET_TABLE_DDL,
|
SNOWFLAKE_GET_TABLE_DDL,
|
||||||
SNOWFLAKE_GET_TRANSIENT_NAMES,
|
SNOWFLAKE_GET_TABLE_NAMES,
|
||||||
SNOWFLAKE_GET_VIEW_DDL,
|
SNOWFLAKE_GET_VIEW_DDL,
|
||||||
SNOWFLAKE_GET_VIEW_DEFINITION,
|
SNOWFLAKE_GET_VIEW_DEFINITION,
|
||||||
SNOWFLAKE_GET_VIEW_NAMES,
|
SNOWFLAKE_GET_VIEW_NAMES,
|
||||||
SNOWFLAKE_GET_WITHOUT_TRANSIENT_TABLE_NAMES,
|
|
||||||
SNOWFLAKE_INCREMENTAL_GET_DYNAMIC_TABLE_NAMES,
|
|
||||||
SNOWFLAKE_INCREMENTAL_GET_EXTERNAL_TABLE_NAMES,
|
|
||||||
SNOWFLAKE_INCREMENTAL_GET_MVIEW_NAMES,
|
SNOWFLAKE_INCREMENTAL_GET_MVIEW_NAMES,
|
||||||
SNOWFLAKE_INCREMENTAL_GET_STREAM_NAMES,
|
SNOWFLAKE_INCREMENTAL_GET_STREAM_NAMES,
|
||||||
SNOWFLAKE_INCREMENTAL_GET_TRANSIENT_NAMES,
|
SNOWFLAKE_INCREMENTAL_GET_TABLE_NAMES,
|
||||||
SNOWFLAKE_INCREMENTAL_GET_VIEW_NAMES,
|
SNOWFLAKE_INCREMENTAL_GET_VIEW_NAMES,
|
||||||
SNOWFLAKE_INCREMENTAL_GET_WITHOUT_TRANSIENT_TABLE_NAMES,
|
|
||||||
)
|
)
|
||||||
from metadata.utils import fqn
|
from metadata.utils import fqn
|
||||||
from metadata.utils.sqlalchemy_utils import (
|
from metadata.utils.sqlalchemy_utils import (
|
||||||
@ -67,16 +62,10 @@ QueryMap = Dict[str, Query]
|
|||||||
|
|
||||||
TABLE_QUERY_MAPS = {
|
TABLE_QUERY_MAPS = {
|
||||||
"full": {
|
"full": {
|
||||||
"default": SNOWFLAKE_GET_WITHOUT_TRANSIENT_TABLE_NAMES,
|
"default": SNOWFLAKE_GET_TABLE_NAMES,
|
||||||
"transient_tables": SNOWFLAKE_GET_TRANSIENT_NAMES,
|
|
||||||
"external_tables": SNOWFLAKE_GET_EXTERNAL_TABLE_NAMES,
|
|
||||||
"dynamic_tables": SNOWFLAKE_GET_DYNAMIC_TABLE_NAMES,
|
|
||||||
},
|
},
|
||||||
"incremental": {
|
"incremental": {
|
||||||
"default": SNOWFLAKE_INCREMENTAL_GET_WITHOUT_TRANSIENT_TABLE_NAMES,
|
"default": SNOWFLAKE_INCREMENTAL_GET_TABLE_NAMES,
|
||||||
"transient_tables": SNOWFLAKE_INCREMENTAL_GET_TRANSIENT_NAMES,
|
|
||||||
"external_tables": SNOWFLAKE_INCREMENTAL_GET_EXTERNAL_TABLE_NAMES,
|
|
||||||
"dynamic_tables": SNOWFLAKE_INCREMENTAL_GET_DYNAMIC_TABLE_NAMES,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -194,9 +183,15 @@ def _get_query_parameters(
|
|||||||
schema: str,
|
schema: str,
|
||||||
incremental: Optional[IncrementalConfig],
|
incremental: Optional[IncrementalConfig],
|
||||||
account_usage: Optional[str] = None,
|
account_usage: Optional[str] = None,
|
||||||
|
include_transient_tables: Optional[bool] = False,
|
||||||
|
include_views: Optional[bool] = False,
|
||||||
):
|
):
|
||||||
"""Returns the proper query parameters depending if the extraction is Incremental or Full"""
|
"""Returns the proper query parameters depending if the extraction is Incremental or Full"""
|
||||||
parameters = {"schema": fqn.unquote_name(schema)}
|
parameters = {
|
||||||
|
"schema": fqn.unquote_name(schema),
|
||||||
|
"is_transient": "YES" if include_transient_tables else "NO",
|
||||||
|
"include_views": "TRUE" if include_views else "TABLE_TYPE != 'VIEW'",
|
||||||
|
}
|
||||||
|
|
||||||
if incremental and incremental.enabled:
|
if incremental and incremental.enabled:
|
||||||
database, _ = self._current_database_schema(connection) # pylint: disable=W0212
|
database, _ = self._current_database_schema(connection) # pylint: disable=W0212
|
||||||
@ -217,30 +212,43 @@ def get_table_names(self, connection, schema: str, **kw):
|
|||||||
|
|
||||||
queries = _get_query_map(incremental, TABLE_QUERY_MAPS)
|
queries = _get_query_map(incremental, TABLE_QUERY_MAPS)
|
||||||
parameters = _get_query_parameters(
|
parameters = _get_query_parameters(
|
||||||
self, connection, schema, incremental, account_usage
|
self,
|
||||||
|
connection,
|
||||||
|
schema,
|
||||||
|
incremental,
|
||||||
|
account_usage,
|
||||||
|
include_transient_tables=kw.get("include_transient_tables", False),
|
||||||
|
include_views=kw.get("include_views", False),
|
||||||
)
|
)
|
||||||
|
|
||||||
query = queries["default"]
|
query = queries["default"]
|
||||||
|
|
||||||
if kw.get("include_transient_tables"):
|
|
||||||
query = queries["transient_tables"]
|
|
||||||
|
|
||||||
if kw.get("external_tables"):
|
|
||||||
query = queries["external_tables"]
|
|
||||||
|
|
||||||
if kw.get("dynamic_tables"):
|
|
||||||
query = queries["dynamic_tables"]
|
|
||||||
|
|
||||||
cursor = connection.execute(query.format(**parameters))
|
cursor = connection.execute(query.format(**parameters))
|
||||||
result = SnowflakeTableList(
|
result = SnowflakeTableList(
|
||||||
tables=[
|
tables=[
|
||||||
SnowflakeTable(name=self.normalize_name(row[0]), deleted=row[1])
|
SnowflakeTable(
|
||||||
|
name=self.normalize_name(row[0]),
|
||||||
|
deleted=row[1],
|
||||||
|
type_=_get_table_type(row[2] if row[2] else "BASE TABLE"),
|
||||||
|
)
|
||||||
for row in cursor
|
for row in cursor
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _get_table_type(table_type: str) -> TableType:
|
||||||
|
table_type_map = {
|
||||||
|
"BASE TABLE": TableType.Regular,
|
||||||
|
"VIEW": TableType.View,
|
||||||
|
"MATERIALIZED VIEW": TableType.MaterializedView,
|
||||||
|
"EXTERNAL TABLE": TableType.External,
|
||||||
|
"TRANSIENT TABLE": TableType.Transient,
|
||||||
|
"DYNAMIC TABLE": TableType.Dynamic,
|
||||||
|
}
|
||||||
|
return table_type_map.get(table_type, TableType.Regular)
|
||||||
|
|
||||||
|
|
||||||
def get_view_names(self, connection, schema, **kw):
|
def get_view_names(self, connection, schema, **kw):
|
||||||
incremental = kw.get("incremental")
|
incremental = kw.get("incremental")
|
||||||
account_usage = kw.get("account_usage")
|
account_usage = kw.get("account_usage")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user