MINOR: Lineage Improvements (#20446)

This commit is contained in:
Mayur Singal 2025-03-27 11:57:23 +05:30 committed by GitHub
parent 7a860e51f9
commit 766d0caebc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 17 additions and 3 deletions

View File

@ -105,6 +105,7 @@ class Dialect(Enum):
SQLITE = "sqlite" SQLITE = "sqlite"
TERADATA = "teradata" TERADATA = "teradata"
TSQL = "tsql" TSQL = "tsql"
MARIADB = "mariadb"
MAP_CONNECTION_TYPE_DIALECT: Dict[str, Dialect] = { MAP_CONNECTION_TYPE_DIALECT: Dict[str, Dialect] = {
@ -126,7 +127,7 @@ MAP_CONNECTION_TYPE_DIALECT: Dict[str, Dialect] = {
str(MssqlType.Mssql.value): Dialect.TSQL, str(MssqlType.Mssql.value): Dialect.TSQL,
str(AzureSQLType.AzureSQL.value): Dialect.TSQL, str(AzureSQLType.AzureSQL.value): Dialect.TSQL,
str(TeradataType.Teradata.value): Dialect.TERADATA, str(TeradataType.Teradata.value): Dialect.TERADATA,
str(MariaDBType.MariaDB.value): Dialect.MYSQL, str(MariaDBType.MariaDB.value): Dialect.MARIADB,
str(SingleStoreType.SingleStore.value): Dialect.MYSQL, str(SingleStoreType.SingleStore.value): Dialect.MYSQL,
} }

View File

@ -54,6 +54,7 @@ from metadata.utils.lru_cache import LRU_CACHE_SIZE, LRUCache
logger = utils_logger() logger = utils_logger()
DEFAULT_SCHEMA_NAME = "<default>" DEFAULT_SCHEMA_NAME = "<default>"
CUTOFF_NODES = 20
def get_column_fqn(table_entity: Table, column: str) -> Optional[str]: def get_column_fqn(table_entity: Table, column: str) -> Optional[str]:
@ -880,8 +881,9 @@ def _get_paths_from_subtree(subtree: DiGraph) -> List[List[Any]]:
# Find all simple paths from each root to each leaf # Find all simple paths from each root to each leaf
for root in root_nodes: for root in root_nodes:
logger.debug(f"Processing root node {root}")
for leaf in leaf_nodes: for leaf in leaf_nodes:
paths.extend(nx.all_simple_paths(subtree, root, leaf)) paths.extend(nx.all_simple_paths(subtree, root, leaf, cutoff=CUTOFF_NODES))
return paths return paths
@ -903,6 +905,9 @@ def get_lineage_by_graph(
if graph is None: if graph is None:
return return
logger.info(
f"Processing graph with {graph.number_of_nodes()} nodes and {graph.number_of_edges()} edges"
)
# Get all weakly connected components # Get all weakly connected components
components = list(nx.weakly_connected_components(graph)) components = list(nx.weakly_connected_components(graph))

View File

@ -56,6 +56,8 @@ logger = ingestion_logger()
CHUNK_SIZE = 200 CHUNK_SIZE = 200
THREAD_TIMEOUT = 600
class LineageSource(QueryParserSource, ABC): class LineageSource(QueryParserSource, ABC):
""" """
@ -164,7 +166,11 @@ class LineageSource(QueryParserSource, ABC):
for i, future in enumerate(futures): for i, future in enumerate(futures):
if future.done(): if future.done():
future.result() try:
future.result(timeout=THREAD_TIMEOUT)
except Exception as e:
logger.debug(f"Error in future: {e}")
logger.debug(traceback.format_exc())
futures.pop(i) futures.pop(i)
time.sleep(0.01) time.sleep(0.01)
@ -257,6 +263,7 @@ class LineageSource(QueryParserSource, ABC):
Based on the query logs, prepare the lineage Based on the query logs, prepare the lineage
and send it to the sink and send it to the sink
""" """
logger.info("Processing Query Lineage")
connection_type = str(self.service_connection.type.value) connection_type = str(self.service_connection.type.value)
self.dialect = ConnectionTypeDialectMapper.dialect_of(connection_type) self.dialect = ConnectionTypeDialectMapper.dialect_of(connection_type)
producer_fn = self.get_table_query producer_fn = self.get_table_query

View File

@ -49,6 +49,7 @@ class SnowflakeLineageSource(
OR (QUERY_TYPE = 'INSERT' and query_text ILIKE '%%insert%%into%%select%%') OR (QUERY_TYPE = 'INSERT' and query_text ILIKE '%%insert%%into%%select%%')
OR (QUERY_TYPE = 'ALTER' and query_text ILIKE '%%alter%%table%%swap%%') OR (QUERY_TYPE = 'ALTER' and query_text ILIKE '%%alter%%table%%swap%%')
OR (QUERY_TYPE = 'CREATE_TABLE' and query_text ILIKE '%%clone%%') OR (QUERY_TYPE = 'CREATE_TABLE' and query_text ILIKE '%%clone%%')
OR (QUERY_TYPE = 'CREATE_VIEW' and query_text ILIKE '%%create%%temporary%%view%%')
) )
""" """