MINOR: Fix unity catalog lineage - handle errors (#19791)

* MINOR: Fix unity catalog lineage - hanle errors

* lint
This commit is contained in:
Mayur Singal 2025-02-14 10:45:31 +05:30 committed by GitHub
parent 584816fc33
commit c8e24e5f3b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -11,6 +11,7 @@
""" """
Databricks Unity Catalog Lineage Source Module Databricks Unity Catalog Lineage Source Module
""" """
import traceback
from typing import Iterable, Optional from typing import Iterable, Optional
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
@ -117,23 +118,16 @@ class UnitycatalogLineageSource(Source):
) )
return None return None
def _iter(self, *_, **__) -> Iterable[Either[AddLineageRequest]]: def _handle_upstream_table(
""" self,
Based on the query logs, prepare the lineage table_streams: LineageTableStreams,
and send it to the sink table: Table,
""" databricks_table_fqn: str,
) -> Iterable[Either[AddLineageRequest]]:
for database in self.metadata.list_all_entities(
entity=Database, params={"service": self.config.serviceName}
):
for table in self.metadata.list_all_entities(
entity=Table, params={"database": database.fullyQualifiedName.root}
):
databricks_table_fqn = f"{table.database.name}.{table.databaseSchema.name}.{table.name.root}"
table_streams: LineageTableStreams = self.client.get_table_lineage(
databricks_table_fqn
)
for upstream_table in table_streams.upstream_tables: for upstream_table in table_streams.upstream_tables:
try:
if not upstream_table.name:
continue
from_entity_fqn = fqn.build( from_entity_fqn = fqn.build(
metadata=self.metadata, metadata=self.metadata,
entity_type=Table, entity_type=Table,
@ -164,6 +158,33 @@ class UnitycatalogLineageSource(Source):
) )
), ),
) )
except Exception:
logger.debug(
"Error while processing lineage for "
f"{upstream_table.catalog_name}.{upstream_table.schema_name}.{upstream_table.name}"
f" -> {databricks_table_fqn}"
)
logger.debug(traceback.format_exc())
def _iter(self, *_, **__) -> Iterable[Either[AddLineageRequest]]:
"""
Based on the query logs, prepare the lineage
and send it to the sink
"""
for database in self.metadata.list_all_entities(
entity=Database, params={"service": self.config.serviceName}
):
for table in self.metadata.list_all_entities(
entity=Table, params={"database": database.fullyQualifiedName.root}
):
databricks_table_fqn = f"{table.database.name}.{table.databaseSchema.name}.{table.name.root}"
table_streams: LineageTableStreams = self.client.get_table_lineage(
databricks_table_fqn
)
yield from self._handle_upstream_table(
table_streams, table, databricks_table_fqn
)
def test_connection(self) -> None: def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection) test_connection_fn = get_test_connection_fn(self.service_connection)