MINOR: Fix unity catalog lineage - handle errors (#19791)

* MINOR: Fix unity catalog lineage - hanle errors

* lint
This commit is contained in:
Mayur Singal 2025-02-14 10:45:31 +05:30 committed by GitHub
parent 584816fc33
commit c8e24e5f3b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -11,6 +11,7 @@
"""
Databricks Unity Catalog Lineage Source Module
"""
import traceback
from typing import Iterable, Optional
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
@ -117,23 +118,16 @@ class UnitycatalogLineageSource(Source):
)
return None
def _iter(self, *_, **__) -> Iterable[Either[AddLineageRequest]]:
"""
Based on the query logs, prepare the lineage
and send it to the sink
"""
for database in self.metadata.list_all_entities(
entity=Database, params={"service": self.config.serviceName}
):
for table in self.metadata.list_all_entities(
entity=Table, params={"database": database.fullyQualifiedName.root}
):
databricks_table_fqn = f"{table.database.name}.{table.databaseSchema.name}.{table.name.root}"
table_streams: LineageTableStreams = self.client.get_table_lineage(
databricks_table_fqn
)
def _handle_upstream_table(
self,
table_streams: LineageTableStreams,
table: Table,
databricks_table_fqn: str,
) -> Iterable[Either[AddLineageRequest]]:
for upstream_table in table_streams.upstream_tables:
try:
if not upstream_table.name:
continue
from_entity_fqn = fqn.build(
metadata=self.metadata,
entity_type=Table,
@ -164,6 +158,33 @@ class UnitycatalogLineageSource(Source):
)
),
)
except Exception:
logger.debug(
"Error while processing lineage for "
f"{upstream_table.catalog_name}.{upstream_table.schema_name}.{upstream_table.name}"
f" -> {databricks_table_fqn}"
)
logger.debug(traceback.format_exc())
def _iter(self, *_, **__) -> Iterable[Either[AddLineageRequest]]:
"""
Based on the query logs, prepare the lineage
and send it to the sink
"""
for database in self.metadata.list_all_entities(
entity=Database, params={"service": self.config.serviceName}
):
for table in self.metadata.list_all_entities(
entity=Table, params={"database": database.fullyQualifiedName.root}
):
databricks_table_fqn = f"{table.database.name}.{table.databaseSchema.name}.{table.name.root}"
table_streams: LineageTableStreams = self.client.get_table_lineage(
databricks_table_fqn
)
yield from self._handle_upstream_table(
table_streams, table, databricks_table_fqn
)
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)