mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-31 10:39:30 +00:00 
			
		
		
		
	feat: Unity Catalog Lineage Enhancement: External Location Support (#23790)
This commit is contained in:
		
							parent
							
								
									63336dd98c
								
							
						
					
					
						commit
						4708c2b64f
					
				| @ -41,7 +41,7 @@ from metadata.ingestion.source.database.unitycatalog.models import ( | |||||||
| from metadata.utils.logger import ingestion_logger | from metadata.utils.logger import ingestion_logger | ||||||
| 
 | 
 | ||||||
| logger = ingestion_logger() | logger = ingestion_logger() | ||||||
| TABLE_LINEAGE_PATH = "/lineage-tracking/table-lineage/get" | TABLE_LINEAGE_PATH = "/lineage-tracking/table-lineage" | ||||||
| COLUMN_LINEAGE_PATH = "/lineage-tracking/column-lineage/get" | COLUMN_LINEAGE_PATH = "/lineage-tracking/column-lineage/get" | ||||||
| TABLES_PATH = "/unity-catalog/tables" | TABLES_PATH = "/unity-catalog/tables" | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -15,6 +15,7 @@ import traceback | |||||||
| from typing import Iterable, Optional | from typing import Iterable, Optional | ||||||
| 
 | 
 | ||||||
| from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest | from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest | ||||||
|  | from metadata.generated.schema.entity.data.container import ContainerDataModel | ||||||
| from metadata.generated.schema.entity.data.database import Database | from metadata.generated.schema.entity.data.database import Database | ||||||
| from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema | from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema | ||||||
| from metadata.generated.schema.entity.data.table import Table | from metadata.generated.schema.entity.data.table import Table | ||||||
| @ -90,6 +91,41 @@ class UnitycatalogLineageSource(Source): | |||||||
|             ) |             ) | ||||||
|         return cls(config, metadata) |         return cls(config, metadata) | ||||||
| 
 | 
 | ||||||
|  |     def _get_data_model_column_fqn( | ||||||
|  |         self, data_model_entity: ContainerDataModel, column: str | ||||||
|  |     ) -> Optional[str]: | ||||||
|  |         if not data_model_entity: | ||||||
|  |             return None | ||||||
|  |         for entity_column in data_model_entity.columns: | ||||||
|  |             if entity_column.displayName.lower() == column.lower(): | ||||||
|  |                 return entity_column.fullyQualifiedName.root | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|  |     def _get_container_column_lineage( | ||||||
|  |         self, data_model_entity: ContainerDataModel, table_entity: Table | ||||||
|  |     ) -> Optional[LineageDetails]: | ||||||
|  |         try: | ||||||
|  |             column_lineage = [] | ||||||
|  |             for column in table_entity.columns: | ||||||
|  |                 from_column = self._get_data_model_column_fqn( | ||||||
|  |                     data_model_entity=data_model_entity, column=column.name.root | ||||||
|  |                 ) | ||||||
|  |                 to_column = column.fullyQualifiedName.root | ||||||
|  |                 if from_column and to_column: | ||||||
|  |                     column_lineage.append( | ||||||
|  |                         ColumnLineage(fromColumns=[from_column], toColumn=to_column) | ||||||
|  |                     ) | ||||||
|  |             if column_lineage: | ||||||
|  |                 return LineageDetails( | ||||||
|  |                     columnsLineage=column_lineage, | ||||||
|  |                     source=LineageSource.ExternalTableLineage, | ||||||
|  |                 ) | ||||||
|  |             return None | ||||||
|  |         except Exception as exc: | ||||||
|  |             logger.debug(f"Error computing container column lineage: {exc}") | ||||||
|  |             logger.debug(traceback.format_exc()) | ||||||
|  |             return None | ||||||
|  | 
 | ||||||
|     def _get_lineage_details( |     def _get_lineage_details( | ||||||
|         self, from_table: Table, to_table: Table, databricks_table_fqn: str |         self, from_table: Table, to_table: Table, databricks_table_fqn: str | ||||||
|     ) -> Optional[LineageDetails]: |     ) -> Optional[LineageDetails]: | ||||||
| @ -124,16 +160,87 @@ class UnitycatalogLineageSource(Source): | |||||||
|             logger.debug(traceback.format_exc()) |             logger.debug(traceback.format_exc()) | ||||||
|             return None |             return None | ||||||
| 
 | 
 | ||||||
|  |     def _handle_external_location_lineage( | ||||||
|  |         self, file_info, table: Table, is_upstream: bool | ||||||
|  |     ) -> Iterable[Either[AddLineageRequest]]: | ||||||
|  |         try: | ||||||
|  |             if not file_info.storage_location: | ||||||
|  |                 logger.debug("No storage location found in fileInfo") | ||||||
|  |                 return | ||||||
|  | 
 | ||||||
|  |             location_entity = self.metadata.es_search_container_by_path( | ||||||
|  |                 full_path=file_info.storage_location, fields="dataModel" | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |             if location_entity and location_entity[0]: | ||||||
|  |                 lineage_details = None | ||||||
|  |                 if location_entity[0].dataModel: | ||||||
|  |                     lineage_details = self._get_container_column_lineage( | ||||||
|  |                         location_entity[0].dataModel, table | ||||||
|  |                     ) | ||||||
|  | 
 | ||||||
|  |                 if is_upstream: | ||||||
|  |                     yield Either( | ||||||
|  |                         left=None, | ||||||
|  |                         right=AddLineageRequest( | ||||||
|  |                             edge=EntitiesEdge( | ||||||
|  |                                 fromEntity=EntityReference( | ||||||
|  |                                     id=location_entity[0].id, | ||||||
|  |                                     type="container", | ||||||
|  |                                 ), | ||||||
|  |                                 toEntity=EntityReference( | ||||||
|  |                                     id=table.id, | ||||||
|  |                                     type="table", | ||||||
|  |                                 ), | ||||||
|  |                                 lineageDetails=lineage_details, | ||||||
|  |                             ) | ||||||
|  |                         ), | ||||||
|  |                     ) | ||||||
|  |                 else: | ||||||
|  |                     yield Either( | ||||||
|  |                         left=None, | ||||||
|  |                         right=AddLineageRequest( | ||||||
|  |                             edge=EntitiesEdge( | ||||||
|  |                                 fromEntity=EntityReference( | ||||||
|  |                                     id=table.id, | ||||||
|  |                                     type="table", | ||||||
|  |                                 ), | ||||||
|  |                                 toEntity=EntityReference( | ||||||
|  |                                     id=location_entity[0].id, | ||||||
|  |                                     type="container", | ||||||
|  |                                 ), | ||||||
|  |                                 lineageDetails=lineage_details, | ||||||
|  |                             ) | ||||||
|  |                         ), | ||||||
|  |                     ) | ||||||
|  |             else: | ||||||
|  |                 logger.debug( | ||||||
|  |                     f"Unable to find container for external location: {file_info.storage_location}" | ||||||
|  |                 ) | ||||||
|  |         except Exception as exc: | ||||||
|  |             logger.debug( | ||||||
|  |                 f"Error while processing external location lineage for {file_info.storage_location}: {exc}" | ||||||
|  |             ) | ||||||
|  |             logger.debug(traceback.format_exc()) | ||||||
|  | 
 | ||||||
|     def _handle_upstream_table( |     def _handle_upstream_table( | ||||||
|         self, |         self, | ||||||
|         table_streams: LineageTableStreams, |         table_streams: LineageTableStreams, | ||||||
|         table: Table, |         table: Table, | ||||||
|         databricks_table_fqn: str, |         databricks_table_fqn: str, | ||||||
|     ) -> Iterable[Either[AddLineageRequest]]: |     ) -> Iterable[Either[AddLineageRequest]]: | ||||||
|         for upstream_table in table_streams.upstream_tables: |         for upstream_entity in table_streams.upstreams: | ||||||
|             try: |             try: | ||||||
|                 if not upstream_table.name: |                 if upstream_entity.fileInfo: | ||||||
|  |                     yield from self._handle_external_location_lineage( | ||||||
|  |                         upstream_entity.fileInfo, table, is_upstream=True | ||||||
|  |                     ) | ||||||
|                     continue |                     continue | ||||||
|  | 
 | ||||||
|  |                 if not upstream_entity.tableInfo or not upstream_entity.tableInfo.name: | ||||||
|  |                     continue | ||||||
|  | 
 | ||||||
|  |                 upstream_table = upstream_entity.tableInfo | ||||||
|                 from_entity_fqn = fqn.build( |                 from_entity_fqn = fqn.build( | ||||||
|                     metadata=self.metadata, |                     metadata=self.metadata, | ||||||
|                     entity_type=Table, |                     entity_type=Table, | ||||||
| @ -172,9 +279,69 @@ class UnitycatalogLineageSource(Source): | |||||||
|                     ) |                     ) | ||||||
|             except Exception: |             except Exception: | ||||||
|                 logger.debug( |                 logger.debug( | ||||||
|                     "Error while processing lineage for " |                     "Error while processing upstream lineage for " | ||||||
|                     f"{upstream_table.catalog_name}.{upstream_table.schema_name}.{upstream_table.name}" |                     f"{databricks_table_fqn}" | ||||||
|                     f" -> {databricks_table_fqn}" |                 ) | ||||||
|  |                 logger.debug(traceback.format_exc()) | ||||||
|  | 
 | ||||||
|  |     def _handle_downstream_table( | ||||||
|  |         self, | ||||||
|  |         table_streams: LineageTableStreams, | ||||||
|  |         table: Table, | ||||||
|  |         databricks_table_fqn: str, | ||||||
|  |     ) -> Iterable[Either[AddLineageRequest]]: | ||||||
|  |         for downstream_entity in table_streams.downstreams: | ||||||
|  |             try: | ||||||
|  |                 if downstream_entity.fileInfo: | ||||||
|  |                     yield from self._handle_external_location_lineage( | ||||||
|  |                         downstream_entity.fileInfo, table, is_upstream=False | ||||||
|  |                     ) | ||||||
|  |                     continue | ||||||
|  | 
 | ||||||
|  |                 if ( | ||||||
|  |                     not downstream_entity.tableInfo | ||||||
|  |                     or not downstream_entity.tableInfo.name | ||||||
|  |                 ): | ||||||
|  |                     continue | ||||||
|  | 
 | ||||||
|  |                 downstream_table = downstream_entity.tableInfo | ||||||
|  |                 to_entity_fqn = fqn.build( | ||||||
|  |                     metadata=self.metadata, | ||||||
|  |                     entity_type=Table, | ||||||
|  |                     database_name=downstream_table.catalog_name, | ||||||
|  |                     schema_name=downstream_table.schema_name, | ||||||
|  |                     table_name=downstream_table.name, | ||||||
|  |                     service_name=self.config.serviceName, | ||||||
|  |                 ) | ||||||
|  | 
 | ||||||
|  |                 to_entity = self.metadata.get_by_name(entity=Table, fqn=to_entity_fqn) | ||||||
|  |                 if to_entity: | ||||||
|  |                     downstream_table_fqn = f"{downstream_table.catalog_name}.{downstream_table.schema_name}.{downstream_table.name}" | ||||||
|  |                     lineage_details = self._get_lineage_details( | ||||||
|  |                         from_table=table, | ||||||
|  |                         to_table=to_entity, | ||||||
|  |                         databricks_table_fqn=downstream_table_fqn, | ||||||
|  |                     ) | ||||||
|  |                     yield Either( | ||||||
|  |                         left=None, | ||||||
|  |                         right=AddLineageRequest( | ||||||
|  |                             edge=EntitiesEdge( | ||||||
|  |                                 fromEntity=EntityReference(id=table.id, type="table"), | ||||||
|  |                                 toEntity=EntityReference(id=to_entity.id, type="table"), | ||||||
|  |                                 lineageDetails=lineage_details, | ||||||
|  |                             ) | ||||||
|  |                         ), | ||||||
|  |                     ) | ||||||
|  |                 else: | ||||||
|  |                     logger.debug( | ||||||
|  |                         f"Unable to find downstream entity for " | ||||||
|  |                         f"{databricks_table_fqn} -> " | ||||||
|  |                         f"{downstream_table.catalog_name}.{downstream_table.schema_name}.{downstream_table.name}" | ||||||
|  |                     ) | ||||||
|  |             except Exception: | ||||||
|  |                 logger.debug( | ||||||
|  |                     "Error while processing downstream lineage for " | ||||||
|  |                     f"{databricks_table_fqn}" | ||||||
|                 ) |                 ) | ||||||
|                 logger.debug(traceback.format_exc()) |                 logger.debug(traceback.format_exc()) | ||||||
| 
 | 
 | ||||||
| @ -225,10 +392,17 @@ class UnitycatalogLineageSource(Source): | |||||||
|                     table_streams: LineageTableStreams = self.client.get_table_lineage( |                     table_streams: LineageTableStreams = self.client.get_table_lineage( | ||||||
|                         databricks_table_fqn |                         databricks_table_fqn | ||||||
|                     ) |                     ) | ||||||
|  | 
 | ||||||
|  |                     # Process upstream lineage | ||||||
|                     yield from self._handle_upstream_table( |                     yield from self._handle_upstream_table( | ||||||
|                         table_streams, table, databricks_table_fqn |                         table_streams, table, databricks_table_fqn | ||||||
|                     ) |                     ) | ||||||
| 
 | 
 | ||||||
|  |                     # Process downstream lineage | ||||||
|  |                     yield from self._handle_downstream_table( | ||||||
|  |                         table_streams, table, databricks_table_fqn | ||||||
|  |                     ) | ||||||
|  | 
 | ||||||
|     def test_connection(self) -> None: |     def test_connection(self) -> None: | ||||||
|         test_connection_common( |         test_connection_common( | ||||||
|             self.metadata, self.connection_obj, self.service_connection |             self.metadata, self.connection_obj, self.service_connection | ||||||
|  | |||||||
| @ -22,6 +22,8 @@ class DatabricksTable(BaseModel): | |||||||
|     name: Optional[str] = None |     name: Optional[str] = None | ||||||
|     catalog_name: Optional[str] = None |     catalog_name: Optional[str] = None | ||||||
|     schema_name: Optional[str] = None |     schema_name: Optional[str] = None | ||||||
|  |     table_type: Optional[str] = None | ||||||
|  |     lineage_timestamp: Optional[str] = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class DatabricksColumn(BaseModel): | class DatabricksColumn(BaseModel): | ||||||
| @ -31,9 +33,23 @@ class DatabricksColumn(BaseModel): | |||||||
|     table_name: Optional[str] = None |     table_name: Optional[str] = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class FileInfo(BaseModel): | ||||||
|  |     path: Optional[str] = None | ||||||
|  |     has_permission: Optional[bool] = None | ||||||
|  |     securable_name: Optional[str] = None | ||||||
|  |     storage_location: Optional[str] = None | ||||||
|  |     securable_type: Optional[str] = None | ||||||
|  |     lineage_timestamp: Optional[str] = None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class LineageEntity(BaseModel): | ||||||
|  |     tableInfo: Optional[DatabricksTable] = None | ||||||
|  |     fileInfo: Optional[FileInfo] = None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class LineageTableStreams(BaseModel): | class LineageTableStreams(BaseModel): | ||||||
|     upstream_tables: Optional[List[DatabricksTable]] = [] |     upstreams: Optional[List[LineageEntity]] = [] | ||||||
|     downstream_tables: Optional[List[DatabricksTable]] = [] |     downstreams: Optional[List[LineageEntity]] = [] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class LineageColumnStreams(BaseModel): | class LineageColumnStreams(BaseModel): | ||||||
|  | |||||||
| @ -356,6 +356,7 @@ class S3Source(StorageServiceSource): | |||||||
|                     if entry |                     if entry | ||||||
|                     and entry.get("Key") |                     and entry.get("Key") | ||||||
|                     and len(entry.get("Key").split("/")) > total_depth |                     and len(entry.get("Key").split("/")) > total_depth | ||||||
|  |                     and "/_delta_log/" not in entry.get("Key") | ||||||
|                 } |                 } | ||||||
|                 for key in candidate_keys: |                 for key in candidate_keys: | ||||||
|                     metadata_entry_copy = deepcopy(metadata_entry) |                     metadata_entry_copy = deepcopy(metadata_entry) | ||||||
| @ -469,7 +470,10 @@ class S3Source(StorageServiceSource): | |||||||
|         candidate_keys = [ |         candidate_keys = [ | ||||||
|             entry["Key"] |             entry["Key"] | ||||||
|             for entry in response |             for entry in response | ||||||
|             if entry and entry.get("Key") and not entry.get("Key").endswith("/") |             if entry | ||||||
|  |             and entry.get("Key") | ||||||
|  |             and not entry.get("Key").endswith("/") | ||||||
|  |             and "/_delta_log/" not in entry.get("Key") | ||||||
|         ] |         ] | ||||||
|         for key in candidate_keys: |         for key in candidate_keys: | ||||||
|             if self.is_valid_unstructured_file(metadata_entry.unstructuredFormats, key): |             if self.is_valid_unstructured_file(metadata_entry.unstructuredFormats, key): | ||||||
| @ -678,7 +682,10 @@ class S3Source(StorageServiceSource): | |||||||
|                 candidate_keys = [ |                 candidate_keys = [ | ||||||
|                     entry["Key"] |                     entry["Key"] | ||||||
|                     for entry in response[S3_CLIENT_ROOT_RESPONSE] |                     for entry in response[S3_CLIENT_ROOT_RESPONSE] | ||||||
|                     if entry and entry.get("Key") and not entry.get("Key").endswith("/") |                     if entry | ||||||
|  |                     and entry.get("Key") | ||||||
|  |                     and not entry.get("Key").endswith("/") | ||||||
|  |                     and "/_delta_log/" not in entry.get("Key") | ||||||
|                 ] |                 ] | ||||||
|                 # pick a random key out of the candidates if any were returned |                 # pick a random key out of the candidates if any were returned | ||||||
|                 if candidate_keys: |                 if candidate_keys: | ||||||
|  | |||||||
| @ -0,0 +1,513 @@ | |||||||
|  | #  Copyright 2025 Collate | ||||||
|  | #  Licensed under the Collate Community License, Version 1.0 (the "License"); | ||||||
|  | #  you may not use this file except in compliance with the License. | ||||||
|  | #  You may obtain a copy of the License at | ||||||
|  | #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE | ||||||
|  | #  Unless required by applicable law or agreed to in writing, software | ||||||
|  | #  distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | #  See the License for the specific language governing permissions and | ||||||
|  | #  limitations under the License. | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | Test Unity Catalog lineage functionality | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from unittest import TestCase | ||||||
|  | from unittest.mock import Mock, patch | ||||||
|  | from uuid import uuid4 | ||||||
|  | 
 | ||||||
|  | from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest | ||||||
|  | from metadata.generated.schema.entity.data.container import ( | ||||||
|  |     Container, | ||||||
|  |     ContainerDataModel, | ||||||
|  | ) | ||||||
|  | from metadata.generated.schema.entity.data.table import ( | ||||||
|  |     Column, | ||||||
|  |     ColumnName, | ||||||
|  |     DataType, | ||||||
|  |     Table, | ||||||
|  | ) | ||||||
|  | from metadata.generated.schema.metadataIngestion.workflow import ( | ||||||
|  |     Source as WorkflowSource, | ||||||
|  | ) | ||||||
|  | from metadata.generated.schema.type.basic import EntityName, FullyQualifiedEntityName | ||||||
|  | from metadata.generated.schema.type.entityLineage import Source as LineageSource | ||||||
|  | from metadata.generated.schema.type.entityReference import EntityReference | ||||||
|  | from metadata.ingestion.api.models import Either | ||||||
|  | from metadata.ingestion.source.database.unitycatalog.lineage import ( | ||||||
|  |     UnitycatalogLineageSource, | ||||||
|  | ) | ||||||
|  | from metadata.ingestion.source.database.unitycatalog.models import ( | ||||||
|  |     DatabricksTable, | ||||||
|  |     FileInfo, | ||||||
|  |     LineageEntity, | ||||||
|  |     LineageTableStreams, | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mock_unitycatalog_lineage_config = { | ||||||
|  |     "source": { | ||||||
|  |         "type": "unitycatalog-lineage", | ||||||
|  |         "serviceName": "local_unitycatalog", | ||||||
|  |         "serviceConnection": { | ||||||
|  |             "config": { | ||||||
|  |                 "type": "UnityCatalog", | ||||||
|  |                 "catalog": "demo-test-cat", | ||||||
|  |                 "databaseSchema": "test-schema", | ||||||
|  |                 "authType": {"token": "test_token"}, | ||||||
|  |                 "hostPort": "localhost:443", | ||||||
|  |                 "httpPath": "/sql/1.0/warehouses/test", | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         "sourceConfig": {"config": {"type": "DatabaseLineage"}}, | ||||||
|  |     }, | ||||||
|  |     "sink": {"type": "metadata-rest", "config": {}}, | ||||||
|  |     "workflowConfig": { | ||||||
|  |         "openMetadataServerConfig": { | ||||||
|  |             "hostPort": "http://localhost:8585/api", | ||||||
|  |             "authProvider": "openmetadata", | ||||||
|  |             "securityConfig": {"jwtToken": "test_token"}, | ||||||
|  |         } | ||||||
|  |     }, | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestUnityCatalogLineage(TestCase): | ||||||
|  |     """ | ||||||
|  |     Unity Catalog lineage unit tests | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     @patch( | ||||||
|  |         "metadata.ingestion.source.database.unitycatalog.lineage.UnitycatalogLineageSource.test_connection" | ||||||
|  |     ) | ||||||
|  |     @patch("metadata.ingestion.ometa.ometa_api.OpenMetadata") | ||||||
|  |     def setUp(self, mock_metadata, mock_test_connection): | ||||||
|  |         mock_test_connection.return_value = None | ||||||
|  |         self.mock_metadata = mock_metadata | ||||||
|  |         self.config = WorkflowSource.model_validate( | ||||||
|  |             mock_unitycatalog_lineage_config["source"] | ||||||
|  |         ) | ||||||
|  |         self.lineage_source = UnitycatalogLineageSource(self.config, self.mock_metadata) | ||||||
|  | 
 | ||||||
|  |     def test_lineage_table_streams_with_table_info(self): | ||||||
|  |         """Test LineageTableStreams model with tableInfo""" | ||||||
|  |         upstream_table = DatabricksTable( | ||||||
|  |             name="source_table", | ||||||
|  |             catalog_name="demo-test-cat", | ||||||
|  |             schema_name="test-schema", | ||||||
|  |             table_type="TABLE", | ||||||
|  |             lineage_timestamp="2025-10-08 11:16:26.0", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         downstream_table = DatabricksTable( | ||||||
|  |             name="target_table", | ||||||
|  |             catalog_name="demo-test-cat", | ||||||
|  |             schema_name="test-schema", | ||||||
|  |             table_type="TABLE", | ||||||
|  |             lineage_timestamp="2025-10-08 11:16:26.0", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         lineage_streams = LineageTableStreams( | ||||||
|  |             upstreams=[LineageEntity(tableInfo=upstream_table)], | ||||||
|  |             downstreams=[LineageEntity(tableInfo=downstream_table)], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(len(lineage_streams.upstreams), 1) | ||||||
|  |         self.assertEqual(len(lineage_streams.downstreams), 1) | ||||||
|  |         self.assertEqual(lineage_streams.upstreams[0].tableInfo.name, "source_table") | ||||||
|  |         self.assertEqual(lineage_streams.downstreams[0].tableInfo.name, "target_table") | ||||||
|  |         self.assertIsNone(lineage_streams.upstreams[0].fileInfo) | ||||||
|  |         self.assertIsNone(lineage_streams.downstreams[0].fileInfo) | ||||||
|  | 
 | ||||||
|  |     def test_lineage_table_streams_with_file_info(self): | ||||||
|  |         """Test LineageTableStreams model with fileInfo""" | ||||||
|  |         file_info = FileInfo( | ||||||
|  |             path="s3://bucket/path/file.parquet", | ||||||
|  |             has_permission=True, | ||||||
|  |             securable_name="test_location", | ||||||
|  |             storage_location="s3://bucket/path", | ||||||
|  |             securable_type="EXTERNAL_LOCATION", | ||||||
|  |             lineage_timestamp="2025-10-08 10:37:42.0", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         lineage_streams = LineageTableStreams( | ||||||
|  |             upstreams=[LineageEntity(fileInfo=file_info)], downstreams=[] | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(len(lineage_streams.upstreams), 1) | ||||||
|  |         self.assertEqual( | ||||||
|  |             lineage_streams.upstreams[0].fileInfo.path, "s3://bucket/path/file.parquet" | ||||||
|  |         ) | ||||||
|  |         self.assertEqual( | ||||||
|  |             lineage_streams.upstreams[0].fileInfo.storage_location, "s3://bucket/path" | ||||||
|  |         ) | ||||||
|  |         self.assertIsNone(lineage_streams.upstreams[0].tableInfo) | ||||||
|  | 
 | ||||||
|  |     def test_lineage_table_streams_mixed(self): | ||||||
|  |         """Test LineageTableStreams with both tableInfo and fileInfo""" | ||||||
|  |         table_info = DatabricksTable( | ||||||
|  |             name="table1", | ||||||
|  |             catalog_name="demo-test-cat", | ||||||
|  |             schema_name="test-schema", | ||||||
|  |             table_type="TABLE", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         file_info = FileInfo( | ||||||
|  |             path="s3://bucket/path/file.parquet", | ||||||
|  |             storage_location="s3://bucket/path", | ||||||
|  |             securable_type="EXTERNAL_LOCATION", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         lineage_streams = LineageTableStreams( | ||||||
|  |             upstreams=[ | ||||||
|  |                 LineageEntity(tableInfo=table_info), | ||||||
|  |                 LineageEntity(fileInfo=file_info), | ||||||
|  |             ], | ||||||
|  |             downstreams=[], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(len(lineage_streams.upstreams), 2) | ||||||
|  |         self.assertIsNotNone(lineage_streams.upstreams[0].tableInfo) | ||||||
|  |         self.assertIsNone(lineage_streams.upstreams[0].fileInfo) | ||||||
|  |         self.assertIsNone(lineage_streams.upstreams[1].tableInfo) | ||||||
|  |         self.assertIsNotNone(lineage_streams.upstreams[1].fileInfo) | ||||||
|  | 
 | ||||||
|  |     @patch( | ||||||
|  |         "metadata.ingestion.source.database.unitycatalog.lineage.UnitycatalogLineageSource.test_connection" | ||||||
|  |     ) | ||||||
|  |     @patch("metadata.ingestion.ometa.ometa_api.OpenMetadata") | ||||||
|  |     def test_get_data_model_column_fqn(self, mock_metadata, mock_test_connection): | ||||||
|  |         """Test _get_data_model_column_fqn method""" | ||||||
|  |         mock_test_connection.return_value = None | ||||||
|  | 
 | ||||||
|  |         data_model = ContainerDataModel( | ||||||
|  |             columns=[ | ||||||
|  |                 Column( | ||||||
|  |                     name=ColumnName(root="id"), | ||||||
|  |                     displayName="id", | ||||||
|  |                     dataType=DataType.INT, | ||||||
|  |                     fullyQualifiedName=FullyQualifiedEntityName( | ||||||
|  |                         root="service.container.id" | ||||||
|  |                     ), | ||||||
|  |                 ), | ||||||
|  |                 Column( | ||||||
|  |                     name=ColumnName(root="name"), | ||||||
|  |                     displayName="name", | ||||||
|  |                     dataType=DataType.STRING, | ||||||
|  |                     fullyQualifiedName=FullyQualifiedEntityName( | ||||||
|  |                         root="service.container.name" | ||||||
|  |                     ), | ||||||
|  |                 ), | ||||||
|  |             ] | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         lineage_source = UnitycatalogLineageSource(self.config, mock_metadata) | ||||||
|  | 
 | ||||||
|  |         result = lineage_source._get_data_model_column_fqn(data_model, "id") | ||||||
|  |         self.assertEqual(result, "service.container.id") | ||||||
|  | 
 | ||||||
|  |         result = lineage_source._get_data_model_column_fqn(data_model, "name") | ||||||
|  |         self.assertEqual(result, "service.container.name") | ||||||
|  | 
 | ||||||
|  |         result = lineage_source._get_data_model_column_fqn(data_model, "nonexistent") | ||||||
|  |         self.assertIsNone(result) | ||||||
|  | 
 | ||||||
|  |         result = lineage_source._get_data_model_column_fqn(None, "id") | ||||||
|  |         self.assertIsNone(result) | ||||||
|  | 
 | ||||||
|  |     @patch( | ||||||
|  |         "metadata.ingestion.source.database.unitycatalog.lineage.UnitycatalogLineageSource.test_connection" | ||||||
|  |     ) | ||||||
|  |     @patch("metadata.ingestion.ometa.ometa_api.OpenMetadata") | ||||||
|  |     def test_get_container_column_lineage(self, mock_metadata, mock_test_connection): | ||||||
|  |         """Test _get_container_column_lineage method""" | ||||||
|  |         mock_test_connection.return_value = None | ||||||
|  | 
 | ||||||
|  |         data_model = ContainerDataModel( | ||||||
|  |             columns=[ | ||||||
|  |                 Column( | ||||||
|  |                     name=ColumnName(root="id"), | ||||||
|  |                     displayName="id", | ||||||
|  |                     dataType=DataType.INT, | ||||||
|  |                     fullyQualifiedName=FullyQualifiedEntityName( | ||||||
|  |                         root="service.container.id" | ||||||
|  |                     ), | ||||||
|  |                 ), | ||||||
|  |                 Column( | ||||||
|  |                     name=ColumnName(root="name"), | ||||||
|  |                     displayName="name", | ||||||
|  |                     dataType=DataType.STRING, | ||||||
|  |                     fullyQualifiedName=FullyQualifiedEntityName( | ||||||
|  |                         root="service.container.name" | ||||||
|  |                     ), | ||||||
|  |                 ), | ||||||
|  |             ] | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         table_entity = Table( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="test_table"), | ||||||
|  |             columns=[ | ||||||
|  |                 Column( | ||||||
|  |                     name=ColumnName(root="id"), | ||||||
|  |                     dataType=DataType.INT, | ||||||
|  |                     fullyQualifiedName=FullyQualifiedEntityName( | ||||||
|  |                         root="service.db.schema.test_table.id" | ||||||
|  |                     ), | ||||||
|  |                 ), | ||||||
|  |                 Column( | ||||||
|  |                     name=ColumnName(root="name"), | ||||||
|  |                     dataType=DataType.STRING, | ||||||
|  |                     fullyQualifiedName=FullyQualifiedEntityName( | ||||||
|  |                         root="service.db.schema.test_table.name" | ||||||
|  |                     ), | ||||||
|  |                 ), | ||||||
|  |             ], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         lineage_source = UnitycatalogLineageSource(self.config, mock_metadata) | ||||||
|  | 
 | ||||||
|  |         result = lineage_source._get_container_column_lineage(data_model, table_entity) | ||||||
|  | 
 | ||||||
|  |         self.assertIsNotNone(result) | ||||||
|  |         self.assertEqual(len(result.columnsLineage), 2) | ||||||
|  |         self.assertEqual(result.source, LineageSource.ExternalTableLineage) | ||||||
|  |         self.assertEqual( | ||||||
|  |             result.columnsLineage[0].fromColumns[0].root, "service.container.id" | ||||||
|  |         ) | ||||||
|  |         self.assertEqual( | ||||||
|  |             result.columnsLineage[0].toColumn.root, "service.db.schema.test_table.id" | ||||||
|  |         ) | ||||||
|  |         self.assertEqual( | ||||||
|  |             result.columnsLineage[1].fromColumns[0].root, "service.container.name" | ||||||
|  |         ) | ||||||
|  |         self.assertEqual( | ||||||
|  |             result.columnsLineage[1].toColumn.root, "service.db.schema.test_table.name" | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     @patch( | ||||||
|  |         "metadata.ingestion.source.database.unitycatalog.lineage.UnitycatalogLineageSource.test_connection" | ||||||
|  |     ) | ||||||
|  |     @patch("metadata.ingestion.ometa.ometa_api.OpenMetadata") | ||||||
|  |     def test_handle_external_location_lineage_upstream( | ||||||
|  |         self, mock_metadata, mock_test_connection | ||||||
|  |     ): | ||||||
|  |         """Test _handle_external_location_lineage for upstream""" | ||||||
|  |         mock_test_connection.return_value = None | ||||||
|  | 
 | ||||||
|  |         file_info = FileInfo( | ||||||
|  |             path="s3://bucket/path/file.parquet", | ||||||
|  |             storage_location="s3://bucket/path", | ||||||
|  |             securable_type="EXTERNAL_LOCATION", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         table_entity = Table( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="test_table"), | ||||||
|  |             columns=[], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         container_entity = Container( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="test_container"), | ||||||
|  |             service=EntityReference(id=uuid4(), type="storageService"), | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         mock_metadata.es_search_container_by_path.return_value = [container_entity] | ||||||
|  | 
 | ||||||
|  |         lineage_source = UnitycatalogLineageSource(self.config, mock_metadata) | ||||||
|  | 
 | ||||||
|  |         results = list( | ||||||
|  |             lineage_source._handle_external_location_lineage( | ||||||
|  |                 file_info, table_entity, is_upstream=True | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertIsInstance(results[0], Either) | ||||||
|  |         self.assertIsInstance(results[0].right, AddLineageRequest) | ||||||
|  |         self.assertEqual(results[0].right.edge.fromEntity.id, container_entity.id) | ||||||
|  |         self.assertEqual(results[0].right.edge.fromEntity.type, "container") | ||||||
|  |         self.assertEqual(results[0].right.edge.toEntity.id, table_entity.id) | ||||||
|  |         self.assertEqual(results[0].right.edge.toEntity.type, "table") | ||||||
|  | 
 | ||||||
|  |         mock_metadata.es_search_container_by_path.assert_called_once_with( | ||||||
|  |             full_path="s3://bucket/path", fields="dataModel" | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     @patch( | ||||||
|  |         "metadata.ingestion.source.database.unitycatalog.lineage.UnitycatalogLineageSource.test_connection" | ||||||
|  |     ) | ||||||
|  |     @patch("metadata.ingestion.ometa.ometa_api.OpenMetadata") | ||||||
|  |     def test_handle_external_location_lineage_downstream( | ||||||
|  |         self, mock_metadata, mock_test_connection | ||||||
|  |     ): | ||||||
|  |         """Test _handle_external_location_lineage for downstream""" | ||||||
|  |         mock_test_connection.return_value = None | ||||||
|  | 
 | ||||||
|  |         file_info = FileInfo( | ||||||
|  |             path="s3://bucket/path/file.parquet", | ||||||
|  |             storage_location="s3://bucket/path", | ||||||
|  |             securable_type="EXTERNAL_LOCATION", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         table_entity = Table( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="test_table"), | ||||||
|  |             columns=[], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         container_entity = Container( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="test_container"), | ||||||
|  |             service=EntityReference(id=uuid4(), type="storageService"), | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         mock_metadata.es_search_container_by_path.return_value = [container_entity] | ||||||
|  | 
 | ||||||
|  |         lineage_source = UnitycatalogLineageSource(self.config, mock_metadata) | ||||||
|  | 
 | ||||||
|  |         results = list( | ||||||
|  |             lineage_source._handle_external_location_lineage( | ||||||
|  |                 file_info, table_entity, is_upstream=False | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertIsInstance(results[0], Either) | ||||||
|  |         self.assertIsInstance(results[0].right, AddLineageRequest) | ||||||
|  |         self.assertEqual(results[0].right.edge.fromEntity.id, table_entity.id) | ||||||
|  |         self.assertEqual(results[0].right.edge.fromEntity.type, "table") | ||||||
|  |         self.assertEqual(results[0].right.edge.toEntity.id, container_entity.id) | ||||||
|  |         self.assertEqual(results[0].right.edge.toEntity.type, "container") | ||||||
|  | 
 | ||||||
|  |     @patch( | ||||||
|  |         "metadata.ingestion.source.database.unitycatalog.lineage.UnitycatalogLineageSource.test_connection" | ||||||
|  |     ) | ||||||
|  |     @patch("metadata.ingestion.ometa.ometa_api.OpenMetadata") | ||||||
|  |     def test_handle_external_location_lineage_no_container( | ||||||
|  |         self, mock_metadata, mock_test_connection | ||||||
|  |     ): | ||||||
|  |         """Test _handle_external_location_lineage when container is not found""" | ||||||
|  |         mock_test_connection.return_value = None | ||||||
|  | 
 | ||||||
|  |         file_info = FileInfo( | ||||||
|  |             path="s3://bucket/path/file.parquet", | ||||||
|  |             storage_location="s3://bucket/path", | ||||||
|  |             securable_type="EXTERNAL_LOCATION", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         table_entity = Table( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="test_table"), | ||||||
|  |             columns=[], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         mock_metadata.es_search_container_by_path.return_value = [] | ||||||
|  | 
 | ||||||
|  |         lineage_source = UnitycatalogLineageSource(self.config, mock_metadata) | ||||||
|  | 
 | ||||||
|  |         results = list( | ||||||
|  |             lineage_source._handle_external_location_lineage( | ||||||
|  |                 file_info, table_entity, is_upstream=True | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(len(results), 0) | ||||||
|  | 
 | ||||||
|  |     @patch( | ||||||
|  |         "metadata.ingestion.source.database.unitycatalog.lineage.UnitycatalogLineageSource.test_connection" | ||||||
|  |     ) | ||||||
|  |     @patch("metadata.ingestion.ometa.ometa_api.OpenMetadata") | ||||||
|  |     def test_handle_upstream_table_with_table_info( | ||||||
|  |         self, mock_metadata, mock_test_connection | ||||||
|  |     ): | ||||||
|  |         """Test _handle_upstream_table with tableInfo""" | ||||||
|  |         mock_test_connection.return_value = None | ||||||
|  | 
 | ||||||
|  |         upstream_table_info = DatabricksTable( | ||||||
|  |             name="source_table", | ||||||
|  |             catalog_name="demo-test-cat", | ||||||
|  |             schema_name="test-schema", | ||||||
|  |             table_type="TABLE", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         table_streams = LineageTableStreams( | ||||||
|  |             upstreams=[LineageEntity(tableInfo=upstream_table_info)], downstreams=[] | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         current_table = Table( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="current_table"), | ||||||
|  |             columns=[], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         upstream_table = Table( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="source_table"), | ||||||
|  |             columns=[], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         mock_metadata.get_by_name.return_value = upstream_table | ||||||
|  | 
 | ||||||
|  |         lineage_source = UnitycatalogLineageSource(self.config, mock_metadata) | ||||||
|  |         lineage_source.client = Mock() | ||||||
|  |         lineage_source.client.get_column_lineage.return_value = Mock(upstream_cols=[]) | ||||||
|  | 
 | ||||||
|  |         results = list( | ||||||
|  |             lineage_source._handle_upstream_table( | ||||||
|  |                 table_streams, current_table, "demo-test-cat.test-schema.current_table" | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertIsInstance(results[0], Either) | ||||||
|  |         self.assertEqual(results[0].right.edge.fromEntity.id, upstream_table.id) | ||||||
|  |         self.assertEqual(results[0].right.edge.toEntity.id, current_table.id) | ||||||
|  | 
 | ||||||
|  |     @patch( | ||||||
|  |         "metadata.ingestion.source.database.unitycatalog.lineage.UnitycatalogLineageSource.test_connection" | ||||||
|  |     ) | ||||||
|  |     @patch("metadata.ingestion.ometa.ometa_api.OpenMetadata") | ||||||
|  |     def test_handle_upstream_table_with_file_info( | ||||||
|  |         self, mock_metadata, mock_test_connection | ||||||
|  |     ): | ||||||
|  |         """Test _handle_upstream_table with fileInfo""" | ||||||
|  |         mock_test_connection.return_value = None | ||||||
|  | 
 | ||||||
|  |         file_info = FileInfo( | ||||||
|  |             path="s3://bucket/path/file.parquet", | ||||||
|  |             storage_location="s3://bucket/path", | ||||||
|  |             securable_type="EXTERNAL_LOCATION", | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         table_streams = LineageTableStreams( | ||||||
|  |             upstreams=[LineageEntity(fileInfo=file_info)], downstreams=[] | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         current_table = Table( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="current_table"), | ||||||
|  |             columns=[], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         container_entity = Container( | ||||||
|  |             id=uuid4(), | ||||||
|  |             name=EntityName(root="test_container"), | ||||||
|  |             service=EntityReference(id=uuid4(), type="storageService"), | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         mock_metadata.es_search_container_by_path.return_value = [container_entity] | ||||||
|  | 
 | ||||||
|  |         lineage_source = UnitycatalogLineageSource(self.config, mock_metadata) | ||||||
|  | 
 | ||||||
|  |         results = list( | ||||||
|  |             lineage_source._handle_upstream_table( | ||||||
|  |                 table_streams, current_table, "demo-test-cat.test-schema.current_table" | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertIsInstance(results[0], Either) | ||||||
|  |         self.assertEqual(results[0].right.edge.fromEntity.id, container_entity.id) | ||||||
|  |         self.assertEqual(results[0].right.edge.toEntity.id, current_table.id) | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Mayur Singal
						Mayur Singal