| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  Copyright 2025 Collate | 
					
						
							|  |  |  | #  Licensed under the Collate Community License, Version 1.0 (the "License"); | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | Databricks Pipeline utils tests | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import json | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  | import uuid | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | from pathlib import Path | 
					
						
							|  |  |  | from unittest import TestCase | 
					
						
							|  |  |  | from unittest.mock import patch | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  | from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | from metadata.generated.schema.entity.data.pipeline import ( | 
					
						
							|  |  |  |     Pipeline, | 
					
						
							|  |  |  |     PipelineStatus, | 
					
						
							|  |  |  |     Task, | 
					
						
							|  |  |  |     TaskStatus, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  | from metadata.generated.schema.entity.data.table import Column, Table | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | from metadata.generated.schema.entity.services.pipelineService import ( | 
					
						
							|  |  |  |     PipelineConnection, | 
					
						
							|  |  |  |     PipelineService, | 
					
						
							|  |  |  |     PipelineServiceType, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | from metadata.generated.schema.metadataIngestion.workflow import ( | 
					
						
							|  |  |  |     OpenMetadataWorkflowConfig, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2023-02-13 00:08:55 -08:00
										 |  |  | from metadata.generated.schema.type.basic import FullyQualifiedEntityName | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  | from metadata.generated.schema.type.entityLineage import ( | 
					
						
							|  |  |  |     ColumnLineage, | 
					
						
							|  |  |  |     EntitiesEdge, | 
					
						
							|  |  |  |     LineageDetails, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | from metadata.generated.schema.type.entityReference import EntityReference | 
					
						
							|  |  |  | from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  | from metadata.ingestion.ometa.ometa_api import OpenMetadata | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | from metadata.ingestion.source.pipeline.databrickspipeline.metadata import ( | 
					
						
							|  |  |  |     DatabrickspipelineSource, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  | from metadata.ingestion.source.pipeline.databrickspipeline.models import ( | 
					
						
							|  |  |  |     DataBrickPipelineDetails, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2023-01-23 16:28:17 +01:00
										 |  |  | from metadata.utils.logger import log_ansi_encoded_string | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | mock_file_path = ( | 
					
						
							|  |  |  |     Path(__file__).parent.parent.parent | 
					
						
							|  |  |  |     / "resources/datasets/databricks_pipeline_resource.json" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | with open(mock_file_path) as file: | 
					
						
							|  |  |  |     mock_data: dict = json.load(file) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | mock_file_path = ( | 
					
						
							|  |  |  |     Path(__file__).parent.parent.parent | 
					
						
							|  |  |  |     / "resources/datasets/databricks_pipeline_history.json" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | with open(mock_file_path) as file: | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |     mock_run_data: dict = json.load(file) | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | mock_databricks_config = { | 
					
						
							|  |  |  |     "source": { | 
					
						
							|  |  |  |         "type": "DatabricksPipeline", | 
					
						
							|  |  |  |         "serviceName": "DatabricksPipeline", | 
					
						
							|  |  |  |         "serviceConnection": { | 
					
						
							|  |  |  |             "config": { | 
					
						
							|  |  |  |                 "type": "DatabricksPipeline", | 
					
						
							|  |  |  |                 "token": "random_token", | 
					
						
							|  |  |  |                 "hostPort": "localhost:443", | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  |                 "connectionTimeout": 120, | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |                 "connectionArguments": { | 
					
						
							|  |  |  |                     "http_path": "sql/1.0/endpoints/path", | 
					
						
							|  |  |  |                 }, | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         "sourceConfig": {"config": {"type": "PipelineMetadata"}}, | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     "sink": {"type": "metadata-rest", "config": {}}, | 
					
						
							|  |  |  |     "workflowConfig": { | 
					
						
							|  |  |  |         "openMetadataServerConfig": { | 
					
						
							|  |  |  |             "hostPort": "http://localhost:8585/api", | 
					
						
							|  |  |  |             "authProvider": "openmetadata", | 
					
						
							|  |  |  |             "securityConfig": { | 
					
						
							|  |  |  |                 "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGc" | 
					
						
							|  |  |  |                 "iOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE" | 
					
						
							|  |  |  |                 "2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXB" | 
					
						
							|  |  |  |                 "iEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fN" | 
					
						
							|  |  |  |                 "r3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3u" | 
					
						
							|  |  |  |                 "d-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | MOCK_PIPELINE_SERVICE = PipelineService( | 
					
						
							|  |  |  |     id="85811038-099a-11ed-861d-0242ac120002", | 
					
						
							|  |  |  |     name="databricks_pipeline_test", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     fullyQualifiedName=FullyQualifiedEntityName("databricks_pipeline_test"), | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |     connection=PipelineConnection(), | 
					
						
							|  |  |  |     serviceType=PipelineServiceType.DatabricksPipeline, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | MOCK_PIPELINE = Pipeline( | 
					
						
							|  |  |  |     id="2aaa012e-099a-11ed-861d-0242ac120002", | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |     name="11223344", | 
					
						
							|  |  |  |     fullyQualifiedName="databricks_pipeline_test.11223344", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |     displayName="OpenMetadata Databricks Workflow", | 
					
						
							|  |  |  |     tasks=[ | 
					
						
							|  |  |  |         Task( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             name="Orders_Ingest", | 
					
						
							|  |  |  |             description="Ingests order data", | 
					
						
							|  |  |  |             sourceUrl="https://my-workspace.cloud.databricks.com/#job/11223344/run/123", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |             downstreamTasks=[], | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             taskType="SINGLE_TASK", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         ), | 
					
						
							|  |  |  |         Task( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             name="Match", | 
					
						
							|  |  |  |             description="Matches orders with user sessions", | 
					
						
							|  |  |  |             sourceUrl="https://my-workspace.cloud.databricks.com/#job/11223344/run/123", | 
					
						
							|  |  |  |             downstreamTasks=["Orders_Ingested", "Sessionize"], | 
					
						
							|  |  |  |             taskType="SINGLE_TASK", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         ), | 
					
						
							|  |  |  |         Task( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             name="Sessionize", | 
					
						
							|  |  |  |             description="Extracts session data from events", | 
					
						
							|  |  |  |             sourceUrl="https://my-workspace.cloud.databricks.com/#job/11223344/run/123", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |             downstreamTasks=[], | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             taskType="SINGLE_TASK", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         ), | 
					
						
							|  |  |  |     ], | 
					
						
							|  |  |  |     service=EntityReference( | 
					
						
							|  |  |  |         id="85811038-099a-11ed-861d-0242ac120002", type="pipelineService" | 
					
						
							|  |  |  |     ), | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | EXPECTED_CREATED_PIPELINES = CreatePipelineRequest( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |     name="11223344", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |     displayName="OpenMetadata Databricks Workflow", | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |     description="This job contain multiple tasks that are required to produce the weekly shark sightings report.", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |     tasks=[ | 
					
						
							|  |  |  |         Task( | 
					
						
							| 
									
										
										
										
											2025-10-13 00:41:05 +05:30
										 |  |  |             name="Sessionize", | 
					
						
							|  |  |  |             description="Extracts session data from events", | 
					
						
							|  |  |  |             sourceUrl="https://localhost:443/#job/11223344", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |             downstreamTasks=[], | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             taskType="SINGLE_TASK", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         ), | 
					
						
							|  |  |  |         Task( | 
					
						
							| 
									
										
										
										
											2025-10-13 00:41:05 +05:30
										 |  |  |             name="Orders_Ingest", | 
					
						
							|  |  |  |             description="Ingests order data", | 
					
						
							|  |  |  |             sourceUrl="https://localhost:443/#job/11223344", | 
					
						
							|  |  |  |             downstreamTasks=[], | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             taskType="SINGLE_TASK", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         ), | 
					
						
							|  |  |  |         Task( | 
					
						
							| 
									
										
										
										
											2025-10-13 00:41:05 +05:30
										 |  |  |             name="Matched_Changed", | 
					
						
							|  |  |  |             description="Matches orders with user sessions", | 
					
						
							|  |  |  |             sourceUrl="https://localhost:443/#job/11223344", | 
					
						
							|  |  |  |             downstreamTasks=["Orders_Ingest", "Sessionize", "Sessionize_duplicated"], | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             taskType="SINGLE_TASK", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         ), | 
					
						
							|  |  |  |     ], | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |     scheduleInterval="20 30 * * * ?", | 
					
						
							|  |  |  |     service=FullyQualifiedEntityName(root="databricks_pipeline_test"), | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | EXPECTED_PIPELINE_STATUS = [ | 
					
						
							|  |  |  |     OMetaPipelineStatus( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |         pipeline_fqn="databricks_pipeline_test.11223344", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         pipeline_status=PipelineStatus( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |             timestamp=1625060460483, | 
					
						
							|  |  |  |             executionStatus="Successful", | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |             taskStatus=[ | 
					
						
							|  |  |  |                 TaskStatus( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |                     name="Orders_Ingest", | 
					
						
							|  |  |  |                     executionStatus="Successful", | 
					
						
							|  |  |  |                     startTime=1625060460483, | 
					
						
							|  |  |  |                     endTime=1625060863413, | 
					
						
							|  |  |  |                     logLink="https://my-workspace.cloud.databricks.com/#job/11223344/run/123", | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |                 TaskStatus( | 
					
						
							|  |  |  |                     name="Match", | 
					
						
							|  |  |  |                     executionStatus="Successful", | 
					
						
							|  |  |  |                     startTime=1625060460483, | 
					
						
							|  |  |  |                     endTime=1625060863413, | 
					
						
							|  |  |  |                     logLink="https://my-workspace.cloud.databricks.com/#job/11223344/run/123", | 
					
						
							|  |  |  |                 ), | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |                 TaskStatus( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |                     name="Sessionize", | 
					
						
							|  |  |  |                     executionStatus="Successful", | 
					
						
							|  |  |  |                     startTime=1625060460483, | 
					
						
							|  |  |  |                     endTime=1625060863413, | 
					
						
							|  |  |  |                     logLink="https://my-workspace.cloud.databricks.com/#job/11223344/run/123", | 
					
						
							|  |  |  |                 ), | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |             ], | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |     ), | 
					
						
							|  |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  | PIPELINE_LIST = [DataBrickPipelineDetails(**data) for data in mock_data] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  | EXPECTED_PIPELINE_LINEAGE = AddLineageRequest( | 
					
						
							|  |  |  |     edge=EntitiesEdge( | 
					
						
							|  |  |  |         fromEntity=EntityReference( | 
					
						
							|  |  |  |             id="cced5342-12e8-45fb-b50a-918529d43ed1", type="table" | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         toEntity=EntityReference( | 
					
						
							|  |  |  |             id="6f5ad342-12e8-45fb-b50a-918529d43ed1", type="table" | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         lineageDetails=LineageDetails( | 
					
						
							|  |  |  |             columnsLineage=[ | 
					
						
							|  |  |  |                 ColumnLineage( | 
					
						
							|  |  |  |                     fromColumns=[ | 
					
						
							|  |  |  |                         FullyQualifiedEntityName( | 
					
						
							|  |  |  |                             root="local_table.dev.table_1.column_1" | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                     toColumn=FullyQualifiedEntityName( | 
					
						
							|  |  |  |                         root="local_table.dev.table_2.column_2" | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |             pipeline=EntityReference( | 
					
						
							|  |  |  |                 id="1fa49082-a32c-4e71-ba4a-6a111b489ed6", | 
					
						
							|  |  |  |                 type="pipeline", | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             source="PipelineLineage", | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | class DatabricksPipelineTests(TestCase): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Implements the necessary methods to extract | 
					
						
							|  |  |  |     Databricks Pipeline test | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     maxDiff = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @patch( | 
					
						
							|  |  |  |         "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.test_connection" | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     def __init__(self, methodName, test_connection) -> None: | 
					
						
							|  |  |  |         super().__init__(methodName) | 
					
						
							| 
									
										
										
										
											2023-01-23 16:28:17 +01:00
										 |  |  |         log_ansi_encoded_string(message="init") | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         test_connection.return_value = False | 
					
						
							| 
									
										
										
										
											2024-06-07 04:36:17 +02:00
										 |  |  |         config = OpenMetadataWorkflowConfig.model_validate(mock_databricks_config) | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.databricks = DatabrickspipelineSource.create( | 
					
						
							|  |  |  |             mock_databricks_config["source"], | 
					
						
							|  |  |  |             config.workflowConfig.openMetadataServerConfig, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         self.databricks.context.get().__dict__["pipeline"] = MOCK_PIPELINE.name.root | 
					
						
							| 
									
										
										
										
											2024-03-25 18:20:40 +01:00
										 |  |  |         self.databricks.context.get().__dict__[ | 
					
						
							| 
									
										
										
										
											2023-11-27 16:15:47 +05:30
										 |  |  |             "pipeline_service" | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         ] = MOCK_PIPELINE_SERVICE.name.root | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  |         self.databricks.metadata = OpenMetadata( | 
					
						
							|  |  |  |             config.workflowConfig.openMetadataServerConfig | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @patch( | 
					
						
							|  |  |  |         "metadata.ingestion.source.database.databricks.client.DatabricksClient.list_jobs" | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |     # @patch( | 
					
						
							|  |  |  |     #     "metadata.ingestion.source.database.databricks.client.DatabricksClient.get_job_runs" | 
					
						
							|  |  |  |     # ) | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |     def test_get_pipelines_list(self, list_jobs): | 
					
						
							|  |  |  |         list_jobs.return_value = mock_data | 
					
						
							|  |  |  |         results = list(self.databricks.get_pipelines_list()) | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |         self.assertEqual(PIPELINE_LIST, results) | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-10-13 00:41:05 +05:30
										 |  |  |     def test_yield_pipeline(self): | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |         pipelines = list(self.databricks.yield_pipeline(PIPELINE_LIST[0]))[0].right | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         self.assertEqual(pipelines, EXPECTED_CREATED_PIPELINES) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @patch( | 
					
						
							|  |  |  |         "metadata.ingestion.source.database.databricks.client.DatabricksClient.get_job_runs" | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     def test_yield_pipeline_status(self, get_job_runs): | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |         get_job_runs.return_value = mock_run_data | 
					
						
							| 
									
										
										
										
											2023-08-30 15:49:42 +02:00
										 |  |  |         pipeline_status = [ | 
					
						
							|  |  |  |             either.right | 
					
						
							|  |  |  |             for either in self.databricks.yield_pipeline_status( | 
					
						
							| 
									
										
										
										
											2024-06-27 12:07:04 +05:30
										 |  |  |                 DataBrickPipelineDetails(**mock_data[0]) | 
					
						
							| 
									
										
										
										
											2023-08-30 15:49:42 +02:00
										 |  |  |             ) | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2023-01-05 08:14:12 -05:00
										 |  |  |         self.assertEqual(pipeline_status, EXPECTED_PIPELINE_STATUS) | 
					
						
							| 
									
										
										
										
											2025-06-30 10:41:22 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_databricks_pipeline_lineage(self): | 
					
						
							|  |  |  |         self.databricks.context.get().__dict__["pipeline"] = "11223344" | 
					
						
							|  |  |  |         self.databricks.context.get().__dict__[ | 
					
						
							|  |  |  |             "pipeline_service" | 
					
						
							|  |  |  |         ] = "databricks_pipeline_test" | 
					
						
							|  |  |  |         mock_pipeline = Pipeline( | 
					
						
							|  |  |  |             id=uuid.uuid4(), | 
					
						
							|  |  |  |             name="11223344", | 
					
						
							|  |  |  |             fullyQualifiedName="databricks_pipeline_test.11223344", | 
					
						
							|  |  |  |             service=EntityReference(id=uuid.uuid4(), type="pipelineService"), | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Create source and target tables | 
					
						
							|  |  |  |         mock_source_table = Table( | 
					
						
							|  |  |  |             id="cced5342-12e8-45fb-b50a-918529d43ed1", | 
					
						
							|  |  |  |             name="table_1", | 
					
						
							|  |  |  |             fullyQualifiedName="local_table.dev.table_1", | 
					
						
							|  |  |  |             database=EntityReference(id=uuid.uuid4(), type="database"), | 
					
						
							|  |  |  |             columns=[ | 
					
						
							|  |  |  |                 Column( | 
					
						
							|  |  |  |                     name="column_1", | 
					
						
							|  |  |  |                     fullyQualifiedName="local_table.dev.table_1.column_1", | 
					
						
							|  |  |  |                     dataType="VARCHAR", | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |             databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"), | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         mock_target_table = Table( | 
					
						
							|  |  |  |             id="6f5ad342-12e8-45fb-b50a-918529d43ed1", | 
					
						
							|  |  |  |             name="table_2", | 
					
						
							|  |  |  |             fullyQualifiedName="local_table.dev.table_2", | 
					
						
							|  |  |  |             database=EntityReference(id=uuid.uuid4(), type="database"), | 
					
						
							|  |  |  |             columns=[ | 
					
						
							|  |  |  |                 Column( | 
					
						
							|  |  |  |                     name="column_2", | 
					
						
							|  |  |  |                     fullyQualifiedName="local_table.dev.table_2.column_2", | 
					
						
							|  |  |  |                     dataType="VARCHAR", | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |             databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"), | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         with patch.object(self.databricks.metadata, "get_by_name") as mock_get_by_name: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             def get_by_name_side_effect(entity, fqn): | 
					
						
							|  |  |  |                 if entity == Pipeline: | 
					
						
							|  |  |  |                     if fqn == "databricks_pipeline_test.11223344": | 
					
						
							|  |  |  |                         return mock_pipeline | 
					
						
							|  |  |  |                 elif entity == Table: | 
					
						
							|  |  |  |                     if "table_1" in fqn: | 
					
						
							|  |  |  |                         return mock_source_table | 
					
						
							|  |  |  |                     elif "table_2" in fqn: | 
					
						
							|  |  |  |                         return mock_target_table | 
					
						
							|  |  |  |                 return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             mock_get_by_name.side_effect = get_by_name_side_effect | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             with patch.object( | 
					
						
							|  |  |  |                 self.databricks.client, "get_table_lineage" | 
					
						
							|  |  |  |             ) as mock_get_table_lineage: | 
					
						
							|  |  |  |                 mock_get_table_lineage.return_value = [ | 
					
						
							|  |  |  |                     { | 
					
						
							|  |  |  |                         "source_table_full_name": "local_table.dev.table_1", | 
					
						
							|  |  |  |                         "target_table_full_name": "local_table.dev.table_2", | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 ] | 
					
						
							|  |  |  |                 with patch.object( | 
					
						
							|  |  |  |                     self.databricks.client, "get_column_lineage" | 
					
						
							|  |  |  |                 ) as mock_get_column_lineage: | 
					
						
							|  |  |  |                     mock_get_column_lineage.return_value = [ | 
					
						
							|  |  |  |                         ("column_1", "column_2"), | 
					
						
							|  |  |  |                         ("column_3", "column_4"), | 
					
						
							|  |  |  |                     ] | 
					
						
							|  |  |  |                     lineage_details = list( | 
					
						
							|  |  |  |                         self.databricks.yield_pipeline_lineage_details( | 
					
						
							|  |  |  |                             DataBrickPipelineDetails(**mock_data[0]) | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                     )[0].right | 
					
						
							|  |  |  |                     self.assertEqual( | 
					
						
							|  |  |  |                         lineage_details.edge.fromEntity.id, | 
					
						
							|  |  |  |                         EXPECTED_PIPELINE_LINEAGE.edge.fromEntity.id, | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                     self.assertEqual( | 
					
						
							|  |  |  |                         lineage_details.edge.toEntity.id, | 
					
						
							|  |  |  |                         EXPECTED_PIPELINE_LINEAGE.edge.toEntity.id, | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                     self.assertEqual( | 
					
						
							|  |  |  |                         lineage_details.edge.lineageDetails.columnsLineage, | 
					
						
							|  |  |  |                         EXPECTED_PIPELINE_LINEAGE.edge.lineageDetails.columnsLineage, | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         with patch.object(self.databricks.metadata, "get_by_name") as mock_get_by_name: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             def get_by_name_side_effect(entity, fqn): | 
					
						
							|  |  |  |                 if entity == Pipeline: | 
					
						
							|  |  |  |                     if fqn == "databricks_pipeline_test.11223344": | 
					
						
							|  |  |  |                         return mock_pipeline | 
					
						
							|  |  |  |                 elif entity == Table: | 
					
						
							|  |  |  |                     if "table_1" in fqn: | 
					
						
							|  |  |  |                         return mock_source_table | 
					
						
							|  |  |  |                     elif "table_2" in fqn: | 
					
						
							|  |  |  |                         return mock_target_table | 
					
						
							|  |  |  |                 return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             mock_get_by_name.side_effect = get_by_name_side_effect | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             with patch.object( | 
					
						
							|  |  |  |                 self.databricks.client, "get_table_lineage" | 
					
						
							|  |  |  |             ) as mock_get_table_lineage: | 
					
						
							|  |  |  |                 mock_get_table_lineage.return_value = [ | 
					
						
							|  |  |  |                     { | 
					
						
							|  |  |  |                         "source_table_full_name": "local_table.dev.table_1", | 
					
						
							|  |  |  |                         "target_table_full_name": "local_table.dev.table_2", | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 ] | 
					
						
							|  |  |  |                 with patch.object( | 
					
						
							|  |  |  |                     self.databricks.client, "get_column_lineage" | 
					
						
							|  |  |  |                 ) as mock_get_column_lineage: | 
					
						
							|  |  |  |                     mock_get_column_lineage.return_value = []  # No column lineage | 
					
						
							|  |  |  |                     lineage_details = list( | 
					
						
							|  |  |  |                         self.databricks.yield_pipeline_lineage_details( | 
					
						
							|  |  |  |                             DataBrickPipelineDetails(**mock_data[0]) | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                     )[0].right | 
					
						
							|  |  |  |                     self.assertEqual( | 
					
						
							|  |  |  |                         lineage_details.edge.fromEntity.id, | 
					
						
							|  |  |  |                         EXPECTED_PIPELINE_LINEAGE.edge.fromEntity.id, | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                     self.assertEqual( | 
					
						
							|  |  |  |                         lineage_details.edge.toEntity.id, | 
					
						
							|  |  |  |                         EXPECTED_PIPELINE_LINEAGE.edge.toEntity.id, | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                     self.assertEqual( | 
					
						
							|  |  |  |                         lineage_details.edge.lineageDetails.columnsLineage, | 
					
						
							|  |  |  |                         [], | 
					
						
							|  |  |  |                     ) |