mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-30 18:26:58 +00:00 
			
		
		
		
	bugfix(fivetran/unity): move UnityCatalogConnectionConfig import to avoid circular deps with ge_profiler (#14956)
This commit is contained in:
		
							parent
							
								
									b6ff38d1c3
								
							
						
					
					
						commit
						2a4f57791b
					
				| @ -29,9 +29,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import ( | |||||||
| from datahub.ingestion.source.state.stateful_ingestion_base import ( | from datahub.ingestion.source.state.stateful_ingestion_base import ( | ||||||
|     StatefulIngestionConfigBase, |     StatefulIngestionConfigBase, | ||||||
| ) | ) | ||||||
| from datahub.ingestion.source.unity.config import ( | from datahub.ingestion.source.unity.connection import UnityCatalogConnectionConfig | ||||||
|     UnityCatalogConnectionConfig, |  | ||||||
| ) |  | ||||||
| from datahub.utilities.lossy_collections import LossyList | from datahub.utilities.lossy_collections import LossyList | ||||||
| from datahub.utilities.perf_timer import PerfTimer | from datahub.utilities.perf_timer import PerfTimer | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -2,7 +2,6 @@ import logging | |||||||
| import os | import os | ||||||
| from datetime import datetime, timedelta, timezone | from datetime import datetime, timedelta, timezone | ||||||
| from typing import Any, Dict, List, Optional, Union | from typing import Any, Dict, List, Optional, Union | ||||||
| from urllib.parse import urlparse |  | ||||||
| 
 | 
 | ||||||
| import pydantic | import pydantic | ||||||
| from pydantic import Field | from pydantic import Field | ||||||
| @ -20,10 +19,8 @@ from datahub.configuration.source_common import ( | |||||||
| ) | ) | ||||||
| from datahub.configuration.validate_field_removal import pydantic_removed_field | from datahub.configuration.validate_field_removal import pydantic_removed_field | ||||||
| from datahub.configuration.validate_field_rename import pydantic_renamed_field | from datahub.configuration.validate_field_rename import pydantic_renamed_field | ||||||
| from datahub.ingestion.source.ge_data_profiler import DATABRICKS |  | ||||||
| from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig | from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig | ||||||
| from datahub.ingestion.source.sql.sql_config import SQLCommonConfig | from datahub.ingestion.source.sql.sql_config import SQLCommonConfig | ||||||
| from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri |  | ||||||
| from datahub.ingestion.source.state.stale_entity_removal_handler import ( | from datahub.ingestion.source.state.stale_entity_removal_handler import ( | ||||||
|     StatefulStaleMetadataRemovalConfig, |     StatefulStaleMetadataRemovalConfig, | ||||||
| ) | ) | ||||||
| @ -31,6 +28,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import ( | |||||||
|     StatefulIngestionConfigBase, |     StatefulIngestionConfigBase, | ||||||
|     StatefulProfilingConfigMixin, |     StatefulProfilingConfigMixin, | ||||||
| ) | ) | ||||||
|  | from datahub.ingestion.source.unity.connection import UnityCatalogConnectionConfig | ||||||
| from datahub.ingestion.source.usage.usage_common import BaseUsageConfig | from datahub.ingestion.source.usage.usage_common import BaseUsageConfig | ||||||
| from datahub.ingestion.source_config.operation_config import ( | from datahub.ingestion.source_config.operation_config import ( | ||||||
|     OperationConfig, |     OperationConfig, | ||||||
| @ -132,55 +130,6 @@ class UnityCatalogGEProfilerConfig(UnityCatalogProfilerConfig, GEProfilingConfig | |||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class UnityCatalogConnectionConfig(ConfigModel): |  | ||||||
|     """ |  | ||||||
|     Configuration for connecting to Databricks Unity Catalog. |  | ||||||
|     Contains only connection-related fields that can be reused across different sources. |  | ||||||
|     """ |  | ||||||
| 
 |  | ||||||
|     scheme: str = DATABRICKS |  | ||||||
|     token: str = pydantic.Field(description="Databricks personal access token") |  | ||||||
|     workspace_url: str = pydantic.Field( |  | ||||||
|         description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com" |  | ||||||
|     ) |  | ||||||
|     warehouse_id: Optional[str] = pydantic.Field( |  | ||||||
|         default=None, |  | ||||||
|         description=( |  | ||||||
|             "SQL Warehouse id, for running queries. Must be explicitly provided to enable SQL-based features. " |  | ||||||
|             "Required for the following features that need SQL access: " |  | ||||||
|             "1) Tag extraction (include_tags=True) - queries system.information_schema.tags " |  | ||||||
|             "2) Hive Metastore catalog (include_hive_metastore=True) - queries legacy hive_metastore catalog " |  | ||||||
|             "3) System table lineage (lineage_data_source=SYSTEM_TABLES) - queries system.access.table_lineage/column_lineage " |  | ||||||
|             "4) Data profiling (profiling.enabled=True) - runs SELECT/ANALYZE queries on tables. " |  | ||||||
|             "When warehouse_id is missing, these features will be automatically disabled (with warnings) to allow ingestion to continue." |  | ||||||
|         ), |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     extra_client_options: Dict[str, Any] = Field( |  | ||||||
|         default={}, |  | ||||||
|         description="Additional options to pass to Databricks SQLAlchemy client.", |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     def __init__(self, **data: Any): |  | ||||||
|         super().__init__(**data) |  | ||||||
| 
 |  | ||||||
|     def get_sql_alchemy_url(self, database: Optional[str] = None) -> str: |  | ||||||
|         uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"} |  | ||||||
|         if database: |  | ||||||
|             uri_opts["catalog"] = database |  | ||||||
|         return make_sqlalchemy_uri( |  | ||||||
|             scheme=self.scheme, |  | ||||||
|             username="token", |  | ||||||
|             password=self.token, |  | ||||||
|             at=urlparse(self.workspace_url).netloc, |  | ||||||
|             db=database, |  | ||||||
|             uri_opts=uri_opts, |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|     def get_options(self) -> dict: |  | ||||||
|         return self.extra_client_options |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class UnityCatalogSourceConfig( | class UnityCatalogSourceConfig( | ||||||
|     UnityCatalogConnectionConfig, |     UnityCatalogConnectionConfig, | ||||||
|     SQLCommonConfig, |     SQLCommonConfig, | ||||||
|  | |||||||
| @ -0,0 +1,61 @@ | |||||||
|  | """Databricks Unity Catalog connection configuration.""" | ||||||
|  | 
 | ||||||
|  | from typing import Any, Dict, Optional | ||||||
|  | from urllib.parse import urlparse | ||||||
|  | 
 | ||||||
|  | import pydantic | ||||||
|  | from pydantic import Field | ||||||
|  | 
 | ||||||
|  | from datahub.configuration.common import ConfigModel | ||||||
|  | from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri | ||||||
|  | 
 | ||||||
|  | DATABRICKS = "databricks" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class UnityCatalogConnectionConfig(ConfigModel): | ||||||
|  |     """ | ||||||
|  |     Configuration for connecting to Databricks Unity Catalog. | ||||||
|  |     Contains only connection-related fields that can be reused across different sources. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     scheme: str = DATABRICKS | ||||||
|  |     token: str = pydantic.Field(description="Databricks personal access token") | ||||||
|  |     workspace_url: str = pydantic.Field( | ||||||
|  |         description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com" | ||||||
|  |     ) | ||||||
|  |     warehouse_id: Optional[str] = pydantic.Field( | ||||||
|  |         default=None, | ||||||
|  |         description=( | ||||||
|  |             "SQL Warehouse id, for running queries. Must be explicitly provided to enable SQL-based features. " | ||||||
|  |             "Required for the following features that need SQL access: " | ||||||
|  |             "1) Tag extraction (include_tags=True) - queries system.information_schema.tags " | ||||||
|  |             "2) Hive Metastore catalog (include_hive_metastore=True) - queries legacy hive_metastore catalog " | ||||||
|  |             "3) System table lineage (lineage_data_source=SYSTEM_TABLES) - queries system.access.table_lineage/column_lineage " | ||||||
|  |             "4) Data profiling (profiling.enabled=True) - runs SELECT/ANALYZE queries on tables. " | ||||||
|  |             "When warehouse_id is missing, these features will be automatically disabled (with warnings) to allow ingestion to continue." | ||||||
|  |         ), | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     extra_client_options: Dict[str, Any] = Field( | ||||||
|  |         default={}, | ||||||
|  |         description="Additional options to pass to Databricks SQLAlchemy client.", | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     def __init__(self, **data: Any): | ||||||
|  |         super().__init__(**data) | ||||||
|  | 
 | ||||||
|  |     def get_sql_alchemy_url(self, database: Optional[str] = None) -> str: | ||||||
|  |         uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"} | ||||||
|  |         if database: | ||||||
|  |             uri_opts["catalog"] = database | ||||||
|  |         return make_sqlalchemy_uri( | ||||||
|  |             scheme=self.scheme, | ||||||
|  |             username="token", | ||||||
|  |             password=self.token, | ||||||
|  |             at=urlparse(self.workspace_url).netloc, | ||||||
|  |             db=database, | ||||||
|  |             uri_opts=uri_opts, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     def get_options(self) -> dict: | ||||||
|  |         return self.extra_client_options | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Anush Kumar
						Anush Kumar