datahub/metadata-ingestion/tests/unit/test_unity_catalog_source.py

144 lines
5.9 KiB
Python

from unittest.mock import patch
import pytest
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.source.unity.config import UnityCatalogSourceConfig
from datahub.ingestion.source.unity.source import UnityCatalogSource
class TestUnityCatalogSource:
@pytest.fixture
def minimal_config(self):
"""Create a minimal config for testing."""
return UnityCatalogSourceConfig.parse_obj(
{
"token": "test_token",
"workspace_url": "https://test.databricks.com",
"warehouse_id": "test_warehouse",
"include_hive_metastore": False,
}
)
@pytest.fixture
def config_with_page_size(self):
"""Create a config with custom page size."""
return UnityCatalogSourceConfig.parse_obj(
{
"token": "test_token",
"workspace_url": "https://test.databricks.com",
"warehouse_id": "test_warehouse",
"include_hive_metastore": False,
"databricks_api_page_size": 75,
}
)
@patch("datahub.ingestion.source.unity.source.UnityCatalogApiProxy")
@patch("datahub.ingestion.source.unity.source.HiveMetastoreProxy")
def test_source_constructor_passes_default_page_size_to_proxy(
self, mock_hive_proxy, mock_unity_proxy, minimal_config
):
"""Test that UnityCatalogSource passes default databricks_api_page_size to proxy."""
# Create a mock context
ctx = PipelineContext(run_id="test_run")
source = UnityCatalogSource.create(minimal_config, ctx)
# Verify proxy was created with correct parameters including page size
mock_unity_proxy.assert_called_once_with(
minimal_config.workspace_url,
minimal_config.token,
minimal_config.warehouse_id,
report=source.report,
hive_metastore_proxy=source.hive_metastore_proxy,
lineage_data_source=minimal_config.lineage_data_source,
databricks_api_page_size=0, # Default value
)
@patch("datahub.ingestion.source.unity.source.UnityCatalogApiProxy")
@patch("datahub.ingestion.source.unity.source.HiveMetastoreProxy")
def test_source_constructor_passes_custom_page_size_to_proxy(
self, mock_hive_proxy, mock_unity_proxy, config_with_page_size
):
"""Test that UnityCatalogSource passes custom databricks_api_page_size to proxy."""
ctx = PipelineContext(run_id="test_run")
source = UnityCatalogSource.create(config_with_page_size, ctx)
# Verify proxy was created with correct parameters including custom page size
mock_unity_proxy.assert_called_once_with(
config_with_page_size.workspace_url,
config_with_page_size.token,
config_with_page_size.warehouse_id,
report=source.report,
hive_metastore_proxy=source.hive_metastore_proxy,
lineage_data_source=config_with_page_size.lineage_data_source,
databricks_api_page_size=75, # Custom value
)
@patch("datahub.ingestion.source.unity.source.UnityCatalogApiProxy")
@patch("datahub.ingestion.source.unity.source.HiveMetastoreProxy")
def test_source_config_page_size_available_to_source(
self, mock_hive_proxy, mock_unity_proxy, config_with_page_size
):
"""Test that UnityCatalogSource has access to databricks_api_page_size config."""
ctx = PipelineContext(run_id="test_run")
source = UnityCatalogSource.create(config_with_page_size, ctx)
# Verify the source has access to the configuration value
assert source.config.databricks_api_page_size == 75
@patch("datahub.ingestion.source.unity.source.UnityCatalogApiProxy")
@patch("datahub.ingestion.source.unity.source.HiveMetastoreProxy")
def test_source_with_hive_metastore_disabled(
self, mock_hive_proxy, mock_unity_proxy
):
"""Test that UnityCatalogSource works with hive metastore disabled."""
config = UnityCatalogSourceConfig.parse_obj(
{
"token": "test_token",
"workspace_url": "https://test.databricks.com",
"warehouse_id": "test_warehouse",
"include_hive_metastore": False,
"databricks_api_page_size": 200,
}
)
ctx = PipelineContext(run_id="test_run")
source = UnityCatalogSource.create(config, ctx)
# Verify proxy was created with correct page size even when hive metastore is disabled
mock_unity_proxy.assert_called_once_with(
config.workspace_url,
config.token,
config.warehouse_id,
report=source.report,
hive_metastore_proxy=None, # Should be None when disabled
lineage_data_source=config.lineage_data_source,
databricks_api_page_size=200,
)
def test_test_connection_with_page_size_config(self):
"""Test that test_connection properly handles databricks_api_page_size."""
config_dict = {
"token": "test_token",
"workspace_url": "https://test.databricks.com",
"warehouse_id": "test_warehouse",
"databricks_api_page_size": 300,
}
with patch(
"datahub.ingestion.source.unity.source.UnityCatalogConnectionTest"
) as mock_connection_test:
mock_connection_test.return_value.get_connection_test.return_value = (
"test_report"
)
result = UnityCatalogSource.test_connection(config_dict)
# Verify connection test was created with correct config
assert result == "test_report"
mock_connection_test.assert_called_once()
# Get the config that was passed to UnityCatalogConnectionTest
connection_test_config = mock_connection_test.call_args[0][0]
assert connection_test_config.databricks_api_page_size == 300