mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-02 19:58:59 +00:00
75 lines
3.0 KiB
Python
75 lines
3.0 KiB
Python
from unittest.mock import Mock
|
|
|
|
import pytest
|
|
|
|
from datahub.ingestion.source.dremio.dremio_config import DremioSourceConfig
|
|
from datahub.ingestion.source.dremio.dremio_entities import DremioCatalog, DremioDataset
|
|
from datahub.ingestion.source.dremio.dremio_source import DremioSource
|
|
|
|
|
|
class TestDremioIteratorIntegration:
|
|
@pytest.fixture
|
|
def mock_config(self):
|
|
return DremioSourceConfig(
|
|
hostname="test-host",
|
|
port=9047,
|
|
tls=False,
|
|
username="test-user",
|
|
password="test-password",
|
|
)
|
|
|
|
@pytest.fixture
|
|
def mock_dremio_source(self, mock_config, monkeypatch):
|
|
mock_session = Mock()
|
|
monkeypatch.setattr("requests.Session", Mock(return_value=mock_session))
|
|
mock_session.post.return_value.json.return_value = {"token": "dummy-token"}
|
|
mock_session.post.return_value.status_code = 200
|
|
|
|
mock_ctx = Mock()
|
|
mock_ctx.run_id = "test-run-id"
|
|
source = DremioSource(mock_config, mock_ctx)
|
|
|
|
source.dremio_catalog = Mock(spec=DremioCatalog)
|
|
source.dremio_catalog.dremio_api = Mock()
|
|
|
|
return source
|
|
|
|
def test_source_uses_iterators_by_default(self, mock_dremio_source):
|
|
"""Test that source uses iterators by default"""
|
|
mock_dataset = Mock(spec=DremioDataset)
|
|
mock_dataset.path = ["test", "path"]
|
|
mock_dataset.resource_name = "test_table"
|
|
|
|
mock_dremio_source.dremio_catalog.get_datasets.return_value = iter(
|
|
[mock_dataset]
|
|
)
|
|
mock_dremio_source.dremio_catalog.get_containers.return_value = []
|
|
mock_dremio_source.dremio_catalog.get_glossary_terms.return_value = iter([])
|
|
mock_dremio_source.dremio_catalog.get_sources.return_value = []
|
|
mock_dremio_source.config.source_mappings = []
|
|
mock_dremio_source.process_dataset = Mock(return_value=iter([]))
|
|
list(mock_dremio_source.get_workunits_internal())
|
|
|
|
mock_dremio_source.dremio_catalog.get_datasets.assert_called_once()
|
|
mock_dremio_source.dremio_catalog.get_glossary_terms.assert_called_once()
|
|
|
|
def test_iterator_handles_exceptions_gracefully(self, mock_dremio_source):
|
|
"""Test that iterator handles exceptions without crashing"""
|
|
mock_dataset = Mock(spec=DremioDataset)
|
|
mock_dataset.path = ["test", "path"]
|
|
mock_dataset.resource_name = "test_table"
|
|
|
|
mock_dremio_source.dremio_catalog.get_datasets.return_value = iter(
|
|
[mock_dataset]
|
|
)
|
|
mock_dremio_source.dremio_catalog.get_containers.return_value = []
|
|
mock_dremio_source.dremio_catalog.get_glossary_terms.return_value = iter([])
|
|
mock_dremio_source.dremio_catalog.get_sources.return_value = []
|
|
mock_dremio_source.config.source_mappings = []
|
|
mock_dremio_source.process_dataset = Mock(
|
|
side_effect=Exception("Processing error")
|
|
)
|
|
list(mock_dremio_source.get_workunits_internal())
|
|
|
|
assert mock_dremio_source.report.num_datasets_failed > 0
|