datahub/metadata-ingestion/tests/unit/dremio/test_dremio_iterator_integration.py

75 lines
3.0 KiB
Python

from unittest.mock import Mock
import pytest
from datahub.ingestion.source.dremio.dremio_config import DremioSourceConfig
from datahub.ingestion.source.dremio.dremio_entities import DremioCatalog, DremioDataset
from datahub.ingestion.source.dremio.dremio_source import DremioSource
class TestDremioIteratorIntegration:
@pytest.fixture
def mock_config(self):
return DremioSourceConfig(
hostname="test-host",
port=9047,
tls=False,
username="test-user",
password="test-password",
)
@pytest.fixture
def mock_dremio_source(self, mock_config, monkeypatch):
mock_session = Mock()
monkeypatch.setattr("requests.Session", Mock(return_value=mock_session))
mock_session.post.return_value.json.return_value = {"token": "dummy-token"}
mock_session.post.return_value.status_code = 200
mock_ctx = Mock()
mock_ctx.run_id = "test-run-id"
source = DremioSource(mock_config, mock_ctx)
source.dremio_catalog = Mock(spec=DremioCatalog)
source.dremio_catalog.dremio_api = Mock()
return source
def test_source_uses_iterators_by_default(self, mock_dremio_source):
"""Test that source uses iterators by default"""
mock_dataset = Mock(spec=DremioDataset)
mock_dataset.path = ["test", "path"]
mock_dataset.resource_name = "test_table"
mock_dremio_source.dremio_catalog.get_datasets.return_value = iter(
[mock_dataset]
)
mock_dremio_source.dremio_catalog.get_containers.return_value = []
mock_dremio_source.dremio_catalog.get_glossary_terms.return_value = iter([])
mock_dremio_source.dremio_catalog.get_sources.return_value = []
mock_dremio_source.config.source_mappings = []
mock_dremio_source.process_dataset = Mock(return_value=iter([]))
list(mock_dremio_source.get_workunits_internal())
mock_dremio_source.dremio_catalog.get_datasets.assert_called_once()
mock_dremio_source.dremio_catalog.get_glossary_terms.assert_called_once()
def test_iterator_handles_exceptions_gracefully(self, mock_dremio_source):
"""Test that iterator handles exceptions without crashing"""
mock_dataset = Mock(spec=DremioDataset)
mock_dataset.path = ["test", "path"]
mock_dataset.resource_name = "test_table"
mock_dremio_source.dremio_catalog.get_datasets.return_value = iter(
[mock_dataset]
)
mock_dremio_source.dremio_catalog.get_containers.return_value = []
mock_dremio_source.dremio_catalog.get_glossary_terms.return_value = iter([])
mock_dremio_source.dremio_catalog.get_sources.return_value = []
mock_dremio_source.config.source_mappings = []
mock_dremio_source.process_dataset = Mock(
side_effect=Exception("Processing error")
)
list(mock_dremio_source.get_workunits_internal())
assert mock_dremio_source.report.num_datasets_failed > 0