datahub/metadata-ingestion/tests/unit/dremio/test_dremio_source_map.py

141 lines
4.5 KiB
Python
Raw Permalink Normal View History

from typing import List
from unittest.mock import MagicMock
from datahub.ingestion.source.dremio.dremio_config import DremioSourceMapping
from datahub.ingestion.source.dremio.dremio_entities import DremioSourceContainer
from datahub.ingestion.source.dremio.dremio_source import (
DremioSourceMapEntry,
build_dremio_source_map,
)
def test_build_source_map_simple():
# write unit test
config_mapping: List[DremioSourceMapping] = [
DremioSourceMapping(source_name="source1", platform="S3", env="PROD"),
DremioSourceMapping(source_name="source2", platform="redshift", env="DEV"),
]
sources: List[DremioSourceContainer] = [
DremioSourceContainer(
container_name="source1",
location_id="xxx",
path=[],
api_operations=MagicMock(), # type:ignore
dremio_source_type="S3",
root_path="/",
database_name=None,
),
DremioSourceContainer(
container_name="source2",
location_id="yyy",
path=[],
api_operations=MagicMock(), # type:ignore
dremio_source_type="REDSHIFT",
root_path="/",
database_name="redshiftdb",
),
]
source_map = build_dremio_source_map(sources, config_mapping)
assert source_map == {
"source1": DremioSourceMapEntry(
source_name="source1",
platform="S3",
env="PROD",
dremio_source_category="file_object_storage",
root_path="/",
database_name="",
),
"source2": DremioSourceMapEntry(
source_name="source2",
platform="redshift",
env="DEV",
dremio_source_category="database",
root_path="/",
database_name="redshiftdb",
),
}
def test_build_source_map_same_platform_multiple_sources():
# write unit test
config_mapping: List[DremioSourceMapping] = [
DremioSourceMapping(source_name="source1", platform="S3", env="PROD"),
DremioSourceMapping(source_name="source2", platform="redshift", env="DEV"),
DremioSourceMapping(source_name="source2", platform="redshift", env="PROD"),
]
sources: List[DremioSourceContainer] = [
DremioSourceContainer(
container_name="source1",
location_id="xxx",
path=[],
api_operations=MagicMock(), # type:ignore
dremio_source_type="S3",
root_path="/",
database_name=None,
),
DremioSourceContainer(
container_name="source2",
location_id="yyy",
path=[],
api_operations=MagicMock(), # type:ignore
dremio_source_type="REDSHIFT",
root_path="/",
database_name="redshiftdb",
),
DremioSourceContainer(
container_name="source3",
location_id="tt",
path=[],
api_operations=MagicMock(), # type:ignore
dremio_source_type="REDSHIFT",
root_path="/",
database_name="redshiftproddb",
),
DremioSourceContainer(
container_name="Source4",
location_id="zz",
path=[],
api_operations=MagicMock(), # type:ignore
dremio_source_type="NEWSOURCE",
root_path="/",
database_name="somedb",
),
]
source_map = build_dremio_source_map(sources, config_mapping)
assert source_map == {
"source1": DremioSourceMapEntry(
source_name="source1",
platform="S3",
env="PROD",
dremio_source_category="file_object_storage",
root_path="/",
database_name="",
),
"source2": DremioSourceMapEntry(
source_name="source2",
platform="redshift",
env="DEV",
dremio_source_category="database",
root_path="/",
database_name="redshiftdb",
),
"source3": DremioSourceMapEntry(
source_name="source3",
platform="redshift",
env=None,
dremio_source_category="database",
root_path="/",
database_name="redshiftproddb",
),
"source4": DremioSourceMapEntry(
source_name="Source4",
platform="newsource",
env=None,
dremio_source_category="unknown",
root_path="/",
database_name="somedb",
),
}