mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-31 21:36:08 +00:00
214 lines
7.3 KiB
Python
214 lines
7.3 KiB
Python
from os import PathLike
|
|
from typing import Any, Dict, Optional, Union
|
|
|
|
import pytest
|
|
import requests_mock
|
|
|
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
from tests.test_helpers import mce_helpers
|
|
|
|
|
|
class DbtTestConfig:
|
|
def __init__(
|
|
self,
|
|
run_id: str,
|
|
dbt_metadata_uri_prefix: str,
|
|
test_resources_dir: Union[str, PathLike],
|
|
tmp_path: Union[str, PathLike],
|
|
output_file: Union[str, PathLike],
|
|
golden_file: Union[str, PathLike],
|
|
source_config_modifiers: Optional[Dict[str, Any]] = None,
|
|
sink_config_modifiers: Optional[Dict[str, Any]] = None,
|
|
):
|
|
|
|
if source_config_modifiers is None:
|
|
source_config_modifiers = {}
|
|
|
|
if sink_config_modifiers is None:
|
|
sink_config_modifiers = {}
|
|
|
|
self.run_id = run_id
|
|
|
|
self.manifest_path = f"{dbt_metadata_uri_prefix}/dbt_manifest.json"
|
|
self.catalog_path = f"{dbt_metadata_uri_prefix}/dbt_catalog.json"
|
|
self.sources_path = f"{dbt_metadata_uri_prefix}/dbt_sources.json"
|
|
self.target_platform = "postgres"
|
|
|
|
self.output_path = f"{tmp_path}/{output_file}"
|
|
|
|
self.golden_path = f"{test_resources_dir}/{golden_file}"
|
|
self.source_config = dict(
|
|
{
|
|
"manifest_path": self.manifest_path,
|
|
"catalog_path": self.catalog_path,
|
|
"sources_path": self.sources_path,
|
|
"target_platform": self.target_platform,
|
|
"enable_meta_mapping": False,
|
|
"write_semantics": "OVERRIDE",
|
|
"meta_mapping": {
|
|
"business_owner": {
|
|
"match": ".*",
|
|
"operation": "add_owner",
|
|
"config": {"owner_type": "user"},
|
|
},
|
|
"has_pii": {
|
|
"match": True,
|
|
"operation": "add_tag",
|
|
"config": {"tag": "has_pii_test"},
|
|
},
|
|
"int_property": {
|
|
"match": 1,
|
|
"operation": "add_tag",
|
|
"config": {"tag": "int_meta_property"},
|
|
},
|
|
"double_property": {
|
|
"match": 2.5,
|
|
"operation": "add_term",
|
|
"config": {"term": "double_meta_property"},
|
|
},
|
|
"data_governance.team_owner": {
|
|
"match": "Finance",
|
|
"operation": "add_term",
|
|
"config": {"term": "Finance_test"},
|
|
},
|
|
},
|
|
},
|
|
**source_config_modifiers,
|
|
)
|
|
|
|
self.sink_config = dict(
|
|
{
|
|
"filename": self.output_path,
|
|
},
|
|
**sink_config_modifiers,
|
|
)
|
|
|
|
|
|
@pytest.mark.integration
|
|
@requests_mock.Mocker(kw="req_mock")
|
|
def test_dbt_ingest(pytestconfig, tmp_path, mock_time, **kwargs):
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/dbt"
|
|
|
|
with open(test_resources_dir / "dbt_manifest.json", "r") as f:
|
|
kwargs["req_mock"].get(
|
|
"http://some-external-repo/dbt_manifest.json", text=f.read()
|
|
)
|
|
|
|
with open(test_resources_dir / "dbt_catalog.json", "r") as f:
|
|
kwargs["req_mock"].get(
|
|
"http://some-external-repo/dbt_catalog.json", text=f.read()
|
|
)
|
|
|
|
with open(test_resources_dir / "dbt_sources.json", "r") as f:
|
|
kwargs["req_mock"].get(
|
|
"http://some-external-repo/dbt_sources.json", text=f.read()
|
|
)
|
|
|
|
config_variants = [
|
|
DbtTestConfig(
|
|
"dbt-test-with-schemas",
|
|
test_resources_dir,
|
|
test_resources_dir,
|
|
tmp_path,
|
|
"dbt_with_schemas_mces.json",
|
|
"dbt_with_schemas_mces_golden.json",
|
|
source_config_modifiers={
|
|
"load_schemas": True,
|
|
"disable_dbt_node_creation": True,
|
|
"enable_meta_mapping": True,
|
|
},
|
|
),
|
|
DbtTestConfig(
|
|
"dbt-test-with-external-metadata-files",
|
|
"http://some-external-repo",
|
|
test_resources_dir,
|
|
tmp_path,
|
|
"dbt_with_external_metadata_files_mces.json",
|
|
"dbt_with_external_metadata_files_mces_golden.json",
|
|
source_config_modifiers={
|
|
"load_schemas": True,
|
|
"disable_dbt_node_creation": True,
|
|
},
|
|
),
|
|
DbtTestConfig(
|
|
"dbt-test-without-schemas",
|
|
test_resources_dir,
|
|
test_resources_dir,
|
|
tmp_path,
|
|
"dbt_without_schemas_mces.json",
|
|
"dbt_without_schemas_mces_golden.json",
|
|
source_config_modifiers={
|
|
"load_schemas": False,
|
|
"disable_dbt_node_creation": True,
|
|
},
|
|
),
|
|
DbtTestConfig(
|
|
"dbt-test-without-schemas-with-filter",
|
|
test_resources_dir,
|
|
test_resources_dir,
|
|
tmp_path,
|
|
"dbt_without_schemas_with_filter_mces.json",
|
|
"dbt_without_schemas_with_filter_mces_golden.json",
|
|
source_config_modifiers={
|
|
"load_schemas": False,
|
|
"node_name_pattern": {
|
|
"deny": ["source.sample_dbt.pagila.payment_p2020_06"]
|
|
},
|
|
"disable_dbt_node_creation": True,
|
|
},
|
|
),
|
|
DbtTestConfig(
|
|
"dbt-test-with-schemas-dbt-enabled",
|
|
test_resources_dir,
|
|
test_resources_dir,
|
|
tmp_path,
|
|
"dbt_enabled_with_schemas_mces.json",
|
|
"dbt_enabled_with_schemas_mces_golden.json",
|
|
source_config_modifiers={"load_schemas": True, "enable_meta_mapping": True},
|
|
),
|
|
DbtTestConfig(
|
|
"dbt-test-without-schemas-dbt-enabled",
|
|
test_resources_dir,
|
|
test_resources_dir,
|
|
tmp_path,
|
|
"dbt_enabled_without_schemas_mces.json",
|
|
"dbt_enabled_without_schemas_mces_golden.json",
|
|
source_config_modifiers={"load_schemas": False},
|
|
),
|
|
DbtTestConfig(
|
|
"dbt-test-without-schemas-with-filter-dbt-enabled",
|
|
test_resources_dir,
|
|
test_resources_dir,
|
|
tmp_path,
|
|
"dbt_enabled_without_schemas_with_filter_mces.json",
|
|
"dbt_enabled_without_schemas_with_filter_mces_golden.json",
|
|
source_config_modifiers={
|
|
"load_schemas": False,
|
|
"node_name_pattern": {
|
|
"deny": ["source.sample_dbt.pagila.payment_p2020_06"]
|
|
},
|
|
},
|
|
),
|
|
]
|
|
|
|
for config in config_variants:
|
|
# test manifest, catalog, sources are generated from https://github.com/kevinhu/sample-dbt
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": config.run_id,
|
|
"source": {"type": "dbt", "config": config.source_config},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": config.sink_config,
|
|
},
|
|
}
|
|
)
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=config.output_path,
|
|
golden_path=config.golden_path,
|
|
)
|