mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-28 02:17:53 +00:00
feat(ci): add pytest hooks for updating golden files (#12581)
This commit is contained in:
parent
fe173faf84
commit
7472c535d3
2
.github/workflows/metadata-ingestion.yml
vendored
2
.github/workflows/metadata-ingestion.yml
vendored
@ -27,7 +27,7 @@ concurrency:
|
||||
jobs:
|
||||
metadata-ingestion:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 40
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
DATAHUB_TELEMETRY_ENABLED: false
|
||||
# TODO: Enable this once the test is fixed.
|
||||
|
||||
@ -1,15 +1,14 @@
|
||||
import pathlib
|
||||
import site
|
||||
|
||||
from datahub.testing.pytest_hooks import ( # noqa: F401,E402
|
||||
load_golden_flags,
|
||||
pytest_addoption,
|
||||
)
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--update-golden-files",
|
||||
action="store_true",
|
||||
default=False,
|
||||
)
|
||||
|
||||
|
||||
# The integration tests run Airflow, with our plugin, in a subprocess.
|
||||
# To get more accurate coverage, we need to ensure that the coverage
|
||||
# library is available in the subprocess.
|
||||
# See https://coverage.readthedocs.io/en/latest/subprocess.html#configuring-python-for-sub-process-measurement
|
||||
coverage_startup_code = "import coverage; coverage.process_startup()"
|
||||
site_packages_dir = pathlib.Path(site.getsitepackages()[0])
|
||||
|
||||
@ -26,7 +26,6 @@ from datahub_airflow_plugin._airflow_shims import (
|
||||
HAS_AIRFLOW_LISTENER_API,
|
||||
HAS_AIRFLOW_STANDALONE_CMD,
|
||||
)
|
||||
from tests.utils import PytestConfig
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
@ -346,18 +345,13 @@ def _run_airflow(
|
||||
|
||||
|
||||
def check_golden_file(
|
||||
pytestconfig: PytestConfig,
|
||||
output_path: pathlib.Path,
|
||||
golden_path: pathlib.Path,
|
||||
ignore_paths: Sequence[str] = (),
|
||||
) -> None:
|
||||
update_golden = pytestconfig.getoption("--update-golden-files")
|
||||
|
||||
assert_metadata_files_equal(
|
||||
output_path=output_path,
|
||||
golden_path=golden_path,
|
||||
update_golden=update_golden,
|
||||
copy_output=False,
|
||||
ignore_paths=ignore_paths,
|
||||
ignore_order=True,
|
||||
)
|
||||
@ -434,7 +428,6 @@ test_cases = [
|
||||
],
|
||||
)
|
||||
def test_airflow_plugin(
|
||||
pytestconfig: PytestConfig,
|
||||
tmp_path: pathlib.Path,
|
||||
golden_filename: str,
|
||||
test_case: DagTestCase,
|
||||
@ -497,7 +490,6 @@ def test_airflow_plugin(
|
||||
_sanitize_output_file(airflow_instance.metadata_file)
|
||||
|
||||
check_golden_file(
|
||||
pytestconfig=pytestconfig,
|
||||
output_path=airflow_instance.metadata_file,
|
||||
golden_path=golden_path,
|
||||
ignore_paths=[
|
||||
@ -512,7 +504,6 @@ def test_airflow_plugin(
|
||||
if test_case.multiple_connections:
|
||||
_sanitize_output_file(airflow_instance.metadata_file2)
|
||||
check_golden_file(
|
||||
pytestconfig=pytestconfig,
|
||||
output_path=airflow_instance.metadata_file2,
|
||||
golden_path=golden_path,
|
||||
ignore_paths=[
|
||||
|
||||
@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
|
||||
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
|
||||
}
|
||||
task lintFix(type: Exec, dependsOn: installDev) {
|
||||
commandLine 'bash', '-x', '-c',
|
||||
"source ${venv_name}/bin/activate && " +
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"ruff check --fix src/ tests/ examples/ && " +
|
||||
"ruff format src/ tests/ examples/ "
|
||||
}
|
||||
@ -71,9 +71,9 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
|
||||
outputs.dir("${venv_name}")
|
||||
outputs.file(sentinel_file)
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " +
|
||||
"touch ${sentinel_file}"
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " +
|
||||
"touch ${sentinel_file}"
|
||||
}
|
||||
|
||||
task testQuick(type: Exec, dependsOn: installDevTest) {
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
from datahub.testing.pytest_hooks import ( # noqa: F401,E402
|
||||
load_golden_flags,
|
||||
pytest_addoption,
|
||||
)
|
||||
@ -1,21 +0,0 @@
|
||||
import pathlib
|
||||
import site
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from _pytest.config import Parser
|
||||
|
||||
|
||||
def pytest_addoption(parser: "Parser") -> None:
|
||||
parser.addoption(
|
||||
"--update-golden-files",
|
||||
action="store_true",
|
||||
default=False,
|
||||
)
|
||||
|
||||
|
||||
# See https://coverage.readthedocs.io/en/latest/subprocess.html#configuring-python-for-sub-process-measurement
|
||||
coverage_startup_code = "import coverage; coverage.process_startup()"
|
||||
site_packages_dir = pathlib.Path(site.getsitepackages()[0])
|
||||
pth_file_path = site_packages_dir / "datahub_coverage_startup.pth"
|
||||
pth_file_path.write_text(coverage_startup_code)
|
||||
File diff suppressed because it is too large
Load Diff
@ -23,10 +23,10 @@ from dagster._core.definitions.repository_definition import (
|
||||
)
|
||||
from dagster._core.definitions.resource_definition import ResourceDefinition
|
||||
from freezegun import freeze_time
|
||||
from utils.utils import PytestConfig, check_golden_file
|
||||
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.graph.client import DatahubClientConfig
|
||||
from datahub.testing.compare_metadata_json import assert_metadata_files_equal
|
||||
from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig
|
||||
from datahub_dagster_plugin.sensors.datahub_sensors import (
|
||||
DatahubSensors,
|
||||
@ -96,7 +96,7 @@ TEST_UUIDS = ["uuid_{}".format(i) for i in range(10000)]
|
||||
@patch.object(uuid, "uuid4", side_effect=TEST_UUIDS)
|
||||
@patch("datahub_dagster_plugin.sensors.datahub_sensors.DataHubGraph", autospec=True)
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_emit_metadata(mock_emit: Mock, pytestconfig: PytestConfig) -> None:
|
||||
def test_emit_metadata(mock_emit: Mock, mock_uuid: Mock) -> None:
|
||||
mock_emitter = Mock()
|
||||
mock_emit.return_value = mock_emitter
|
||||
|
||||
@ -168,8 +168,7 @@ def test_emit_metadata(mock_emit: Mock, pytestconfig: PytestConfig) -> None:
|
||||
json_object = json.dumps(mcpws, indent=2)
|
||||
f.write(json_object)
|
||||
|
||||
check_golden_file(
|
||||
pytestconfig=pytestconfig,
|
||||
assert_metadata_files_equal(
|
||||
output_path=pathlib.Path(f"{tmp_path}/test_emit_metadata_mcps.json"),
|
||||
golden_path=pathlib.Path(
|
||||
"tests/unit/golden/golden_test_emit_metadata_mcps.json"
|
||||
|
||||
@ -1,2 +0,0 @@
|
||||
def test_dummy():
|
||||
pass
|
||||
@ -1,30 +0,0 @@
|
||||
import pathlib
|
||||
from typing import Sequence
|
||||
|
||||
from datahub.testing.compare_metadata_json import assert_metadata_files_equal
|
||||
|
||||
try:
|
||||
from pytest import Config as PytestConfig # type: ignore[attr-defined]
|
||||
except ImportError:
|
||||
# Support for pytest 6.x.
|
||||
from _pytest.config import Config as PytestConfig # type: ignore
|
||||
|
||||
__all__ = ["PytestConfig"]
|
||||
|
||||
|
||||
def check_golden_file(
|
||||
pytestconfig: PytestConfig,
|
||||
output_path: pathlib.Path,
|
||||
golden_path: pathlib.Path,
|
||||
ignore_paths: Sequence[str] = (),
|
||||
) -> None:
|
||||
update_golden = pytestconfig.getoption("--update-golden-files")
|
||||
|
||||
assert_metadata_files_equal(
|
||||
output_path=output_path,
|
||||
golden_path=golden_path,
|
||||
update_golden=update_golden,
|
||||
copy_output=False,
|
||||
ignore_paths=ignore_paths,
|
||||
ignore_order=True,
|
||||
)
|
||||
@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
|
||||
"mypy --show-traceback --show-error-codes src/ tests/"
|
||||
}
|
||||
task lintFix(type: Exec, dependsOn: installDev) {
|
||||
commandLine 'bash', '-x', '-c',
|
||||
"source ${venv_name}/bin/activate && " +
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"ruff check --fix src/ tests/ && " +
|
||||
"ruff format src/ tests/ "
|
||||
}
|
||||
|
||||
@ -1 +1,5 @@
|
||||
from datahub.testing.docker_utils import docker_compose_runner # noqa: F401
|
||||
from datahub.testing.pytest_hooks import ( # noqa: F401,E402
|
||||
load_golden_flags,
|
||||
pytest_addoption,
|
||||
)
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
import os
|
||||
import shutil
|
||||
from typing import List
|
||||
from unittest import mock
|
||||
@ -23,10 +22,6 @@ except Exception:
|
||||
use_gx_folder = False
|
||||
|
||||
|
||||
def should_update_golden_file() -> bool:
|
||||
return bool(os.getenv("DATAHUB_GOLDEN_FILE_UPDATE", False))
|
||||
|
||||
|
||||
FROZEN_TIME = "2021-12-28 12:00:00"
|
||||
|
||||
|
||||
@ -84,7 +79,5 @@ def test_ge_ingest(
|
||||
assert_metadata_files_equal(
|
||||
output_path=tmp_path / "ge_mcps.json",
|
||||
golden_path=test_resources_dir / golden_json,
|
||||
copy_output=False,
|
||||
update_golden=should_update_golden_file(),
|
||||
ignore_paths=[],
|
||||
)
|
||||
|
||||
@ -33,7 +33,7 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti
|
||||
def sentinel_file = "${venv_name}/.build_install_package_sentinel"
|
||||
inputs.file file('setup.py')
|
||||
outputs.file(sentinel_file)
|
||||
commandLine 'bash', '-x', '-c',
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"${pip_install_command} -e . ${extra_pip_requirements} &&" +
|
||||
"touch ${sentinel_file}"
|
||||
@ -45,7 +45,7 @@ task installDev(type: Exec, dependsOn: [install]) {
|
||||
def sentinel_file = "${venv_name}/.build_install_dev_sentinel"
|
||||
inputs.file file('setup.py')
|
||||
outputs.file("${sentinel_file}")
|
||||
commandLine 'bash', '-x', '-c',
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"${pip_install_command} -e .[dev] ${extra_pip_requirements} && " +
|
||||
"touch ${sentinel_file}"
|
||||
@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
|
||||
"mypy --show-traceback --show-error-codes src/ tests/"
|
||||
}
|
||||
task lintFix(type: Exec, dependsOn: installDev) {
|
||||
commandLine 'bash', '-x', '-c',
|
||||
"source ${venv_name}/bin/activate && " +
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"ruff check --fix src/ tests/ && " +
|
||||
"ruff format src/ tests/ "
|
||||
}
|
||||
@ -70,8 +70,10 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
|
||||
inputs.file file('setup.py')
|
||||
outputs.dir("${venv_name}")
|
||||
outputs.file("${sentinel_file}")
|
||||
commandLine 'bash', '-x', '-c',
|
||||
"${pip_install_command} -e .[dev,integration-tests] && touch ${sentinel_file}"
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"${pip_install_command} -e .[dev,integration-tests] && " +
|
||||
"touch ${sentinel_file}"
|
||||
}
|
||||
|
||||
task testQuick(type: Exec, dependsOn: installDevTest) {
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
@ -8,11 +7,10 @@ import deepdiff
|
||||
from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
|
||||
from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
|
||||
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult, sqlglot_lineage
|
||||
from datahub.testing.pytest_hooks import get_golden_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true"
|
||||
|
||||
|
||||
def assert_sql_result_with_resolver(
|
||||
sql: str,
|
||||
@ -22,6 +20,8 @@ def assert_sql_result_with_resolver(
|
||||
allow_table_error: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
settings = get_golden_settings()
|
||||
|
||||
# HACK: Our BigQuery source overwrites this value and doesn't undo it.
|
||||
# As such, we need to handle that here.
|
||||
BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "_yyyymmdd"
|
||||
@ -47,15 +47,14 @@ def assert_sql_result_with_resolver(
|
||||
)
|
||||
|
||||
txt = res.json(indent=4)
|
||||
if UPDATE_FILES:
|
||||
if settings.update_golden:
|
||||
expected_file.write_text(txt)
|
||||
return
|
||||
|
||||
if not expected_file.exists():
|
||||
expected_file.write_text(txt)
|
||||
raise AssertionError(
|
||||
f"Expected file {expected_file} does not exist. "
|
||||
"Created it with the expected output. Please verify it."
|
||||
f"Missing expected golden file; run with --update-golden-files to create it: {expected_file}"
|
||||
)
|
||||
|
||||
expected = SqlParsingResult.parse_raw(expected_file.read_text())
|
||||
|
||||
@ -16,6 +16,7 @@ from deepdiff import DeepDiff
|
||||
from datahub.ingestion.sink.file import write_metadata_file
|
||||
from datahub.ingestion.source.file import read_metadata_file
|
||||
from datahub.testing.mcp_diff import CannotCompareMCPs, MCPDiff, get_aspects_by_urn
|
||||
from datahub.testing.pytest_hooks import get_golden_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -40,26 +41,26 @@ def load_json_file(filename: Union[str, os.PathLike]) -> MetadataJson:
|
||||
def assert_metadata_files_equal(
|
||||
output_path: Union[str, os.PathLike],
|
||||
golden_path: Union[str, os.PathLike],
|
||||
update_golden: bool,
|
||||
copy_output: bool,
|
||||
ignore_paths: Sequence[str] = (),
|
||||
ignore_paths_v2: Sequence[str] = (),
|
||||
ignore_order: bool = True,
|
||||
) -> None:
|
||||
settings = get_golden_settings()
|
||||
|
||||
golden_exists = os.path.isfile(golden_path)
|
||||
|
||||
if copy_output:
|
||||
if settings.copy_output:
|
||||
shutil.copyfile(str(output_path), str(golden_path) + ".output")
|
||||
logger.info(f"Copied output file to {golden_path}.output")
|
||||
|
||||
if not update_golden and not golden_exists:
|
||||
if not settings.update_golden and not golden_exists:
|
||||
raise FileNotFoundError(
|
||||
"Golden file does not exist. Please run with the --update-golden-files option to create."
|
||||
)
|
||||
|
||||
output = load_json_file(output_path)
|
||||
|
||||
if update_golden and not golden_exists:
|
||||
if settings.update_golden and not golden_exists:
|
||||
shutil.copyfile(str(output_path), str(golden_path))
|
||||
return
|
||||
else:
|
||||
@ -87,7 +88,7 @@ def assert_metadata_files_equal(
|
||||
ignore_paths = (*ignore_paths, *default_exclude_paths)
|
||||
|
||||
diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
|
||||
if diff and update_golden:
|
||||
if diff and settings.update_golden:
|
||||
if isinstance(diff, MCPDiff) and diff.is_delta_valid:
|
||||
logger.info(f"Applying delta to golden file {golden_path}")
|
||||
diff.apply_delta(golden)
|
||||
|
||||
56
metadata-ingestion/src/datahub/testing/pytest_hooks.py
Normal file
56
metadata-ingestion/src/datahub/testing/pytest_hooks.py
Normal file
@ -0,0 +1,56 @@
|
||||
import dataclasses
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
__all__ = [
|
||||
"load_golden_flags",
|
||||
"get_golden_settings",
|
||||
"pytest_addoption",
|
||||
"GoldenFileSettings",
|
||||
]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class GoldenFileSettings:
|
||||
update_golden: bool
|
||||
copy_output: bool
|
||||
|
||||
|
||||
_registered: bool = False
|
||||
_settings: Optional[GoldenFileSettings] = None
|
||||
|
||||
|
||||
def pytest_addoption(parser: pytest.Parser) -> None:
|
||||
parser.addoption(
|
||||
"--update-golden-files",
|
||||
action="store_true",
|
||||
default=False,
|
||||
)
|
||||
|
||||
# TODO: Deprecate and remove this flag.
|
||||
parser.addoption("--copy-output-files", action="store_true", default=False)
|
||||
|
||||
global _registered
|
||||
_registered = True
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def load_golden_flags(pytestconfig: pytest.Config) -> None:
|
||||
global _settings
|
||||
_settings = GoldenFileSettings(
|
||||
update_golden=pytestconfig.getoption("--update-golden-files"),
|
||||
copy_output=pytestconfig.getoption("--copy-output-files"),
|
||||
)
|
||||
|
||||
|
||||
def get_golden_settings() -> GoldenFileSettings:
|
||||
if not _registered:
|
||||
raise ValueError(
|
||||
"Golden files aren't set up properly. Call register_golden_flags from a conftest pytest_addoptions method."
|
||||
)
|
||||
if not _settings:
|
||||
raise ValueError(
|
||||
"Golden files aren't set up properly. Ensure load_golden_flags is imported in your conftest."
|
||||
)
|
||||
return _settings
|
||||
@ -22,6 +22,10 @@ os.environ["DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES"] = "1"
|
||||
|
||||
# We need our imports to go below the os.environ updates, since mere act
|
||||
# of importing some datahub modules will load env variables.
|
||||
from datahub.testing.pytest_hooks import ( # noqa: F401,E402
|
||||
load_golden_flags,
|
||||
pytest_addoption,
|
||||
)
|
||||
from tests.test_helpers.docker_helpers import ( # noqa: F401,E402
|
||||
docker_compose_command,
|
||||
docker_compose_runner,
|
||||
@ -54,15 +58,6 @@ def mock_time(monkeypatch):
|
||||
yield
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--update-golden-files",
|
||||
action="store_true",
|
||||
default=False,
|
||||
)
|
||||
parser.addoption("--copy-output-files", action="store_true", default=False)
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(
|
||||
config: pytest.Config, items: List[pytest.Item]
|
||||
) -> None:
|
||||
|
||||
@ -73,9 +73,7 @@ procedure_sqls = [sql_file.name for sql_file in PROCEDURE_SQLS_DIR.iterdir()]
|
||||
|
||||
@pytest.mark.parametrize("procedure_sql_file", procedure_sqls)
|
||||
@pytest.mark.integration
|
||||
def test_stored_procedure_lineage(
|
||||
pytestconfig: pytest.Config, procedure_sql_file: str
|
||||
) -> None:
|
||||
def test_stored_procedure_lineage(procedure_sql_file: str) -> None:
|
||||
sql_file_path = PROCEDURE_SQLS_DIR / procedure_sql_file
|
||||
procedure_code = sql_file_path.read_text()
|
||||
|
||||
@ -105,7 +103,6 @@ def test_stored_procedure_lineage(
|
||||
)
|
||||
)
|
||||
mce_helpers.check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=(
|
||||
PROCEDURES_GOLDEN_DIR / Path(procedure_sql_file).with_suffix(".json")
|
||||
|
||||
@ -85,13 +85,14 @@ def check_golden_file(
|
||||
ignore_paths_v2: Sequence[str] = (),
|
||||
ignore_order: bool = True,
|
||||
) -> None:
|
||||
update_golden = pytestconfig.getoption("--update-golden-files")
|
||||
copy_output = pytestconfig.getoption("--copy-output-files")
|
||||
# TODO: Remove the pytestconfig parameter since it's redundant.
|
||||
# Or more straightforward - we can remove the `check_golden_file` method
|
||||
# and use assert_metadata_files_equal directly. Maybe call it "check_golden_metadata"?
|
||||
# In a lot of cases, the output_path is also just annoying - our pytest setup
|
||||
# should be responsible for figuring out where to put the temp file.
|
||||
assert_metadata_files_equal(
|
||||
output_path=output_path,
|
||||
golden_path=golden_path,
|
||||
update_golden=update_golden,
|
||||
copy_output=copy_output,
|
||||
ignore_paths=ignore_paths,
|
||||
ignore_paths_v2=ignore_paths_v2,
|
||||
ignore_order=ignore_order,
|
||||
@ -99,7 +100,6 @@ def check_golden_file(
|
||||
|
||||
|
||||
def check_goldens_stream(
|
||||
pytestconfig: pytest.Config,
|
||||
outputs: List,
|
||||
golden_path: Union[str, os.PathLike],
|
||||
ignore_paths: Sequence[str] = (),
|
||||
@ -108,8 +108,7 @@ def check_goldens_stream(
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
write_metadata_file(pathlib.Path(f.name), outputs)
|
||||
|
||||
check_golden_file(
|
||||
pytestconfig=pytestconfig,
|
||||
assert_metadata_files_equal(
|
||||
output_path=f.name,
|
||||
golden_path=golden_path,
|
||||
ignore_paths=ignore_paths,
|
||||
|
||||
@ -1,16 +1,11 @@
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
from datahub.sdk._entity import Entity
|
||||
from tests.test_helpers import mce_helpers
|
||||
|
||||
|
||||
def assert_entity_golden(
|
||||
pytestconfig: pytest.Config, entity: Entity, golden_path: pathlib.Path
|
||||
) -> None:
|
||||
def assert_entity_golden(entity: Entity, golden_path: pathlib.Path) -> None:
|
||||
mce_helpers.check_goldens_stream(
|
||||
pytestconfig=pytestconfig,
|
||||
outputs=entity._as_mcps(),
|
||||
golden_path=golden_path,
|
||||
ignore_order=False,
|
||||
|
||||
@ -32,7 +32,6 @@ def test_structuredproperties_load(pytestconfig: pytest.Config) -> None:
|
||||
mcps.extend(property.generate_mcps())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
mcps,
|
||||
golden_path=RESOURCE_DIR / "example_structured_properties_golden.json",
|
||||
)
|
||||
|
||||
@ -20,7 +20,7 @@ from tests.test_helpers.sdk_v2_helpers import assert_entity_golden
|
||||
_GOLDEN_DIR = pathlib.Path(__file__).parent / "container_golden"
|
||||
|
||||
|
||||
def test_container_basic(pytestconfig: pytest.Config) -> None:
|
||||
def test_container_basic() -> None:
|
||||
db_key = DatabaseKey(
|
||||
platform="bigquery",
|
||||
database="my_bq_project",
|
||||
@ -60,12 +60,10 @@ def test_container_basic(pytestconfig: pytest.Config) -> None:
|
||||
# This should fail. Eventually we should make it suggest calling set_owners instead.
|
||||
c.owners = [] # type: ignore
|
||||
|
||||
assert_entity_golden(
|
||||
pytestconfig, c, _GOLDEN_DIR / "test_container_basic_golden.json"
|
||||
)
|
||||
assert_entity_golden(c, _GOLDEN_DIR / "test_container_basic_golden.json")
|
||||
|
||||
|
||||
def test_container_complex(pytestconfig: pytest.Config) -> None:
|
||||
def test_container_complex() -> None:
|
||||
schema_key = SchemaKey(
|
||||
platform="snowflake",
|
||||
instance="my_instance",
|
||||
@ -75,7 +73,7 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
|
||||
created = datetime(2025, 1, 2, 3, 4, 5, tzinfo=timezone.utc)
|
||||
updated = datetime(2025, 1, 9, 3, 4, 6, tzinfo=timezone.utc)
|
||||
|
||||
d = Container(
|
||||
c = Container(
|
||||
schema_key,
|
||||
display_name="MY_SCHEMA",
|
||||
qualified_name="MY_DB.MY_SCHEMA",
|
||||
@ -100,19 +98,19 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
|
||||
],
|
||||
domain=DomainUrn("Marketing"),
|
||||
)
|
||||
assert d.platform_instance is not None
|
||||
assert c.platform_instance is not None
|
||||
assert (
|
||||
str(d.platform_instance)
|
||||
str(c.platform_instance)
|
||||
== "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)"
|
||||
)
|
||||
assert d.subtype == "Schema"
|
||||
assert d.description == "test"
|
||||
assert d.display_name == "MY_SCHEMA"
|
||||
assert d.qualified_name == "MY_DB.MY_SCHEMA"
|
||||
assert d.external_url == "https://example.com"
|
||||
assert d.created == created
|
||||
assert d.last_modified == updated
|
||||
assert d.custom_properties == {
|
||||
assert c.subtype == "Schema"
|
||||
assert c.description == "test"
|
||||
assert c.display_name == "MY_SCHEMA"
|
||||
assert c.qualified_name == "MY_DB.MY_SCHEMA"
|
||||
assert c.external_url == "https://example.com"
|
||||
assert c.created == created
|
||||
assert c.last_modified == updated
|
||||
assert c.custom_properties == {
|
||||
"platform": "snowflake",
|
||||
"instance": "my_instance",
|
||||
"database": "MY_DB",
|
||||
@ -122,14 +120,12 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
|
||||
}
|
||||
|
||||
# Check standard aspects.
|
||||
assert d.domain == DomainUrn("Marketing")
|
||||
assert d.tags is not None
|
||||
assert len(d.tags) == 2
|
||||
assert d.terms is not None
|
||||
assert len(d.terms) == 1
|
||||
assert d.owners is not None
|
||||
assert len(d.owners) == 1
|
||||
assert c.domain == DomainUrn("Marketing")
|
||||
assert c.tags is not None
|
||||
assert len(c.tags) == 2
|
||||
assert c.terms is not None
|
||||
assert len(c.terms) == 1
|
||||
assert c.owners is not None
|
||||
assert len(c.owners) == 1
|
||||
|
||||
assert_entity_golden(
|
||||
pytestconfig, d, _GOLDEN_DIR / "test_container_complex_golden.json"
|
||||
)
|
||||
assert_entity_golden(c, _GOLDEN_DIR / "test_container_complex_golden.json")
|
||||
|
||||
@ -65,9 +65,7 @@ def test_dataset_basic(pytestconfig: pytest.Config) -> None:
|
||||
# This should fail. Eventually we should make it suggest calling set_owners instead.
|
||||
d.owners = [] # type: ignore
|
||||
|
||||
assert_entity_golden(
|
||||
pytestconfig, d, _GOLDEN_DIR / "test_dataset_basic_golden.json"
|
||||
)
|
||||
assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_basic_golden.json")
|
||||
|
||||
|
||||
def _build_complex_dataset() -> Dataset:
|
||||
@ -161,17 +159,13 @@ def _build_complex_dataset() -> Dataset:
|
||||
return d
|
||||
|
||||
|
||||
def test_dataset_complex(pytestconfig: pytest.Config) -> None:
|
||||
def test_dataset_complex() -> None:
|
||||
d = _build_complex_dataset()
|
||||
assert_entity_golden(
|
||||
pytestconfig, d, _GOLDEN_DIR / "test_dataset_complex_golden.json"
|
||||
)
|
||||
assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_complex_golden.json")
|
||||
|
||||
|
||||
def test_dataset_ingestion(pytestconfig: pytest.Config) -> None:
|
||||
def test_dataset_ingestion() -> None:
|
||||
with change_default_attribution(KnownAttribution.INGESTION):
|
||||
d = _build_complex_dataset()
|
||||
|
||||
assert_entity_golden(
|
||||
pytestconfig, d, _GOLDEN_DIR / "test_dataset_ingestion_golden.json"
|
||||
)
|
||||
assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_ingestion_golden.json")
|
||||
|
||||
@ -35,7 +35,6 @@ def assert_client_golden(
|
||||
) -> None:
|
||||
mcps = client._graph.emit_mcps.call_args[0][0] # type: ignore
|
||||
mce_helpers.check_goldens_stream(
|
||||
pytestconfig=pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=golden_path,
|
||||
ignore_order=False,
|
||||
|
||||
@ -62,7 +62,6 @@ def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> N
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_basic_lineage.json",
|
||||
)
|
||||
@ -86,7 +85,7 @@ def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> N
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_overlapping_inserts(pytestconfig: pytest.Config) -> None:
|
||||
def test_overlapping_inserts() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -114,14 +113,13 @@ def test_overlapping_inserts(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_overlapping_inserts.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_temp_table(pytestconfig: pytest.Config) -> None:
|
||||
def test_temp_table() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -173,14 +171,13 @@ def test_temp_table(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_temp_table.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_multistep_temp_table(pytestconfig: pytest.Config) -> None:
|
||||
def test_multistep_temp_table() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -235,14 +232,13 @@ def test_multistep_temp_table(pytestconfig: pytest.Config) -> None:
|
||||
== 4
|
||||
)
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_multistep_temp_table.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_overlapping_inserts_from_temp_tables(pytestconfig: pytest.Config) -> None:
|
||||
def test_overlapping_inserts_from_temp_tables() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -311,14 +307,13 @@ def test_overlapping_inserts_from_temp_tables(pytestconfig: pytest.Config) -> No
|
||||
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_overlapping_inserts_from_temp_tables.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_aggregate_operations(pytestconfig: pytest.Config) -> None:
|
||||
def test_aggregate_operations() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=False,
|
||||
@ -360,14 +355,13 @@ def test_aggregate_operations(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_aggregate_operations.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_view_lineage(pytestconfig: pytest.Config) -> None:
|
||||
def test_view_lineage() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -398,14 +392,13 @@ def test_view_lineage(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_view_lineage.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_known_lineage_mapping(pytestconfig: pytest.Config) -> None:
|
||||
def test_known_lineage_mapping() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -429,14 +422,13 @@ def test_known_lineage_mapping(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_known_lineage_mapping.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_column_lineage_deduplication(pytestconfig: pytest.Config) -> None:
|
||||
def test_column_lineage_deduplication() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -467,14 +459,13 @@ def test_column_lineage_deduplication(pytestconfig: pytest.Config) -> None:
|
||||
# which came later and hence has higher precedence.
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_column_lineage_deduplication.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_add_known_query_lineage(pytestconfig: pytest.Config) -> None:
|
||||
def test_add_known_query_lineage() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -512,14 +503,13 @@ def test_add_known_query_lineage(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_add_known_query_lineage.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_table_rename(pytestconfig: pytest.Config) -> None:
|
||||
def test_table_rename() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -570,14 +560,13 @@ def test_table_rename(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_table_rename.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_table_rename_with_temp(pytestconfig: pytest.Config) -> None:
|
||||
def test_table_rename_with_temp() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -630,14 +619,13 @@ def test_table_rename_with_temp(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_table_rename_with_temp.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_table_swap(pytestconfig: pytest.Config) -> None:
|
||||
def test_table_swap() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="snowflake",
|
||||
generate_lineage=True,
|
||||
@ -717,14 +705,13 @@ def test_table_swap(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_table_swap.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_table_swap_with_temp(pytestconfig: pytest.Config) -> None:
|
||||
def test_table_swap_with_temp() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="snowflake",
|
||||
generate_lineage=True,
|
||||
@ -887,14 +874,13 @@ def test_table_swap_with_temp(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_table_swap_with_temp.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None:
|
||||
def test_create_table_query_mcps() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="bigquery",
|
||||
generate_lineage=True,
|
||||
@ -914,16 +900,13 @@ def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_create_table_query_mcps.json",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_table_lineage_via_temp_table_disordered_add(
|
||||
pytestconfig: pytest.Config,
|
||||
) -> None:
|
||||
def test_table_lineage_via_temp_table_disordered_add() -> None:
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
generate_lineage=True,
|
||||
@ -949,7 +932,6 @@ def test_table_lineage_via_temp_table_disordered_add(
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR
|
||||
/ "test_table_lineage_via_temp_table_disordered_add.json",
|
||||
@ -957,7 +939,7 @@ def test_table_lineage_via_temp_table_disordered_add(
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_basic_usage(pytestconfig: pytest.Config) -> None:
|
||||
def test_basic_usage() -> None:
|
||||
frozen_timestamp = parse_user_datetime(FROZEN_TIME)
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
@ -998,7 +980,6 @@ def test_basic_usage(pytestconfig: pytest.Config) -> None:
|
||||
mcps = list(aggregator.gen_metadata())
|
||||
|
||||
check_goldens_stream(
|
||||
pytestconfig,
|
||||
outputs=mcps,
|
||||
golden_path=RESOURCE_DIR / "test_basic_usage.json",
|
||||
)
|
||||
|
||||
@ -2,22 +2,11 @@ import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
import datahub.testing.check_sql_parser_result as checker
|
||||
from datahub.testing.check_sql_parser_result import assert_sql_result
|
||||
|
||||
RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def set_update_sql_parser(
|
||||
pytestconfig: pytest.Config, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
update_golden = pytestconfig.getoption("--update-golden-files")
|
||||
|
||||
if update_golden:
|
||||
monkeypatch.setattr(checker, "UPDATE_FILES", True)
|
||||
|
||||
|
||||
def test_invalid_sql():
|
||||
assert_sql_result(
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user