feat(ci): add pytest hooks for updating golden files (#12581)

This commit is contained in:
Harshal Sheth 2025-02-12 15:32:31 -08:00 committed by GitHub
parent fe173faf84
commit 7472c535d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 592 additions and 653 deletions

View File

@ -27,7 +27,7 @@ concurrency:
jobs:
metadata-ingestion:
runs-on: ubuntu-latest
timeout-minutes: 40
timeout-minutes: 60
env:
DATAHUB_TELEMETRY_ENABLED: false
# TODO: Enable this once the test is fixed.

View File

@ -1,15 +1,14 @@
import pathlib
import site
from datahub.testing.pytest_hooks import ( # noqa: F401,E402
load_golden_flags,
pytest_addoption,
)
def pytest_addoption(parser):
parser.addoption(
"--update-golden-files",
action="store_true",
default=False,
)
# The integration tests run Airflow, with our plugin, in a subprocess.
# To get more accurate coverage, we need to ensure that the coverage
# library is available in the subprocess.
# See https://coverage.readthedocs.io/en/latest/subprocess.html#configuring-python-for-sub-process-measurement
coverage_startup_code = "import coverage; coverage.process_startup()"
site_packages_dir = pathlib.Path(site.getsitepackages()[0])

View File

@ -26,7 +26,6 @@ from datahub_airflow_plugin._airflow_shims import (
HAS_AIRFLOW_LISTENER_API,
HAS_AIRFLOW_STANDALONE_CMD,
)
from tests.utils import PytestConfig
pytestmark = pytest.mark.integration
@ -346,18 +345,13 @@ def _run_airflow(
def check_golden_file(
pytestconfig: PytestConfig,
output_path: pathlib.Path,
golden_path: pathlib.Path,
ignore_paths: Sequence[str] = (),
) -> None:
update_golden = pytestconfig.getoption("--update-golden-files")
assert_metadata_files_equal(
output_path=output_path,
golden_path=golden_path,
update_golden=update_golden,
copy_output=False,
ignore_paths=ignore_paths,
ignore_order=True,
)
@ -434,7 +428,6 @@ test_cases = [
],
)
def test_airflow_plugin(
pytestconfig: PytestConfig,
tmp_path: pathlib.Path,
golden_filename: str,
test_case: DagTestCase,
@ -497,7 +490,6 @@ def test_airflow_plugin(
_sanitize_output_file(airflow_instance.metadata_file)
check_golden_file(
pytestconfig=pytestconfig,
output_path=airflow_instance.metadata_file,
golden_path=golden_path,
ignore_paths=[
@ -512,7 +504,6 @@ def test_airflow_plugin(
if test_case.multiple_connections:
_sanitize_output_file(airflow_instance.metadata_file2)
check_golden_file(
pytestconfig=pytestconfig,
output_path=airflow_instance.metadata_file2,
golden_path=golden_path,
ignore_paths=[

View File

@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && " +
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"ruff check --fix src/ tests/ examples/ && " +
"ruff format src/ tests/ examples/ "
}
@ -71,9 +71,9 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
outputs.dir("${venv_name}")
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
"source ${venv_name}/bin/activate && set -x && " +
"${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
}
task testQuick(type: Exec, dependsOn: installDevTest) {

View File

@ -0,0 +1,4 @@
from datahub.testing.pytest_hooks import ( # noqa: F401,E402
load_golden_flags,
pytest_addoption,
)

View File

@ -1,21 +0,0 @@
import pathlib
import site
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from _pytest.config import Parser
def pytest_addoption(parser: "Parser") -> None:
parser.addoption(
"--update-golden-files",
action="store_true",
default=False,
)
# See https://coverage.readthedocs.io/en/latest/subprocess.html#configuring-python-for-sub-process-measurement
coverage_startup_code = "import coverage; coverage.process_startup()"
site_packages_dir = pathlib.Path(site.getsitepackages()[0])
pth_file_path = site_packages_dir / "datahub_coverage_startup.pth"
pth_file_path.write_text(coverage_startup_code)

View File

@ -23,10 +23,10 @@ from dagster._core.definitions.repository_definition import (
)
from dagster._core.definitions.resource_definition import ResourceDefinition
from freezegun import freeze_time
from utils.utils import PytestConfig, check_golden_file
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig
from datahub.testing.compare_metadata_json import assert_metadata_files_equal
from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig
from datahub_dagster_plugin.sensors.datahub_sensors import (
DatahubSensors,
@ -96,7 +96,7 @@ TEST_UUIDS = ["uuid_{}".format(i) for i in range(10000)]
@patch.object(uuid, "uuid4", side_effect=TEST_UUIDS)
@patch("datahub_dagster_plugin.sensors.datahub_sensors.DataHubGraph", autospec=True)
@freeze_time(FROZEN_TIME)
def test_emit_metadata(mock_emit: Mock, pytestconfig: PytestConfig) -> None:
def test_emit_metadata(mock_emit: Mock, mock_uuid: Mock) -> None:
mock_emitter = Mock()
mock_emit.return_value = mock_emitter
@ -168,8 +168,7 @@ def test_emit_metadata(mock_emit: Mock, pytestconfig: PytestConfig) -> None:
json_object = json.dumps(mcpws, indent=2)
f.write(json_object)
check_golden_file(
pytestconfig=pytestconfig,
assert_metadata_files_equal(
output_path=pathlib.Path(f"{tmp_path}/test_emit_metadata_mcps.json"),
golden_path=pathlib.Path(
"tests/unit/golden/golden_test_emit_metadata_mcps.json"

View File

@ -1,2 +0,0 @@
def test_dummy():
pass

View File

@ -1,30 +0,0 @@
import pathlib
from typing import Sequence
from datahub.testing.compare_metadata_json import assert_metadata_files_equal
try:
from pytest import Config as PytestConfig # type: ignore[attr-defined]
except ImportError:
# Support for pytest 6.x.
from _pytest.config import Config as PytestConfig # type: ignore
__all__ = ["PytestConfig"]
def check_golden_file(
pytestconfig: PytestConfig,
output_path: pathlib.Path,
golden_path: pathlib.Path,
ignore_paths: Sequence[str] = (),
) -> None:
update_golden = pytestconfig.getoption("--update-golden-files")
assert_metadata_files_equal(
output_path=output_path,
golden_path=golden_path,
update_golden=update_golden,
copy_output=False,
ignore_paths=ignore_paths,
ignore_order=True,
)

View File

@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
"mypy --show-traceback --show-error-codes src/ tests/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && " +
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"ruff check --fix src/ tests/ && " +
"ruff format src/ tests/ "
}

View File

@ -1 +1,5 @@
from datahub.testing.docker_utils import docker_compose_runner # noqa: F401
from datahub.testing.pytest_hooks import ( # noqa: F401,E402
load_golden_flags,
pytest_addoption,
)

View File

@ -1,4 +1,3 @@
import os
import shutil
from typing import List
from unittest import mock
@ -23,10 +22,6 @@ except Exception:
use_gx_folder = False
def should_update_golden_file() -> bool:
return bool(os.getenv("DATAHUB_GOLDEN_FILE_UPDATE", False))
FROZEN_TIME = "2021-12-28 12:00:00"
@ -84,7 +79,5 @@ def test_ge_ingest(
assert_metadata_files_equal(
output_path=tmp_path / "ge_mcps.json",
golden_path=test_resources_dir / golden_json,
copy_output=False,
update_golden=should_update_golden_file(),
ignore_paths=[],
)

View File

@ -33,7 +33,7 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti
def sentinel_file = "${venv_name}/.build_install_package_sentinel"
inputs.file file('setup.py')
outputs.file(sentinel_file)
commandLine 'bash', '-x', '-c',
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"${pip_install_command} -e . ${extra_pip_requirements} &&" +
"touch ${sentinel_file}"
@ -45,7 +45,7 @@ task installDev(type: Exec, dependsOn: [install]) {
def sentinel_file = "${venv_name}/.build_install_dev_sentinel"
inputs.file file('setup.py')
outputs.file("${sentinel_file}")
commandLine 'bash', '-x', '-c',
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"${pip_install_command} -e .[dev] ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
"mypy --show-traceback --show-error-codes src/ tests/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && " +
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"ruff check --fix src/ tests/ && " +
"ruff format src/ tests/ "
}
@ -70,8 +70,10 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
inputs.file file('setup.py')
outputs.dir("${venv_name}")
outputs.file("${sentinel_file}")
commandLine 'bash', '-x', '-c',
"${pip_install_command} -e .[dev,integration-tests] && touch ${sentinel_file}"
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"${pip_install_command} -e .[dev,integration-tests] && " +
"touch ${sentinel_file}"
}
task testQuick(type: Exec, dependsOn: installDevTest) {

View File

@ -1,5 +1,4 @@
import logging
import os
import pathlib
from typing import Any, Dict, Optional
@ -8,11 +7,10 @@ import deepdiff
from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult, sqlglot_lineage
from datahub.testing.pytest_hooks import get_golden_settings
logger = logging.getLogger(__name__)
UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true"
def assert_sql_result_with_resolver(
sql: str,
@ -22,6 +20,8 @@ def assert_sql_result_with_resolver(
allow_table_error: bool = False,
**kwargs: Any,
) -> None:
settings = get_golden_settings()
# HACK: Our BigQuery source overwrites this value and doesn't undo it.
# As such, we need to handle that here.
BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "_yyyymmdd"
@ -47,15 +47,14 @@ def assert_sql_result_with_resolver(
)
txt = res.json(indent=4)
if UPDATE_FILES:
if settings.update_golden:
expected_file.write_text(txt)
return
if not expected_file.exists():
expected_file.write_text(txt)
raise AssertionError(
f"Expected file {expected_file} does not exist. "
"Created it with the expected output. Please verify it."
f"Missing expected golden file; run with --update-golden-files to create it: {expected_file}"
)
expected = SqlParsingResult.parse_raw(expected_file.read_text())

View File

@ -16,6 +16,7 @@ from deepdiff import DeepDiff
from datahub.ingestion.sink.file import write_metadata_file
from datahub.ingestion.source.file import read_metadata_file
from datahub.testing.mcp_diff import CannotCompareMCPs, MCPDiff, get_aspects_by_urn
from datahub.testing.pytest_hooks import get_golden_settings
logger = logging.getLogger(__name__)
@ -40,26 +41,26 @@ def load_json_file(filename: Union[str, os.PathLike]) -> MetadataJson:
def assert_metadata_files_equal(
output_path: Union[str, os.PathLike],
golden_path: Union[str, os.PathLike],
update_golden: bool,
copy_output: bool,
ignore_paths: Sequence[str] = (),
ignore_paths_v2: Sequence[str] = (),
ignore_order: bool = True,
) -> None:
settings = get_golden_settings()
golden_exists = os.path.isfile(golden_path)
if copy_output:
if settings.copy_output:
shutil.copyfile(str(output_path), str(golden_path) + ".output")
logger.info(f"Copied output file to {golden_path}.output")
if not update_golden and not golden_exists:
if not settings.update_golden and not golden_exists:
raise FileNotFoundError(
"Golden file does not exist. Please run with the --update-golden-files option to create."
)
output = load_json_file(output_path)
if update_golden and not golden_exists:
if settings.update_golden and not golden_exists:
shutil.copyfile(str(output_path), str(golden_path))
return
else:
@ -87,7 +88,7 @@ def assert_metadata_files_equal(
ignore_paths = (*ignore_paths, *default_exclude_paths)
diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
if diff and update_golden:
if diff and settings.update_golden:
if isinstance(diff, MCPDiff) and diff.is_delta_valid:
logger.info(f"Applying delta to golden file {golden_path}")
diff.apply_delta(golden)

View File

@ -0,0 +1,56 @@
import dataclasses
from typing import Optional
import pytest
__all__ = [
"load_golden_flags",
"get_golden_settings",
"pytest_addoption",
"GoldenFileSettings",
]
@dataclasses.dataclass
class GoldenFileSettings:
update_golden: bool
copy_output: bool
_registered: bool = False
_settings: Optional[GoldenFileSettings] = None
def pytest_addoption(parser: pytest.Parser) -> None:
parser.addoption(
"--update-golden-files",
action="store_true",
default=False,
)
# TODO: Deprecate and remove this flag.
parser.addoption("--copy-output-files", action="store_true", default=False)
global _registered
_registered = True
@pytest.fixture(scope="session", autouse=True)
def load_golden_flags(pytestconfig: pytest.Config) -> None:
global _settings
_settings = GoldenFileSettings(
update_golden=pytestconfig.getoption("--update-golden-files"),
copy_output=pytestconfig.getoption("--copy-output-files"),
)
def get_golden_settings() -> GoldenFileSettings:
if not _registered:
raise ValueError(
"Golden files aren't set up properly. Call register_golden_flags from a conftest pytest_addoptions method."
)
if not _settings:
raise ValueError(
"Golden files aren't set up properly. Ensure load_golden_flags is imported in your conftest."
)
return _settings

View File

@ -22,6 +22,10 @@ os.environ["DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES"] = "1"
# We need our imports to go below the os.environ updates, since mere act
# of importing some datahub modules will load env variables.
from datahub.testing.pytest_hooks import ( # noqa: F401,E402
load_golden_flags,
pytest_addoption,
)
from tests.test_helpers.docker_helpers import ( # noqa: F401,E402
docker_compose_command,
docker_compose_runner,
@ -54,15 +58,6 @@ def mock_time(monkeypatch):
yield
def pytest_addoption(parser):
parser.addoption(
"--update-golden-files",
action="store_true",
default=False,
)
parser.addoption("--copy-output-files", action="store_true", default=False)
def pytest_collection_modifyitems(
config: pytest.Config, items: List[pytest.Item]
) -> None:

View File

@ -73,9 +73,7 @@ procedure_sqls = [sql_file.name for sql_file in PROCEDURE_SQLS_DIR.iterdir()]
@pytest.mark.parametrize("procedure_sql_file", procedure_sqls)
@pytest.mark.integration
def test_stored_procedure_lineage(
pytestconfig: pytest.Config, procedure_sql_file: str
) -> None:
def test_stored_procedure_lineage(procedure_sql_file: str) -> None:
sql_file_path = PROCEDURE_SQLS_DIR / procedure_sql_file
procedure_code = sql_file_path.read_text()
@ -105,7 +103,6 @@ def test_stored_procedure_lineage(
)
)
mce_helpers.check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=(
PROCEDURES_GOLDEN_DIR / Path(procedure_sql_file).with_suffix(".json")

View File

@ -85,13 +85,14 @@ def check_golden_file(
ignore_paths_v2: Sequence[str] = (),
ignore_order: bool = True,
) -> None:
update_golden = pytestconfig.getoption("--update-golden-files")
copy_output = pytestconfig.getoption("--copy-output-files")
# TODO: Remove the pytestconfig parameter since it's redundant.
# Or more straightforward - we can remove the `check_golden_file` method
# and use assert_metadata_files_equal directly. Maybe call it "check_golden_metadata"?
# In a lot of cases, the output_path is also just annoying - our pytest setup
# should be responsible for figuring out where to put the temp file.
assert_metadata_files_equal(
output_path=output_path,
golden_path=golden_path,
update_golden=update_golden,
copy_output=copy_output,
ignore_paths=ignore_paths,
ignore_paths_v2=ignore_paths_v2,
ignore_order=ignore_order,
@ -99,7 +100,6 @@ def check_golden_file(
def check_goldens_stream(
pytestconfig: pytest.Config,
outputs: List,
golden_path: Union[str, os.PathLike],
ignore_paths: Sequence[str] = (),
@ -108,8 +108,7 @@ def check_goldens_stream(
with tempfile.NamedTemporaryFile() as f:
write_metadata_file(pathlib.Path(f.name), outputs)
check_golden_file(
pytestconfig=pytestconfig,
assert_metadata_files_equal(
output_path=f.name,
golden_path=golden_path,
ignore_paths=ignore_paths,

View File

@ -1,16 +1,11 @@
import pathlib
import pytest
from datahub.sdk._entity import Entity
from tests.test_helpers import mce_helpers
def assert_entity_golden(
pytestconfig: pytest.Config, entity: Entity, golden_path: pathlib.Path
) -> None:
def assert_entity_golden(entity: Entity, golden_path: pathlib.Path) -> None:
mce_helpers.check_goldens_stream(
pytestconfig=pytestconfig,
outputs=entity._as_mcps(),
golden_path=golden_path,
ignore_order=False,

View File

@ -32,7 +32,6 @@ def test_structuredproperties_load(pytestconfig: pytest.Config) -> None:
mcps.extend(property.generate_mcps())
check_goldens_stream(
pytestconfig,
mcps,
golden_path=RESOURCE_DIR / "example_structured_properties_golden.json",
)

View File

@ -20,7 +20,7 @@ from tests.test_helpers.sdk_v2_helpers import assert_entity_golden
_GOLDEN_DIR = pathlib.Path(__file__).parent / "container_golden"
def test_container_basic(pytestconfig: pytest.Config) -> None:
def test_container_basic() -> None:
db_key = DatabaseKey(
platform="bigquery",
database="my_bq_project",
@ -60,12 +60,10 @@ def test_container_basic(pytestconfig: pytest.Config) -> None:
# This should fail. Eventually we should make it suggest calling set_owners instead.
c.owners = [] # type: ignore
assert_entity_golden(
pytestconfig, c, _GOLDEN_DIR / "test_container_basic_golden.json"
)
assert_entity_golden(c, _GOLDEN_DIR / "test_container_basic_golden.json")
def test_container_complex(pytestconfig: pytest.Config) -> None:
def test_container_complex() -> None:
schema_key = SchemaKey(
platform="snowflake",
instance="my_instance",
@ -75,7 +73,7 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
created = datetime(2025, 1, 2, 3, 4, 5, tzinfo=timezone.utc)
updated = datetime(2025, 1, 9, 3, 4, 6, tzinfo=timezone.utc)
d = Container(
c = Container(
schema_key,
display_name="MY_SCHEMA",
qualified_name="MY_DB.MY_SCHEMA",
@ -100,19 +98,19 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
],
domain=DomainUrn("Marketing"),
)
assert d.platform_instance is not None
assert c.platform_instance is not None
assert (
str(d.platform_instance)
str(c.platform_instance)
== "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)"
)
assert d.subtype == "Schema"
assert d.description == "test"
assert d.display_name == "MY_SCHEMA"
assert d.qualified_name == "MY_DB.MY_SCHEMA"
assert d.external_url == "https://example.com"
assert d.created == created
assert d.last_modified == updated
assert d.custom_properties == {
assert c.subtype == "Schema"
assert c.description == "test"
assert c.display_name == "MY_SCHEMA"
assert c.qualified_name == "MY_DB.MY_SCHEMA"
assert c.external_url == "https://example.com"
assert c.created == created
assert c.last_modified == updated
assert c.custom_properties == {
"platform": "snowflake",
"instance": "my_instance",
"database": "MY_DB",
@ -122,14 +120,12 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
}
# Check standard aspects.
assert d.domain == DomainUrn("Marketing")
assert d.tags is not None
assert len(d.tags) == 2
assert d.terms is not None
assert len(d.terms) == 1
assert d.owners is not None
assert len(d.owners) == 1
assert c.domain == DomainUrn("Marketing")
assert c.tags is not None
assert len(c.tags) == 2
assert c.terms is not None
assert len(c.terms) == 1
assert c.owners is not None
assert len(c.owners) == 1
assert_entity_golden(
pytestconfig, d, _GOLDEN_DIR / "test_container_complex_golden.json"
)
assert_entity_golden(c, _GOLDEN_DIR / "test_container_complex_golden.json")

View File

@ -65,9 +65,7 @@ def test_dataset_basic(pytestconfig: pytest.Config) -> None:
# This should fail. Eventually we should make it suggest calling set_owners instead.
d.owners = [] # type: ignore
assert_entity_golden(
pytestconfig, d, _GOLDEN_DIR / "test_dataset_basic_golden.json"
)
assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_basic_golden.json")
def _build_complex_dataset() -> Dataset:
@ -161,17 +159,13 @@ def _build_complex_dataset() -> Dataset:
return d
def test_dataset_complex(pytestconfig: pytest.Config) -> None:
def test_dataset_complex() -> None:
d = _build_complex_dataset()
assert_entity_golden(
pytestconfig, d, _GOLDEN_DIR / "test_dataset_complex_golden.json"
)
assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_complex_golden.json")
def test_dataset_ingestion(pytestconfig: pytest.Config) -> None:
def test_dataset_ingestion() -> None:
with change_default_attribution(KnownAttribution.INGESTION):
d = _build_complex_dataset()
assert_entity_golden(
pytestconfig, d, _GOLDEN_DIR / "test_dataset_ingestion_golden.json"
)
assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_ingestion_golden.json")

View File

@ -35,7 +35,6 @@ def assert_client_golden(
) -> None:
mcps = client._graph.emit_mcps.call_args[0][0] # type: ignore
mce_helpers.check_goldens_stream(
pytestconfig=pytestconfig,
outputs=mcps,
golden_path=golden_path,
ignore_order=False,

View File

@ -62,7 +62,6 @@ def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> N
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_basic_lineage.json",
)
@ -86,7 +85,7 @@ def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> N
@freeze_time(FROZEN_TIME)
def test_overlapping_inserts(pytestconfig: pytest.Config) -> None:
def test_overlapping_inserts() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -114,14 +113,13 @@ def test_overlapping_inserts(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_overlapping_inserts.json",
)
@freeze_time(FROZEN_TIME)
def test_temp_table(pytestconfig: pytest.Config) -> None:
def test_temp_table() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -173,14 +171,13 @@ def test_temp_table(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_temp_table.json",
)
@freeze_time(FROZEN_TIME)
def test_multistep_temp_table(pytestconfig: pytest.Config) -> None:
def test_multistep_temp_table() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -235,14 +232,13 @@ def test_multistep_temp_table(pytestconfig: pytest.Config) -> None:
== 4
)
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_multistep_temp_table.json",
)
@freeze_time(FROZEN_TIME)
def test_overlapping_inserts_from_temp_tables(pytestconfig: pytest.Config) -> None:
def test_overlapping_inserts_from_temp_tables() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -311,14 +307,13 @@ def test_overlapping_inserts_from_temp_tables(pytestconfig: pytest.Config) -> No
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_overlapping_inserts_from_temp_tables.json",
)
@freeze_time(FROZEN_TIME)
def test_aggregate_operations(pytestconfig: pytest.Config) -> None:
def test_aggregate_operations() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=False,
@ -360,14 +355,13 @@ def test_aggregate_operations(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_aggregate_operations.json",
)
@freeze_time(FROZEN_TIME)
def test_view_lineage(pytestconfig: pytest.Config) -> None:
def test_view_lineage() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -398,14 +392,13 @@ def test_view_lineage(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_view_lineage.json",
)
@freeze_time(FROZEN_TIME)
def test_known_lineage_mapping(pytestconfig: pytest.Config) -> None:
def test_known_lineage_mapping() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -429,14 +422,13 @@ def test_known_lineage_mapping(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_known_lineage_mapping.json",
)
@freeze_time(FROZEN_TIME)
def test_column_lineage_deduplication(pytestconfig: pytest.Config) -> None:
def test_column_lineage_deduplication() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -467,14 +459,13 @@ def test_column_lineage_deduplication(pytestconfig: pytest.Config) -> None:
# which came later and hence has higher precedence.
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_column_lineage_deduplication.json",
)
@freeze_time(FROZEN_TIME)
def test_add_known_query_lineage(pytestconfig: pytest.Config) -> None:
def test_add_known_query_lineage() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -512,14 +503,13 @@ def test_add_known_query_lineage(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_add_known_query_lineage.json",
)
@freeze_time(FROZEN_TIME)
def test_table_rename(pytestconfig: pytest.Config) -> None:
def test_table_rename() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -570,14 +560,13 @@ def test_table_rename(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_table_rename.json",
)
@freeze_time(FROZEN_TIME)
def test_table_rename_with_temp(pytestconfig: pytest.Config) -> None:
def test_table_rename_with_temp() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -630,14 +619,13 @@ def test_table_rename_with_temp(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_table_rename_with_temp.json",
)
@freeze_time(FROZEN_TIME)
def test_table_swap(pytestconfig: pytest.Config) -> None:
def test_table_swap() -> None:
aggregator = SqlParsingAggregator(
platform="snowflake",
generate_lineage=True,
@ -717,14 +705,13 @@ def test_table_swap(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_table_swap.json",
)
@freeze_time(FROZEN_TIME)
def test_table_swap_with_temp(pytestconfig: pytest.Config) -> None:
def test_table_swap_with_temp() -> None:
aggregator = SqlParsingAggregator(
platform="snowflake",
generate_lineage=True,
@ -887,14 +874,13 @@ def test_table_swap_with_temp(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_table_swap_with_temp.json",
)
@freeze_time(FROZEN_TIME)
def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None:
def test_create_table_query_mcps() -> None:
aggregator = SqlParsingAggregator(
platform="bigquery",
generate_lineage=True,
@ -914,16 +900,13 @@ def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_create_table_query_mcps.json",
)
@freeze_time(FROZEN_TIME)
def test_table_lineage_via_temp_table_disordered_add(
pytestconfig: pytest.Config,
) -> None:
def test_table_lineage_via_temp_table_disordered_add() -> None:
aggregator = SqlParsingAggregator(
platform="redshift",
generate_lineage=True,
@ -949,7 +932,6 @@ def test_table_lineage_via_temp_table_disordered_add(
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR
/ "test_table_lineage_via_temp_table_disordered_add.json",
@ -957,7 +939,7 @@ def test_table_lineage_via_temp_table_disordered_add(
@freeze_time(FROZEN_TIME)
def test_basic_usage(pytestconfig: pytest.Config) -> None:
def test_basic_usage() -> None:
frozen_timestamp = parse_user_datetime(FROZEN_TIME)
aggregator = SqlParsingAggregator(
platform="redshift",
@ -998,7 +980,6 @@ def test_basic_usage(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_basic_usage.json",
)

View File

@ -2,22 +2,11 @@ import pathlib
import pytest
import datahub.testing.check_sql_parser_result as checker
from datahub.testing.check_sql_parser_result import assert_sql_result
RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens"
@pytest.fixture(autouse=True)
def set_update_sql_parser(
pytestconfig: pytest.Config, monkeypatch: pytest.MonkeyPatch
) -> None:
update_golden = pytestconfig.getoption("--update-golden-files")
if update_golden:
monkeypatch.setattr(checker, "UPDATE_FILES", True)
def test_invalid_sql():
assert_sql_result(
"""