feat(ci): add pytest hooks for updating golden files (#12581)

2025-12-28 02:17:53 +00:00 · 2025-02-12 15:32:31 -08:00 · 2025-02-12 15:32:31 -08:00 · 7472c535d3
commit 7472c535d3
parent fe173faf84
28 changed files with 592 additions and 653 deletions
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@ -27,7 +27,7 @@ concurrency:
 jobs:
  metadata-ingestion:
    runs-on: ubuntu-latest
-    timeout-minutes: 40
+    timeout-minutes: 60
    env:
      DATAHUB_TELEMETRY_ENABLED: false
      # TODO: Enable this once the test is fixed.
--- a/metadata-ingestion-modules/airflow-plugin/tests/conftest.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py
@ -1,15 +1,14 @@
 import pathlib
 import site

+from datahub.testing.pytest_hooks import (  # noqa: F401,E402
+    load_golden_flags,
+    pytest_addoption,
+)

-def pytest_addoption(parser):
-    parser.addoption(
-        "--update-golden-files",
-        action="store_true",
-        default=False,
-    )
-
-
+# The integration tests run Airflow, with our plugin, in a subprocess.
+# To get more accurate coverage, we need to ensure that the coverage
+# library is available in the subprocess.
 # See https://coverage.readthedocs.io/en/latest/subprocess.html#configuring-python-for-sub-process-measurement
 coverage_startup_code = "import coverage; coverage.process_startup()"
 site_packages_dir = pathlib.Path(site.getsitepackages()[0])
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
@ -26,7 +26,6 @@ from datahub_airflow_plugin._airflow_shims import (
    HAS_AIRFLOW_LISTENER_API,
    HAS_AIRFLOW_STANDALONE_CMD,
 )
-from tests.utils import PytestConfig

 pytestmark = pytest.mark.integration

@ -346,18 +345,13 @@ def _run_airflow(


 def check_golden_file(
-    pytestconfig: PytestConfig,
    output_path: pathlib.Path,
    golden_path: pathlib.Path,
    ignore_paths: Sequence[str] = (),
 ) -> None:
-    update_golden = pytestconfig.getoption("--update-golden-files")
-
    assert_metadata_files_equal(
        output_path=output_path,
        golden_path=golden_path,
-        update_golden=update_golden,
-        copy_output=False,
        ignore_paths=ignore_paths,
        ignore_order=True,
    )
@ -434,7 +428,6 @@ test_cases = [
    ],
 )
 def test_airflow_plugin(
-    pytestconfig: PytestConfig,
    tmp_path: pathlib.Path,
    golden_filename: str,
    test_case: DagTestCase,
@ -497,7 +490,6 @@ def test_airflow_plugin(
        _sanitize_output_file(airflow_instance.metadata_file)

        check_golden_file(
-            pytestconfig=pytestconfig,
            output_path=airflow_instance.metadata_file,
            golden_path=golden_path,
            ignore_paths=[
@ -512,7 +504,6 @@ def test_airflow_plugin(
        if test_case.multiple_connections:
            _sanitize_output_file(airflow_instance.metadata_file2)
            check_golden_file(
-                pytestconfig=pytestconfig,
                output_path=airflow_instance.metadata_file2,
                golden_path=golden_path,
                ignore_paths=[
--- a/metadata-ingestion-modules/dagster-plugin/build.gradle
+++ b/metadata-ingestion-modules/dagster-plugin/build.gradle
@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
    "mypy --show-traceback --show-error-codes src/ tests/ examples/"
 }
 task lintFix(type: Exec, dependsOn: installDev) {
-  commandLine 'bash', '-x', '-c',
-    "source ${venv_name}/bin/activate && " +
+  commandLine 'bash', '-c',
+    "source ${venv_name}/bin/activate && set -x && " +
    "ruff check --fix src/ tests/ examples/ && " +
    "ruff format src/ tests/ examples/ "
 }
@ -71,9 +71,9 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
  outputs.dir("${venv_name}")
  outputs.file(sentinel_file)
  commandLine 'bash', '-c',
-      "source ${venv_name}/bin/activate && set -x && " +
-      "${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " +
-      "touch ${sentinel_file}"
+    "source ${venv_name}/bin/activate && set -x && " +
+    "${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " +
+    "touch ${sentinel_file}"
 }

 task testQuick(type: Exec, dependsOn: installDevTest) {
--- a/metadata-ingestion-modules/dagster-plugin/tests/conftest.py
+++ b/metadata-ingestion-modules/dagster-plugin/tests/conftest.py
@ -0,0 +1,4 @@
+from datahub.testing.pytest_hooks import (  # noqa: F401,E402
+    load_golden_flags,
+    pytest_addoption,
+)
--- a/metadata-ingestion-modules/dagster-plugin/tests/unit/conftest.py
+++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/conftest.py
@ -1,21 +0,0 @@
-import pathlib
-import site
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from _pytest.config import Parser
-
-
-def pytest_addoption(parser: "Parser") -> None:
-    parser.addoption(
-        "--update-golden-files",
-        action="store_true",
-        default=False,
-    )
-
-
-# See https://coverage.readthedocs.io/en/latest/subprocess.html#configuring-python-for-sub-process-measurement
-coverage_startup_code = "import coverage; coverage.process_startup()"
-site_packages_dir = pathlib.Path(site.getsitepackages()[0])
-pth_file_path = site_packages_dir / "datahub_coverage_startup.pth"
-pth_file_path.write_text(coverage_startup_code)
--- a/metadata-ingestion-modules/dagster-plugin/tests/unit/golden/golden_test_emit_metadata_mcps.json
+++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/golden/golden_test_emit_metadata_mcps.json
--- a/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py
+++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py
@ -23,10 +23,10 @@ from dagster._core.definitions.repository_definition import (
 )
 from dagster._core.definitions.resource_definition import ResourceDefinition
 from freezegun import freeze_time
-from utils.utils import PytestConfig, check_golden_file

 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.graph.client import DatahubClientConfig
+from datahub.testing.compare_metadata_json import assert_metadata_files_equal
 from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig
 from datahub_dagster_plugin.sensors.datahub_sensors import (
    DatahubSensors,
@ -96,7 +96,7 @@ TEST_UUIDS = ["uuid_{}".format(i) for i in range(10000)]
@patch.object(uuid, "uuid4", side_effect=TEST_UUIDS)
@patch("datahub_dagster_plugin.sensors.datahub_sensors.DataHubGraph", autospec=True)
@freeze_time(FROZEN_TIME)
-def test_emit_metadata(mock_emit: Mock, pytestconfig: PytestConfig) -> None:
+def test_emit_metadata(mock_emit: Mock, mock_uuid: Mock) -> None:
    mock_emitter = Mock()
    mock_emit.return_value = mock_emitter

@ -168,8 +168,7 @@ def test_emit_metadata(mock_emit: Mock, pytestconfig: PytestConfig) -> None:
            json_object = json.dumps(mcpws, indent=2)
            f.write(json_object)

-        check_golden_file(
-            pytestconfig=pytestconfig,
+        assert_metadata_files_equal(
            output_path=pathlib.Path(f"{tmp_path}/test_emit_metadata_mcps.json"),
            golden_path=pathlib.Path(
                "tests/unit/golden/golden_test_emit_metadata_mcps.json"
--- a/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dummy.py
+++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dummy.py
@ -1,2 +0,0 @@
-def test_dummy():
-    pass
--- a/metadata-ingestion-modules/dagster-plugin/tests/unit/utils/init.py
+++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/utils/init.py
--- a/metadata-ingestion-modules/dagster-plugin/tests/unit/utils/utils.py
+++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/utils/utils.py
@ -1,30 +0,0 @@
-import pathlib
-from typing import Sequence
-
-from datahub.testing.compare_metadata_json import assert_metadata_files_equal
-
-try:
-    from pytest import Config as PytestConfig  # type: ignore[attr-defined]
-except ImportError:
-    # Support for pytest 6.x.
-    from _pytest.config import Config as PytestConfig  # type: ignore
-
-__all__ = ["PytestConfig"]
-
-
-def check_golden_file(
-    pytestconfig: PytestConfig,
-    output_path: pathlib.Path,
-    golden_path: pathlib.Path,
-    ignore_paths: Sequence[str] = (),
-) -> None:
-    update_golden = pytestconfig.getoption("--update-golden-files")
-
-    assert_metadata_files_equal(
-        output_path=output_path,
-        golden_path=golden_path,
-        update_golden=update_golden,
-        copy_output=False,
-        ignore_paths=ignore_paths,
-        ignore_order=True,
-    )
--- a/metadata-ingestion-modules/gx-plugin/build.gradle
+++ b/metadata-ingestion-modules/gx-plugin/build.gradle
@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
    "mypy --show-traceback --show-error-codes src/ tests/"
 }
 task lintFix(type: Exec, dependsOn: installDev) {
-  commandLine 'bash', '-x', '-c',
-    "source ${venv_name}/bin/activate && " +
+  commandLine 'bash', '-c',
+    "source ${venv_name}/bin/activate && set -x && " +
    "ruff check --fix src/ tests/ && " +
    "ruff format src/ tests/ "
 }
--- a/metadata-ingestion-modules/gx-plugin/tests/conftest.py
+++ b/metadata-ingestion-modules/gx-plugin/tests/conftest.py
@ -1 +1,5 @@
 from datahub.testing.docker_utils import docker_compose_runner  # noqa: F401
+from datahub.testing.pytest_hooks import (  # noqa: F401,E402
+    load_golden_flags,
+    pytest_addoption,
+)
--- a/metadata-ingestion-modules/gx-plugin/tests/integration/test_great_expectations.py
+++ b/metadata-ingestion-modules/gx-plugin/tests/integration/test_great_expectations.py
@ -1,4 +1,3 @@
-import os
 import shutil
 from typing import List
 from unittest import mock
@ -23,10 +22,6 @@ except Exception:
    use_gx_folder = False


-def should_update_golden_file() -> bool:
-    return bool(os.getenv("DATAHUB_GOLDEN_FILE_UPDATE", False))
-
-
 FROZEN_TIME = "2021-12-28 12:00:00"


@ -84,7 +79,5 @@ def test_ge_ingest(
        assert_metadata_files_equal(
            output_path=tmp_path / "ge_mcps.json",
            golden_path=test_resources_dir / golden_json,
-            copy_output=False,
-            update_golden=should_update_golden_file(),
            ignore_paths=[],
        )
--- a/metadata-ingestion-modules/prefect-plugin/build.gradle
+++ b/metadata-ingestion-modules/prefect-plugin/build.gradle
@ -33,7 +33,7 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti
  def sentinel_file = "${venv_name}/.build_install_package_sentinel"
  inputs.file file('setup.py')
  outputs.file(sentinel_file)
-  commandLine 'bash', '-x', '-c',
+  commandLine 'bash', '-c',
    "source ${venv_name}/bin/activate && set -x && " +
    "${pip_install_command} -e . ${extra_pip_requirements} &&" +
    "touch ${sentinel_file}"
@ -45,7 +45,7 @@ task installDev(type: Exec, dependsOn: [install]) {
  def sentinel_file = "${venv_name}/.build_install_dev_sentinel"
  inputs.file file('setup.py')
  outputs.file("${sentinel_file}")
-  commandLine 'bash', '-x', '-c',
+  commandLine 'bash', '-c',
    "source ${venv_name}/bin/activate && set -x && " +
    "${pip_install_command} -e .[dev]  ${extra_pip_requirements} && " +
    "touch ${sentinel_file}"
@ -59,8 +59,8 @@ task lint(type: Exec, dependsOn: installDev) {
    "mypy --show-traceback --show-error-codes src/ tests/"
 }
 task lintFix(type: Exec, dependsOn: installDev) {
-  commandLine 'bash', '-x', '-c',
-    "source ${venv_name}/bin/activate && " +
+  commandLine 'bash', '-c',
+    "source ${venv_name}/bin/activate && set -x && " +
    "ruff check --fix src/ tests/ && " +
    "ruff format src/ tests/ "
 }
@ -70,8 +70,10 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
  inputs.file file('setup.py')
  outputs.dir("${venv_name}")
  outputs.file("${sentinel_file}")
-  commandLine 'bash', '-x', '-c',
-    "${pip_install_command} -e .[dev,integration-tests] && touch ${sentinel_file}"
+  commandLine 'bash', '-c',
+    "source ${venv_name}/bin/activate && set -x && " +
+    "${pip_install_command} -e .[dev,integration-tests] && " +
+    "touch ${sentinel_file}"
 }

 task testQuick(type: Exec, dependsOn: installDevTest) {
--- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
+++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
@ -1,5 +1,4 @@
 import logging
-import os
 import pathlib
 from typing import Any, Dict, Optional

@ -8,11 +7,10 @@ import deepdiff
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
 from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
 from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult, sqlglot_lineage
+from datahub.testing.pytest_hooks import get_golden_settings

 logger = logging.getLogger(__name__)

-UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true"
-

 def assert_sql_result_with_resolver(
    sql: str,
@ -22,6 +20,8 @@ def assert_sql_result_with_resolver(
    allow_table_error: bool = False,
    **kwargs: Any,
 ) -> None:
+    settings = get_golden_settings()
+
    # HACK: Our BigQuery source overwrites this value and doesn't undo it.
    # As such, we need to handle that here.
    BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "_yyyymmdd"
@ -47,15 +47,14 @@ def assert_sql_result_with_resolver(
        )

    txt = res.json(indent=4)
-    if UPDATE_FILES:
+    if settings.update_golden:
        expected_file.write_text(txt)
        return

    if not expected_file.exists():
        expected_file.write_text(txt)
        raise AssertionError(
-            f"Expected file {expected_file} does not exist. "
-            "Created it with the expected output. Please verify it."
+            f"Missing expected golden file; run with --update-golden-files to create it: {expected_file}"
        )

    expected = SqlParsingResult.parse_raw(expected_file.read_text())
--- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
+++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
@ -16,6 +16,7 @@ from deepdiff import DeepDiff
 from datahub.ingestion.sink.file import write_metadata_file
 from datahub.ingestion.source.file import read_metadata_file
 from datahub.testing.mcp_diff import CannotCompareMCPs, MCPDiff, get_aspects_by_urn
+from datahub.testing.pytest_hooks import get_golden_settings

 logger = logging.getLogger(__name__)

@ -40,26 +41,26 @@ def load_json_file(filename: Union[str, os.PathLike]) -> MetadataJson:
 def assert_metadata_files_equal(
    output_path: Union[str, os.PathLike],
    golden_path: Union[str, os.PathLike],
-    update_golden: bool,
-    copy_output: bool,
    ignore_paths: Sequence[str] = (),
    ignore_paths_v2: Sequence[str] = (),
    ignore_order: bool = True,
 ) -> None:
+    settings = get_golden_settings()
+
    golden_exists = os.path.isfile(golden_path)

-    if copy_output:
+    if settings.copy_output:
        shutil.copyfile(str(output_path), str(golden_path) + ".output")
        logger.info(f"Copied output file to {golden_path}.output")

-    if not update_golden and not golden_exists:
+    if not settings.update_golden and not golden_exists:
        raise FileNotFoundError(
            "Golden file does not exist. Please run with the --update-golden-files option to create."
        )

    output = load_json_file(output_path)

-    if update_golden and not golden_exists:
+    if settings.update_golden and not golden_exists:
        shutil.copyfile(str(output_path), str(golden_path))
        return
    else:
@ -87,7 +88,7 @@ def assert_metadata_files_equal(
    ignore_paths = (*ignore_paths, *default_exclude_paths)

    diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
-    if diff and update_golden:
+    if diff and settings.update_golden:
        if isinstance(diff, MCPDiff) and diff.is_delta_valid:
            logger.info(f"Applying delta to golden file {golden_path}")
            diff.apply_delta(golden)
--- a/metadata-ingestion/src/datahub/testing/pytest_hooks.py
+++ b/metadata-ingestion/src/datahub/testing/pytest_hooks.py
@ -0,0 +1,56 @@
+import dataclasses
+from typing import Optional
+
+import pytest
+
+__all__ = [
+    "load_golden_flags",
+    "get_golden_settings",
+    "pytest_addoption",
+    "GoldenFileSettings",
+]
+
+
+@dataclasses.dataclass
+class GoldenFileSettings:
+    update_golden: bool
+    copy_output: bool
+
+
+_registered: bool = False
+_settings: Optional[GoldenFileSettings] = None
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    parser.addoption(
+        "--update-golden-files",
+        action="store_true",
+        default=False,
+    )
+
+    # TODO: Deprecate and remove this flag.
+    parser.addoption("--copy-output-files", action="store_true", default=False)
+
+    global _registered
+    _registered = True
+
+
+@pytest.fixture(scope="session", autouse=True)
+def load_golden_flags(pytestconfig: pytest.Config) -> None:
+    global _settings
+    _settings = GoldenFileSettings(
+        update_golden=pytestconfig.getoption("--update-golden-files"),
+        copy_output=pytestconfig.getoption("--copy-output-files"),
+    )
+
+
+def get_golden_settings() -> GoldenFileSettings:
+    if not _registered:
+        raise ValueError(
+            "Golden files aren't set up properly. Call register_golden_flags from a conftest pytest_addoptions method."
+        )
+    if not _settings:
+        raise ValueError(
+            "Golden files aren't set up properly. Ensure load_golden_flags is imported in your conftest."
+        )
+    return _settings
--- a/metadata-ingestion/tests/conftest.py
+++ b/metadata-ingestion/tests/conftest.py
@ -22,6 +22,10 @@ os.environ["DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES"] = "1"

 # We need our imports to go below the os.environ updates, since mere act
 # of importing some datahub modules will load env variables.
+from datahub.testing.pytest_hooks import (  # noqa: F401,E402
+    load_golden_flags,
+    pytest_addoption,
+)
 from tests.test_helpers.docker_helpers import (  # noqa: F401,E402
    docker_compose_command,
    docker_compose_runner,
@ -54,15 +58,6 @@ def mock_time(monkeypatch):
    yield


-def pytest_addoption(parser):
-    parser.addoption(
-        "--update-golden-files",
-        action="store_true",
-        default=False,
-    )
-    parser.addoption("--copy-output-files", action="store_true", default=False)
-
-
 def pytest_collection_modifyitems(
    config: pytest.Config, items: List[pytest.Item]
 ) -> None:
--- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
+++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
@ -73,9 +73,7 @@ procedure_sqls = [sql_file.name for sql_file in PROCEDURE_SQLS_DIR.iterdir()]

@pytest.mark.parametrize("procedure_sql_file", procedure_sqls)
@pytest.mark.integration
-def test_stored_procedure_lineage(
-    pytestconfig: pytest.Config, procedure_sql_file: str
-) -> None:
+def test_stored_procedure_lineage(procedure_sql_file: str) -> None:
    sql_file_path = PROCEDURE_SQLS_DIR / procedure_sql_file
    procedure_code = sql_file_path.read_text()

@ -105,7 +103,6 @@ def test_stored_procedure_lineage(
        )
    )
    mce_helpers.check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=(
            PROCEDURES_GOLDEN_DIR / Path(procedure_sql_file).with_suffix(".json")
--- a/metadata-ingestion/tests/test_helpers/mce_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/mce_helpers.py
@ -85,13 +85,14 @@ def check_golden_file(
    ignore_paths_v2: Sequence[str] = (),
    ignore_order: bool = True,
 ) -> None:
-    update_golden = pytestconfig.getoption("--update-golden-files")
-    copy_output = pytestconfig.getoption("--copy-output-files")
+    # TODO: Remove the pytestconfig parameter since it's redundant.
+    # Or more straightforward - we can remove the `check_golden_file` method
+    # and use assert_metadata_files_equal directly. Maybe call it "check_golden_metadata"?
+    # In a lot of cases, the output_path is also just annoying - our pytest setup
+    # should be responsible for figuring out where to put the temp file.
    assert_metadata_files_equal(
        output_path=output_path,
        golden_path=golden_path,
-        update_golden=update_golden,
-        copy_output=copy_output,
        ignore_paths=ignore_paths,
        ignore_paths_v2=ignore_paths_v2,
        ignore_order=ignore_order,
@ -99,7 +100,6 @@ def check_golden_file(


 def check_goldens_stream(
-    pytestconfig: pytest.Config,
    outputs: List,
    golden_path: Union[str, os.PathLike],
    ignore_paths: Sequence[str] = (),
@ -108,8 +108,7 @@ def check_goldens_stream(
    with tempfile.NamedTemporaryFile() as f:
        write_metadata_file(pathlib.Path(f.name), outputs)

-        check_golden_file(
-            pytestconfig=pytestconfig,
+        assert_metadata_files_equal(
            output_path=f.name,
            golden_path=golden_path,
            ignore_paths=ignore_paths,
--- a/metadata-ingestion/tests/test_helpers/sdk_v2_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/sdk_v2_helpers.py
@ -1,16 +1,11 @@
 import pathlib

-import pytest
-
 from datahub.sdk._entity import Entity
 from tests.test_helpers import mce_helpers


-def assert_entity_golden(
-    pytestconfig: pytest.Config, entity: Entity, golden_path: pathlib.Path
-) -> None:
+def assert_entity_golden(entity: Entity, golden_path: pathlib.Path) -> None:
    mce_helpers.check_goldens_stream(
-        pytestconfig=pytestconfig,
        outputs=entity._as_mcps(),
        golden_path=golden_path,
        ignore_order=False,
--- a/metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py
+++ b/metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py
@ -32,7 +32,6 @@ def test_structuredproperties_load(pytestconfig: pytest.Config) -> None:
        mcps.extend(property.generate_mcps())

    check_goldens_stream(
-        pytestconfig,
        mcps,
        golden_path=RESOURCE_DIR / "example_structured_properties_golden.json",
    )
--- a/metadata-ingestion/tests/unit/sdk_v2/test_container.py
+++ b/metadata-ingestion/tests/unit/sdk_v2/test_container.py
@ -20,7 +20,7 @@ from tests.test_helpers.sdk_v2_helpers import assert_entity_golden
 _GOLDEN_DIR = pathlib.Path(__file__).parent / "container_golden"


-def test_container_basic(pytestconfig: pytest.Config) -> None:
+def test_container_basic() -> None:
    db_key = DatabaseKey(
        platform="bigquery",
        database="my_bq_project",
@ -60,12 +60,10 @@ def test_container_basic(pytestconfig: pytest.Config) -> None:
        # This should fail. Eventually we should make it suggest calling set_owners instead.
        c.owners = []  # type: ignore

-    assert_entity_golden(
-        pytestconfig, c, _GOLDEN_DIR / "test_container_basic_golden.json"
-    )
+    assert_entity_golden(c, _GOLDEN_DIR / "test_container_basic_golden.json")


-def test_container_complex(pytestconfig: pytest.Config) -> None:
+def test_container_complex() -> None:
    schema_key = SchemaKey(
        platform="snowflake",
        instance="my_instance",
@ -75,7 +73,7 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
    created = datetime(2025, 1, 2, 3, 4, 5, tzinfo=timezone.utc)
    updated = datetime(2025, 1, 9, 3, 4, 6, tzinfo=timezone.utc)

-    d = Container(
+    c = Container(
        schema_key,
        display_name="MY_SCHEMA",
        qualified_name="MY_DB.MY_SCHEMA",
@ -100,19 +98,19 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
        ],
        domain=DomainUrn("Marketing"),
    )
-    assert d.platform_instance is not None
+    assert c.platform_instance is not None
    assert (
-        str(d.platform_instance)
+        str(c.platform_instance)
        == "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,my_instance)"
    )
-    assert d.subtype == "Schema"
-    assert d.description == "test"
-    assert d.display_name == "MY_SCHEMA"
-    assert d.qualified_name == "MY_DB.MY_SCHEMA"
-    assert d.external_url == "https://example.com"
-    assert d.created == created
-    assert d.last_modified == updated
-    assert d.custom_properties == {
+    assert c.subtype == "Schema"
+    assert c.description == "test"
+    assert c.display_name == "MY_SCHEMA"
+    assert c.qualified_name == "MY_DB.MY_SCHEMA"
+    assert c.external_url == "https://example.com"
+    assert c.created == created
+    assert c.last_modified == updated
+    assert c.custom_properties == {
        "platform": "snowflake",
        "instance": "my_instance",
        "database": "MY_DB",
@ -122,14 +120,12 @@ def test_container_complex(pytestconfig: pytest.Config) -> None:
    }

    # Check standard aspects.
-    assert d.domain == DomainUrn("Marketing")
-    assert d.tags is not None
-    assert len(d.tags) == 2
-    assert d.terms is not None
-    assert len(d.terms) == 1
-    assert d.owners is not None
-    assert len(d.owners) == 1
+    assert c.domain == DomainUrn("Marketing")
+    assert c.tags is not None
+    assert len(c.tags) == 2
+    assert c.terms is not None
+    assert len(c.terms) == 1
+    assert c.owners is not None
+    assert len(c.owners) == 1

-    assert_entity_golden(
-        pytestconfig, d, _GOLDEN_DIR / "test_container_complex_golden.json"
-    )
+    assert_entity_golden(c, _GOLDEN_DIR / "test_container_complex_golden.json")
--- a/metadata-ingestion/tests/unit/sdk_v2/test_dataset.py
+++ b/metadata-ingestion/tests/unit/sdk_v2/test_dataset.py
@ -65,9 +65,7 @@ def test_dataset_basic(pytestconfig: pytest.Config) -> None:
        # This should fail. Eventually we should make it suggest calling set_owners instead.
        d.owners = []  # type: ignore

-    assert_entity_golden(
-        pytestconfig, d, _GOLDEN_DIR / "test_dataset_basic_golden.json"
-    )
+    assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_basic_golden.json")


 def _build_complex_dataset() -> Dataset:
@ -161,17 +159,13 @@ def _build_complex_dataset() -> Dataset:
    return d


-def test_dataset_complex(pytestconfig: pytest.Config) -> None:
+def test_dataset_complex() -> None:
    d = _build_complex_dataset()
-    assert_entity_golden(
-        pytestconfig, d, _GOLDEN_DIR / "test_dataset_complex_golden.json"
-    )
+    assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_complex_golden.json")


-def test_dataset_ingestion(pytestconfig: pytest.Config) -> None:
+def test_dataset_ingestion() -> None:
    with change_default_attribution(KnownAttribution.INGESTION):
        d = _build_complex_dataset()

-        assert_entity_golden(
-            pytestconfig, d, _GOLDEN_DIR / "test_dataset_ingestion_golden.json"
-        )
+        assert_entity_golden(d, _GOLDEN_DIR / "test_dataset_ingestion_golden.json")
--- a/metadata-ingestion/tests/unit/sdk_v2/test_entity_client.py
+++ b/metadata-ingestion/tests/unit/sdk_v2/test_entity_client.py
@ -35,7 +35,6 @@ def assert_client_golden(
 ) -> None:
    mcps = client._graph.emit_mcps.call_args[0][0]  # type: ignore
    mce_helpers.check_goldens_stream(
-        pytestconfig=pytestconfig,
        outputs=mcps,
        golden_path=golden_path,
        ignore_order=False,
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py
@ -62,7 +62,6 @@ def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> N
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_basic_lineage.json",
    )
@ -86,7 +85,7 @@ def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> N


@freeze_time(FROZEN_TIME)
-def test_overlapping_inserts(pytestconfig: pytest.Config) -> None:
+def test_overlapping_inserts() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -114,14 +113,13 @@ def test_overlapping_inserts(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_overlapping_inserts.json",
    )


@freeze_time(FROZEN_TIME)
-def test_temp_table(pytestconfig: pytest.Config) -> None:
+def test_temp_table() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -173,14 +171,13 @@ def test_temp_table(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_temp_table.json",
    )


@freeze_time(FROZEN_TIME)
-def test_multistep_temp_table(pytestconfig: pytest.Config) -> None:
+def test_multistep_temp_table() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -235,14 +232,13 @@ def test_multistep_temp_table(pytestconfig: pytest.Config) -> None:
        == 4
    )
    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_multistep_temp_table.json",
    )


@freeze_time(FROZEN_TIME)
-def test_overlapping_inserts_from_temp_tables(pytestconfig: pytest.Config) -> None:
+def test_overlapping_inserts_from_temp_tables() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -311,14 +307,13 @@ def test_overlapping_inserts_from_temp_tables(pytestconfig: pytest.Config) -> No

    mcps = list(aggregator.gen_metadata())
    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_overlapping_inserts_from_temp_tables.json",
    )


@freeze_time(FROZEN_TIME)
-def test_aggregate_operations(pytestconfig: pytest.Config) -> None:
+def test_aggregate_operations() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=False,
@ -360,14 +355,13 @@ def test_aggregate_operations(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_aggregate_operations.json",
    )


@freeze_time(FROZEN_TIME)
-def test_view_lineage(pytestconfig: pytest.Config) -> None:
+def test_view_lineage() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -398,14 +392,13 @@ def test_view_lineage(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_view_lineage.json",
    )


@freeze_time(FROZEN_TIME)
-def test_known_lineage_mapping(pytestconfig: pytest.Config) -> None:
+def test_known_lineage_mapping() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -429,14 +422,13 @@ def test_known_lineage_mapping(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_known_lineage_mapping.json",
    )


@freeze_time(FROZEN_TIME)
-def test_column_lineage_deduplication(pytestconfig: pytest.Config) -> None:
+def test_column_lineage_deduplication() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -467,14 +459,13 @@ def test_column_lineage_deduplication(pytestconfig: pytest.Config) -> None:
    # which came later and hence has higher precedence.

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_column_lineage_deduplication.json",
    )


@freeze_time(FROZEN_TIME)
-def test_add_known_query_lineage(pytestconfig: pytest.Config) -> None:
+def test_add_known_query_lineage() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -512,14 +503,13 @@ def test_add_known_query_lineage(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_add_known_query_lineage.json",
    )


@freeze_time(FROZEN_TIME)
-def test_table_rename(pytestconfig: pytest.Config) -> None:
+def test_table_rename() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -570,14 +560,13 @@ def test_table_rename(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_table_rename.json",
    )


@freeze_time(FROZEN_TIME)
-def test_table_rename_with_temp(pytestconfig: pytest.Config) -> None:
+def test_table_rename_with_temp() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -630,14 +619,13 @@ def test_table_rename_with_temp(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_table_rename_with_temp.json",
    )


@freeze_time(FROZEN_TIME)
-def test_table_swap(pytestconfig: pytest.Config) -> None:
+def test_table_swap() -> None:
    aggregator = SqlParsingAggregator(
        platform="snowflake",
        generate_lineage=True,
@ -717,14 +705,13 @@ def test_table_swap(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_table_swap.json",
    )


@freeze_time(FROZEN_TIME)
-def test_table_swap_with_temp(pytestconfig: pytest.Config) -> None:
+def test_table_swap_with_temp() -> None:
    aggregator = SqlParsingAggregator(
        platform="snowflake",
        generate_lineage=True,
@ -887,14 +874,13 @@ def test_table_swap_with_temp(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_table_swap_with_temp.json",
    )


@freeze_time(FROZEN_TIME)
-def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None:
+def test_create_table_query_mcps() -> None:
    aggregator = SqlParsingAggregator(
        platform="bigquery",
        generate_lineage=True,
@ -914,16 +900,13 @@ def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_create_table_query_mcps.json",
    )


@freeze_time(FROZEN_TIME)
-def test_table_lineage_via_temp_table_disordered_add(
-    pytestconfig: pytest.Config,
-) -> None:
+def test_table_lineage_via_temp_table_disordered_add() -> None:
    aggregator = SqlParsingAggregator(
        platform="redshift",
        generate_lineage=True,
@ -949,7 +932,6 @@ def test_table_lineage_via_temp_table_disordered_add(
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR
        / "test_table_lineage_via_temp_table_disordered_add.json",
@ -957,7 +939,7 @@ def test_table_lineage_via_temp_table_disordered_add(


@freeze_time(FROZEN_TIME)
-def test_basic_usage(pytestconfig: pytest.Config) -> None:
+def test_basic_usage() -> None:
    frozen_timestamp = parse_user_datetime(FROZEN_TIME)
    aggregator = SqlParsingAggregator(
        platform="redshift",
@ -998,7 +980,6 @@ def test_basic_usage(pytestconfig: pytest.Config) -> None:
    mcps = list(aggregator.gen_metadata())

    check_goldens_stream(
-        pytestconfig,
        outputs=mcps,
        golden_path=RESOURCE_DIR / "test_basic_usage.json",
    )
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@ -2,22 +2,11 @@ import pathlib

 import pytest

-import datahub.testing.check_sql_parser_result as checker
 from datahub.testing.check_sql_parser_result import assert_sql_result

 RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens"


-@pytest.fixture(autouse=True)
-def set_update_sql_parser(
-    pytestconfig: pytest.Config, monkeypatch: pytest.MonkeyPatch
-) -> None:
-    update_golden = pytestconfig.getoption("--update-golden-files")
-
-    if update_golden:
-        monkeypatch.setattr(checker, "UPDATE_FILES", True)
-
-
 def test_invalid_sql():
    assert_sql_result(
        """