chore(cli): drop support for python 3.7 (#9731)

2025-11-09 16:03:31 +00:00 · 2024-01-29 10:50:47 -08:00 · 2024-01-29 10:50:47 -08:00 · 1498c36875
commit 1498c36875
parent f3cc4e068a
22 changed files with 799 additions and 857 deletions
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@ -31,7 +31,7 @@ jobs:
      # DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
    strategy:
      matrix:
-        python-version: ["3.7", "3.10"]
+        python-version: ["3.8", "3.10"]
        command:
          [
            "testQuick",
@ -40,7 +40,7 @@ jobs:
            "testIntegrationBatch2",
          ]
        include:
-          - python-version: "3.7"
+          - python-version: "3.8"
          - python-version: "3.10"
      fail-fast: false
    steps:
--- a/docs/cli.md
+++ b/docs/cli.md
@ -24,7 +24,7 @@ source venv/bin/activate         # activate the environment
 Once inside the virtual environment, install `datahub` using the following commands

 ```shell
-# Requires Python 3.7+
+# Requires Python 3.8+
 python3 -m pip install --upgrade pip wheel setuptools
 python3 -m pip install --upgrade acryl-datahub
 # validate that the install was successful
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@ -10,8 +10,10 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 - Neo4j 5.x, may require migration from 4.x
 - Build requires JDK17 (Runtime Java 11)
 - Build requires Docker Compose > 2.20
+- #9731 - The `acryl-datahub` CLI now requires Python 3.8+
 - #9601 - The Unity Catalog(UC) ingestion source config `include_metastore` is now disabled by default. This change will affect the urns of all entities in the workspace.<br/>
  Entity Hierarchy with `include_metastore: true` (Old)
+
  ```
  - UC Metastore
    - Catalog
@ -20,15 +22,18 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
  ```

  Entity Hierarchy with `include_metastore: false` (New)
+
  ```
  - Catalog
    - Schema
      - Table
  ```
+
  We recommend using `platform_instance` for differentiating across metastores.

  If stateful ingestion is enabled, running ingestion with latest cli version will perform all required cleanup. Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
  `datahub delete --platform databricks --soft` and then reingesting with latest cli version.
+
 - #9601 - The Unity Catalog(UC) ingestion source config `include_hive_metastore` is now enabled by default. This requires config `warehouse_id` to be set. You can disable `include_hive_metastore` by setting it to `False` to avoid ingesting legacy hive metastore catalog in Databricks.

 ### Potential Downtime
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@ -22,7 +22,7 @@ If you're interested in a managed version, [Acryl Data](https://www.acryldata.io
  | Linux    | [Docker for Linux](https://docs.docker.com/desktop/install/linux-install/) and [Docker Compose](https://docs.docker.com/compose/install/linux/) |

 - **Launch the Docker engine** from command line or the desktop app.
- Ensure you have **Python 3.7+** installed & configured. (Check using `python3 --version`).
+- Ensure you have **Python 3.8+** installed & configured. (Check using `python3 --version`).

 :::note Docker Resource Allocation

--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@ -18,16 +18,10 @@ _version: str = package_metadata["__version__"]
 _self_pin = f"=={_version}" if not _version.endswith("dev0") else ""


-rest_common = {"requests", "requests_file"}
-
 base_requirements = {
-    # Compatibility.
-    "dataclasses>=0.6; python_version < '3.7'",
-    "mypy_extensions>=0.4.3",
+    f"acryl-datahub[datahub-rest]{_self_pin}",
    # Actual dependencies.
-    "pydantic>=1.5.1",
    "apache-airflow >= 2.0.2",
-    *rest_common,
 }

 plugins: Dict[str, Set[str]] = {
@ -42,9 +36,8 @@ plugins: Dict[str, Set[str]] = {
    },
    "plugin-v1": set(),
    "plugin-v2": {
-        # The v2 plugin requires Python 3.8+.
        f"acryl-datahub[sql-parser]{_self_pin}",
-        "openlineage-airflow==1.2.0; python_version >= '3.8'",
+        "openlineage-airflow==1.2.0",
    },
 }

@ -144,7 +137,6 @@ setuptools.setup(
        "Programming Language :: Python",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3 :: Only",
-        "Programming Language :: Python :: 3.7",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
@ -161,7 +153,7 @@ setuptools.setup(
    ],
    # Package info.
    zip_safe=False,
-    python_requires=">=3.7",
+    python_requires=">=3.8",
    package_data={
        "datahub_airflow_plugin": ["py.typed"],
    },
--- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
@ -1,7 +1,6 @@
 import datetime
 import json
 import os
-import sys
 from contextlib import contextmanager
 from typing import Iterator
 from unittest import mock
@ -318,8 +317,7 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
        # Check that the right things were emitted.
        assert mock_emitter.emit.call_count == 17 if capture_executions else 9

-        # Running further checks based on python version because args only exists in python 3.8+
-        if sys.version_info > (3, 8):
+        # TODO: Replace this with a golden file-based comparison.
        assert mock_emitter.method_calls[0].args[0].aspectName == "dataFlowInfo"
        assert (
            mock_emitter.method_calls[0].args[0].entityUrn
@ -344,9 +342,7 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
            == "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
        )

-            assert (
-                mock_emitter.method_calls[4].args[0].aspectName == "dataJobInputOutput"
-            )
+        assert mock_emitter.method_calls[4].args[0].aspectName == "dataJobInputOutput"
        assert (
            mock_emitter.method_calls[4].args[0].entityUrn
            == "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@ -17,7 +17,7 @@ def get_coverage_arg(test_name) {

 task checkPythonVersion(type: Exec) {
  commandLine python_executable, '-c',
-    'import sys; assert (3, 11) > sys.version_info >= (3, 7), f"Python version {sys.version_info[:2]} not allowed"'
+    'import sys; assert (3, 11) > sys.version_info >= (3, 8), f"Python version {sys.version_info[:2]} not allowed"'
 }

 task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
--- a/metadata-ingestion/cli-ingestion.md
+++ b/metadata-ingestion/cli-ingestion.md
@ -3,14 +3,16 @@
 ## Installing the CLI

 Make sure you have installed DataHub CLI before following this guide.
+
 ```shell
-# Requires Python 3.7+
+# Requires Python 3.8+
 python3 -m pip install --upgrade pip wheel setuptools
 python3 -m pip install --upgrade acryl-datahub
 # validate that the install was successful
 datahub version
 # If you see "command not found", try running this instead: python3 -m datahub version
 ```
+
 Check out the [CLI Installation Guide](../docs/cli.md#installation) for more installation options and troubleshooting tips.

 After that, install the required plugin for the ingestion.
@ -18,10 +20,13 @@ After that, install the required plugin for the ingestion.
 ```shell
 pip install 'acryl-datahub[datahub-rest]'  # install the required plugin
 ```
+
 Check out the [alternative installation options](../docs/cli.md#alternate-installation-options) for more reference.

 ## Configuring a Recipe
+
 Create a recipe.yml file that defines the source and sink for metadata, as shown below.
+
 ```yaml
 # my_reipe.yml
 source:
@ -39,6 +44,7 @@ sink:
 For more information and examples on configuring recipes, please refer to [Recipes](recipe_overview.md).

 ## Ingesting Metadata
+
 You can run ingestion using `datahub ingest` like below.

 ```shell
@ -48,6 +54,7 @@ datahub ingest -c <path_to_recipe_file.yml>
 ## Reference

 Please refer the following pages for advanced guids on CLI ingestion.
+
 - [Reference for `datahub ingest` command](../docs/cli.md#ingest)
 - [UI Ingestion Guide](../docs/ui-ingestion.md)

--- a/metadata-ingestion/developing.md
+++ b/metadata-ingestion/developing.md
@ -9,10 +9,10 @@ Also take a look at the guide to [adding a source](./adding-source.md).

 ### Requirements

-1. Python 3.7+ must be installed in your host environment.
+1. Python 3.8+ must be installed in your host environment.
 2. Java 17 (gradle won't work with newer or older versions)
-4. On Debian/Ubuntu: `sudo apt install python3-dev python3-venv`
-5. On Fedora (if using LDAP source integration): `sudo yum install openldap-devel`
+3. On Debian/Ubuntu: `sudo apt install python3-dev python3-venv`
+4. On Fedora (if using LDAP source integration): `sudo yum install openldap-devel`

 ### Set up your Python environment

--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@ -1,4 +1,3 @@
-import sys
 from typing import Dict, Set

 import setuptools
@ -11,7 +10,6 @@ with open("./src/datahub/__init__.py") as fp:
 base_requirements = {
    # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict.
    "typing_extensions>=3.7.4.3",
-    "mypy_extensions>=0.4.3",
    # Actual dependencies.
    "typing-inspect",
    # pydantic 1.8.2 is incompatible with mypy 0.910.
@ -48,9 +46,7 @@ framework_common = {
    "click-spinner",
    "requests_file",
    "jsonref",
-    # jsonschema drops python 3.7 support in v4.18.0
-    "jsonschema<=4.17.3; python_version < '3.8'",
-    "jsonschema; python_version >= '3.8'",
+    "jsonschema",
    "ruamel.yaml",
 }

@ -463,7 +459,7 @@ base_dev_requirements = {
    "black==22.12.0",
    "coverage>=5.1",
    "faker>=18.4.0",
-    "flake8>=3.8.3",  # DEPRECATION: Once we drop Python 3.7, we can pin to 6.x.
+    "flake8>=6.0.0",
    "flake8-tidy-imports>=4.3.0",
    "flake8-bugbear==23.3.12",
    "isort>=5.7.0",
@ -489,9 +485,9 @@ base_dev_requirements = {
            "delta-lake",
            "druid",
            "elasticsearch",
-            "feast" if sys.version_info >= (3, 8) else None,
-            "iceberg" if sys.version_info >= (3, 8) else None,
-            "mlflow" if sys.version_info >= (3, 8) else None,
+            "feast",
+            "iceberg",
+            "mlflow",
            "json-schema",
            "ldap",
            "looker",
@ -544,14 +540,14 @@ full_test_dev_requirements = {
            "clickhouse",
            "delta-lake",
            "druid",
-            "feast" if sys.version_info >= (3, 8) else None,
+            "feast",
            "hana",
            "hive",
-            "iceberg" if sys.version_info >= (3, 8) else None,
+            "iceberg",
            "kafka-connect",
            "ldap",
            "mongodb",
-            "mssql" if sys.version_info >= (3, 8) else None,
+            "mssql",
            "mysql",
            "mariadb",
            "redash",
@ -699,7 +695,6 @@ See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
        "Programming Language :: Python",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3 :: Only",
-        "Programming Language :: Python :: 3.7",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
@ -716,7 +711,7 @@ See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
    ],
    # Package info.
    zip_safe=False,
-    python_requires=">=3.7",
+    python_requires=">=3.8",
    package_dir={"": "src"},
    packages=setuptools.find_namespace_packages(where="./src"),
    package_data={
--- a/metadata-ingestion/src/datahub/init.py
+++ b/metadata-ingestion/src/datahub/init.py
@ -16,16 +16,9 @@ def nice_version_name() -> str:
    return __version__


-if sys.version_info < (3, 7):
+if sys.version_info < (3, 8):
    warnings.warn(
-        "DataHub requires Python 3.7 or newer. "
-        "Please upgrade your Python version to continue using DataHub.",
-        FutureWarning,
-        stacklevel=2,
-    )
-elif sys.version_info < (3, 8):
-    warnings.warn(
-        "DataHub will require Python 3.8 or newer soon. "
+        "DataHub requires Python 3.8 or newer. "
        "Please upgrade your Python version to continue using DataHub.",
        FutureWarning,
        stacklevel=2,
--- a/metadata-ingestion/src/datahub/ingestion/api/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/report.py
@ -2,11 +2,10 @@ import dataclasses
 import json
 import logging
 import pprint
-import sys
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from enum import Enum
-from typing import Any, Dict, Optional
+from typing import Any, Optional

 import humanfriendly
 import pydantic
@ -19,12 +18,6 @@ from datahub.utilities.lossy_collections import LossyList
 logger = logging.getLogger(__name__)
 LogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"]

-# The sort_dicts option was added in Python 3.8.
-if sys.version_info >= (3, 8):
-    PPRINT_OPTIONS = {"sort_dicts": False}
-else:
-    PPRINT_OPTIONS: Dict = {}
-

@runtime_checkable
 class SupportsAsObj(Protocol):
@ -32,14 +25,6 @@ class SupportsAsObj(Protocol):
        ...


-def _stacklevel_if_supported(level: int) -> dict:
-    # The logging module added support for stacklevel in Python 3.8.
-    if sys.version_info >= (3, 8):
-        return {"stacklevel": level}
-    else:
-        return {}
-
-
@dataclass
 class Report(SupportsAsObj):
    @staticmethod
@ -95,7 +80,7 @@ class Report(SupportsAsObj):
        }

    def as_string(self) -> str:
-        return pprint.pformat(self.as_obj(), width=150, **PPRINT_OPTIONS)
+        return pprint.pformat(self.as_obj(), width=150, sort_dicts=False)

    def as_json(self) -> str:
        return json.dumps(self.as_obj())
@ -118,7 +103,7 @@ class ReportAttribute(BaseModel):
        return log_levels[self.severity]

    def log(self, msg: str) -> None:
-        logger.log(level=self.logger_sev, msg=msg, **_stacklevel_if_supported(3))
+        logger.log(level=self.logger_sev, msg=msg, stacklevel=3)


 class EntityFilterReport(ReportAttribute):
--- a/metadata-ingestion/src/datahub/ingestion/source/feast.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py
@ -1,8 +1,3 @@
-import sys
-
-if sys.version_info < (3, 8):
-    raise ImportError("Feast is only supported on Python 3.8+")
-
 from dataclasses import dataclass
 from typing import Dict, Iterable, List, Optional, Tuple, Union

--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
@ -1,8 +1,3 @@
-import sys
-
-if sys.version_info < (3, 8):
-    raise ImportError("Iceberg is only supported on Python 3.8+")
-
 import json
 import logging
 import uuid
--- a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
@ -1,9 +1,3 @@
-import sys
-
-if sys.version_info < (3, 8):
-    raise ImportError("MLflow is only supported on Python 3.8+")
-
-
 from dataclasses import dataclass
 from typing import Any, Callable, Iterable, Optional, TypeVar, Union

--- a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py
@ -1,7 +1,7 @@
 from collections import Counter
 from typing import Any, Counter as CounterType, Dict, Sequence, Tuple, Union

-from mypy_extensions import TypedDict
+from typing_extensions import TypedDict


 class BasicSchemaDescription(TypedDict):
--- a/metadata-ingestion/tests/integration/feast/test_feast_repository.py
+++ b/metadata-ingestion/tests/integration/feast/test_feast_repository.py
@ -1,6 +1,3 @@
-import sys
-
-import pytest
 from freezegun import freeze_time

 from datahub.ingestion.run.pipeline import Pipeline
@ -8,10 +5,6 @@ from tests.test_helpers import mce_helpers

 FROZEN_TIME = "2020-04-14 07:00:00"

-pytestmark = pytest.mark.skipif(
-    sys.version_info < (3, 8), reason="requires python 3.8 or higher"
-)
-

@freeze_time(FROZEN_TIME)
 def test_feast_repository_ingest(pytestconfig, tmp_path, mock_time):
--- a/metadata-ingestion/tests/integration/iceberg/test_iceberg.py
+++ b/metadata-ingestion/tests/integration/iceberg/test_iceberg.py
@ -1,5 +1,4 @@
 import subprocess
-import sys
 from typing import Any, Dict, List
 from unittest.mock import patch

@ -15,13 +14,7 @@ from tests.test_helpers.state_helpers import (
    validate_all_providers_have_committed_successfully,
 )

-pytestmark = [
-    pytest.mark.integration_batch_1,
-    # Skip tests if not on Python 3.8 or higher.
-    pytest.mark.skipif(
-        sys.version_info < (3, 8), reason="Requires python 3.8 or higher"
-    ),
-]
+pytestmark = pytest.mark.integration_batch_1
 FROZEN_TIME = "2020-04-14 07:00:00"
 GMS_PORT = 8080
 GMS_SERVER = f"http://localhost:{GMS_PORT}"
--- a/metadata-ingestion/tests/integration/mlflow/test_mlflow_source.py
+++ b/metadata-ingestion/tests/integration/mlflow/test_mlflow_source.py
@ -1,27 +1,27 @@
-import sys
+from pathlib import Path
+from typing import Any, Dict, TypeVar

-if sys.version_info >= (3, 8):
-    from pathlib import Path
-    from typing import Any, Dict, TypeVar
+import pytest
+from mlflow import MlflowClient

-    import pytest
-    from mlflow import MlflowClient
+from datahub.ingestion.run.pipeline import Pipeline
+from tests.test_helpers import mce_helpers

-    from datahub.ingestion.run.pipeline import Pipeline
-    from tests.test_helpers import mce_helpers
+T = TypeVar("T")

-    T = TypeVar("T")

-    @pytest.fixture
-    def tracking_uri(tmp_path: Path) -> str:
+@pytest.fixture
+def tracking_uri(tmp_path: Path) -> str:
    return str(tmp_path / "mlruns")

-    @pytest.fixture
-    def sink_file_path(tmp_path: Path) -> str:
+
+@pytest.fixture
+def sink_file_path(tmp_path: Path) -> str:
    return str(tmp_path / "mlflow_source_mcps.json")

-    @pytest.fixture
-    def pipeline_config(tracking_uri: str, sink_file_path: str) -> Dict[str, Any]:
+
+@pytest.fixture
+def pipeline_config(tracking_uri: str, sink_file_path: str) -> Dict[str, Any]:
    source_type = "mlflow"
    return {
        "run_id": "mlflow-source-test",
@ -39,8 +39,9 @@ if sys.version_info >= (3, 8):
        },
    }

-    @pytest.fixture
-    def generate_mlflow_data(tracking_uri: str) -> None:
+
+@pytest.fixture
+def generate_mlflow_data(tracking_uri: str) -> None:
    client = MlflowClient(tracking_uri=tracking_uri)
    experiment_name = "test-experiment"
    run_name = "test-run"
@ -80,13 +81,14 @@ if sys.version_info >= (3, 8):
        stage="Archived",
    )

-    def test_ingestion(
+
+def test_ingestion(
    pytestconfig,
    mock_time,
    sink_file_path,
    pipeline_config,
    generate_mlflow_data,
-    ):
+):
    print(f"MCPs file path: {sink_file_path}")
    golden_file_path = (
        pytestconfig.rootpath / "tests/integration/mlflow/mlflow_mcps_golden.json"
--- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
+++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
@ -1,6 +1,5 @@
 import os
 import subprocess
-import sys
 import time

 import pytest
@ -9,10 +8,6 @@ from tests.test_helpers import mce_helpers
 from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port

-pytestmark = pytest.mark.skipif(
-    sys.version_info < (3, 8), reason="requires python 3.8 or higher"
-)
-

@pytest.fixture(scope="module")
 def mssql_runner(docker_compose_runner, pytestconfig):
--- a/metadata-ingestion/tests/unit/test_iceberg.py
+++ b/metadata-ingestion/tests/unit/test_iceberg.py
@ -1,14 +1,11 @@
-import sys
 import uuid
 from decimal import Decimal
 from typing import Any, Optional

 import pytest
 from pydantic import ValidationError
-
-if sys.version_info >= (3, 8):
-    from pyiceberg.schema import Schema
-    from pyiceberg.types import (
+from pyiceberg.schema import Schema
+from pyiceberg.types import (
    BinaryType,
    BooleanType,
    DateType,
@ -29,17 +26,17 @@ if sys.version_info >= (3, 8):
    TimestamptzType,
    TimeType,
    UUIDType,
-    )
+)

-    from datahub.ingestion.api.common import PipelineContext
-    from datahub.ingestion.source.iceberg.iceberg import (
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.source.iceberg.iceberg import (
    IcebergProfiler,
    IcebergSource,
    IcebergSourceConfig,
-    )
-    from datahub.ingestion.source.iceberg.iceberg_common import IcebergCatalogConfig
-    from datahub.metadata.com.linkedin.pegasus2avro.schema import ArrayType, SchemaField
-    from datahub.metadata.schema_classes import (
+)
+from datahub.ingestion.source.iceberg.iceberg_common import IcebergCatalogConfig
+from datahub.metadata.com.linkedin.pegasus2avro.schema import ArrayType, SchemaField
+from datahub.metadata.schema_classes import (
    ArrayTypeClass,
    BooleanTypeClass,
    BytesTypeClass,
@ -49,13 +46,10 @@ if sys.version_info >= (3, 8):
    RecordTypeClass,
    StringTypeClass,
    TimeTypeClass,
-    )
+)

-    pytestmark = pytest.mark.skipif(
-        sys.version_info < (3, 8), reason="requires python 3.8 or higher"
-    )

-    def with_iceberg_source() -> IcebergSource:
+def with_iceberg_source() -> IcebergSource:
    catalog: IcebergCatalogConfig = IcebergCatalogConfig(
        name="test", type="rest", config={}
    )
@ -64,18 +58,20 @@ if sys.version_info >= (3, 8):
        config=IcebergSourceConfig(catalog=catalog),
    )

-    def with_iceberg_profiler() -> IcebergProfiler:
+
+def with_iceberg_profiler() -> IcebergProfiler:
    iceberg_source_instance = with_iceberg_source()
    return IcebergProfiler(
        iceberg_source_instance.report, iceberg_source_instance.config.profiling
    )

-    def assert_field(
+
+def assert_field(
    schema_field: SchemaField,
    expected_description: Optional[str],
    expected_nullable: bool,
    expected_type: Any,
-    ) -> None:
+) -> None:
    assert (
        schema_field.description == expected_description
    ), f"Field description '{schema_field.description}' is different from expected description '{expected_description}'"
@ -86,14 +82,16 @@ if sys.version_info >= (3, 8):
        schema_field.type.type, expected_type
    ), f"Field type {schema_field.type.type} is different from expected type {expected_type}"

-    def test_config_no_catalog():
+
+def test_config_no_catalog():
    """
    Test when no Iceberg catalog is provided.
    """
    with pytest.raises(ValidationError, match="catalog"):
        IcebergSourceConfig()  # type: ignore

-    def test_config_catalog_not_configured():
+
+def test_config_catalog_not_configured():
    """
    Test when an Iceberg catalog is provided, but not properly configured.
    """
@ -106,13 +104,15 @@ if sys.version_info >= (3, 8):
    with pytest.raises(ValidationError, match="type"):
        IcebergCatalogConfig(conf={})  # type: ignore

-    def test_config_for_tests():
+
+def test_config_for_tests():
    """
    Test valid iceberg source that will be used in unit tests.
    """
    with_iceberg_source()

-    @pytest.mark.parametrize(
+
+@pytest.mark.parametrize(
    "iceberg_type, expected_schema_field_type",
    [
        (BinaryType(), BytesTypeClass),
@ -142,10 +142,10 @@ if sys.version_info >= (3, 8):
            StringTypeClass,
        ),
    ],
-    )
-    def test_iceberg_primitive_type_to_schema_field(
+)
+def test_iceberg_primitive_type_to_schema_field(
    iceberg_type: PrimitiveType, expected_schema_field_type: Any
-    ) -> None:
+) -> None:
    """
    Test converting a primitive typed Iceberg field to a SchemaField
    """
@ -159,9 +159,7 @@ if sys.version_info >= (3, 8):
        ),
    ]:
        schema = Schema(column)
-            schema_fields = iceberg_source_instance._get_schema_fields_for_schema(
-                schema
-            )
+        schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
        assert (
            len(schema_fields) == 1
        ), f"Expected 1 field, but got {len(schema_fields)}"
@ -172,7 +170,8 @@ if sys.version_info >= (3, 8):
            expected_schema_field_type,
        )

-    @pytest.mark.parametrize(
+
+@pytest.mark.parametrize(
    "iceberg_type, expected_array_nested_type",
    [
        (BinaryType(), "bytes"),
@ -202,10 +201,10 @@ if sys.version_info >= (3, 8):
            "uuid",
        ),
    ],
-    )
-    def test_iceberg_list_to_schema_field(
+)
+def test_iceberg_list_to_schema_field(
    iceberg_type: PrimitiveType, expected_array_nested_type: Any
-    ) -> None:
+) -> None:
    """
    Test converting a list typed Iceberg field to an ArrayType SchemaField, including the list nested type.
    """
@ -241,9 +240,7 @@ if sys.version_info >= (3, 8):
    ]:
        iceberg_source_instance = with_iceberg_source()
        schema = Schema(list_column)
-            schema_fields = iceberg_source_instance._get_schema_fields_for_schema(
-                schema
-            )
+        schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
        assert (
            len(schema_fields) == 1
        ), f"Expected 1 field, but got {len(schema_fields)}"
@ -258,7 +255,8 @@ if sys.version_info >= (3, 8):
            expected_array_nested_type
        ], f"List Field nested type {arrayType.nestedType} was expected to be {expected_array_nested_type}"

-    @pytest.mark.parametrize(
+
+@pytest.mark.parametrize(
    "iceberg_type, expected_map_type",
    [
        (BinaryType(), BytesTypeClass),
@ -288,10 +286,10 @@ if sys.version_info >= (3, 8):
            StringTypeClass,
        ),
    ],
-    )
-    def test_iceberg_map_to_schema_field(
+)
+def test_iceberg_map_to_schema_field(
    iceberg_type: PrimitiveType, expected_map_type: Any
-    ) -> None:
+) -> None:
    """
    Test converting a map typed Iceberg field to a MapType SchemaField, where the key is the same type as the value.
    """
@ -327,9 +325,7 @@ if sys.version_info >= (3, 8):
    ]:
        iceberg_source_instance = with_iceberg_source()
        schema = Schema(map_column)
-            schema_fields = iceberg_source_instance._get_schema_fields_for_schema(
-                schema
-            )
+        schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
        # Converting an Iceberg Map type will be done by creating an array of struct(key, value) records.
        # The first field will be the array.
        assert (
@ -350,7 +346,8 @@ if sys.version_info >= (3, 8):
            expected_map_type,
        )

-    @pytest.mark.parametrize(
+
+@pytest.mark.parametrize(
    "iceberg_type, expected_schema_field_type",
    [
        (BinaryType(), BytesTypeClass),
@ -380,10 +377,10 @@ if sys.version_info >= (3, 8):
            StringTypeClass,
        ),
    ],
-    )
-    def test_iceberg_struct_to_schema_field(
+)
+def test_iceberg_struct_to_schema_field(
    iceberg_type: PrimitiveType, expected_schema_field_type: Any
-    ) -> None:
+) -> None:
    """
    Test converting a struct typed Iceberg field to a RecordType SchemaField.
    """
@ -394,9 +391,7 @@ if sys.version_info >= (3, 8):
    iceberg_source_instance = with_iceberg_source()
    schema = Schema(struct_column)
    schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
-        assert (
-            len(schema_fields) == 2
-        ), f"Expected 2 fields, but got {len(schema_fields)}"
+    assert len(schema_fields) == 2, f"Expected 2 fields, but got {len(schema_fields)}"
    assert_field(
        schema_fields[0], struct_column.doc, struct_column.optional, RecordTypeClass
    )
@ -404,7 +399,8 @@ if sys.version_info >= (3, 8):
        schema_fields[1], field1.doc, field1.optional, expected_schema_field_type
    )

-    @pytest.mark.parametrize(
+
+@pytest.mark.parametrize(
    "value_type, value, expected_value",
    [
        (BinaryType(), bytes([1, 2, 3, 4, 5]), "b'\\x01\\x02\\x03\\x04\\x05'"),
@ -434,17 +430,18 @@ if sys.version_info >= (3, 8):
            "00010203-0405-0607-0809-0a0b0c0d0e0f",
        ),
    ],
-    )
-    def test_iceberg_profiler_value_render(
+)
+def test_iceberg_profiler_value_render(
    value_type: IcebergType, value: Any, expected_value: Optional[str]
-    ) -> None:
+) -> None:
    iceberg_profiler_instance = with_iceberg_profiler()
    assert (
        iceberg_profiler_instance._render_value("a.dataset", value_type, value)
        == expected_value
    )

-    def test_avro_decimal_bytes_nullable() -> None:
+
+def test_avro_decimal_bytes_nullable() -> None:
    """
    The following test exposes a problem with decimal (bytes) not preserving extra attributes like _nullable.  Decimal (fixed) and Boolean for example do.
    NOTE: This bug was by-passed by mapping the Decimal type to fixed instead of bytes.
@ -457,9 +454,7 @@ if sys.version_info >= (3, 8):
    print(
        f"Original avro schema string:                         {decimal_avro_schema_string}"
    )
-        print(
-            f"After avro parsing, _nullable attribute is missing:  {decimal_avro_schema}"
-        )
+    print(f"After avro parsing, _nullable attribute is missing:  {decimal_avro_schema}")

    decimal_fixed_avro_schema_string = """{"type": "record", "name": "__struct_", "fields": [{"type": {"type": "fixed", "logicalType": "decimal", "precision": 3, "scale": 2, "native_data_type": "decimal(3, 2)", "_nullable": false, "name": "bogusName", "size": 16}, "name": "required_field", "doc": "required field documentation"}]}"""
    decimal_fixed_avro_schema = avro.schema.parse(decimal_fixed_avro_schema_string)
--- a/metadata-ingestion/tests/unit/test_mlflow_source.py
+++ b/metadata-ingestion/tests/unit/test_mlflow_source.py
@ -1,42 +1,43 @@
-import sys
+import datetime
+from pathlib import Path
+from typing import Any, TypeVar, Union

-if sys.version_info >= (3, 8):
-    import datetime
-    from pathlib import Path
-    from typing import Any, TypeVar, Union
+import pytest
+from mlflow import MlflowClient
+from mlflow.entities.model_registry import RegisteredModel
+from mlflow.entities.model_registry.model_version import ModelVersion
+from mlflow.store.entities import PagedList

-    import pytest
-    from mlflow import MlflowClient
-    from mlflow.entities.model_registry import RegisteredModel
-    from mlflow.entities.model_registry.model_version import ModelVersion
-    from mlflow.store.entities import PagedList
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.source.mlflow import MLflowConfig, MLflowSource

-    from datahub.ingestion.api.common import PipelineContext
-    from datahub.ingestion.source.mlflow import MLflowConfig, MLflowSource
+T = TypeVar("T")

-    T = TypeVar("T")

-    @pytest.fixture
-    def tracking_uri(tmp_path: Path) -> str:
+@pytest.fixture
+def tracking_uri(tmp_path: Path) -> str:
    return str(tmp_path / "mlruns")

-    @pytest.fixture
-    def source(tracking_uri: str) -> MLflowSource:
+
+@pytest.fixture
+def source(tracking_uri: str) -> MLflowSource:
    return MLflowSource(
        ctx=PipelineContext(run_id="mlflow-source-test"),
        config=MLflowConfig(tracking_uri=tracking_uri),
    )

-    @pytest.fixture
-    def registered_model(source: MLflowSource) -> RegisteredModel:
+
+@pytest.fixture
+def registered_model(source: MLflowSource) -> RegisteredModel:
    model_name = "abc"
    return RegisteredModel(name=model_name)

-    @pytest.fixture
-    def model_version(
+
+@pytest.fixture
+def model_version(
    source: MLflowSource,
    registered_model: RegisteredModel,
-    ) -> ModelVersion:
+) -> ModelVersion:
    version = "1"
    return ModelVersion(
        name=registered_model.name,
@ -44,7 +45,8 @@ if sys.version_info >= (3, 8):
        creation_timestamp=datetime.datetime.now(),
    )

-    def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]:
+
+def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]:
    dummy_pages = dict(
        page_1=PagedList(items=["a", "b"], token="page_2"),
        page_2=PagedList(items=["c", "d"], token="page_3"),
@ -61,7 +63,8 @@ if sys.version_info >= (3, 8):
        )
    return page_to_return

-    def test_stages(source):
+
+def test_stages(source):
    mlflow_registered_model_stages = {
        "Production",
        "Staging",
@ -76,7 +79,8 @@ if sys.version_info >= (3, 8):
        "mlflow_" + str(stage).lower() for stage in mlflow_registered_model_stages
    }

-    def test_config_model_name_separator(source, model_version):
+
+def test_config_model_name_separator(source, model_version):
    name_version_sep = "+"
    source.config.model_name_separator = name_version_sep
    expected_model_name = (
@ -88,7 +92,8 @@ if sys.version_info >= (3, 8):

    assert urn == expected_urn

-    def test_model_without_run(source, registered_model, model_version):
+
+def test_model_without_run(source, registered_model, model_version):
    run = source._get_mlflow_run(model_version)
    wu = source._get_ml_model_properties_workunit(
        registered_model=registered_model,
@ -100,30 +105,32 @@ if sys.version_info >= (3, 8):
    assert aspect.hyperParams is None
    assert aspect.trainingMetrics is None

-    def test_traverse_mlflow_search_func(source):
+
+def test_traverse_mlflow_search_func(source):
    expected_items = ["a", "b", "c", "d", "e"]

    items = list(source._traverse_mlflow_search_func(dummy_search_func))

    assert items == expected_items

-    def test_traverse_mlflow_search_func_with_kwargs(source):
+
+def test_traverse_mlflow_search_func_with_kwargs(source):
    expected_items = ["A", "B", "C", "D", "E"]

-        items = list(
-            source._traverse_mlflow_search_func(dummy_search_func, case="upper")
-        )
+    items = list(source._traverse_mlflow_search_func(dummy_search_func, case="upper"))

    assert items == expected_items

-    def test_make_external_link_local(source, model_version):
+
+def test_make_external_link_local(source, model_version):
    expected_url = None

    url = source._make_external_url(model_version)

    assert url == expected_url

-    def test_make_external_link_remote(source, model_version):
+
+def test_make_external_link_remote(source, model_version):
    tracking_uri_remote = "https://dummy-mlflow-tracking-server.org"
    source.client = MlflowClient(tracking_uri=tracking_uri_remote)
    expected_url = f"{tracking_uri_remote}/#/models/{model_version.name}/versions/{model_version.version}"