chore: update lint dependencies (#13316)

Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io>
2025-12-28 18:38:17 +00:00 · 2025-04-25 20:29:11 +05:30 · 2025-04-25 20:29:11 +05:30 · 49ee849382
commit 49ee849382
parent 0f473232a3
19 changed files with 63 additions and 33 deletions
--- a/datahub-actions/setup.py
+++ b/datahub-actions/setup.py
@ -36,7 +36,7 @@ lint_requirements = {
    # This is pinned only to avoid spurious errors in CI.
    # We should make an effort to keep it up to date.
    "ruff==0.11.6",
-    "mypy==1.10.1",
+    "mypy==1.12.1",
 }

 base_requirements = {
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@ -74,6 +74,7 @@ dev_requirements = {
    *mypy_stubs,
    "coverage>=5.1",
    "ruff==0.11.6",
+    # Updating mypy was causing a conflict with pydantic so cannot upgrade
    "mypy==1.10.1",
    # pydantic 1.8.2 is incompatible with mypy 0.910.
    # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
--- a/metadata-ingestion-modules/dagster-plugin/setup.py
+++ b/metadata-ingestion-modules/dagster-plugin/setup.py
@ -55,7 +55,7 @@ base_dev_requirements = {
    "dagster-snowflake-pandas >= 0.11.0",
    "coverage>=5.1",
    "ruff==0.11.6",
-    "mypy==1.10.1",
+    "mypy==1.12.1",
    # pydantic 1.8.2 is incompatible with mypy 0.910.
    # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
    "pydantic>=1.10.0,!=1.10.3",
--- a/metadata-ingestion-modules/gx-plugin/setup.py
+++ b/metadata-ingestion-modules/gx-plugin/setup.py
@ -60,7 +60,7 @@ base_dev_requirements = {
    *mypy_stubs,
    "coverage>=5.1",
    "ruff==0.11.6",
-    "mypy==1.10.1",
+    "mypy==1.12.1",
    # pydantic 1.8.2 is incompatible with mypy 0.910.
    # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
    "pydantic>=1.10.0,!=1.10.3",
--- a/metadata-ingestion-modules/prefect-plugin/setup.py
+++ b/metadata-ingestion-modules/prefect-plugin/setup.py
@ -57,7 +57,7 @@ dev_requirements = {
    *mypy_stubs,
    "coverage>=5.1",
    "ruff==0.11.6",
-    "mypy==1.10.1",
+    "mypy==1.12.1",
    # pydantic 1.8.2 is incompatible with mypy 0.910.
    # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
    "pydantic>=1.10",
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@ -610,7 +610,7 @@ lint_requirements = {
    # This is pinned only to avoid spurious errors in CI.
    # We should make an effort to keep it up to date.
    "ruff==0.11.6",
-    "mypy==1.10.1",
+    "mypy==1.12.1",
 }

 base_dev_requirements = {
--- a/metadata-ingestion/src/datahub/emitter/request_helper.py
+++ b/metadata-ingestion/src/datahub/emitter/request_helper.py
@ -5,10 +5,17 @@ import requests
 from requests.auth import HTTPBasicAuth


+def _decode_bytes(value: Union[str, bytes]) -> str:
+    """Decode bytes to string, if necessary."""
+    if isinstance(value, bytes):
+        return value.decode()
+    return value
+
+
 def _format_header(name: str, value: Union[str, bytes]) -> str:
    if name == "Authorization":
        return f"{name!s}: <redacted>"
-    return f"{name!s}: {value!s}"
+    return f"{name!s}: {_decode_bytes(value)}"


 def make_curl_command(
@ -21,7 +28,9 @@ def make_curl_command(

    if session.auth:
        if isinstance(session.auth, HTTPBasicAuth):
-            fragments.extend(["-u", f"{session.auth.username}:<redacted>"])
+            fragments.extend(
+                ["-u", f"{_decode_bytes(session.auth.username)}:<redacted>"]
+            )
        else:
            # For other auth types, they should be handled via headers
            fragments.extend(["-H", "<unknown auth type>"])
--- a/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py
+++ b/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py
@ -319,8 +319,10 @@ def classification_workunit_processor(
                        partial(
                            data_reader.get_sample_data_for_table,
                            table_id,
-                            classification_handler.config.classification.sample_size
-                            * SAMPLE_SIZE_MULTIPLIER,
+                            int(
+                                classification_handler.config.classification.sample_size
+                                * SAMPLE_SIZE_MULTIPLIER
+                            ),
                            **(data_reader_kwargs or {}),
                        )
                        if data_reader
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
@ -40,11 +40,11 @@ class TimeoutHTTPAdapter(HTTPAdapter):
            del kwargs["timeout"]
        super().__init__(*args, **kwargs)

-    def send(self, request, **kwargs):
+    def send(self, request, *args, **kwargs):
        timeout = kwargs.get("timeout")
        if timeout is None and hasattr(self, "timeout"):
            kwargs["timeout"] = self.timeout
-        return super().send(request, **kwargs)
+        return super().send(request, *args, **kwargs)


 class IcebergProfilingConfig(ConfigModel):
--- a/metadata-ingestion/src/datahub/ingestion/source/ldap.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ldap.py
@ -515,5 +515,5 @@ def parse_ldap_dn(input_clean: bytes) -> str:

 def get_attr_or_none(
    attrs: Dict[str, Any], key: str, default: Optional[str] = None
-) -> str:
+) -> Optional[str]:
    return attrs[key][0].decode() if attrs.get(key) else default
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
@ -63,10 +63,10 @@ class SessionWithTimeout(requests.Session):
        super().__init__(*args, **kwargs)
        self.timeout = timeout

-    def request(self, method, url, **kwargs):
+    def request(self, method, url, *args, **kwargs):
        # Set the default timeout if none is provided
        kwargs.setdefault("timeout", self.timeout)
-        return super().request(method, url, **kwargs)
+        return super().request(method, url, *args, **kwargs)


 class DataResolverBase(ABC):
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
@ -145,7 +145,11 @@ class ClickHouseConfig(
    )
    include_materialized_views: Optional[bool] = Field(default=True, description="")

-    def get_sql_alchemy_url(self, current_db=None):
+    def get_sql_alchemy_url(
+        self,
+        uri_opts: Optional[Dict[str, Any]] = None,
+        current_db: Optional[str] = None,
+    ) -> str:
        url = make_url(
            super().get_sql_alchemy_url(uri_opts=self.uri_opts, current_db=current_db)
        )
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/druid.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/druid.py
@ -1,4 +1,6 @@
 # This import verifies that the dependencies are available.
+from typing import Any, Dict, Optional
+
 import pydruid  # noqa: F401
 from pydantic.fields import Field
 from pydruid.db.sqlalchemy import DruidDialect
@ -38,8 +40,11 @@ class DruidConfig(BasicSQLAlchemyConfig):
        description="regex patterns for schemas to filter in ingestion.",
    )

-    def get_sql_alchemy_url(self):
-        return f"{super().get_sql_alchemy_url()}/druid/v2/sql/"
+    def get_sql_alchemy_url(
+        self, uri_opts: Optional[Dict[str, Any]] = None, database: Optional[str] = None
+    ) -> str:
+        base_url = super().get_sql_alchemy_url(uri_opts=uri_opts, database=database)
+        return f"{base_url}/druid/v2/sql/"

    """
    The pydruid library already formats the table name correctly, so we do not
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
@ -127,11 +127,15 @@ class OracleConfig(BasicSQLAlchemyConfig):
            )
        return v

-    def get_sql_alchemy_url(self):
-        url = super().get_sql_alchemy_url()
+    def get_sql_alchemy_url(
+        self, uri_opts: Optional[Dict[str, Any]] = None, database: Optional[str] = None
+    ) -> str:
+        url = super().get_sql_alchemy_url(uri_opts=uri_opts, database=database)
+
        if self.service_name:
            assert not self.database
            url = f"{url}/?service_name={self.service_name}"
+
        return url

    def get_identifier(self, schema: str, table: str) -> str:
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
@ -2,7 +2,7 @@ import collections
 import dataclasses
 import logging
 from datetime import datetime
-from typing import Dict, Iterable, List
+from typing import Any, Dict, Iterable, List, Optional

 from dateutil import parser
 from pydantic.fields import Field
@ -74,8 +74,12 @@ class ClickHouseUsageConfig(ClickHouseConfig, BaseUsageConfig, EnvConfigMixin):
    options: dict = Field(default={}, description="")
    query_log_table: str = Field(default="system.query_log", exclude=True)

-    def get_sql_alchemy_url(self):
-        return super().get_sql_alchemy_url()
+    def get_sql_alchemy_url(
+        self,
+        uri_opts: Optional[Dict[str, Any]] = None,
+        current_db: Optional[str] = None,
+    ) -> str:
+        return super().get_sql_alchemy_url(uri_opts=uri_opts, current_db=current_db)


@platform_name("ClickHouse")
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
@ -4,7 +4,7 @@ import json
 import logging
 from datetime import datetime
 from email.utils import parseaddr
-from typing import Dict, Iterable, List, Optional
+from typing import Any, Dict, Iterable, List, Optional

 from dateutil import parser
 from pydantic.fields import Field
@ -98,8 +98,10 @@ class TrinoUsageConfig(TrinoConfig, BaseUsageConfig, EnvBasedSourceBaseConfig):
    options: dict = Field(default={}, description="")
    database: str = Field(description="The name of the catalog from getting the usage")

-    def get_sql_alchemy_url(self):
-        return super().get_sql_alchemy_url()
+    def get_sql_alchemy_url(
+        self, uri_opts: Optional[Dict[str, Any]] = None, database: Optional[str] = None
+    ) -> str:
+        return super().get_sql_alchemy_url(uri_opts=uri_opts, database=database)


@dataclasses.dataclass
--- a/metadata-service/iceberg-catalog/requirements.txt
+++ b/metadata-service/iceberg-catalog/requirements.txt
@ -3,6 +3,6 @@ pytest-dependency>=0.5.1
 pyspark==3.5.3
 -e ../../metadata-ingestion[iceberg-catalog]
 # libaries for linting below this
-mypy==1.5.1
-ruff==0.9.7
+mypy==1.12.1
+ruff==0.11.6

--- a/metadata-service/iceberg-catalog/src/integrationTest/integration_test.py
+++ b/metadata-service/iceberg-catalog/src/integrationTest/integration_test.py
@ -213,7 +213,7 @@ def test_iceberg_quick(spark_session, warehouse):
    _test_rename_ops(spark_session)

    result = spark_session.sql("show namespaces")
-    assert (result[result["namespace"] == "default"].count() == 1)
+    assert result[result["namespace"] == "default"].count() == 1


 def _create_table(spark_session, ns, table_name):
@ -225,11 +225,10 @@ def _create_table(spark_session, ns, table_name):
    spark_session.sql(f"insert into {ns}.{table_name} values (1, 'foo' ) ")

    result = spark_session.sql("show namespaces")
-    assert (result[result["namespace"] == "default"].count() == 1)
+    assert result[result["namespace"] == "default"].count() == 1

    result = spark_session.sql("show namespaces in default")
-    assert (result[result["namespace"] == f"{ns}"].count() == 1)
-
+    assert result[result["namespace"] == f"{ns}"].count() == 1


 def test_load_tables(spark_session, warehouse):
--- a/smoke-test/requirements.txt
+++ b/smoke-test/requirements.txt
@ -9,8 +9,8 @@ joblib
 pytest-xdist
 networkx
 # libaries for linting below this
-mypy==1.5.1
-ruff==0.9.7
+mypy==1.12.1
+ruff==0.11.6
 # stub version are copied from metadata-ingestion/setup.py and that should be the source of truth
 types-requests>=2.28.11.6,<=2.31.0.3
 types-PyYAML