diff --git a/metadata-ingestion-modules/airflow-plugin/pyproject.toml b/metadata-ingestion-modules/airflow-plugin/pyproject.toml index d1e1d0ad47..6480f7b628 100644 --- a/metadata-ingestion-modules/airflow-plugin/pyproject.toml +++ b/metadata-ingestion-modules/airflow-plugin/pyproject.toml @@ -44,6 +44,7 @@ extend-select = [ "G010", # logging.warn -> logging.warning "I", # isort "TID", # flake8-tidy-imports + "RUF100", # unused-noqa ] ignore = [ "E501", # Line length violations (handled by formatter) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index b2ca61e3de..c8c0932be2 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -60,7 +60,7 @@ if TYPE_CHECKING: # To placate mypy on Airflow versions that don't have the listener API, # we define a dummy hookimpl that's an identity function. - def hookimpl(f: _F) -> _F: # type: ignore[misc] # noqa: F811 + def hookimpl(f: _F) -> _F: # type: ignore[misc] return f else: diff --git a/metadata-ingestion-modules/airflow-plugin/tests/conftest.py b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py index 031449b443..2aecd0af8a 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/conftest.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py @@ -1,7 +1,7 @@ import pathlib import site -from datahub.testing.pytest_hooks import ( # noqa: F401,E402 +from datahub.testing.pytest_hooks import ( # noqa: F401 load_golden_flags, pytest_addoption, ) diff --git a/metadata-ingestion-modules/dagster-plugin/pyproject.toml b/metadata-ingestion-modules/dagster-plugin/pyproject.toml index d1e1d0ad47..6480f7b628 100644 --- a/metadata-ingestion-modules/dagster-plugin/pyproject.toml +++ b/metadata-ingestion-modules/dagster-plugin/pyproject.toml @@ -44,6 +44,7 @@ extend-select = [ "G010", # logging.warn -> logging.warning "I", # isort "TID", # flake8-tidy-imports + "RUF100", # unused-noqa ] ignore = [ "E501", # Line length violations (handled by formatter) diff --git a/metadata-ingestion-modules/dagster-plugin/tests/conftest.py b/metadata-ingestion-modules/dagster-plugin/tests/conftest.py index 46ea2a7aa4..9bf52684e1 100644 --- a/metadata-ingestion-modules/dagster-plugin/tests/conftest.py +++ b/metadata-ingestion-modules/dagster-plugin/tests/conftest.py @@ -1,4 +1,4 @@ -from datahub.testing.pytest_hooks import ( # noqa: F401,E402 +from datahub.testing.pytest_hooks import ( # noqa: F401 load_golden_flags, pytest_addoption, ) diff --git a/metadata-ingestion-modules/gx-plugin/pyproject.toml b/metadata-ingestion-modules/gx-plugin/pyproject.toml index d1e1d0ad47..6480f7b628 100644 --- a/metadata-ingestion-modules/gx-plugin/pyproject.toml +++ b/metadata-ingestion-modules/gx-plugin/pyproject.toml @@ -44,6 +44,7 @@ extend-select = [ "G010", # logging.warn -> logging.warning "I", # isort "TID", # flake8-tidy-imports + "RUF100", # unused-noqa ] ignore = [ "E501", # Line length violations (handled by formatter) diff --git a/metadata-ingestion-modules/gx-plugin/tests/conftest.py b/metadata-ingestion-modules/gx-plugin/tests/conftest.py index 6e66bf61d8..d44fe80415 100644 --- a/metadata-ingestion-modules/gx-plugin/tests/conftest.py +++ b/metadata-ingestion-modules/gx-plugin/tests/conftest.py @@ -1,5 +1,5 @@ from datahub.testing.docker_utils import docker_compose_runner # noqa: F401 -from datahub.testing.pytest_hooks import ( # noqa: F401,E402 +from datahub.testing.pytest_hooks import ( # noqa: F401 load_golden_flags, pytest_addoption, ) diff --git a/metadata-ingestion-modules/prefect-plugin/pyproject.toml b/metadata-ingestion-modules/prefect-plugin/pyproject.toml index d1e1d0ad47..6480f7b628 100644 --- a/metadata-ingestion-modules/prefect-plugin/pyproject.toml +++ b/metadata-ingestion-modules/prefect-plugin/pyproject.toml @@ -44,6 +44,7 @@ extend-select = [ "G010", # logging.warn -> logging.warning "I", # isort "TID", # flake8-tidy-imports + "RUF100", # unused-noqa ] ignore = [ "E501", # Line length violations (handled by formatter) diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index 1d434eb8c3..c3ec6cb830 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -29,6 +29,7 @@ extend-select = [ "G010", # logging.warn -> logging.warning "I", # isort "TID", # flake8-tidy-imports + "RUF100", # unused-noqa ] extend-ignore = [ "E501", # Handled by formatter @@ -38,6 +39,8 @@ extend-ignore = [ "E203", # Ignore whitespace before ':' (matches Black) "B019", # Allow usages of functools.lru_cache "B008", # Allow function call in argument defaults + "RUF012", # mutable-class-default; incompatible with pydantic + "RUF015", # unnecessary-iterable-allocation-for-first-element # TODO: Enable these later "B006", # Mutable args "B017", # Do not assert blind exception diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index b744ac573a..2eb0e27072 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -430,7 +430,7 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures: return quickstart_arch -@docker.command() # noqa: C901 +@docker.command() @click.option( "--version", type=str, @@ -592,7 +592,7 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures: "arch", ] ) -def quickstart( # noqa: C901 +def quickstart( version: Optional[str], build_locally: bool, pull_images: bool, diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index 8052de1b06..58c5124da5 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -20,7 +20,7 @@ from pydantic import BaseModel, Extra, ValidationError from pydantic.fields import Field from typing_extensions import Protocol, Self -from datahub.configuration._config_enum import ConfigEnum as ConfigEnum # noqa: I250 +from datahub.configuration._config_enum import ConfigEnum as ConfigEnum from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2 from datahub.utilities.dedup_list import deduplicate_list diff --git a/metadata-ingestion/src/datahub/ingestion/api/decorators.py b/metadata-ingestion/src/datahub/ingestion/api/decorators.py index b521dc5e9e..a94cbbde15 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/decorators.py +++ b/metadata-ingestion/src/datahub/ingestion/api/decorators.py @@ -3,7 +3,7 @@ from enum import Enum, auto from typing import Callable, Dict, Optional, Type from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.api.source import ( # noqa: I250 +from datahub.ingestion.api.source import ( Source, SourceCapability as SourceCapability, ) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index dfb484743f..ea207b6ef7 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -33,7 +33,7 @@ from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.emitter.serialization_helper import post_json_transform -from datahub.ingestion.graph.config import ( # noqa: I250; TODO: Remove this alias +from datahub.ingestion.graph.config import ( DatahubClientConfig as DatahubClientConfig, ) from datahub.ingestion.graph.connections import ( diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index 120cf6a79b..8eaecf3582 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -439,7 +439,7 @@ class Pipeline: return True return False - def run(self) -> None: # noqa: C901 + def run(self) -> None: with contextlib.ExitStack() as stack: if self.config.flags.generate_memory_profiles: import memray diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 950c1ccf5b..17602efbe9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -923,7 +923,7 @@ class LookerExplore: tags=cast(List, dict.get("tags")) if dict.get("tags") is not None else [], ) - @classmethod # noqa: C901 + @classmethod def from_api( # noqa: C901 cls, model: str, @@ -931,7 +931,7 @@ class LookerExplore: client: LookerAPI, reporter: SourceReport, source_config: LookerDashboardSourceConfig, - ) -> Optional["LookerExplore"]: # noqa: C901 + ) -> Optional["LookerExplore"]: try: explore = client.lookml_model_explore(model, explore_name) views: Set[str] = set() @@ -1183,7 +1183,7 @@ class LookerExplore: base_url = remove_port_from_url(base_url) return f"{base_url}/embed/explore/{self.model_name}/{self.name}" - def _to_metadata_events( # noqa: C901 + def _to_metadata_events( self, config: LookerCommonConfig, reporter: SourceReport, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 2f1fcd378d..ee781c5db5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -383,7 +383,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase): self.reachable_explores[(model, explore)].append(via) - def _get_looker_dashboard_element( # noqa: C901 + def _get_looker_dashboard_element( self, element: DashboardElement ) -> Optional[LookerDashboardElement]: # Dashboard elements can use raw usage_queries against explores diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index d365d72782..975beb36f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -488,7 +488,7 @@ class NifiSource(Source): def get_report(self) -> SourceReport: return self.report - def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None: # noqa: C901 + def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None: """ Update self.nifi_flow with contents of the input process group `pg_flow_dto` """ @@ -894,7 +894,7 @@ class NifiSource(Source): if not delete_response.ok: logger.error("failed to delete provenance ", provenance_uri) - def construct_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 + def construct_workunits(self) -> Iterable[MetadataWorkUnit]: rootpg = self.nifi_flow.root_process_group flow_name = rootpg.name # self.config.site_name flow_urn = self.make_flow_urn() diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi.py b/metadata-ingestion/src/datahub/ingestion/source/openapi.py index 2075e999ea..b770113af5 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi.py @@ -270,7 +270,7 @@ class APISource(Source, ABC): mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) return ApiWorkUnit(id=dataset_name, mce=mce) - def get_workunits_internal(self) -> Iterable[ApiWorkUnit]: # noqa: C901 + def get_workunits_internal(self) -> Iterable[ApiWorkUnit]: config = self.config sw_dict = self.config.get_swagger() diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py index 5bacafaa3f..20b8272429 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py @@ -111,7 +111,7 @@ def check_sw_version(sw_dict: dict) -> None: ) -def get_endpoints(sw_dict: dict) -> dict: # noqa: C901 +def get_endpoints(sw_dict: dict) -> dict: """ Get all the URLs, together with their description and the tags """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py index b65ae5cd29..b30f7317ca 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py @@ -33,7 +33,7 @@ class CatalogItem(BaseModel): ) @validator("display_name", always=True) - def validate_diplay_name(cls, value, values): # noqa: N805 + def validate_diplay_name(cls, value, values): if values["created_by"]: return values["created_by"].split("\\")[-1] return "" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index cfc43454b5..65adf76b45 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -55,7 +55,7 @@ try: except ImportError: _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any]) - def override(f: _F, /) -> _F: # noqa: F811 + def override(f: _F, /) -> _F: return f @@ -104,7 +104,7 @@ class CustomAthenaRestDialect(AthenaRestDialect): return "\n".join([r for r in res]) @typing.no_type_check - def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine: # noqa: C901 + def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine: """Derives the data type of the Athena column. This method is overwritten to extend the behavior of PyAthena. diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py index 82b22d8966..76adffc088 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py @@ -67,7 +67,7 @@ TableKey = namedtuple("TableKey", ["schema", "table"]) class HiveMetastoreConfigMode(StrEnum): - hive: str = "hive" # noqa: F811 + hive: str = "hive" presto: str = "presto" presto_on_hive: str = "presto-on-hive" trino: str = "trino" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 6c1f15a400..766f94a43e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -401,7 +401,7 @@ class SQLServerSource(SQLAlchemySource): data_job.add_property(name=data_name, value=str(data_value)) yield from self.construct_job_workunits(data_job) - def loop_stored_procedures( # noqa: C901 + def loop_stored_procedures( self, inspector: Inspector, schema: str, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 5b1b9b1c29..922dc4b28a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -635,7 +635,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource): return None - def loop_tables( # noqa: C901 + def loop_tables( self, inspector: Inspector, schema: str, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py index c52eceb726..442d026b0a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py @@ -649,7 +649,7 @@ ORDER by DataBaseName, TableName; ) # Disabling the below because the cached view definition is not the view definition the column in tablesv actually holds the last statement executed against the object... not necessarily the view definition - # setattr( # noqa: B010 + # setattr( # TeradataDialect, # "get_view_definition", # lambda self, connection, view_name, schema=None, **kw: optimized_get_view_definition( @@ -746,7 +746,7 @@ ORDER by DataBaseName, TableName; else: raise Exception("Unable to get database name from Sqlalchemy inspector") - def cached_loop_tables( # noqa: C901 + def cached_loop_tables( self, inspector: Inspector, schema: str, @@ -782,7 +782,7 @@ ORDER by DataBaseName, TableName; break return description, properties, location - def cached_loop_views( # noqa: C901 + def cached_loop_views( self, inspector: Inspector, schema: str, diff --git a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py index 55b026a144..7f3ead3630 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py +++ b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py @@ -13,7 +13,7 @@ from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier from datahub.metadata.schema_classes import SchemaFieldClass, SchemaMetadataClass from datahub.metadata.urns import DataPlatformUrn -from datahub.sql_parsing._models import _TableName as _TableName # noqa: I250 +from datahub.sql_parsing._models import _TableName as _TableName from datahub.sql_parsing.sql_parsing_common import PLATFORMS_WITH_CASE_SENSITIVE_TABLES from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index c825deeccd..f224125a63 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -473,7 +473,7 @@ def _create_table_ddl_cll( return column_lineage -def _select_statement_cll( # noqa: C901 +def _select_statement_cll( statement: _SupportedColumnLineageTypes, dialect: sqlglot.Dialect, root_scope: sqlglot.optimizer.Scope, diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py index 85767c619a..ef72148226 100644 --- a/metadata-ingestion/src/datahub/upgrade/upgrade.py +++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py @@ -293,9 +293,9 @@ def is_client_server_compatible(client: VersionStats, server: VersionStats) -> i return server.version.micro - client.version.micro -def _maybe_print_upgrade_message( # noqa: C901 +def _maybe_print_upgrade_message( version_stats: Optional[DataHubVersionStats], -) -> None: # noqa: C901 +) -> None: days_before_cli_stale = 7 days_before_quickstart_stale = 7 diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py index 96870fc6fc..4772730c90 100644 --- a/metadata-ingestion/src/datahub/utilities/mapping.py +++ b/metadata-ingestion/src/datahub/utilities/mapping.py @@ -171,7 +171,7 @@ class OperationProcessor: self.owner_source_type = owner_source_type self.match_nested_props = match_nested_props - def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]: # noqa: C901 + def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]: # Defining the following local variables - # operations_map - the final resulting map when operations are processed. # Against each operation the values to be applied are stored. diff --git a/metadata-ingestion/tests/conftest.py b/metadata-ingestion/tests/conftest.py index f1d0c6706e..4cc25bcf38 100644 --- a/metadata-ingestion/tests/conftest.py +++ b/metadata-ingestion/tests/conftest.py @@ -41,7 +41,7 @@ try: except ImportError: pass -import freezegun # noqa: F401,E402 +import freezegun # noqa: E402 # The freezegun library has incomplete type annotations. # See https://github.com/spulec/freezegun/issues/469 diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py index d0e943bbe6..89f48912cd 100644 --- a/metadata-ingestion/tests/test_helpers/docker_helpers.py +++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py @@ -4,7 +4,7 @@ import subprocess import pytest -from datahub.testing.docker_utils import ( # noqa: F401,I250 +from datahub.testing.docker_utils import ( docker_compose_runner as docker_compose_runner, is_responsive as is_responsive, wait_for_port as wait_for_port, diff --git a/metadata-ingestion/tests/unit/bigquery/test_bigqueryv2_usage_source.py b/metadata-ingestion/tests/unit/bigquery/test_bigqueryv2_usage_source.py index 3247a64631..e6109e9292 100644 --- a/metadata-ingestion/tests/unit/bigquery/test_bigqueryv2_usage_source.py +++ b/metadata-ingestion/tests/unit/bigquery/test_bigqueryv2_usage_source.py @@ -114,7 +114,7 @@ AND ) OR protoPayload.metadata.tableDataRead.reason = "JOB" -)""" # noqa: W293 +)""" corrected_start_time = config.start_time - config.max_query_duration corrected_end_time = config.end_time + config.max_query_duration