dev(ingest): move modules from isort,flake8 to ruff (#12373)

This commit is contained in:
Aseem Bansal 2025-01-17 21:38:29 +05:30 committed by GitHub
parent 4de7f61d09
commit 76e46b89db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
50 changed files with 166 additions and 144 deletions

View File

@ -74,16 +74,14 @@ task lint(type: Exec, dependsOn: installDev) {
"find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " + "find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " +
"source ${venv_name}/bin/activate && set -x && " + "source ${venv_name}/bin/activate && set -x && " +
"black --check --diff src/ tests/ && " + "black --check --diff src/ tests/ && " +
"isort --check --diff src/ tests/ && " + "ruff check src/ tests/ && " +
"flake8 --count --statistics src/ tests/ && " +
"mypy --show-traceback --show-error-codes src/ tests/" "mypy --show-traceback --show-error-codes src/ tests/"
} }
task lintFix(type: Exec, dependsOn: installDev) { task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " + "source ${venv_name}/bin/activate && set -x && " +
"black src/ tests/ && " + "black src/ tests/ && " +
"isort src/ tests/ && " + "ruff check --fix src/ tests/"
"flake8 src/ tests/ && " +
"mypy src/ tests/ " "mypy src/ tests/ "
} }

View File

@ -10,11 +10,50 @@ extend-exclude = '''
''' '''
include = '\.pyi?$' include = '\.pyi?$'
[tool.isort] [tool.ruff.lint.isort]
indent = ' ' combine-as-imports = true
known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub_provider._airflow_compat'] known-first-party = ["datahub"]
profile = 'black' extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
force-sort-within-sections = false
force-wrap-aliases = false
split-on-trailing-comma = false
order-by-type = true
relative-imports-order = "closest-to-furthest"
force-single-line = false
single-line-exclusions = ["typing"]
length-sort = false
from-first = false
required-imports = []
classes = ["typing"]
[tool.pyright] [tool.ruff.lint]
extraPaths = ['tests'] select = [
"B",
"C90",
"E",
"F",
"I", # For isort
"TID",
]
ignore = [
# Ignore line length violations (handled by Black)
"E501",
# Ignore whitespace before ':' (matches Black)
"E203",
"E203",
# Allow usages of functools.lru_cache
"B019",
# Allow function call in argument defaults
"B008",
]
[tool.ruff.lint.mccabe]
max-complexity = 15
[tool.ruff.lint.flake8-tidy-imports]
# Disallow all relative imports.
ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]

View File

@ -1,24 +1,3 @@
[flake8]
max-complexity = 15
ignore =
# Ignore: line length issues, since black's formatter will take care of them.
E501,
# Ignore: 1 blank line required before class docstring.
D203,
# See https://stackoverflow.com/a/57074416.
W503,
# See https://github.com/psf/black/issues/315.
E203
exclude =
.git,
venv,
.tox,
__pycache__
per-file-ignores =
# imported but unused
__init__.py: F401
ban-relative-imports = true
[mypy] [mypy]
plugins = plugins =
sqlmypy, sqlmypy,

View File

@ -73,9 +73,7 @@ dev_requirements = {
*mypy_stubs, *mypy_stubs,
"black==22.12.0", "black==22.12.0",
"coverage>=5.1", "coverage>=5.1",
"flake8>=3.8.3", "ruff==0.9.1",
"flake8-tidy-imports>=4.3.0",
"isort>=5.7.0",
"mypy==1.10.1", "mypy==1.10.1",
# pydantic 1.8.2 is incompatible with mypy 0.910. # pydantic 1.8.2 is incompatible with mypy 0.910.
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.

View File

@ -1,11 +1,12 @@
from enum import Enum from enum import Enum
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
import datahub.emitter.mce_builder as builder
from airflow.configuration import conf from airflow.configuration import conf
from datahub.configuration.common import AllowDenyPattern, ConfigModel
from pydantic.fields import Field from pydantic.fields import Field
import datahub.emitter.mce_builder as builder
from datahub.configuration.common import AllowDenyPattern, ConfigModel
if TYPE_CHECKING: if TYPE_CHECKING:
from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook

View File

@ -1,8 +1,9 @@
import logging import logging
import datahub.emitter.mce_builder as builder
from openlineage.client.run import Dataset as OpenLineageDataset from openlineage.client.run import Dataset as OpenLineageDataset
import datahub.emitter.mce_builder as builder
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -3,17 +3,11 @@ import logging
import unittest.mock import unittest.mock
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
import datahub.emitter.mce_builder as builder from openlineage.airflow.extractors import (
from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( BaseExtractor,
get_platform_from_sqlalchemy_uri, ExtractorManager as OLExtractorManager,
TaskMetadata,
) )
from datahub.sql_parsing.sqlglot_lineage import (
SqlParsingResult,
create_lineage_sql_parsed_result,
)
from openlineage.airflow.extractors import BaseExtractor
from openlineage.airflow.extractors import ExtractorManager as OLExtractorManager
from openlineage.airflow.extractors import TaskMetadata
from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor
from openlineage.airflow.extractors.sql_extractor import SqlExtractor from openlineage.airflow.extractors.sql_extractor import SqlExtractor
from openlineage.airflow.utils import get_operator_class, try_import_from_string from openlineage.airflow.utils import get_operator_class, try_import_from_string
@ -23,11 +17,20 @@ from openlineage.client.facet import (
SqlJobFacet, SqlJobFacet,
) )
import datahub.emitter.mce_builder as builder
from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
get_platform_from_sqlalchemy_uri,
)
from datahub.sql_parsing.sqlglot_lineage import (
SqlParsingResult,
create_lineage_sql_parsed_result,
)
from datahub_airflow_plugin._airflow_shims import Operator from datahub_airflow_plugin._airflow_shims import Operator
from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS
if TYPE_CHECKING: if TYPE_CHECKING:
from airflow.models import DagRun, TaskInstance from airflow.models import DagRun, TaskInstance
from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.graph.client import DataHubGraph
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -2,6 +2,7 @@ from datetime import datetime
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
from airflow.configuration import conf from airflow.configuration import conf
from datahub.api.entities.datajob import DataFlow, DataJob from datahub.api.entities.datajob import DataFlow, DataJob
from datahub.api.entities.dataprocess.dataprocess_instance import ( from datahub.api.entities.dataprocess.dataprocess_instance import (
DataProcessInstance, DataProcessInstance,
@ -11,7 +12,6 @@ from datahub.emitter.generic_emitter import Emitter
from datahub.metadata.schema_classes import DataProcessTypeClass from datahub.metadata.schema_classes import DataProcessTypeClass
from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_flow_urn import DataFlowUrn
from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.data_job_urn import DataJobUrn
from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED
from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl

View File

@ -8,9 +8,13 @@ import time
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast
import airflow import airflow
import datahub.emitter.mce_builder as builder
from airflow.models import Variable from airflow.models import Variable
from airflow.models.serialized_dag import SerializedDagModel from airflow.models.serialized_dag import SerializedDagModel
from openlineage.airflow.listener import TaskHolder
from openlineage.airflow.utils import redact_with_exclusions
from openlineage.client.serde import Serde
import datahub.emitter.mce_builder as builder
from datahub.api.entities.datajob import DataJob from datahub.api.entities.datajob import DataJob
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
@ -30,10 +34,6 @@ from datahub.metadata.schema_classes import (
) )
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
from datahub.telemetry import telemetry from datahub.telemetry import telemetry
from openlineage.airflow.listener import TaskHolder
from openlineage.airflow.utils import redact_with_exclusions
from openlineage.client.serde import Serde
from datahub_airflow_plugin._airflow_shims import ( from datahub_airflow_plugin._airflow_shims import (
HAS_AIRFLOW_DAG_LISTENER_API, HAS_AIRFLOW_DAG_LISTENER_API,
HAS_AIRFLOW_DATASET_LISTENER_API, HAS_AIRFLOW_DATASET_LISTENER_API,

View File

@ -15,9 +15,9 @@ assert AIRFLOW_PATCHED
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and not os.getenv( _USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and os.getenv(
"DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false" "DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false"
).lower() in ("true", "1") ).lower() not in ("true", "1")
if _USE_AIRFLOW_LISTENER_INTERFACE: if _USE_AIRFLOW_LISTENER_INTERFACE:
try: try:
@ -32,7 +32,7 @@ if _USE_AIRFLOW_LISTENER_INTERFACE:
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
if not os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() in ( if os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() not in (
"true", "true",
"1", "1",
): ):

View File

@ -7,9 +7,9 @@ import airflow
from airflow.lineage import PIPELINE_OUTLETS from airflow.lineage import PIPELINE_OUTLETS
from airflow.models.baseoperator import BaseOperator from airflow.models.baseoperator import BaseOperator
from airflow.utils.module_loading import import_string from airflow.utils.module_loading import import_string
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
from datahub.telemetry import telemetry from datahub.telemetry import telemetry
from datahub_airflow_plugin._airflow_shims import ( from datahub_airflow_plugin._airflow_shims import (
MappedOperator, MappedOperator,
get_task_inlets, get_task_inlets,

View File

@ -2,6 +2,7 @@ from abc import abstractmethod
from typing import List, Optional from typing import List, Optional
import attr import attr
import datahub.emitter.mce_builder as builder import datahub.emitter.mce_builder as builder
from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.data_job_urn import DataJobUrn
from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.dataset_urn import DatasetUrn

View File

@ -9,6 +9,7 @@ from datetime import timedelta
from airflow import DAG from airflow import DAG
from airflow.operators.python import PythonOperator from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago from airflow.utils.dates import days_ago
from datahub.configuration.config_loader import load_config_file from datahub.configuration.config_loader import load_config_file
from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.run.pipeline import Pipeline

View File

@ -4,8 +4,8 @@ from datetime import timedelta
import pendulum import pendulum
from airflow.decorators import dag, task from airflow.decorators import dag, task
from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter
from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook from datahub_airflow_plugin.hooks.datahub import DatahubRestHook

View File

@ -4,11 +4,11 @@ This example demonstrates how to emit lineage to DataHub within an Airflow DAG.
""" """
from datetime import timedelta from datetime import timedelta
import datahub.emitter.mce_builder as builder
from airflow import DAG from airflow import DAG
from airflow.operators.bash import BashOperator from airflow.operators.bash import BashOperator
from airflow.utils.dates import days_ago from airflow.utils.dates import days_ago
import datahub.emitter.mce_builder as builder
from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
default_args = { default_args = {

View File

@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Tuple, Union
from airflow.exceptions import AirflowException from airflow.exceptions import AirflowException
from airflow.hooks.base import BaseHook from airflow.hooks.base import BaseHook
from datahub.emitter.generic_emitter import Emitter from datahub.emitter.generic_emitter import Emitter
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
@ -11,6 +12,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
if TYPE_CHECKING: if TYPE_CHECKING:
from airflow.models.connection import Connection from airflow.models.connection import Connection
from datahub.emitter.kafka_emitter import DatahubKafkaEmitter from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
from datahub.emitter.rest_emitter import DataHubRestEmitter from datahub.emitter.rest_emitter import DataHubRestEmitter
from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter

View File

@ -2,7 +2,6 @@ from datetime import datetime
from typing import TYPE_CHECKING, Dict, List from typing import TYPE_CHECKING, Dict, List
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
from datahub_airflow_plugin._config import DatahubLineageConfig from datahub_airflow_plugin._config import DatahubLineageConfig
from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
from datahub_airflow_plugin.entities import ( from datahub_airflow_plugin.entities import (

View File

@ -3,9 +3,9 @@ from typing import List, Union
from airflow.models import BaseOperator from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults from airflow.utils.decorators import apply_defaults
from avrogen.dict_wrapper import DictWrapper from avrogen.dict_wrapper import DictWrapper
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
from datahub_airflow_plugin.hooks.datahub import ( from datahub_airflow_plugin.hooks.datahub import (
DatahubGenericHook, DatahubGenericHook,
DatahubKafkaHook, DatahubKafkaHook,

View File

@ -2,11 +2,11 @@ import datetime
from typing import Any, List, Optional, Sequence, Union from typing import Any, List, Optional, Sequence, Union
from airflow.models import BaseOperator from airflow.models import BaseOperator
from datahub.api.circuit_breaker import ( from datahub.api.circuit_breaker import (
AssertionCircuitBreaker, AssertionCircuitBreaker,
AssertionCircuitBreakerConfig, AssertionCircuitBreakerConfig,
) )
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook from datahub_airflow_plugin.hooks.datahub import DatahubRestHook

View File

@ -2,11 +2,11 @@ import datetime
from typing import Any, List, Optional, Sequence, Union from typing import Any, List, Optional, Sequence, Union
from airflow.sensors.base import BaseSensorOperator from airflow.sensors.base import BaseSensorOperator
from datahub.api.circuit_breaker import ( from datahub.api.circuit_breaker import (
AssertionCircuitBreaker, AssertionCircuitBreaker,
AssertionCircuitBreakerConfig, AssertionCircuitBreakerConfig,
) )
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook from datahub_airflow_plugin.hooks.datahub import DatahubRestHook

View File

@ -2,11 +2,11 @@ import datetime
from typing import Any, List, Optional, Sequence, Union from typing import Any, List, Optional, Sequence, Union
from airflow.sensors.base import BaseSensorOperator from airflow.sensors.base import BaseSensorOperator
from datahub.api.circuit_breaker import ( from datahub.api.circuit_breaker import (
OperationCircuitBreaker, OperationCircuitBreaker,
OperationCircuitBreakerConfig, OperationCircuitBreakerConfig,
) )
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook from datahub_airflow_plugin.hooks.datahub import DatahubRestHook

View File

@ -2,11 +2,11 @@ import datetime
from typing import Any, List, Optional, Sequence, Union from typing import Any, List, Optional, Sequence, Union
from airflow.sensors.base import BaseSensorOperator from airflow.sensors.base import BaseSensorOperator
from datahub.api.circuit_breaker import ( from datahub.api.circuit_breaker import (
OperationCircuitBreaker, OperationCircuitBreaker,
OperationCircuitBreakerConfig, OperationCircuitBreakerConfig,
) )
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook from datahub_airflow_plugin.hooks.datahub import DatahubRestHook

View File

@ -1,6 +1,7 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from airflow import DAG from airflow import DAG
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
from datahub.metadata.schema_classes import ( from datahub.metadata.schema_classes import (
@ -9,7 +10,6 @@ from datahub.metadata.schema_classes import (
DatasetPropertiesClass, DatasetPropertiesClass,
DatasetSnapshotClass, DatasetSnapshotClass,
) )
from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
default_args = { default_args = {

View File

@ -17,9 +17,9 @@ import pytest
import requests import requests
import tenacity import tenacity
from airflow.models.connection import Connection from airflow.models.connection import Connection
from datahub.ingestion.sink.file import write_metadata_file from datahub.ingestion.sink.file import write_metadata_file
from datahub.testing.compare_metadata_json import assert_metadata_files_equal from datahub.testing.compare_metadata_json import assert_metadata_files_equal
from datahub_airflow_plugin._airflow_shims import ( from datahub_airflow_plugin._airflow_shims import (
AIRFLOW_VERSION, AIRFLOW_VERSION,
HAS_AIRFLOW_DAG_LISTENER_API, HAS_AIRFLOW_DAG_LISTENER_API,

View File

@ -8,12 +8,12 @@ from unittest.mock import Mock
import airflow.configuration import airflow.configuration
import airflow.version import airflow.version
import datahub.emitter.mce_builder as builder
import packaging.version import packaging.version
import pytest import pytest
from airflow.lineage import apply_lineage, prepare_lineage from airflow.lineage import apply_lineage, prepare_lineage
from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance
import datahub.emitter.mce_builder as builder
from datahub_airflow_plugin import get_provider_info from datahub_airflow_plugin import get_provider_info
from datahub_airflow_plugin._airflow_shims import ( from datahub_airflow_plugin._airflow_shims import (
AIRFLOW_PATCHED, AIRFLOW_PATCHED,

View File

@ -1,6 +1,6 @@
import setuptools import setuptools
from datahub.testing.check_imports import ensure_no_indirect_model_imports
from datahub.testing.check_imports import ensure_no_indirect_model_imports
from tests.utils import PytestConfig from tests.utils import PytestConfig

View File

@ -55,16 +55,14 @@ task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c', commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " + "source ${venv_name}/bin/activate && set -x && " +
"black --check --diff src/ tests/ examples/ && " + "black --check --diff src/ tests/ examples/ && " +
"isort --check --diff src/ tests/ examples/ && " + "ruff check src/ tests/ && " +
"flake8 --count --statistics src/ tests/ examples/ && " +
"mypy --show-traceback --show-error-codes src/ tests/ examples/" "mypy --show-traceback --show-error-codes src/ tests/ examples/"
} }
task lintFix(type: Exec, dependsOn: installDev) { task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c', commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && " + "source ${venv_name}/bin/activate && " +
"black src/ tests/ examples/ && " + "black src/ tests/ examples/ && " +
"isort src/ tests/ examples/ && " + "ruff check --fix src/ tests/"
"flake8 src/ tests/ examples/ && " +
"mypy src/ tests/ examples/" "mypy src/ tests/ examples/"
} }

View File

@ -10,10 +10,50 @@ extend-exclude = '''
''' '''
include = '\.pyi?$' include = '\.pyi?$'
[tool.isort] [tool.ruff.lint.isort]
indent = ' ' combine-as-imports = true
profile = 'black' known-first-party = ["datahub"]
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
force-sort-within-sections = false
force-wrap-aliases = false
split-on-trailing-comma = false
order-by-type = true
relative-imports-order = "closest-to-furthest"
force-single-line = false
single-line-exclusions = ["typing"]
length-sort = false
from-first = false
required-imports = []
classes = ["typing"]
[tool.pyright] [tool.ruff.lint]
extraPaths = ['tests'] select = [
"B",
"C90",
"E",
"F",
"I", # For isort
"TID",
]
ignore = [
# Ignore line length violations (handled by Black)
"E501",
# Ignore whitespace before ':' (matches Black)
"E203",
"E203",
# Allow usages of functools.lru_cache
"B019",
# Allow function call in argument defaults
"B008",
]
[tool.ruff.lint.mccabe]
max-complexity = 15
[tool.ruff.lint.flake8-tidy-imports]
# Disallow all relative imports.
ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]

View File

@ -1,24 +1,3 @@
[flake8]
max-complexity = 15
ignore =
# Ignore: line length issues, since black's formatter will take care of them.
E501,
# Ignore: 1 blank line required before class docstring.
D203,
# See https://stackoverflow.com/a/57074416.
W503,
# See https://github.com/psf/black/issues/315.
E203
exclude =
.git,
venv,
.tox,
__pycache__
per-file-ignores =
# imported but unused
__init__.py: F401
ban-relative-imports = true
[mypy] [mypy]
plugins = plugins =
pydantic.mypy pydantic.mypy

View File

@ -53,10 +53,7 @@ base_dev_requirements = {
"dagster-snowflake-pandas >= 0.11.0", "dagster-snowflake-pandas >= 0.11.0",
"black==22.12.0", "black==22.12.0",
"coverage>=5.1", "coverage>=5.1",
"flake8>=6.0.0", "ruff==0.9.1",
"flake8-tidy-imports>=4.3.0",
"flake8-bugbear==23.3.12",
"isort>=5.7.0",
"mypy>=1.4.0", "mypy>=1.4.0",
# pydantic 1.8.2 is incompatible with mypy 0.910. # pydantic 1.8.2 is incompatible with mypy 0.910.
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.

View File

@ -13,6 +13,7 @@ from dagster import (
TableSchemaMetadataValue, TableSchemaMetadataValue,
) )
from dagster._core.execution.stats import RunStepKeyStatsSnapshot, StepEventStatus from dagster._core.execution.stats import RunStepKeyStatsSnapshot, StepEventStatus
from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
try: try:
@ -23,6 +24,7 @@ except ImportError:
from dagster._core.snap.node import OpDefSnap from dagster._core.snap.node import OpDefSnap
from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatsSnapshot from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatsSnapshot
from datahub.api.entities.datajob import DataFlow, DataJob from datahub.api.entities.datajob import DataFlow, DataJob
from datahub.api.entities.dataprocess.dataprocess_instance import ( from datahub.api.entities.dataprocess.dataprocess_instance import (
DataProcessInstance, DataProcessInstance,

View File

@ -35,7 +35,9 @@ from dagster._core.definitions.sensor_definition import DefaultSensorStatus
try: try:
from dagster._core.definitions.sensor_definition import SensorReturnTypesUnion from dagster._core.definitions.sensor_definition import SensorReturnTypesUnion
except ImportError: except ImportError:
from dagster._core.definitions.sensor_definition import RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion # type: ignore from dagster._core.definitions.sensor_definition import ( # type: ignore
RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion,
)
from dagster._core.definitions.target import ExecutableDefinition from dagster._core.definitions.target import ExecutableDefinition
from dagster._core.definitions.unresolved_asset_job_definition import ( from dagster._core.definitions.unresolved_asset_job_definition import (
@ -43,6 +45,7 @@ from dagster._core.definitions.unresolved_asset_job_definition import (
) )
from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData
from dagster._core.execution.stats import RunStepKeyStatsSnapshot from dagster._core.execution.stats import RunStepKeyStatsSnapshot
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.metadata.schema_classes import SubTypesClass from datahub.metadata.schema_classes import SubTypesClass
@ -52,7 +55,6 @@ from datahub.sql_parsing.sqlglot_lineage import (
) )
from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.dataset_urn import DatasetUrn
from datahub.utilities.urns.error import InvalidUrnError from datahub.utilities.urns.error import InvalidUrnError
from datahub_dagster_plugin.client.dagster_generator import ( from datahub_dagster_plugin.client.dagster_generator import (
DATAHUB_ASSET_GROUP_NAME_CACHE, DATAHUB_ASSET_GROUP_NAME_CACHE,
Constant, Constant,

View File

@ -22,11 +22,11 @@ from dagster._core.definitions.repository_definition import (
RepositoryDefinition, RepositoryDefinition,
) )
from dagster._core.definitions.resource_definition import ResourceDefinition from dagster._core.definitions.resource_definition import ResourceDefinition
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig
from freezegun import freeze_time from freezegun import freeze_time
from utils.utils import PytestConfig, check_golden_file from utils.utils import PytestConfig, check_golden_file
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig
from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig
from datahub_dagster_plugin.sensors.datahub_sensors import ( from datahub_dagster_plugin.sensors.datahub_sensors import (
DatahubSensors, DatahubSensors,

View File

@ -14,6 +14,3 @@ include = '\.pyi?$'
indent = ' ' indent = ' '
profile = 'black' profile = 'black'
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
[tool.pyright]
extraPaths = ['tests']

View File

@ -14,6 +14,3 @@ include = '\.pyi?$'
indent = ' ' indent = ' '
profile = 'black' profile = 'black'
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
[tool.pyright]
extraPaths = ['tests']

View File

@ -11,6 +11,7 @@ extend-exclude = '''
include = '\.pyi?$' include = '\.pyi?$'
target-version = ['py38', 'py39', 'py310', 'py311'] target-version = ['py38', 'py39', 'py310', 'py311']
[tool.ruff.lint.isort] [tool.ruff.lint.isort]
combine-as-imports = true combine-as-imports = true
known-first-party = ["datahub"] known-first-party = ["datahub"]
@ -28,16 +29,6 @@ from-first = false
required-imports = [] required-imports = []
classes = ["typing"] classes = ["typing"]
[tool.pyright]
extraPaths = ['tests']
[tool.vulture]
exclude = ["src/datahub/metadata/"]
ignore_decorators = ["@click.*", "@validator", "@root_validator", "@pydantic.validator", "@pydantic.root_validator", "@pytest.fixture"]
ignore_names = ["*Source", "*Sink", "*Report"]
paths = ["src"]
sort_by_size = true
[tool.ruff] [tool.ruff]
# Same as Black. # Same as Black.
line-length = 88 line-length = 88
@ -70,7 +61,6 @@ ignore = [
"B008", "B008",
# TODO: Enable these later # TODO: Enable these later
"B006", # Mutable args "B006", # Mutable args
"B007", # Unused loop control variable
"B017", # Do not assert blind exception "B017", # Do not assert blind exception
"B904", # Checks for raise statements in exception handlers that lack a from clause "B904", # Checks for raise statements in exception handlers that lack a from clause
] ]

View File

@ -438,7 +438,7 @@ class DataProduct(ConfigModel):
for replace_index, replace_value in patches_replace.items(): for replace_index, replace_value in patches_replace.items():
list_to_manipulate[replace_index] = replace_value list_to_manipulate[replace_index] = replace_value
for drop_index, drop_value in patches_drop.items(): for drop_value in patches_drop.values():
list_to_manipulate.remove(drop_value) list_to_manipulate.remove(drop_value)
for add_value in patches_add: for add_value in patches_add:

View File

@ -613,7 +613,7 @@ class ABSSource(StatefulIngestionSourceBase):
table_data.table_path table_data.table_path
].timestamp = table_data.timestamp ].timestamp = table_data.timestamp
for guid, table_data in table_dict.items(): for _, table_data in table_dict.items():
yield from self.ingest_table(table_data, path_spec) yield from self.ingest_table(table_data, path_spec)
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:

View File

@ -181,7 +181,7 @@ class DremioAPIOperations:
return return
# On-prem Dremio authentication (PAT or Basic Auth) # On-prem Dremio authentication (PAT or Basic Auth)
for retry in range(1, self._retry_count + 1): for _ in range(1, self._retry_count + 1):
try: try:
if connection_args.authentication_method == "PAT": if connection_args.authentication_method == "PAT":
self.session.headers.update( self.session.headers.update(

View File

@ -286,7 +286,7 @@ class Neo4jSource(Source):
df = self.get_neo4j_metadata( df = self.get_neo4j_metadata(
"CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;" "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
) )
for index, row in df.iterrows(): for _, row in df.iterrows():
try: try:
yield MetadataWorkUnit( yield MetadataWorkUnit(
id=row["key"], id=row["key"],

View File

@ -1124,7 +1124,7 @@ class S3Source(StatefulIngestionSourceBase):
table_data.table_path table_data.table_path
].timestamp = table_data.timestamp ].timestamp = table_data.timestamp
for guid, table_data in table_dict.items(): for _, table_data in table_dict.items():
yield from self.ingest_table(table_data, path_spec) yield from self.ingest_table(table_data, path_spec)
if not self.source_config.is_profiling_enabled(): if not self.source_config.is_profiling_enabled():

View File

@ -354,7 +354,7 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}" browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
if os.path.isdir(self.config.path): if os.path.isdir(self.config.path):
for root, dirs, files in os.walk(self.config.path, topdown=False): for root, _, files in os.walk(self.config.path, topdown=False):
for file_name in [f for f in files if f.endswith(".json")]: for file_name in [f for f in files if f.endswith(".json")]:
try: try:
yield from self._load_one_file( yield from self._load_one_file(

View File

@ -268,7 +268,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw):
info_cache = kw.get("info_cache") info_cache = kw.get("info_cache")
all_relations = self._get_all_relation_info(connection, info_cache=info_cache) all_relations = self._get_all_relation_info(connection, info_cache=info_cache)
relation_names = [] relation_names = []
for key, relation in all_relations.items(): for _, relation in all_relations.items():
if relation.database == schema and relation.relkind == relkind: if relation.database == schema and relation.relkind == relkind:
relation_names.append(relation.relname) relation_names.append(relation.relname)
return relation_names return relation_names

View File

@ -3605,7 +3605,7 @@ class TableauSiteSource:
parent_container_key=parent_project_key, parent_container_key=parent_project_key,
) )
for id_, project in self.tableau_project_registry.items(): for project in self.tableau_project_registry.values():
logger.debug( logger.debug(
f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}" f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}"
) )

View File

@ -246,7 +246,7 @@ class MCPDiff:
for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed: for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed:
aspect_map = self.aspect_changes[urn] aspect_map = self.aspect_changes[urn]
s.append(f"Urn changed, {urn}:") s.append(f"Urn changed, {urn}:")
for aspect_name, aspect_diffs in aspect_map.items(): for aspect_diffs in aspect_map.values():
for i, ga in aspect_diffs.aspects_added.items(): for i, ga in aspect_diffs.aspects_added.items():
s.append(self.report_aspect(ga, i, "added")) s.append(self.report_aspect(ga, i, "added"))
if verbose: if verbose:

View File

@ -8,7 +8,7 @@ from sqllineage.utils.constant import EdgeType
# Patch based on sqllineage v1.3.3 # Patch based on sqllineage v1.3.3
def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore
for i, tbl in enumerate(self.tables): for tbl in self.tables:
holder.add_read(tbl) holder.add_read(tbl)
self.union_barriers.append((len(self.columns), len(self.tables))) self.union_barriers.append((len(self.columns), len(self.tables)))

View File

@ -1070,7 +1070,7 @@ def test_unsupported_data_platform():
) # type :ignore ) # type :ignore
is_entry_present: bool = False is_entry_present: bool = False
for key, entry in info_entries.items(): for entry in info_entries.values():
if entry.title == "Non-Data Platform Expression": if entry.title == "Non-Data Platform Expression":
is_entry_present = True is_entry_present = True
break break
@ -1163,7 +1163,7 @@ def test_m_query_timeout(mock_get_lark_parser):
) # type :ignore ) # type :ignore
is_entry_present: bool = False is_entry_present: bool = False
for key, entry in warn_entries.items(): for entry in warn_entries.values():
if entry.title == "M-Query Parsing Timeout": if entry.title == "M-Query Parsing Timeout":
is_entry_present = True is_entry_present = True
break break

View File

@ -1438,7 +1438,7 @@ def test_powerbi_cross_workspace_reference_info_message(
is_entry_present: bool = False is_entry_present: bool = False
# Printing INFO entries # Printing INFO entries
for key, entry in info_entries.items(): for entry in info_entries.values():
if entry.title == "Missing Lineage For Tile": if entry.title == "Missing Lineage For Tile":
is_entry_present = True is_entry_present = True
break break
@ -1563,7 +1563,7 @@ def test_powerbi_app_ingest_info_message(
is_entry_present: bool = False is_entry_present: bool = False
# Printing INFO entries # Printing INFO entries
for key, entry in info_entries.items(): for entry in info_entries.values():
if entry.title == "App Ingestion Is Disabled": if entry.title == "App Ingestion Is Disabled":
is_entry_present = True is_entry_present = True
break break

View File

@ -198,7 +198,7 @@ def generate_queries(
all_tables = seed_metadata.tables + seed_metadata.views all_tables = seed_metadata.tables + seed_metadata.views
users = [f"user_{i}@xyz.com" for i in range(num_users)] users = [f"user_{i}@xyz.com" for i in range(num_users)]
for i in range(num_selects): # Pure SELECT statements for _ in range(num_selects): # Pure SELECT statements
tables = _sample_list(all_tables, tables_per_select) tables = _sample_list(all_tables, tables_per_select)
all_columns = [ all_columns = [
FieldAccess(column, table) for table in tables for column in table.columns FieldAccess(column, table) for table in tables for column in table.columns
@ -213,7 +213,7 @@ def generate_queries(
fields_accessed=_sample_list(all_columns, columns_per_select), fields_accessed=_sample_list(all_columns, columns_per_select),
) )
for i in range(num_operations): for _ in range(num_operations):
modified_table = random.choice(seed_metadata.tables) modified_table = random.choice(seed_metadata.tables)
n_col = len(modified_table.columns) n_col = len(modified_table.columns)
num_columns_modified = NormalDistribution(n_col / 2, n_col / 2) num_columns_modified = NormalDistribution(n_col / 2, n_col / 2)

View File

@ -42,5 +42,3 @@ warn_unused_configs = true
disallow_incomplete_defs = false disallow_incomplete_defs = false
disallow_untyped_defs = false disallow_untyped_defs = false
[tool.pyright]
extraPaths = ['tests']