mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-02 11:49:23 +00:00
dev(ingest): move modules from isort,flake8 to ruff (#12373)
This commit is contained in:
parent
4de7f61d09
commit
76e46b89db
@ -74,16 +74,14 @@ task lint(type: Exec, dependsOn: installDev) {
|
||||
"find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " +
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"black --check --diff src/ tests/ && " +
|
||||
"isort --check --diff src/ tests/ && " +
|
||||
"flake8 --count --statistics src/ tests/ && " +
|
||||
"ruff check src/ tests/ && " +
|
||||
"mypy --show-traceback --show-error-codes src/ tests/"
|
||||
}
|
||||
task lintFix(type: Exec, dependsOn: installDev) {
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"black src/ tests/ && " +
|
||||
"isort src/ tests/ && " +
|
||||
"flake8 src/ tests/ && " +
|
||||
"ruff check --fix src/ tests/"
|
||||
"mypy src/ tests/ "
|
||||
}
|
||||
|
||||
|
||||
@ -10,11 +10,50 @@ extend-exclude = '''
|
||||
'''
|
||||
include = '\.pyi?$'
|
||||
|
||||
[tool.isort]
|
||||
indent = ' '
|
||||
known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub_provider._airflow_compat']
|
||||
profile = 'black'
|
||||
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
||||
[tool.ruff.lint.isort]
|
||||
combine-as-imports = true
|
||||
known-first-party = ["datahub"]
|
||||
extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
|
||||
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
|
||||
force-sort-within-sections = false
|
||||
force-wrap-aliases = false
|
||||
split-on-trailing-comma = false
|
||||
order-by-type = true
|
||||
relative-imports-order = "closest-to-furthest"
|
||||
force-single-line = false
|
||||
single-line-exclusions = ["typing"]
|
||||
length-sort = false
|
||||
from-first = false
|
||||
required-imports = []
|
||||
classes = ["typing"]
|
||||
|
||||
[tool.pyright]
|
||||
extraPaths = ['tests']
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
"B",
|
||||
"C90",
|
||||
"E",
|
||||
"F",
|
||||
"I", # For isort
|
||||
"TID",
|
||||
]
|
||||
ignore = [
|
||||
# Ignore line length violations (handled by Black)
|
||||
"E501",
|
||||
# Ignore whitespace before ':' (matches Black)
|
||||
"E203",
|
||||
"E203",
|
||||
# Allow usages of functools.lru_cache
|
||||
"B019",
|
||||
# Allow function call in argument defaults
|
||||
"B008",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.mccabe]
|
||||
max-complexity = 15
|
||||
|
||||
[tool.ruff.lint.flake8-tidy-imports]
|
||||
# Disallow all relative imports.
|
||||
ban-relative-imports = "all"
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"__init__.py" = ["F401"]
|
||||
@ -1,24 +1,3 @@
|
||||
[flake8]
|
||||
max-complexity = 15
|
||||
ignore =
|
||||
# Ignore: line length issues, since black's formatter will take care of them.
|
||||
E501,
|
||||
# Ignore: 1 blank line required before class docstring.
|
||||
D203,
|
||||
# See https://stackoverflow.com/a/57074416.
|
||||
W503,
|
||||
# See https://github.com/psf/black/issues/315.
|
||||
E203
|
||||
exclude =
|
||||
.git,
|
||||
venv,
|
||||
.tox,
|
||||
__pycache__
|
||||
per-file-ignores =
|
||||
# imported but unused
|
||||
__init__.py: F401
|
||||
ban-relative-imports = true
|
||||
|
||||
[mypy]
|
||||
plugins =
|
||||
sqlmypy,
|
||||
|
||||
@ -73,9 +73,7 @@ dev_requirements = {
|
||||
*mypy_stubs,
|
||||
"black==22.12.0",
|
||||
"coverage>=5.1",
|
||||
"flake8>=3.8.3",
|
||||
"flake8-tidy-imports>=4.3.0",
|
||||
"isort>=5.7.0",
|
||||
"ruff==0.9.1",
|
||||
"mypy==1.10.1",
|
||||
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
||||
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from airflow.configuration import conf
|
||||
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
||||
from pydantic.fields import Field
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
|
||||
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import logging
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from openlineage.client.run import Dataset as OpenLineageDataset
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@ -3,17 +3,11 @@ import logging
|
||||
import unittest.mock
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
|
||||
get_platform_from_sqlalchemy_uri,
|
||||
from openlineage.airflow.extractors import (
|
||||
BaseExtractor,
|
||||
ExtractorManager as OLExtractorManager,
|
||||
TaskMetadata,
|
||||
)
|
||||
from datahub.sql_parsing.sqlglot_lineage import (
|
||||
SqlParsingResult,
|
||||
create_lineage_sql_parsed_result,
|
||||
)
|
||||
from openlineage.airflow.extractors import BaseExtractor
|
||||
from openlineage.airflow.extractors import ExtractorManager as OLExtractorManager
|
||||
from openlineage.airflow.extractors import TaskMetadata
|
||||
from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor
|
||||
from openlineage.airflow.extractors.sql_extractor import SqlExtractor
|
||||
from openlineage.airflow.utils import get_operator_class, try_import_from_string
|
||||
@ -23,11 +17,20 @@ from openlineage.client.facet import (
|
||||
SqlJobFacet,
|
||||
)
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
|
||||
get_platform_from_sqlalchemy_uri,
|
||||
)
|
||||
from datahub.sql_parsing.sqlglot_lineage import (
|
||||
SqlParsingResult,
|
||||
create_lineage_sql_parsed_result,
|
||||
)
|
||||
from datahub_airflow_plugin._airflow_shims import Operator
|
||||
from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from airflow.models import DagRun, TaskInstance
|
||||
|
||||
from datahub.ingestion.graph.client import DataHubGraph
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -2,6 +2,7 @@ from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
|
||||
|
||||
from airflow.configuration import conf
|
||||
|
||||
from datahub.api.entities.datajob import DataFlow, DataJob
|
||||
from datahub.api.entities.dataprocess.dataprocess_instance import (
|
||||
DataProcessInstance,
|
||||
@ -11,7 +12,6 @@ from datahub.emitter.generic_emitter import Emitter
|
||||
from datahub.metadata.schema_classes import DataProcessTypeClass
|
||||
from datahub.utilities.urns.data_flow_urn import DataFlowUrn
|
||||
from datahub.utilities.urns.data_job_urn import DataJobUrn
|
||||
|
||||
from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED
|
||||
from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl
|
||||
|
||||
|
||||
@ -8,9 +8,13 @@ import time
|
||||
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast
|
||||
|
||||
import airflow
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from airflow.models import Variable
|
||||
from airflow.models.serialized_dag import SerializedDagModel
|
||||
from openlineage.airflow.listener import TaskHolder
|
||||
from openlineage.airflow.utils import redact_with_exclusions
|
||||
from openlineage.client.serde import Serde
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from datahub.api.entities.datajob import DataJob
|
||||
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
@ -30,10 +34,6 @@ from datahub.metadata.schema_classes import (
|
||||
)
|
||||
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
|
||||
from datahub.telemetry import telemetry
|
||||
from openlineage.airflow.listener import TaskHolder
|
||||
from openlineage.airflow.utils import redact_with_exclusions
|
||||
from openlineage.client.serde import Serde
|
||||
|
||||
from datahub_airflow_plugin._airflow_shims import (
|
||||
HAS_AIRFLOW_DAG_LISTENER_API,
|
||||
HAS_AIRFLOW_DATASET_LISTENER_API,
|
||||
|
||||
@ -15,9 +15,9 @@ assert AIRFLOW_PATCHED
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and not os.getenv(
|
||||
_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and os.getenv(
|
||||
"DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false"
|
||||
).lower() in ("true", "1")
|
||||
).lower() not in ("true", "1")
|
||||
|
||||
if _USE_AIRFLOW_LISTENER_INTERFACE:
|
||||
try:
|
||||
@ -32,7 +32,7 @@ if _USE_AIRFLOW_LISTENER_INTERFACE:
|
||||
|
||||
|
||||
with contextlib.suppress(Exception):
|
||||
if not os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() in (
|
||||
if os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() not in (
|
||||
"true",
|
||||
"1",
|
||||
):
|
||||
|
||||
@ -7,9 +7,9 @@ import airflow
|
||||
from airflow.lineage import PIPELINE_OUTLETS
|
||||
from airflow.models.baseoperator import BaseOperator
|
||||
from airflow.utils.module_loading import import_string
|
||||
|
||||
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
||||
from datahub.telemetry import telemetry
|
||||
|
||||
from datahub_airflow_plugin._airflow_shims import (
|
||||
MappedOperator,
|
||||
get_task_inlets,
|
||||
|
||||
@ -2,6 +2,7 @@ from abc import abstractmethod
|
||||
from typing import List, Optional
|
||||
|
||||
import attr
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from datahub.utilities.urns.data_job_urn import DataJobUrn
|
||||
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
||||
|
||||
@ -9,6 +9,7 @@ from datetime import timedelta
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
from datahub.configuration.config_loader import load_config_file
|
||||
from datahub.ingestion.run.pipeline import Pipeline
|
||||
|
||||
|
||||
@ -4,8 +4,8 @@ from datetime import timedelta
|
||||
|
||||
import pendulum
|
||||
from airflow.decorators import dag, task
|
||||
from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter
|
||||
|
||||
from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter
|
||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||
|
||||
|
||||
|
||||
@ -4,11 +4,11 @@ This example demonstrates how to emit lineage to DataHub within an Airflow DAG.
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from airflow import DAG
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
|
||||
|
||||
default_args = {
|
||||
|
||||
@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Tuple, Union
|
||||
|
||||
from airflow.exceptions import AirflowException
|
||||
from airflow.hooks.base import BaseHook
|
||||
|
||||
from datahub.emitter.generic_emitter import Emitter
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
||||
@ -11,6 +12,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from airflow.models.connection import Connection
|
||||
|
||||
from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
|
||||
from datahub.emitter.rest_emitter import DataHubRestEmitter
|
||||
from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter
|
||||
|
||||
@ -2,7 +2,6 @@ from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Dict, List
|
||||
|
||||
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
||||
|
||||
from datahub_airflow_plugin._config import DatahubLineageConfig
|
||||
from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
|
||||
from datahub_airflow_plugin.entities import (
|
||||
|
||||
@ -3,9 +3,9 @@ from typing import List, Union
|
||||
from airflow.models import BaseOperator
|
||||
from airflow.utils.decorators import apply_defaults
|
||||
from avrogen.dict_wrapper import DictWrapper
|
||||
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||
|
||||
from datahub_airflow_plugin.hooks.datahub import (
|
||||
DatahubGenericHook,
|
||||
DatahubKafkaHook,
|
||||
|
||||
@ -2,11 +2,11 @@ import datetime
|
||||
from typing import Any, List, Optional, Sequence, Union
|
||||
|
||||
from airflow.models import BaseOperator
|
||||
|
||||
from datahub.api.circuit_breaker import (
|
||||
AssertionCircuitBreaker,
|
||||
AssertionCircuitBreakerConfig,
|
||||
)
|
||||
|
||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||
|
||||
|
||||
|
||||
@ -2,11 +2,11 @@ import datetime
|
||||
from typing import Any, List, Optional, Sequence, Union
|
||||
|
||||
from airflow.sensors.base import BaseSensorOperator
|
||||
|
||||
from datahub.api.circuit_breaker import (
|
||||
AssertionCircuitBreaker,
|
||||
AssertionCircuitBreakerConfig,
|
||||
)
|
||||
|
||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||
|
||||
|
||||
|
||||
@ -2,11 +2,11 @@ import datetime
|
||||
from typing import Any, List, Optional, Sequence, Union
|
||||
|
||||
from airflow.sensors.base import BaseSensorOperator
|
||||
|
||||
from datahub.api.circuit_breaker import (
|
||||
OperationCircuitBreaker,
|
||||
OperationCircuitBreakerConfig,
|
||||
)
|
||||
|
||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||
|
||||
|
||||
|
||||
@ -2,11 +2,11 @@ import datetime
|
||||
from typing import Any, List, Optional, Sequence, Union
|
||||
|
||||
from airflow.sensors.base import BaseSensorOperator
|
||||
|
||||
from datahub.api.circuit_breaker import (
|
||||
OperationCircuitBreaker,
|
||||
OperationCircuitBreakerConfig,
|
||||
)
|
||||
|
||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||
from datahub.metadata.schema_classes import (
|
||||
@ -9,7 +10,6 @@ from datahub.metadata.schema_classes import (
|
||||
DatasetPropertiesClass,
|
||||
DatasetSnapshotClass,
|
||||
)
|
||||
|
||||
from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
|
||||
|
||||
default_args = {
|
||||
|
||||
@ -17,9 +17,9 @@ import pytest
|
||||
import requests
|
||||
import tenacity
|
||||
from airflow.models.connection import Connection
|
||||
|
||||
from datahub.ingestion.sink.file import write_metadata_file
|
||||
from datahub.testing.compare_metadata_json import assert_metadata_files_equal
|
||||
|
||||
from datahub_airflow_plugin._airflow_shims import (
|
||||
AIRFLOW_VERSION,
|
||||
HAS_AIRFLOW_DAG_LISTENER_API,
|
||||
|
||||
@ -8,12 +8,12 @@ from unittest.mock import Mock
|
||||
|
||||
import airflow.configuration
|
||||
import airflow.version
|
||||
import datahub.emitter.mce_builder as builder
|
||||
import packaging.version
|
||||
import pytest
|
||||
from airflow.lineage import apply_lineage, prepare_lineage
|
||||
from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance
|
||||
|
||||
import datahub.emitter.mce_builder as builder
|
||||
from datahub_airflow_plugin import get_provider_info
|
||||
from datahub_airflow_plugin._airflow_shims import (
|
||||
AIRFLOW_PATCHED,
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import setuptools
|
||||
from datahub.testing.check_imports import ensure_no_indirect_model_imports
|
||||
|
||||
from datahub.testing.check_imports import ensure_no_indirect_model_imports
|
||||
from tests.utils import PytestConfig
|
||||
|
||||
|
||||
|
||||
@ -55,16 +55,14 @@ task lint(type: Exec, dependsOn: installDev) {
|
||||
commandLine 'bash', '-c',
|
||||
"source ${venv_name}/bin/activate && set -x && " +
|
||||
"black --check --diff src/ tests/ examples/ && " +
|
||||
"isort --check --diff src/ tests/ examples/ && " +
|
||||
"flake8 --count --statistics src/ tests/ examples/ && " +
|
||||
"ruff check src/ tests/ && " +
|
||||
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
|
||||
}
|
||||
task lintFix(type: Exec, dependsOn: installDev) {
|
||||
commandLine 'bash', '-x', '-c',
|
||||
"source ${venv_name}/bin/activate && " +
|
||||
"black src/ tests/ examples/ && " +
|
||||
"isort src/ tests/ examples/ && " +
|
||||
"flake8 src/ tests/ examples/ && " +
|
||||
"ruff check --fix src/ tests/"
|
||||
"mypy src/ tests/ examples/"
|
||||
}
|
||||
|
||||
|
||||
@ -10,10 +10,50 @@ extend-exclude = '''
|
||||
'''
|
||||
include = '\.pyi?$'
|
||||
|
||||
[tool.isort]
|
||||
indent = ' '
|
||||
profile = 'black'
|
||||
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
||||
[tool.ruff.lint.isort]
|
||||
combine-as-imports = true
|
||||
known-first-party = ["datahub"]
|
||||
extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
|
||||
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
|
||||
force-sort-within-sections = false
|
||||
force-wrap-aliases = false
|
||||
split-on-trailing-comma = false
|
||||
order-by-type = true
|
||||
relative-imports-order = "closest-to-furthest"
|
||||
force-single-line = false
|
||||
single-line-exclusions = ["typing"]
|
||||
length-sort = false
|
||||
from-first = false
|
||||
required-imports = []
|
||||
classes = ["typing"]
|
||||
|
||||
[tool.pyright]
|
||||
extraPaths = ['tests']
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
"B",
|
||||
"C90",
|
||||
"E",
|
||||
"F",
|
||||
"I", # For isort
|
||||
"TID",
|
||||
]
|
||||
ignore = [
|
||||
# Ignore line length violations (handled by Black)
|
||||
"E501",
|
||||
# Ignore whitespace before ':' (matches Black)
|
||||
"E203",
|
||||
"E203",
|
||||
# Allow usages of functools.lru_cache
|
||||
"B019",
|
||||
# Allow function call in argument defaults
|
||||
"B008",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.mccabe]
|
||||
max-complexity = 15
|
||||
|
||||
[tool.ruff.lint.flake8-tidy-imports]
|
||||
# Disallow all relative imports.
|
||||
ban-relative-imports = "all"
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"__init__.py" = ["F401"]
|
||||
@ -1,24 +1,3 @@
|
||||
[flake8]
|
||||
max-complexity = 15
|
||||
ignore =
|
||||
# Ignore: line length issues, since black's formatter will take care of them.
|
||||
E501,
|
||||
# Ignore: 1 blank line required before class docstring.
|
||||
D203,
|
||||
# See https://stackoverflow.com/a/57074416.
|
||||
W503,
|
||||
# See https://github.com/psf/black/issues/315.
|
||||
E203
|
||||
exclude =
|
||||
.git,
|
||||
venv,
|
||||
.tox,
|
||||
__pycache__
|
||||
per-file-ignores =
|
||||
# imported but unused
|
||||
__init__.py: F401
|
||||
ban-relative-imports = true
|
||||
|
||||
[mypy]
|
||||
plugins =
|
||||
pydantic.mypy
|
||||
|
||||
@ -53,10 +53,7 @@ base_dev_requirements = {
|
||||
"dagster-snowflake-pandas >= 0.11.0",
|
||||
"black==22.12.0",
|
||||
"coverage>=5.1",
|
||||
"flake8>=6.0.0",
|
||||
"flake8-tidy-imports>=4.3.0",
|
||||
"flake8-bugbear==23.3.12",
|
||||
"isort>=5.7.0",
|
||||
"ruff==0.9.1",
|
||||
"mypy>=1.4.0",
|
||||
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
||||
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
||||
|
||||
@ -13,6 +13,7 @@ from dagster import (
|
||||
TableSchemaMetadataValue,
|
||||
)
|
||||
from dagster._core.execution.stats import RunStepKeyStatsSnapshot, StepEventStatus
|
||||
|
||||
from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
|
||||
|
||||
try:
|
||||
@ -23,6 +24,7 @@ except ImportError:
|
||||
|
||||
from dagster._core.snap.node import OpDefSnap
|
||||
from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatsSnapshot
|
||||
|
||||
from datahub.api.entities.datajob import DataFlow, DataJob
|
||||
from datahub.api.entities.dataprocess.dataprocess_instance import (
|
||||
DataProcessInstance,
|
||||
|
||||
@ -35,7 +35,9 @@ from dagster._core.definitions.sensor_definition import DefaultSensorStatus
|
||||
try:
|
||||
from dagster._core.definitions.sensor_definition import SensorReturnTypesUnion
|
||||
except ImportError:
|
||||
from dagster._core.definitions.sensor_definition import RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion # type: ignore
|
||||
from dagster._core.definitions.sensor_definition import ( # type: ignore
|
||||
RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion,
|
||||
)
|
||||
|
||||
from dagster._core.definitions.target import ExecutableDefinition
|
||||
from dagster._core.definitions.unresolved_asset_job_definition import (
|
||||
@ -43,6 +45,7 @@ from dagster._core.definitions.unresolved_asset_job_definition import (
|
||||
)
|
||||
from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData
|
||||
from dagster._core.execution.stats import RunStepKeyStatsSnapshot
|
||||
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
|
||||
from datahub.metadata.schema_classes import SubTypesClass
|
||||
@ -52,7 +55,6 @@ from datahub.sql_parsing.sqlglot_lineage import (
|
||||
)
|
||||
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
||||
from datahub.utilities.urns.error import InvalidUrnError
|
||||
|
||||
from datahub_dagster_plugin.client.dagster_generator import (
|
||||
DATAHUB_ASSET_GROUP_NAME_CACHE,
|
||||
Constant,
|
||||
|
||||
@ -22,11 +22,11 @@ from dagster._core.definitions.repository_definition import (
|
||||
RepositoryDefinition,
|
||||
)
|
||||
from dagster._core.definitions.resource_definition import ResourceDefinition
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.graph.client import DatahubClientConfig
|
||||
from freezegun import freeze_time
|
||||
from utils.utils import PytestConfig, check_golden_file
|
||||
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.graph.client import DatahubClientConfig
|
||||
from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig
|
||||
from datahub_dagster_plugin.sensors.datahub_sensors import (
|
||||
DatahubSensors,
|
||||
|
||||
@ -14,6 +14,3 @@ include = '\.pyi?$'
|
||||
indent = ' '
|
||||
profile = 'black'
|
||||
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
||||
|
||||
[tool.pyright]
|
||||
extraPaths = ['tests']
|
||||
@ -14,6 +14,3 @@ include = '\.pyi?$'
|
||||
indent = ' '
|
||||
profile = 'black'
|
||||
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
||||
|
||||
[tool.pyright]
|
||||
extraPaths = ['tests']
|
||||
@ -11,6 +11,7 @@ extend-exclude = '''
|
||||
include = '\.pyi?$'
|
||||
target-version = ['py38', 'py39', 'py310', 'py311']
|
||||
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
combine-as-imports = true
|
||||
known-first-party = ["datahub"]
|
||||
@ -28,16 +29,6 @@ from-first = false
|
||||
required-imports = []
|
||||
classes = ["typing"]
|
||||
|
||||
[tool.pyright]
|
||||
extraPaths = ['tests']
|
||||
|
||||
[tool.vulture]
|
||||
exclude = ["src/datahub/metadata/"]
|
||||
ignore_decorators = ["@click.*", "@validator", "@root_validator", "@pydantic.validator", "@pydantic.root_validator", "@pytest.fixture"]
|
||||
ignore_names = ["*Source", "*Sink", "*Report"]
|
||||
paths = ["src"]
|
||||
sort_by_size = true
|
||||
|
||||
[tool.ruff]
|
||||
# Same as Black.
|
||||
line-length = 88
|
||||
@ -70,7 +61,6 @@ ignore = [
|
||||
"B008",
|
||||
# TODO: Enable these later
|
||||
"B006", # Mutable args
|
||||
"B007", # Unused loop control variable
|
||||
"B017", # Do not assert blind exception
|
||||
"B904", # Checks for raise statements in exception handlers that lack a from clause
|
||||
]
|
||||
|
||||
@ -438,7 +438,7 @@ class DataProduct(ConfigModel):
|
||||
for replace_index, replace_value in patches_replace.items():
|
||||
list_to_manipulate[replace_index] = replace_value
|
||||
|
||||
for drop_index, drop_value in patches_drop.items():
|
||||
for drop_value in patches_drop.values():
|
||||
list_to_manipulate.remove(drop_value)
|
||||
|
||||
for add_value in patches_add:
|
||||
|
||||
@ -613,7 +613,7 @@ class ABSSource(StatefulIngestionSourceBase):
|
||||
table_data.table_path
|
||||
].timestamp = table_data.timestamp
|
||||
|
||||
for guid, table_data in table_dict.items():
|
||||
for _, table_data in table_dict.items():
|
||||
yield from self.ingest_table(table_data, path_spec)
|
||||
|
||||
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
||||
|
||||
@ -181,7 +181,7 @@ class DremioAPIOperations:
|
||||
return
|
||||
|
||||
# On-prem Dremio authentication (PAT or Basic Auth)
|
||||
for retry in range(1, self._retry_count + 1):
|
||||
for _ in range(1, self._retry_count + 1):
|
||||
try:
|
||||
if connection_args.authentication_method == "PAT":
|
||||
self.session.headers.update(
|
||||
|
||||
@ -286,7 +286,7 @@ class Neo4jSource(Source):
|
||||
df = self.get_neo4j_metadata(
|
||||
"CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
|
||||
)
|
||||
for index, row in df.iterrows():
|
||||
for _, row in df.iterrows():
|
||||
try:
|
||||
yield MetadataWorkUnit(
|
||||
id=row["key"],
|
||||
|
||||
@ -1124,7 +1124,7 @@ class S3Source(StatefulIngestionSourceBase):
|
||||
table_data.table_path
|
||||
].timestamp = table_data.timestamp
|
||||
|
||||
for guid, table_data in table_dict.items():
|
||||
for _, table_data in table_dict.items():
|
||||
yield from self.ingest_table(table_data, path_spec)
|
||||
|
||||
if not self.source_config.is_profiling_enabled():
|
||||
|
||||
@ -354,7 +354,7 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
|
||||
browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
|
||||
|
||||
if os.path.isdir(self.config.path):
|
||||
for root, dirs, files in os.walk(self.config.path, topdown=False):
|
||||
for root, _, files in os.walk(self.config.path, topdown=False):
|
||||
for file_name in [f for f in files if f.endswith(".json")]:
|
||||
try:
|
||||
yield from self._load_one_file(
|
||||
|
||||
@ -268,7 +268,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw):
|
||||
info_cache = kw.get("info_cache")
|
||||
all_relations = self._get_all_relation_info(connection, info_cache=info_cache)
|
||||
relation_names = []
|
||||
for key, relation in all_relations.items():
|
||||
for _, relation in all_relations.items():
|
||||
if relation.database == schema and relation.relkind == relkind:
|
||||
relation_names.append(relation.relname)
|
||||
return relation_names
|
||||
|
||||
@ -3605,7 +3605,7 @@ class TableauSiteSource:
|
||||
parent_container_key=parent_project_key,
|
||||
)
|
||||
|
||||
for id_, project in self.tableau_project_registry.items():
|
||||
for project in self.tableau_project_registry.values():
|
||||
logger.debug(
|
||||
f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}"
|
||||
)
|
||||
|
||||
@ -246,7 +246,7 @@ class MCPDiff:
|
||||
for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed:
|
||||
aspect_map = self.aspect_changes[urn]
|
||||
s.append(f"Urn changed, {urn}:")
|
||||
for aspect_name, aspect_diffs in aspect_map.items():
|
||||
for aspect_diffs in aspect_map.values():
|
||||
for i, ga in aspect_diffs.aspects_added.items():
|
||||
s.append(self.report_aspect(ga, i, "added"))
|
||||
if verbose:
|
||||
|
||||
@ -8,7 +8,7 @@ from sqllineage.utils.constant import EdgeType
|
||||
|
||||
# Patch based on sqllineage v1.3.3
|
||||
def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore
|
||||
for i, tbl in enumerate(self.tables):
|
||||
for tbl in self.tables:
|
||||
holder.add_read(tbl)
|
||||
self.union_barriers.append((len(self.columns), len(self.tables)))
|
||||
|
||||
|
||||
@ -1070,7 +1070,7 @@ def test_unsupported_data_platform():
|
||||
) # type :ignore
|
||||
|
||||
is_entry_present: bool = False
|
||||
for key, entry in info_entries.items():
|
||||
for entry in info_entries.values():
|
||||
if entry.title == "Non-Data Platform Expression":
|
||||
is_entry_present = True
|
||||
break
|
||||
@ -1163,7 +1163,7 @@ def test_m_query_timeout(mock_get_lark_parser):
|
||||
) # type :ignore
|
||||
|
||||
is_entry_present: bool = False
|
||||
for key, entry in warn_entries.items():
|
||||
for entry in warn_entries.values():
|
||||
if entry.title == "M-Query Parsing Timeout":
|
||||
is_entry_present = True
|
||||
break
|
||||
|
||||
@ -1438,7 +1438,7 @@ def test_powerbi_cross_workspace_reference_info_message(
|
||||
|
||||
is_entry_present: bool = False
|
||||
# Printing INFO entries
|
||||
for key, entry in info_entries.items():
|
||||
for entry in info_entries.values():
|
||||
if entry.title == "Missing Lineage For Tile":
|
||||
is_entry_present = True
|
||||
break
|
||||
@ -1563,7 +1563,7 @@ def test_powerbi_app_ingest_info_message(
|
||||
|
||||
is_entry_present: bool = False
|
||||
# Printing INFO entries
|
||||
for key, entry in info_entries.items():
|
||||
for entry in info_entries.values():
|
||||
if entry.title == "App Ingestion Is Disabled":
|
||||
is_entry_present = True
|
||||
break
|
||||
|
||||
@ -198,7 +198,7 @@ def generate_queries(
|
||||
|
||||
all_tables = seed_metadata.tables + seed_metadata.views
|
||||
users = [f"user_{i}@xyz.com" for i in range(num_users)]
|
||||
for i in range(num_selects): # Pure SELECT statements
|
||||
for _ in range(num_selects): # Pure SELECT statements
|
||||
tables = _sample_list(all_tables, tables_per_select)
|
||||
all_columns = [
|
||||
FieldAccess(column, table) for table in tables for column in table.columns
|
||||
@ -213,7 +213,7 @@ def generate_queries(
|
||||
fields_accessed=_sample_list(all_columns, columns_per_select),
|
||||
)
|
||||
|
||||
for i in range(num_operations):
|
||||
for _ in range(num_operations):
|
||||
modified_table = random.choice(seed_metadata.tables)
|
||||
n_col = len(modified_table.columns)
|
||||
num_columns_modified = NormalDistribution(n_col / 2, n_col / 2)
|
||||
|
||||
@ -42,5 +42,3 @@ warn_unused_configs = true
|
||||
disallow_incomplete_defs = false
|
||||
disallow_untyped_defs = false
|
||||
|
||||
[tool.pyright]
|
||||
extraPaths = ['tests']
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user