mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-18 12:23:34 +00:00
dev(ingest): move modules from isort,flake8 to ruff (#12373)
This commit is contained in:
parent
4de7f61d09
commit
76e46b89db
@ -74,16 +74,14 @@ task lint(type: Exec, dependsOn: installDev) {
|
|||||||
"find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " +
|
"find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " +
|
||||||
"source ${venv_name}/bin/activate && set -x && " +
|
"source ${venv_name}/bin/activate && set -x && " +
|
||||||
"black --check --diff src/ tests/ && " +
|
"black --check --diff src/ tests/ && " +
|
||||||
"isort --check --diff src/ tests/ && " +
|
"ruff check src/ tests/ && " +
|
||||||
"flake8 --count --statistics src/ tests/ && " +
|
|
||||||
"mypy --show-traceback --show-error-codes src/ tests/"
|
"mypy --show-traceback --show-error-codes src/ tests/"
|
||||||
}
|
}
|
||||||
task lintFix(type: Exec, dependsOn: installDev) {
|
task lintFix(type: Exec, dependsOn: installDev) {
|
||||||
commandLine 'bash', '-c',
|
commandLine 'bash', '-c',
|
||||||
"source ${venv_name}/bin/activate && set -x && " +
|
"source ${venv_name}/bin/activate && set -x && " +
|
||||||
"black src/ tests/ && " +
|
"black src/ tests/ && " +
|
||||||
"isort src/ tests/ && " +
|
"ruff check --fix src/ tests/"
|
||||||
"flake8 src/ tests/ && " +
|
|
||||||
"mypy src/ tests/ "
|
"mypy src/ tests/ "
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -10,11 +10,50 @@ extend-exclude = '''
|
|||||||
'''
|
'''
|
||||||
include = '\.pyi?$'
|
include = '\.pyi?$'
|
||||||
|
|
||||||
[tool.isort]
|
[tool.ruff.lint.isort]
|
||||||
indent = ' '
|
combine-as-imports = true
|
||||||
known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub_provider._airflow_compat']
|
known-first-party = ["datahub"]
|
||||||
profile = 'black'
|
extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
|
||||||
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
|
||||||
|
force-sort-within-sections = false
|
||||||
|
force-wrap-aliases = false
|
||||||
|
split-on-trailing-comma = false
|
||||||
|
order-by-type = true
|
||||||
|
relative-imports-order = "closest-to-furthest"
|
||||||
|
force-single-line = false
|
||||||
|
single-line-exclusions = ["typing"]
|
||||||
|
length-sort = false
|
||||||
|
from-first = false
|
||||||
|
required-imports = []
|
||||||
|
classes = ["typing"]
|
||||||
|
|
||||||
[tool.pyright]
|
[tool.ruff.lint]
|
||||||
extraPaths = ['tests']
|
select = [
|
||||||
|
"B",
|
||||||
|
"C90",
|
||||||
|
"E",
|
||||||
|
"F",
|
||||||
|
"I", # For isort
|
||||||
|
"TID",
|
||||||
|
]
|
||||||
|
ignore = [
|
||||||
|
# Ignore line length violations (handled by Black)
|
||||||
|
"E501",
|
||||||
|
# Ignore whitespace before ':' (matches Black)
|
||||||
|
"E203",
|
||||||
|
"E203",
|
||||||
|
# Allow usages of functools.lru_cache
|
||||||
|
"B019",
|
||||||
|
# Allow function call in argument defaults
|
||||||
|
"B008",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint.mccabe]
|
||||||
|
max-complexity = 15
|
||||||
|
|
||||||
|
[tool.ruff.lint.flake8-tidy-imports]
|
||||||
|
# Disallow all relative imports.
|
||||||
|
ban-relative-imports = "all"
|
||||||
|
|
||||||
|
[tool.ruff.lint.per-file-ignores]
|
||||||
|
"__init__.py" = ["F401"]
|
||||||
@ -1,24 +1,3 @@
|
|||||||
[flake8]
|
|
||||||
max-complexity = 15
|
|
||||||
ignore =
|
|
||||||
# Ignore: line length issues, since black's formatter will take care of them.
|
|
||||||
E501,
|
|
||||||
# Ignore: 1 blank line required before class docstring.
|
|
||||||
D203,
|
|
||||||
# See https://stackoverflow.com/a/57074416.
|
|
||||||
W503,
|
|
||||||
# See https://github.com/psf/black/issues/315.
|
|
||||||
E203
|
|
||||||
exclude =
|
|
||||||
.git,
|
|
||||||
venv,
|
|
||||||
.tox,
|
|
||||||
__pycache__
|
|
||||||
per-file-ignores =
|
|
||||||
# imported but unused
|
|
||||||
__init__.py: F401
|
|
||||||
ban-relative-imports = true
|
|
||||||
|
|
||||||
[mypy]
|
[mypy]
|
||||||
plugins =
|
plugins =
|
||||||
sqlmypy,
|
sqlmypy,
|
||||||
|
|||||||
@ -73,9 +73,7 @@ dev_requirements = {
|
|||||||
*mypy_stubs,
|
*mypy_stubs,
|
||||||
"black==22.12.0",
|
"black==22.12.0",
|
||||||
"coverage>=5.1",
|
"coverage>=5.1",
|
||||||
"flake8>=3.8.3",
|
"ruff==0.9.1",
|
||||||
"flake8-tidy-imports>=4.3.0",
|
|
||||||
"isort>=5.7.0",
|
|
||||||
"mypy==1.10.1",
|
"mypy==1.10.1",
|
||||||
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
||||||
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
||||||
|
|||||||
@ -1,11 +1,12 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import TYPE_CHECKING, Optional
|
from typing import TYPE_CHECKING, Optional
|
||||||
|
|
||||||
import datahub.emitter.mce_builder as builder
|
|
||||||
from airflow.configuration import conf
|
from airflow.configuration import conf
|
||||||
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
||||||
from pydantic.fields import Field
|
from pydantic.fields import Field
|
||||||
|
|
||||||
|
import datahub.emitter.mce_builder as builder
|
||||||
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
|
from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
|
||||||
|
|
||||||
|
|||||||
@ -1,8 +1,9 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
import datahub.emitter.mce_builder as builder
|
|
||||||
from openlineage.client.run import Dataset as OpenLineageDataset
|
from openlineage.client.run import Dataset as OpenLineageDataset
|
||||||
|
|
||||||
|
import datahub.emitter.mce_builder as builder
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -3,17 +3,11 @@ import logging
|
|||||||
import unittest.mock
|
import unittest.mock
|
||||||
from typing import TYPE_CHECKING, Optional
|
from typing import TYPE_CHECKING, Optional
|
||||||
|
|
||||||
import datahub.emitter.mce_builder as builder
|
from openlineage.airflow.extractors import (
|
||||||
from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
|
BaseExtractor,
|
||||||
get_platform_from_sqlalchemy_uri,
|
ExtractorManager as OLExtractorManager,
|
||||||
|
TaskMetadata,
|
||||||
)
|
)
|
||||||
from datahub.sql_parsing.sqlglot_lineage import (
|
|
||||||
SqlParsingResult,
|
|
||||||
create_lineage_sql_parsed_result,
|
|
||||||
)
|
|
||||||
from openlineage.airflow.extractors import BaseExtractor
|
|
||||||
from openlineage.airflow.extractors import ExtractorManager as OLExtractorManager
|
|
||||||
from openlineage.airflow.extractors import TaskMetadata
|
|
||||||
from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor
|
from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor
|
||||||
from openlineage.airflow.extractors.sql_extractor import SqlExtractor
|
from openlineage.airflow.extractors.sql_extractor import SqlExtractor
|
||||||
from openlineage.airflow.utils import get_operator_class, try_import_from_string
|
from openlineage.airflow.utils import get_operator_class, try_import_from_string
|
||||||
@ -23,11 +17,20 @@ from openlineage.client.facet import (
|
|||||||
SqlJobFacet,
|
SqlJobFacet,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import datahub.emitter.mce_builder as builder
|
||||||
|
from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
|
||||||
|
get_platform_from_sqlalchemy_uri,
|
||||||
|
)
|
||||||
|
from datahub.sql_parsing.sqlglot_lineage import (
|
||||||
|
SqlParsingResult,
|
||||||
|
create_lineage_sql_parsed_result,
|
||||||
|
)
|
||||||
from datahub_airflow_plugin._airflow_shims import Operator
|
from datahub_airflow_plugin._airflow_shims import Operator
|
||||||
from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS
|
from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from airflow.models import DagRun, TaskInstance
|
from airflow.models import DagRun, TaskInstance
|
||||||
|
|
||||||
from datahub.ingestion.graph.client import DataHubGraph
|
from datahub.ingestion.graph.client import DataHubGraph
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@ -2,6 +2,7 @@ from datetime import datetime
|
|||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
|
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
|
||||||
|
|
||||||
from airflow.configuration import conf
|
from airflow.configuration import conf
|
||||||
|
|
||||||
from datahub.api.entities.datajob import DataFlow, DataJob
|
from datahub.api.entities.datajob import DataFlow, DataJob
|
||||||
from datahub.api.entities.dataprocess.dataprocess_instance import (
|
from datahub.api.entities.dataprocess.dataprocess_instance import (
|
||||||
DataProcessInstance,
|
DataProcessInstance,
|
||||||
@ -11,7 +12,6 @@ from datahub.emitter.generic_emitter import Emitter
|
|||||||
from datahub.metadata.schema_classes import DataProcessTypeClass
|
from datahub.metadata.schema_classes import DataProcessTypeClass
|
||||||
from datahub.utilities.urns.data_flow_urn import DataFlowUrn
|
from datahub.utilities.urns.data_flow_urn import DataFlowUrn
|
||||||
from datahub.utilities.urns.data_job_urn import DataJobUrn
|
from datahub.utilities.urns.data_job_urn import DataJobUrn
|
||||||
|
|
||||||
from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED
|
from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED
|
||||||
from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl
|
from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl
|
||||||
|
|
||||||
|
|||||||
@ -8,9 +8,13 @@ import time
|
|||||||
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast
|
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast
|
||||||
|
|
||||||
import airflow
|
import airflow
|
||||||
import datahub.emitter.mce_builder as builder
|
|
||||||
from airflow.models import Variable
|
from airflow.models import Variable
|
||||||
from airflow.models.serialized_dag import SerializedDagModel
|
from airflow.models.serialized_dag import SerializedDagModel
|
||||||
|
from openlineage.airflow.listener import TaskHolder
|
||||||
|
from openlineage.airflow.utils import redact_with_exclusions
|
||||||
|
from openlineage.client.serde import Serde
|
||||||
|
|
||||||
|
import datahub.emitter.mce_builder as builder
|
||||||
from datahub.api.entities.datajob import DataJob
|
from datahub.api.entities.datajob import DataJob
|
||||||
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
@ -30,10 +34,6 @@ from datahub.metadata.schema_classes import (
|
|||||||
)
|
)
|
||||||
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
|
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
|
||||||
from datahub.telemetry import telemetry
|
from datahub.telemetry import telemetry
|
||||||
from openlineage.airflow.listener import TaskHolder
|
|
||||||
from openlineage.airflow.utils import redact_with_exclusions
|
|
||||||
from openlineage.client.serde import Serde
|
|
||||||
|
|
||||||
from datahub_airflow_plugin._airflow_shims import (
|
from datahub_airflow_plugin._airflow_shims import (
|
||||||
HAS_AIRFLOW_DAG_LISTENER_API,
|
HAS_AIRFLOW_DAG_LISTENER_API,
|
||||||
HAS_AIRFLOW_DATASET_LISTENER_API,
|
HAS_AIRFLOW_DATASET_LISTENER_API,
|
||||||
|
|||||||
@ -15,9 +15,9 @@ assert AIRFLOW_PATCHED
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and not os.getenv(
|
_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and os.getenv(
|
||||||
"DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false"
|
"DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false"
|
||||||
).lower() in ("true", "1")
|
).lower() not in ("true", "1")
|
||||||
|
|
||||||
if _USE_AIRFLOW_LISTENER_INTERFACE:
|
if _USE_AIRFLOW_LISTENER_INTERFACE:
|
||||||
try:
|
try:
|
||||||
@ -32,7 +32,7 @@ if _USE_AIRFLOW_LISTENER_INTERFACE:
|
|||||||
|
|
||||||
|
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
if not os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() in (
|
if os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() not in (
|
||||||
"true",
|
"true",
|
||||||
"1",
|
"1",
|
||||||
):
|
):
|
||||||
|
|||||||
@ -7,9 +7,9 @@ import airflow
|
|||||||
from airflow.lineage import PIPELINE_OUTLETS
|
from airflow.lineage import PIPELINE_OUTLETS
|
||||||
from airflow.models.baseoperator import BaseOperator
|
from airflow.models.baseoperator import BaseOperator
|
||||||
from airflow.utils.module_loading import import_string
|
from airflow.utils.module_loading import import_string
|
||||||
|
|
||||||
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
||||||
from datahub.telemetry import telemetry
|
from datahub.telemetry import telemetry
|
||||||
|
|
||||||
from datahub_airflow_plugin._airflow_shims import (
|
from datahub_airflow_plugin._airflow_shims import (
|
||||||
MappedOperator,
|
MappedOperator,
|
||||||
get_task_inlets,
|
get_task_inlets,
|
||||||
|
|||||||
@ -2,6 +2,7 @@ from abc import abstractmethod
|
|||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
|
|
||||||
import datahub.emitter.mce_builder as builder
|
import datahub.emitter.mce_builder as builder
|
||||||
from datahub.utilities.urns.data_job_urn import DataJobUrn
|
from datahub.utilities.urns.data_job_urn import DataJobUrn
|
||||||
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
||||||
|
|||||||
@ -9,6 +9,7 @@ from datetime import timedelta
|
|||||||
from airflow import DAG
|
from airflow import DAG
|
||||||
from airflow.operators.python import PythonOperator
|
from airflow.operators.python import PythonOperator
|
||||||
from airflow.utils.dates import days_ago
|
from airflow.utils.dates import days_ago
|
||||||
|
|
||||||
from datahub.configuration.config_loader import load_config_file
|
from datahub.configuration.config_loader import load_config_file
|
||||||
from datahub.ingestion.run.pipeline import Pipeline
|
from datahub.ingestion.run.pipeline import Pipeline
|
||||||
|
|
||||||
|
|||||||
@ -4,8 +4,8 @@ from datetime import timedelta
|
|||||||
|
|
||||||
import pendulum
|
import pendulum
|
||||||
from airflow.decorators import dag, task
|
from airflow.decorators import dag, task
|
||||||
from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter
|
|
||||||
|
|
||||||
|
from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter
|
||||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -4,11 +4,11 @@ This example demonstrates how to emit lineage to DataHub within an Airflow DAG.
|
|||||||
"""
|
"""
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
import datahub.emitter.mce_builder as builder
|
|
||||||
from airflow import DAG
|
from airflow import DAG
|
||||||
from airflow.operators.bash import BashOperator
|
from airflow.operators.bash import BashOperator
|
||||||
from airflow.utils.dates import days_ago
|
from airflow.utils.dates import days_ago
|
||||||
|
|
||||||
|
import datahub.emitter.mce_builder as builder
|
||||||
from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
|
from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
|
||||||
|
|
||||||
default_args = {
|
default_args = {
|
||||||
|
|||||||
@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Tuple, Union
|
|||||||
|
|
||||||
from airflow.exceptions import AirflowException
|
from airflow.exceptions import AirflowException
|
||||||
from airflow.hooks.base import BaseHook
|
from airflow.hooks.base import BaseHook
|
||||||
|
|
||||||
from datahub.emitter.generic_emitter import Emitter
|
from datahub.emitter.generic_emitter import Emitter
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
||||||
@ -11,6 +12,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from airflow.models.connection import Connection
|
from airflow.models.connection import Connection
|
||||||
|
|
||||||
from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
|
from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
|
||||||
from datahub.emitter.rest_emitter import DataHubRestEmitter
|
from datahub.emitter.rest_emitter import DataHubRestEmitter
|
||||||
from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter
|
from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter
|
||||||
|
|||||||
@ -2,7 +2,6 @@ from datetime import datetime
|
|||||||
from typing import TYPE_CHECKING, Dict, List
|
from typing import TYPE_CHECKING, Dict, List
|
||||||
|
|
||||||
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
|
||||||
|
|
||||||
from datahub_airflow_plugin._config import DatahubLineageConfig
|
from datahub_airflow_plugin._config import DatahubLineageConfig
|
||||||
from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
|
from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
|
||||||
from datahub_airflow_plugin.entities import (
|
from datahub_airflow_plugin.entities import (
|
||||||
|
|||||||
@ -3,9 +3,9 @@ from typing import List, Union
|
|||||||
from airflow.models import BaseOperator
|
from airflow.models import BaseOperator
|
||||||
from airflow.utils.decorators import apply_defaults
|
from airflow.utils.decorators import apply_defaults
|
||||||
from avrogen.dict_wrapper import DictWrapper
|
from avrogen.dict_wrapper import DictWrapper
|
||||||
|
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||||
|
|
||||||
from datahub_airflow_plugin.hooks.datahub import (
|
from datahub_airflow_plugin.hooks.datahub import (
|
||||||
DatahubGenericHook,
|
DatahubGenericHook,
|
||||||
DatahubKafkaHook,
|
DatahubKafkaHook,
|
||||||
|
|||||||
@ -2,11 +2,11 @@ import datetime
|
|||||||
from typing import Any, List, Optional, Sequence, Union
|
from typing import Any, List, Optional, Sequence, Union
|
||||||
|
|
||||||
from airflow.models import BaseOperator
|
from airflow.models import BaseOperator
|
||||||
|
|
||||||
from datahub.api.circuit_breaker import (
|
from datahub.api.circuit_breaker import (
|
||||||
AssertionCircuitBreaker,
|
AssertionCircuitBreaker,
|
||||||
AssertionCircuitBreakerConfig,
|
AssertionCircuitBreakerConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2,11 +2,11 @@ import datetime
|
|||||||
from typing import Any, List, Optional, Sequence, Union
|
from typing import Any, List, Optional, Sequence, Union
|
||||||
|
|
||||||
from airflow.sensors.base import BaseSensorOperator
|
from airflow.sensors.base import BaseSensorOperator
|
||||||
|
|
||||||
from datahub.api.circuit_breaker import (
|
from datahub.api.circuit_breaker import (
|
||||||
AssertionCircuitBreaker,
|
AssertionCircuitBreaker,
|
||||||
AssertionCircuitBreakerConfig,
|
AssertionCircuitBreakerConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2,11 +2,11 @@ import datetime
|
|||||||
from typing import Any, List, Optional, Sequence, Union
|
from typing import Any, List, Optional, Sequence, Union
|
||||||
|
|
||||||
from airflow.sensors.base import BaseSensorOperator
|
from airflow.sensors.base import BaseSensorOperator
|
||||||
|
|
||||||
from datahub.api.circuit_breaker import (
|
from datahub.api.circuit_breaker import (
|
||||||
OperationCircuitBreaker,
|
OperationCircuitBreaker,
|
||||||
OperationCircuitBreakerConfig,
|
OperationCircuitBreakerConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2,11 +2,11 @@ import datetime
|
|||||||
from typing import Any, List, Optional, Sequence, Union
|
from typing import Any, List, Optional, Sequence, Union
|
||||||
|
|
||||||
from airflow.sensors.base import BaseSensorOperator
|
from airflow.sensors.base import BaseSensorOperator
|
||||||
|
|
||||||
from datahub.api.circuit_breaker import (
|
from datahub.api.circuit_breaker import (
|
||||||
OperationCircuitBreaker,
|
OperationCircuitBreaker,
|
||||||
OperationCircuitBreakerConfig,
|
OperationCircuitBreakerConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
from datahub_airflow_plugin.hooks.datahub import DatahubRestHook
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
from airflow import DAG
|
from airflow import DAG
|
||||||
|
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||||
from datahub.metadata.schema_classes import (
|
from datahub.metadata.schema_classes import (
|
||||||
@ -9,7 +10,6 @@ from datahub.metadata.schema_classes import (
|
|||||||
DatasetPropertiesClass,
|
DatasetPropertiesClass,
|
||||||
DatasetSnapshotClass,
|
DatasetSnapshotClass,
|
||||||
)
|
)
|
||||||
|
|
||||||
from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
|
from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
|
||||||
|
|
||||||
default_args = {
|
default_args = {
|
||||||
|
|||||||
@ -17,9 +17,9 @@ import pytest
|
|||||||
import requests
|
import requests
|
||||||
import tenacity
|
import tenacity
|
||||||
from airflow.models.connection import Connection
|
from airflow.models.connection import Connection
|
||||||
|
|
||||||
from datahub.ingestion.sink.file import write_metadata_file
|
from datahub.ingestion.sink.file import write_metadata_file
|
||||||
from datahub.testing.compare_metadata_json import assert_metadata_files_equal
|
from datahub.testing.compare_metadata_json import assert_metadata_files_equal
|
||||||
|
|
||||||
from datahub_airflow_plugin._airflow_shims import (
|
from datahub_airflow_plugin._airflow_shims import (
|
||||||
AIRFLOW_VERSION,
|
AIRFLOW_VERSION,
|
||||||
HAS_AIRFLOW_DAG_LISTENER_API,
|
HAS_AIRFLOW_DAG_LISTENER_API,
|
||||||
|
|||||||
@ -8,12 +8,12 @@ from unittest.mock import Mock
|
|||||||
|
|
||||||
import airflow.configuration
|
import airflow.configuration
|
||||||
import airflow.version
|
import airflow.version
|
||||||
import datahub.emitter.mce_builder as builder
|
|
||||||
import packaging.version
|
import packaging.version
|
||||||
import pytest
|
import pytest
|
||||||
from airflow.lineage import apply_lineage, prepare_lineage
|
from airflow.lineage import apply_lineage, prepare_lineage
|
||||||
from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance
|
from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance
|
||||||
|
|
||||||
|
import datahub.emitter.mce_builder as builder
|
||||||
from datahub_airflow_plugin import get_provider_info
|
from datahub_airflow_plugin import get_provider_info
|
||||||
from datahub_airflow_plugin._airflow_shims import (
|
from datahub_airflow_plugin._airflow_shims import (
|
||||||
AIRFLOW_PATCHED,
|
AIRFLOW_PATCHED,
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import setuptools
|
import setuptools
|
||||||
from datahub.testing.check_imports import ensure_no_indirect_model_imports
|
|
||||||
|
|
||||||
|
from datahub.testing.check_imports import ensure_no_indirect_model_imports
|
||||||
from tests.utils import PytestConfig
|
from tests.utils import PytestConfig
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -55,16 +55,14 @@ task lint(type: Exec, dependsOn: installDev) {
|
|||||||
commandLine 'bash', '-c',
|
commandLine 'bash', '-c',
|
||||||
"source ${venv_name}/bin/activate && set -x && " +
|
"source ${venv_name}/bin/activate && set -x && " +
|
||||||
"black --check --diff src/ tests/ examples/ && " +
|
"black --check --diff src/ tests/ examples/ && " +
|
||||||
"isort --check --diff src/ tests/ examples/ && " +
|
"ruff check src/ tests/ && " +
|
||||||
"flake8 --count --statistics src/ tests/ examples/ && " +
|
|
||||||
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
|
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
|
||||||
}
|
}
|
||||||
task lintFix(type: Exec, dependsOn: installDev) {
|
task lintFix(type: Exec, dependsOn: installDev) {
|
||||||
commandLine 'bash', '-x', '-c',
|
commandLine 'bash', '-x', '-c',
|
||||||
"source ${venv_name}/bin/activate && " +
|
"source ${venv_name}/bin/activate && " +
|
||||||
"black src/ tests/ examples/ && " +
|
"black src/ tests/ examples/ && " +
|
||||||
"isort src/ tests/ examples/ && " +
|
"ruff check --fix src/ tests/"
|
||||||
"flake8 src/ tests/ examples/ && " +
|
|
||||||
"mypy src/ tests/ examples/"
|
"mypy src/ tests/ examples/"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -10,10 +10,50 @@ extend-exclude = '''
|
|||||||
'''
|
'''
|
||||||
include = '\.pyi?$'
|
include = '\.pyi?$'
|
||||||
|
|
||||||
[tool.isort]
|
[tool.ruff.lint.isort]
|
||||||
indent = ' '
|
combine-as-imports = true
|
||||||
profile = 'black'
|
known-first-party = ["datahub"]
|
||||||
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
|
||||||
|
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
|
||||||
|
force-sort-within-sections = false
|
||||||
|
force-wrap-aliases = false
|
||||||
|
split-on-trailing-comma = false
|
||||||
|
order-by-type = true
|
||||||
|
relative-imports-order = "closest-to-furthest"
|
||||||
|
force-single-line = false
|
||||||
|
single-line-exclusions = ["typing"]
|
||||||
|
length-sort = false
|
||||||
|
from-first = false
|
||||||
|
required-imports = []
|
||||||
|
classes = ["typing"]
|
||||||
|
|
||||||
[tool.pyright]
|
[tool.ruff.lint]
|
||||||
extraPaths = ['tests']
|
select = [
|
||||||
|
"B",
|
||||||
|
"C90",
|
||||||
|
"E",
|
||||||
|
"F",
|
||||||
|
"I", # For isort
|
||||||
|
"TID",
|
||||||
|
]
|
||||||
|
ignore = [
|
||||||
|
# Ignore line length violations (handled by Black)
|
||||||
|
"E501",
|
||||||
|
# Ignore whitespace before ':' (matches Black)
|
||||||
|
"E203",
|
||||||
|
"E203",
|
||||||
|
# Allow usages of functools.lru_cache
|
||||||
|
"B019",
|
||||||
|
# Allow function call in argument defaults
|
||||||
|
"B008",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint.mccabe]
|
||||||
|
max-complexity = 15
|
||||||
|
|
||||||
|
[tool.ruff.lint.flake8-tidy-imports]
|
||||||
|
# Disallow all relative imports.
|
||||||
|
ban-relative-imports = "all"
|
||||||
|
|
||||||
|
[tool.ruff.lint.per-file-ignores]
|
||||||
|
"__init__.py" = ["F401"]
|
||||||
@ -1,24 +1,3 @@
|
|||||||
[flake8]
|
|
||||||
max-complexity = 15
|
|
||||||
ignore =
|
|
||||||
# Ignore: line length issues, since black's formatter will take care of them.
|
|
||||||
E501,
|
|
||||||
# Ignore: 1 blank line required before class docstring.
|
|
||||||
D203,
|
|
||||||
# See https://stackoverflow.com/a/57074416.
|
|
||||||
W503,
|
|
||||||
# See https://github.com/psf/black/issues/315.
|
|
||||||
E203
|
|
||||||
exclude =
|
|
||||||
.git,
|
|
||||||
venv,
|
|
||||||
.tox,
|
|
||||||
__pycache__
|
|
||||||
per-file-ignores =
|
|
||||||
# imported but unused
|
|
||||||
__init__.py: F401
|
|
||||||
ban-relative-imports = true
|
|
||||||
|
|
||||||
[mypy]
|
[mypy]
|
||||||
plugins =
|
plugins =
|
||||||
pydantic.mypy
|
pydantic.mypy
|
||||||
|
|||||||
@ -53,10 +53,7 @@ base_dev_requirements = {
|
|||||||
"dagster-snowflake-pandas >= 0.11.0",
|
"dagster-snowflake-pandas >= 0.11.0",
|
||||||
"black==22.12.0",
|
"black==22.12.0",
|
||||||
"coverage>=5.1",
|
"coverage>=5.1",
|
||||||
"flake8>=6.0.0",
|
"ruff==0.9.1",
|
||||||
"flake8-tidy-imports>=4.3.0",
|
|
||||||
"flake8-bugbear==23.3.12",
|
|
||||||
"isort>=5.7.0",
|
|
||||||
"mypy>=1.4.0",
|
"mypy>=1.4.0",
|
||||||
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
||||||
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
||||||
|
|||||||
@ -13,6 +13,7 @@ from dagster import (
|
|||||||
TableSchemaMetadataValue,
|
TableSchemaMetadataValue,
|
||||||
)
|
)
|
||||||
from dagster._core.execution.stats import RunStepKeyStatsSnapshot, StepEventStatus
|
from dagster._core.execution.stats import RunStepKeyStatsSnapshot, StepEventStatus
|
||||||
|
|
||||||
from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
|
from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -23,6 +24,7 @@ except ImportError:
|
|||||||
|
|
||||||
from dagster._core.snap.node import OpDefSnap
|
from dagster._core.snap.node import OpDefSnap
|
||||||
from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatsSnapshot
|
from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatsSnapshot
|
||||||
|
|
||||||
from datahub.api.entities.datajob import DataFlow, DataJob
|
from datahub.api.entities.datajob import DataFlow, DataJob
|
||||||
from datahub.api.entities.dataprocess.dataprocess_instance import (
|
from datahub.api.entities.dataprocess.dataprocess_instance import (
|
||||||
DataProcessInstance,
|
DataProcessInstance,
|
||||||
|
|||||||
@ -35,7 +35,9 @@ from dagster._core.definitions.sensor_definition import DefaultSensorStatus
|
|||||||
try:
|
try:
|
||||||
from dagster._core.definitions.sensor_definition import SensorReturnTypesUnion
|
from dagster._core.definitions.sensor_definition import SensorReturnTypesUnion
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from dagster._core.definitions.sensor_definition import RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion # type: ignore
|
from dagster._core.definitions.sensor_definition import ( # type: ignore
|
||||||
|
RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion,
|
||||||
|
)
|
||||||
|
|
||||||
from dagster._core.definitions.target import ExecutableDefinition
|
from dagster._core.definitions.target import ExecutableDefinition
|
||||||
from dagster._core.definitions.unresolved_asset_job_definition import (
|
from dagster._core.definitions.unresolved_asset_job_definition import (
|
||||||
@ -43,6 +45,7 @@ from dagster._core.definitions.unresolved_asset_job_definition import (
|
|||||||
)
|
)
|
||||||
from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData
|
from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData
|
||||||
from dagster._core.execution.stats import RunStepKeyStatsSnapshot
|
from dagster._core.execution.stats import RunStepKeyStatsSnapshot
|
||||||
|
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
|
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
|
||||||
from datahub.metadata.schema_classes import SubTypesClass
|
from datahub.metadata.schema_classes import SubTypesClass
|
||||||
@ -52,7 +55,6 @@ from datahub.sql_parsing.sqlglot_lineage import (
|
|||||||
)
|
)
|
||||||
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
||||||
from datahub.utilities.urns.error import InvalidUrnError
|
from datahub.utilities.urns.error import InvalidUrnError
|
||||||
|
|
||||||
from datahub_dagster_plugin.client.dagster_generator import (
|
from datahub_dagster_plugin.client.dagster_generator import (
|
||||||
DATAHUB_ASSET_GROUP_NAME_CACHE,
|
DATAHUB_ASSET_GROUP_NAME_CACHE,
|
||||||
Constant,
|
Constant,
|
||||||
|
|||||||
@ -22,11 +22,11 @@ from dagster._core.definitions.repository_definition import (
|
|||||||
RepositoryDefinition,
|
RepositoryDefinition,
|
||||||
)
|
)
|
||||||
from dagster._core.definitions.resource_definition import ResourceDefinition
|
from dagster._core.definitions.resource_definition import ResourceDefinition
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
||||||
from datahub.ingestion.graph.client import DatahubClientConfig
|
|
||||||
from freezegun import freeze_time
|
from freezegun import freeze_time
|
||||||
from utils.utils import PytestConfig, check_golden_file
|
from utils.utils import PytestConfig, check_golden_file
|
||||||
|
|
||||||
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
|
from datahub.ingestion.graph.client import DatahubClientConfig
|
||||||
from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig
|
from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig
|
||||||
from datahub_dagster_plugin.sensors.datahub_sensors import (
|
from datahub_dagster_plugin.sensors.datahub_sensors import (
|
||||||
DatahubSensors,
|
DatahubSensors,
|
||||||
|
|||||||
@ -14,6 +14,3 @@ include = '\.pyi?$'
|
|||||||
indent = ' '
|
indent = ' '
|
||||||
profile = 'black'
|
profile = 'black'
|
||||||
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
||||||
|
|
||||||
[tool.pyright]
|
|
||||||
extraPaths = ['tests']
|
|
||||||
@ -14,6 +14,3 @@ include = '\.pyi?$'
|
|||||||
indent = ' '
|
indent = ' '
|
||||||
profile = 'black'
|
profile = 'black'
|
||||||
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
|
||||||
|
|
||||||
[tool.pyright]
|
|
||||||
extraPaths = ['tests']
|
|
||||||
@ -11,6 +11,7 @@ extend-exclude = '''
|
|||||||
include = '\.pyi?$'
|
include = '\.pyi?$'
|
||||||
target-version = ['py38', 'py39', 'py310', 'py311']
|
target-version = ['py38', 'py39', 'py310', 'py311']
|
||||||
|
|
||||||
|
|
||||||
[tool.ruff.lint.isort]
|
[tool.ruff.lint.isort]
|
||||||
combine-as-imports = true
|
combine-as-imports = true
|
||||||
known-first-party = ["datahub"]
|
known-first-party = ["datahub"]
|
||||||
@ -28,16 +29,6 @@ from-first = false
|
|||||||
required-imports = []
|
required-imports = []
|
||||||
classes = ["typing"]
|
classes = ["typing"]
|
||||||
|
|
||||||
[tool.pyright]
|
|
||||||
extraPaths = ['tests']
|
|
||||||
|
|
||||||
[tool.vulture]
|
|
||||||
exclude = ["src/datahub/metadata/"]
|
|
||||||
ignore_decorators = ["@click.*", "@validator", "@root_validator", "@pydantic.validator", "@pydantic.root_validator", "@pytest.fixture"]
|
|
||||||
ignore_names = ["*Source", "*Sink", "*Report"]
|
|
||||||
paths = ["src"]
|
|
||||||
sort_by_size = true
|
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
# Same as Black.
|
# Same as Black.
|
||||||
line-length = 88
|
line-length = 88
|
||||||
@ -70,7 +61,6 @@ ignore = [
|
|||||||
"B008",
|
"B008",
|
||||||
# TODO: Enable these later
|
# TODO: Enable these later
|
||||||
"B006", # Mutable args
|
"B006", # Mutable args
|
||||||
"B007", # Unused loop control variable
|
|
||||||
"B017", # Do not assert blind exception
|
"B017", # Do not assert blind exception
|
||||||
"B904", # Checks for raise statements in exception handlers that lack a from clause
|
"B904", # Checks for raise statements in exception handlers that lack a from clause
|
||||||
]
|
]
|
||||||
|
|||||||
@ -438,7 +438,7 @@ class DataProduct(ConfigModel):
|
|||||||
for replace_index, replace_value in patches_replace.items():
|
for replace_index, replace_value in patches_replace.items():
|
||||||
list_to_manipulate[replace_index] = replace_value
|
list_to_manipulate[replace_index] = replace_value
|
||||||
|
|
||||||
for drop_index, drop_value in patches_drop.items():
|
for drop_value in patches_drop.values():
|
||||||
list_to_manipulate.remove(drop_value)
|
list_to_manipulate.remove(drop_value)
|
||||||
|
|
||||||
for add_value in patches_add:
|
for add_value in patches_add:
|
||||||
|
|||||||
@ -613,7 +613,7 @@ class ABSSource(StatefulIngestionSourceBase):
|
|||||||
table_data.table_path
|
table_data.table_path
|
||||||
].timestamp = table_data.timestamp
|
].timestamp = table_data.timestamp
|
||||||
|
|
||||||
for guid, table_data in table_dict.items():
|
for _, table_data in table_dict.items():
|
||||||
yield from self.ingest_table(table_data, path_spec)
|
yield from self.ingest_table(table_data, path_spec)
|
||||||
|
|
||||||
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
||||||
|
|||||||
@ -181,7 +181,7 @@ class DremioAPIOperations:
|
|||||||
return
|
return
|
||||||
|
|
||||||
# On-prem Dremio authentication (PAT or Basic Auth)
|
# On-prem Dremio authentication (PAT or Basic Auth)
|
||||||
for retry in range(1, self._retry_count + 1):
|
for _ in range(1, self._retry_count + 1):
|
||||||
try:
|
try:
|
||||||
if connection_args.authentication_method == "PAT":
|
if connection_args.authentication_method == "PAT":
|
||||||
self.session.headers.update(
|
self.session.headers.update(
|
||||||
|
|||||||
@ -286,7 +286,7 @@ class Neo4jSource(Source):
|
|||||||
df = self.get_neo4j_metadata(
|
df = self.get_neo4j_metadata(
|
||||||
"CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
|
"CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
|
||||||
)
|
)
|
||||||
for index, row in df.iterrows():
|
for _, row in df.iterrows():
|
||||||
try:
|
try:
|
||||||
yield MetadataWorkUnit(
|
yield MetadataWorkUnit(
|
||||||
id=row["key"],
|
id=row["key"],
|
||||||
|
|||||||
@ -1124,7 +1124,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|||||||
table_data.table_path
|
table_data.table_path
|
||||||
].timestamp = table_data.timestamp
|
].timestamp = table_data.timestamp
|
||||||
|
|
||||||
for guid, table_data in table_dict.items():
|
for _, table_data in table_dict.items():
|
||||||
yield from self.ingest_table(table_data, path_spec)
|
yield from self.ingest_table(table_data, path_spec)
|
||||||
|
|
||||||
if not self.source_config.is_profiling_enabled():
|
if not self.source_config.is_profiling_enabled():
|
||||||
|
|||||||
@ -354,7 +354,7 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
|
|||||||
browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
|
browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
|
||||||
|
|
||||||
if os.path.isdir(self.config.path):
|
if os.path.isdir(self.config.path):
|
||||||
for root, dirs, files in os.walk(self.config.path, topdown=False):
|
for root, _, files in os.walk(self.config.path, topdown=False):
|
||||||
for file_name in [f for f in files if f.endswith(".json")]:
|
for file_name in [f for f in files if f.endswith(".json")]:
|
||||||
try:
|
try:
|
||||||
yield from self._load_one_file(
|
yield from self._load_one_file(
|
||||||
|
|||||||
@ -268,7 +268,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw):
|
|||||||
info_cache = kw.get("info_cache")
|
info_cache = kw.get("info_cache")
|
||||||
all_relations = self._get_all_relation_info(connection, info_cache=info_cache)
|
all_relations = self._get_all_relation_info(connection, info_cache=info_cache)
|
||||||
relation_names = []
|
relation_names = []
|
||||||
for key, relation in all_relations.items():
|
for _, relation in all_relations.items():
|
||||||
if relation.database == schema and relation.relkind == relkind:
|
if relation.database == schema and relation.relkind == relkind:
|
||||||
relation_names.append(relation.relname)
|
relation_names.append(relation.relname)
|
||||||
return relation_names
|
return relation_names
|
||||||
|
|||||||
@ -3605,7 +3605,7 @@ class TableauSiteSource:
|
|||||||
parent_container_key=parent_project_key,
|
parent_container_key=parent_project_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
for id_, project in self.tableau_project_registry.items():
|
for project in self.tableau_project_registry.values():
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}"
|
f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -246,7 +246,7 @@ class MCPDiff:
|
|||||||
for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed:
|
for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed:
|
||||||
aspect_map = self.aspect_changes[urn]
|
aspect_map = self.aspect_changes[urn]
|
||||||
s.append(f"Urn changed, {urn}:")
|
s.append(f"Urn changed, {urn}:")
|
||||||
for aspect_name, aspect_diffs in aspect_map.items():
|
for aspect_diffs in aspect_map.values():
|
||||||
for i, ga in aspect_diffs.aspects_added.items():
|
for i, ga in aspect_diffs.aspects_added.items():
|
||||||
s.append(self.report_aspect(ga, i, "added"))
|
s.append(self.report_aspect(ga, i, "added"))
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|||||||
@ -8,7 +8,7 @@ from sqllineage.utils.constant import EdgeType
|
|||||||
|
|
||||||
# Patch based on sqllineage v1.3.3
|
# Patch based on sqllineage v1.3.3
|
||||||
def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore
|
def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore
|
||||||
for i, tbl in enumerate(self.tables):
|
for tbl in self.tables:
|
||||||
holder.add_read(tbl)
|
holder.add_read(tbl)
|
||||||
self.union_barriers.append((len(self.columns), len(self.tables)))
|
self.union_barriers.append((len(self.columns), len(self.tables)))
|
||||||
|
|
||||||
|
|||||||
@ -1070,7 +1070,7 @@ def test_unsupported_data_platform():
|
|||||||
) # type :ignore
|
) # type :ignore
|
||||||
|
|
||||||
is_entry_present: bool = False
|
is_entry_present: bool = False
|
||||||
for key, entry in info_entries.items():
|
for entry in info_entries.values():
|
||||||
if entry.title == "Non-Data Platform Expression":
|
if entry.title == "Non-Data Platform Expression":
|
||||||
is_entry_present = True
|
is_entry_present = True
|
||||||
break
|
break
|
||||||
@ -1163,7 +1163,7 @@ def test_m_query_timeout(mock_get_lark_parser):
|
|||||||
) # type :ignore
|
) # type :ignore
|
||||||
|
|
||||||
is_entry_present: bool = False
|
is_entry_present: bool = False
|
||||||
for key, entry in warn_entries.items():
|
for entry in warn_entries.values():
|
||||||
if entry.title == "M-Query Parsing Timeout":
|
if entry.title == "M-Query Parsing Timeout":
|
||||||
is_entry_present = True
|
is_entry_present = True
|
||||||
break
|
break
|
||||||
|
|||||||
@ -1438,7 +1438,7 @@ def test_powerbi_cross_workspace_reference_info_message(
|
|||||||
|
|
||||||
is_entry_present: bool = False
|
is_entry_present: bool = False
|
||||||
# Printing INFO entries
|
# Printing INFO entries
|
||||||
for key, entry in info_entries.items():
|
for entry in info_entries.values():
|
||||||
if entry.title == "Missing Lineage For Tile":
|
if entry.title == "Missing Lineage For Tile":
|
||||||
is_entry_present = True
|
is_entry_present = True
|
||||||
break
|
break
|
||||||
@ -1563,7 +1563,7 @@ def test_powerbi_app_ingest_info_message(
|
|||||||
|
|
||||||
is_entry_present: bool = False
|
is_entry_present: bool = False
|
||||||
# Printing INFO entries
|
# Printing INFO entries
|
||||||
for key, entry in info_entries.items():
|
for entry in info_entries.values():
|
||||||
if entry.title == "App Ingestion Is Disabled":
|
if entry.title == "App Ingestion Is Disabled":
|
||||||
is_entry_present = True
|
is_entry_present = True
|
||||||
break
|
break
|
||||||
|
|||||||
@ -198,7 +198,7 @@ def generate_queries(
|
|||||||
|
|
||||||
all_tables = seed_metadata.tables + seed_metadata.views
|
all_tables = seed_metadata.tables + seed_metadata.views
|
||||||
users = [f"user_{i}@xyz.com" for i in range(num_users)]
|
users = [f"user_{i}@xyz.com" for i in range(num_users)]
|
||||||
for i in range(num_selects): # Pure SELECT statements
|
for _ in range(num_selects): # Pure SELECT statements
|
||||||
tables = _sample_list(all_tables, tables_per_select)
|
tables = _sample_list(all_tables, tables_per_select)
|
||||||
all_columns = [
|
all_columns = [
|
||||||
FieldAccess(column, table) for table in tables for column in table.columns
|
FieldAccess(column, table) for table in tables for column in table.columns
|
||||||
@ -213,7 +213,7 @@ def generate_queries(
|
|||||||
fields_accessed=_sample_list(all_columns, columns_per_select),
|
fields_accessed=_sample_list(all_columns, columns_per_select),
|
||||||
)
|
)
|
||||||
|
|
||||||
for i in range(num_operations):
|
for _ in range(num_operations):
|
||||||
modified_table = random.choice(seed_metadata.tables)
|
modified_table = random.choice(seed_metadata.tables)
|
||||||
n_col = len(modified_table.columns)
|
n_col = len(modified_table.columns)
|
||||||
num_columns_modified = NormalDistribution(n_col / 2, n_col / 2)
|
num_columns_modified = NormalDistribution(n_col / 2, n_col / 2)
|
||||||
|
|||||||
@ -42,5 +42,3 @@ warn_unused_configs = true
|
|||||||
disallow_incomplete_defs = false
|
disallow_incomplete_defs = false
|
||||||
disallow_untyped_defs = false
|
disallow_untyped_defs = false
|
||||||
|
|
||||||
[tool.pyright]
|
|
||||||
extraPaths = ['tests']
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user