mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-08 00:28:37 +00:00
fix(ingest): simplify + fix ruff config (#12382)
This commit is contained in:
parent
94e966506d
commit
f06ad1a1d5
@ -23,8 +23,10 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
|
|||||||
inputs.file file('setup.py')
|
inputs.file file('setup.py')
|
||||||
outputs.file(sentinel_file)
|
outputs.file(sentinel_file)
|
||||||
commandLine 'bash', '-c',
|
commandLine 'bash', '-c',
|
||||||
"${python_executable} -m venv ${venv_name} && set -x && " +
|
"if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " +
|
||||||
"${venv_name}/bin/python -m pip install --upgrade uv && " +
|
"set -x && " +
|
||||||
|
// If we already have uv available, use it to upgrade uv. Otherwise, install it with pip.
|
||||||
|
"if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " +
|
||||||
"touch ${sentinel_file}"
|
"touch ${sentinel_file}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,25 +11,13 @@ extend-exclude = '''
|
|||||||
include = '\.pyi?$'
|
include = '\.pyi?$'
|
||||||
target-version = ['py38', 'py39', 'py310', 'py311']
|
target-version = ['py38', 'py39', 'py310', 'py311']
|
||||||
|
|
||||||
|
|
||||||
[tool.ruff.lint.isort]
|
[tool.ruff.lint.isort]
|
||||||
|
section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"]
|
||||||
|
sections = { "patch" = ["datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] }
|
||||||
combine-as-imports = true
|
combine-as-imports = true
|
||||||
known-first-party = ["datahub"]
|
|
||||||
extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
|
|
||||||
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
|
|
||||||
force-sort-within-sections = false
|
|
||||||
force-wrap-aliases = false
|
|
||||||
split-on-trailing-comma = false
|
|
||||||
order-by-type = true
|
|
||||||
relative-imports-order = "closest-to-furthest"
|
|
||||||
force-single-line = false
|
|
||||||
single-line-exclusions = ["typing"]
|
|
||||||
length-sort = false
|
|
||||||
from-first = false
|
|
||||||
required-imports = []
|
|
||||||
classes = ["typing"]
|
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
|
target-version = "py38"
|
||||||
# Same as Black.
|
# Same as Black.
|
||||||
line-length = 88
|
line-length = 88
|
||||||
# Exclude directories matching these patterns.
|
# Exclude directories matching these patterns.
|
||||||
@ -42,15 +30,16 @@ exclude = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff.lint]
|
[tool.ruff.lint]
|
||||||
select = [
|
extend-select = [
|
||||||
"B",
|
"B", # Bugbear
|
||||||
"C90",
|
"C90",
|
||||||
"E",
|
"E",
|
||||||
"F",
|
"F",
|
||||||
"I", # For isort
|
"G010", # logging.warn -> logging.warning
|
||||||
"TID",
|
"I", # Import sorting
|
||||||
|
"TID", # Tidy imports
|
||||||
]
|
]
|
||||||
ignore = [
|
extend-ignore = [
|
||||||
# Ignore line length violations (handled by Black)
|
# Ignore line length violations (handled by Black)
|
||||||
"E501",
|
"E501",
|
||||||
# Ignore whitespace before ':' (matches Black)
|
# Ignore whitespace before ':' (matches Black)
|
||||||
@ -69,9 +58,7 @@ ignore = [
|
|||||||
max-complexity = 20
|
max-complexity = 20
|
||||||
|
|
||||||
[tool.ruff.lint.flake8-tidy-imports]
|
[tool.ruff.lint.flake8-tidy-imports]
|
||||||
# Disallow all relative imports.
|
|
||||||
ban-relative-imports = "all"
|
ban-relative-imports = "all"
|
||||||
|
|
||||||
|
|
||||||
[tool.ruff.lint.per-file-ignores]
|
[tool.ruff.lint.per-file-ignores]
|
||||||
"__init__.py" = ["F401"]
|
"__init__.py" = ["F401"]
|
@ -114,7 +114,7 @@ class CorpGroup(BaseModel):
|
|||||||
)
|
)
|
||||||
urns_created.add(m.urn)
|
urns_created.add(m.urn)
|
||||||
else:
|
else:
|
||||||
logger.warn(
|
logger.warning(
|
||||||
f"Suppressing emission of member {m.urn} before we already emitted metadata for it"
|
f"Suppressing emission of member {m.urn} before we already emitted metadata for it"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ def get_s3_tags(
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
except s3.meta.client.exceptions.ClientError:
|
except s3.meta.client.exceptions.ClientError:
|
||||||
logger.warn(f"No tags found for bucket={bucket_name}")
|
logger.warning(f"No tags found for bucket={bucket_name}")
|
||||||
|
|
||||||
if use_s3_object_tags and key_name is not None:
|
if use_s3_object_tags and key_name is not None:
|
||||||
s3_client = aws_config.get_s3_client()
|
s3_client = aws_config.get_s3_client()
|
||||||
@ -53,7 +53,7 @@ def get_s3_tags(
|
|||||||
else:
|
else:
|
||||||
# Unlike bucket tags, if an object does not have tags, it will just return an empty array
|
# Unlike bucket tags, if an object does not have tags, it will just return an empty array
|
||||||
# as opposed to an exception.
|
# as opposed to an exception.
|
||||||
logger.warn(f"No tags found for bucket={bucket_name} key={key_name}")
|
logger.warning(f"No tags found for bucket={bucket_name} key={key_name}")
|
||||||
if len(tags_to_add) == 0:
|
if len(tags_to_add) == 0:
|
||||||
return None
|
return None
|
||||||
if ctx.graph is not None:
|
if ctx.graph is not None:
|
||||||
@ -65,7 +65,7 @@ def get_s3_tags(
|
|||||||
if current_tags:
|
if current_tags:
|
||||||
tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags])
|
tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags])
|
||||||
else:
|
else:
|
||||||
logger.warn("Could not connect to DatahubApi. No current tags to maintain")
|
logger.warning("Could not connect to DatahubApi. No current tags to maintain")
|
||||||
# Remove duplicate tags
|
# Remove duplicate tags
|
||||||
tags_to_add = sorted(list(set(tags_to_add)))
|
tags_to_add = sorted(list(set(tags_to_add)))
|
||||||
new_tags = GlobalTagsClass(
|
new_tags = GlobalTagsClass(
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import contextlib
|
import contextlib
|
||||||
@ -10,7 +12,6 @@ import threading
|
|||||||
import traceback
|
import traceback
|
||||||
import unittest.mock
|
import unittest.mock
|
||||||
import uuid
|
import uuid
|
||||||
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import (
|
from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
|
@ -89,7 +89,7 @@ def make_usage_workunit(
|
|||||||
top_sql_queries: Optional[List[str]] = None
|
top_sql_queries: Optional[List[str]] = None
|
||||||
if query_freq is not None:
|
if query_freq is not None:
|
||||||
if top_n_queries < len(query_freq):
|
if top_n_queries < len(query_freq):
|
||||||
logger.warn(
|
logger.warning(
|
||||||
f"Top N query limit exceeded on {str(resource)}. Max number of queries {top_n_queries} < {len(query_freq)}. Truncating top queries to {top_n_queries}."
|
f"Top N query limit exceeded on {str(resource)}. Max number of queries {top_n_queries} < {len(query_freq)}. Truncating top queries to {top_n_queries}."
|
||||||
)
|
)
|
||||||
query_freq = query_freq[0:top_n_queries]
|
query_freq = query_freq[0:top_n_queries]
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
|
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
|
||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import functools
|
import functools
|
||||||
import logging
|
import logging
|
||||||
import traceback
|
import traceback
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
|
|
||||||
from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union
|
from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union
|
||||||
|
|
||||||
import pydantic.dataclasses
|
import pydantic.dataclasses
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
|
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
|
|
||||||
from typing import Dict, Iterable, Optional, Tuple, Union
|
from typing import Dict, Iterable, Optional, Tuple, Union
|
||||||
|
|
||||||
import sqlglot
|
import sqlglot
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import time
|
|
||||||
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
|
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import sqlglot
|
import sqlglot
|
||||||
import sqlglot.errors
|
import sqlglot.errors
|
||||||
|
Loading…
x
Reference in New Issue
Block a user