fix(ingest): simplify + fix ruff config (#12382)

This commit is contained in:
Harshal Sheth 2025-01-17 16:28:12 -08:00 committed by GitHub
parent 94e966506d
commit f06ad1a1d5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 26 additions and 33 deletions

View File

@ -23,8 +23,10 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
inputs.file file('setup.py') inputs.file file('setup.py')
outputs.file(sentinel_file) outputs.file(sentinel_file)
commandLine 'bash', '-c', commandLine 'bash', '-c',
"${python_executable} -m venv ${venv_name} && set -x && " + "if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " +
"${venv_name}/bin/python -m pip install --upgrade uv && " + "set -x && " +
// If we already have uv available, use it to upgrade uv. Otherwise, install it with pip.
"if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " +
"touch ${sentinel_file}" "touch ${sentinel_file}"
} }

View File

@ -11,25 +11,13 @@ extend-exclude = '''
include = '\.pyi?$' include = '\.pyi?$'
target-version = ['py38', 'py39', 'py310', 'py311'] target-version = ['py38', 'py39', 'py310', 'py311']
[tool.ruff.lint.isort] [tool.ruff.lint.isort]
section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"]
sections = { "patch" = ["datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] }
combine-as-imports = true combine-as-imports = true
known-first-party = ["datahub"]
extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
force-sort-within-sections = false
force-wrap-aliases = false
split-on-trailing-comma = false
order-by-type = true
relative-imports-order = "closest-to-furthest"
force-single-line = false
single-line-exclusions = ["typing"]
length-sort = false
from-first = false
required-imports = []
classes = ["typing"]
[tool.ruff] [tool.ruff]
target-version = "py38"
# Same as Black. # Same as Black.
line-length = 88 line-length = 88
# Exclude directories matching these patterns. # Exclude directories matching these patterns.
@ -42,15 +30,16 @@ exclude = [
] ]
[tool.ruff.lint] [tool.ruff.lint]
select = [ extend-select = [
"B", "B", # Bugbear
"C90", "C90",
"E", "E",
"F", "F",
"I", # For isort "G010", # logging.warn -> logging.warning
"TID", "I", # Import sorting
"TID", # Tidy imports
] ]
ignore = [ extend-ignore = [
# Ignore line length violations (handled by Black) # Ignore line length violations (handled by Black)
"E501", "E501",
# Ignore whitespace before ':' (matches Black) # Ignore whitespace before ':' (matches Black)
@ -69,9 +58,7 @@ ignore = [
max-complexity = 20 max-complexity = 20
[tool.ruff.lint.flake8-tidy-imports] [tool.ruff.lint.flake8-tidy-imports]
# Disallow all relative imports.
ban-relative-imports = "all" ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores] [tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"] "__init__.py" = ["F401"]

View File

@ -114,7 +114,7 @@ class CorpGroup(BaseModel):
) )
urns_created.add(m.urn) urns_created.add(m.urn)
else: else:
logger.warn( logger.warning(
f"Suppressing emission of member {m.urn} before we already emitted metadata for it" f"Suppressing emission of member {m.urn} before we already emitted metadata for it"
) )

View File

@ -40,7 +40,7 @@ def get_s3_tags(
] ]
) )
except s3.meta.client.exceptions.ClientError: except s3.meta.client.exceptions.ClientError:
logger.warn(f"No tags found for bucket={bucket_name}") logger.warning(f"No tags found for bucket={bucket_name}")
if use_s3_object_tags and key_name is not None: if use_s3_object_tags and key_name is not None:
s3_client = aws_config.get_s3_client() s3_client = aws_config.get_s3_client()
@ -53,7 +53,7 @@ def get_s3_tags(
else: else:
# Unlike bucket tags, if an object does not have tags, it will just return an empty array # Unlike bucket tags, if an object does not have tags, it will just return an empty array
# as opposed to an exception. # as opposed to an exception.
logger.warn(f"No tags found for bucket={bucket_name} key={key_name}") logger.warning(f"No tags found for bucket={bucket_name} key={key_name}")
if len(tags_to_add) == 0: if len(tags_to_add) == 0:
return None return None
if ctx.graph is not None: if ctx.graph is not None:
@ -65,7 +65,7 @@ def get_s3_tags(
if current_tags: if current_tags:
tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags]) tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags])
else: else:
logger.warn("Could not connect to DatahubApi. No current tags to maintain") logger.warning("Could not connect to DatahubApi. No current tags to maintain")
# Remove duplicate tags # Remove duplicate tags
tags_to_add = sorted(list(set(tags_to_add))) tags_to_add = sorted(list(set(tags_to_add)))
new_tags = GlobalTagsClass( new_tags = GlobalTagsClass(

View File

@ -1,3 +1,5 @@
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
import collections import collections
import concurrent.futures import concurrent.futures
import contextlib import contextlib
@ -10,7 +12,6 @@ import threading
import traceback import traceback
import unittest.mock import unittest.mock
import uuid import uuid
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
from functools import lru_cache from functools import lru_cache
from typing import ( from typing import (
TYPE_CHECKING, TYPE_CHECKING,

View File

@ -89,7 +89,7 @@ def make_usage_workunit(
top_sql_queries: Optional[List[str]] = None top_sql_queries: Optional[List[str]] = None
if query_freq is not None: if query_freq is not None:
if top_n_queries < len(query_freq): if top_n_queries < len(query_freq):
logger.warn( logger.warning(
f"Top N query limit exceeded on {str(resource)}. Max number of queries {top_n_queries} < {len(query_freq)}. Truncating top queries to {top_n_queries}." f"Top N query limit exceeded on {str(resource)}. Max number of queries {top_n_queries} < {len(query_freq)}. Truncating top queries to {top_n_queries}."
) )
query_freq = query_freq[0:top_n_queries] query_freq = query_freq[0:top_n_queries]

View File

@ -1,9 +1,10 @@
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
import dataclasses import dataclasses
import functools import functools
import logging import logging
import traceback import traceback
from collections import defaultdict from collections import defaultdict
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union
import pydantic.dataclasses import pydantic.dataclasses

View File

@ -1,8 +1,9 @@
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
import functools import functools
import hashlib import hashlib
import logging import logging
import re import re
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
from typing import Dict, Iterable, Optional, Tuple, Union from typing import Dict, Iterable, Optional, Tuple, Union
import sqlglot import sqlglot

View File

@ -1,6 +1,7 @@
import time
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
import time
import pytest import pytest
import sqlglot import sqlglot
import sqlglot.errors import sqlglot.errors