mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-26 00:14:53 +00:00 
			
		
		
		
	fix(ingest): simplify + fix ruff config (#12382)
This commit is contained in:
		
							parent
							
								
									94e966506d
								
							
						
					
					
						commit
						f06ad1a1d5
					
				| @ -23,8 +23,10 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { | ||||
|   inputs.file file('setup.py') | ||||
|   outputs.file(sentinel_file) | ||||
|   commandLine 'bash', '-c', | ||||
|     "${python_executable} -m venv ${venv_name} && set -x && " + | ||||
|     "${venv_name}/bin/python -m pip install --upgrade uv && " + | ||||
|     "if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " + | ||||
|     "set -x && " + | ||||
|     // If we already have uv available, use it to upgrade uv. Otherwise, install it with pip. | ||||
|     "if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " + | ||||
|     "touch ${sentinel_file}" | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -11,25 +11,13 @@ extend-exclude = ''' | ||||
| include = '\.pyi?$' | ||||
| target-version = ['py38', 'py39', 'py310', 'py311'] | ||||
| 
 | ||||
| 
 | ||||
| [tool.ruff.lint.isort] | ||||
| section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"] | ||||
| sections = { "patch" = ["datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] } | ||||
| combine-as-imports = true | ||||
| known-first-party = ["datahub"] | ||||
| extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] | ||||
| section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] | ||||
| force-sort-within-sections = false | ||||
| force-wrap-aliases = false | ||||
| split-on-trailing-comma = false | ||||
| order-by-type = true | ||||
| relative-imports-order = "closest-to-furthest" | ||||
| force-single-line = false | ||||
| single-line-exclusions = ["typing"] | ||||
| length-sort = false | ||||
| from-first = false | ||||
| required-imports = [] | ||||
| classes = ["typing"] | ||||
| 
 | ||||
| [tool.ruff] | ||||
| target-version = "py38" | ||||
| # Same as Black. | ||||
| line-length = 88 | ||||
| # Exclude directories matching these patterns. | ||||
| @ -42,15 +30,16 @@ exclude = [ | ||||
| ] | ||||
| 
 | ||||
| [tool.ruff.lint] | ||||
| select = [ | ||||
|     "B", | ||||
| extend-select = [ | ||||
|     "B",  # Bugbear | ||||
|     "C90", | ||||
|     "E", | ||||
|     "F", | ||||
|     "I", # For isort | ||||
|     "TID", | ||||
|     "G010",  # logging.warn -> logging.warning | ||||
|     "I",  # Import sorting | ||||
|     "TID",  # Tidy imports | ||||
| ] | ||||
| ignore = [ | ||||
| extend-ignore = [ | ||||
|     # Ignore line length violations (handled by Black) | ||||
|     "E501", | ||||
|     # Ignore whitespace before ':' (matches Black) | ||||
| @ -69,9 +58,7 @@ ignore = [ | ||||
| max-complexity = 20 | ||||
| 
 | ||||
| [tool.ruff.lint.flake8-tidy-imports] | ||||
| # Disallow all relative imports. | ||||
| ban-relative-imports = "all" | ||||
| 
 | ||||
| 
 | ||||
| [tool.ruff.lint.per-file-ignores] | ||||
| "__init__.py" = ["F401"] | ||||
| @ -114,7 +114,7 @@ class CorpGroup(BaseModel): | ||||
|                 ) | ||||
|                 urns_created.add(m.urn) | ||||
|             else: | ||||
|                 logger.warn( | ||||
|                 logger.warning( | ||||
|                     f"Suppressing emission of member {m.urn} before we already emitted metadata for it" | ||||
|                 ) | ||||
| 
 | ||||
|  | ||||
| @ -40,7 +40,7 @@ def get_s3_tags( | ||||
|                 ] | ||||
|             ) | ||||
|         except s3.meta.client.exceptions.ClientError: | ||||
|             logger.warn(f"No tags found for bucket={bucket_name}") | ||||
|             logger.warning(f"No tags found for bucket={bucket_name}") | ||||
| 
 | ||||
|     if use_s3_object_tags and key_name is not None: | ||||
|         s3_client = aws_config.get_s3_client() | ||||
| @ -53,7 +53,7 @@ def get_s3_tags( | ||||
|         else: | ||||
|             # Unlike bucket tags, if an object does not have tags, it will just return an empty array | ||||
|             # as opposed to an exception. | ||||
|             logger.warn(f"No tags found for bucket={bucket_name} key={key_name}") | ||||
|             logger.warning(f"No tags found for bucket={bucket_name} key={key_name}") | ||||
|     if len(tags_to_add) == 0: | ||||
|         return None | ||||
|     if ctx.graph is not None: | ||||
| @ -65,7 +65,7 @@ def get_s3_tags( | ||||
|         if current_tags: | ||||
|             tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags]) | ||||
|     else: | ||||
|         logger.warn("Could not connect to DatahubApi. No current tags to maintain") | ||||
|         logger.warning("Could not connect to DatahubApi. No current tags to maintain") | ||||
|     # Remove duplicate tags | ||||
|     tags_to_add = sorted(list(set(tags_to_add))) | ||||
|     new_tags = GlobalTagsClass( | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED | ||||
| 
 | ||||
| import collections | ||||
| import concurrent.futures | ||||
| import contextlib | ||||
| @ -10,7 +12,6 @@ import threading | ||||
| import traceback | ||||
| import unittest.mock | ||||
| import uuid | ||||
| from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED | ||||
| from functools import lru_cache | ||||
| from typing import ( | ||||
|     TYPE_CHECKING, | ||||
|  | ||||
| @ -89,7 +89,7 @@ def make_usage_workunit( | ||||
|     top_sql_queries: Optional[List[str]] = None | ||||
|     if query_freq is not None: | ||||
|         if top_n_queries < len(query_freq): | ||||
|             logger.warn( | ||||
|             logger.warning( | ||||
|                 f"Top N query limit exceeded on {str(resource)}.  Max number of queries {top_n_queries} <  {len(query_freq)}. Truncating top queries to {top_n_queries}." | ||||
|             ) | ||||
|             query_freq = query_freq[0:top_n_queries] | ||||
|  | ||||
| @ -1,9 +1,10 @@ | ||||
| from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | ||||
| 
 | ||||
| import dataclasses | ||||
| import functools | ||||
| import logging | ||||
| import traceback | ||||
| from collections import defaultdict | ||||
| from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | ||||
| from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union | ||||
| 
 | ||||
| import pydantic.dataclasses | ||||
|  | ||||
| @ -1,8 +1,9 @@ | ||||
| from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | ||||
| 
 | ||||
| import functools | ||||
| import hashlib | ||||
| import logging | ||||
| import re | ||||
| from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | ||||
| from typing import Dict, Iterable, Optional, Tuple, Union | ||||
| 
 | ||||
| import sqlglot | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| import time | ||||
| from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | ||||
| 
 | ||||
| import time | ||||
| 
 | ||||
| import pytest | ||||
| import sqlglot | ||||
| import sqlglot.errors | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Harshal Sheth
						Harshal Sheth