mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 10:49:00 +00:00 
			
		
		
		
	fix(ingest): simplify + fix ruff config (#12382)
This commit is contained in:
		
							parent
							
								
									94e966506d
								
							
						
					
					
						commit
						f06ad1a1d5
					
				| @ -23,8 +23,10 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { | |||||||
|   inputs.file file('setup.py') |   inputs.file file('setup.py') | ||||||
|   outputs.file(sentinel_file) |   outputs.file(sentinel_file) | ||||||
|   commandLine 'bash', '-c', |   commandLine 'bash', '-c', | ||||||
|     "${python_executable} -m venv ${venv_name} && set -x && " + |     "if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " + | ||||||
|     "${venv_name}/bin/python -m pip install --upgrade uv && " + |     "set -x && " + | ||||||
|  |     // If we already have uv available, use it to upgrade uv. Otherwise, install it with pip. | ||||||
|  |     "if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " + | ||||||
|     "touch ${sentinel_file}" |     "touch ${sentinel_file}" | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -11,25 +11,13 @@ extend-exclude = ''' | |||||||
| include = '\.pyi?$' | include = '\.pyi?$' | ||||||
| target-version = ['py38', 'py39', 'py310', 'py311'] | target-version = ['py38', 'py39', 'py310', 'py311'] | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| [tool.ruff.lint.isort] | [tool.ruff.lint.isort] | ||||||
|  | section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"] | ||||||
|  | sections = { "patch" = ["datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] } | ||||||
| combine-as-imports = true | combine-as-imports = true | ||||||
| known-first-party = ["datahub"] |  | ||||||
| extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] |  | ||||||
| section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] |  | ||||||
| force-sort-within-sections = false |  | ||||||
| force-wrap-aliases = false |  | ||||||
| split-on-trailing-comma = false |  | ||||||
| order-by-type = true |  | ||||||
| relative-imports-order = "closest-to-furthest" |  | ||||||
| force-single-line = false |  | ||||||
| single-line-exclusions = ["typing"] |  | ||||||
| length-sort = false |  | ||||||
| from-first = false |  | ||||||
| required-imports = [] |  | ||||||
| classes = ["typing"] |  | ||||||
| 
 | 
 | ||||||
| [tool.ruff] | [tool.ruff] | ||||||
|  | target-version = "py38" | ||||||
| # Same as Black. | # Same as Black. | ||||||
| line-length = 88 | line-length = 88 | ||||||
| # Exclude directories matching these patterns. | # Exclude directories matching these patterns. | ||||||
| @ -42,15 +30,16 @@ exclude = [ | |||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| [tool.ruff.lint] | [tool.ruff.lint] | ||||||
| select = [ | extend-select = [ | ||||||
|     "B", |     "B",  # Bugbear | ||||||
|     "C90", |     "C90", | ||||||
|     "E", |     "E", | ||||||
|     "F", |     "F", | ||||||
|     "I", # For isort |     "G010",  # logging.warn -> logging.warning | ||||||
|     "TID", |     "I",  # Import sorting | ||||||
|  |     "TID",  # Tidy imports | ||||||
| ] | ] | ||||||
| ignore = [ | extend-ignore = [ | ||||||
|     # Ignore line length violations (handled by Black) |     # Ignore line length violations (handled by Black) | ||||||
|     "E501", |     "E501", | ||||||
|     # Ignore whitespace before ':' (matches Black) |     # Ignore whitespace before ':' (matches Black) | ||||||
| @ -69,9 +58,7 @@ ignore = [ | |||||||
| max-complexity = 20 | max-complexity = 20 | ||||||
| 
 | 
 | ||||||
| [tool.ruff.lint.flake8-tidy-imports] | [tool.ruff.lint.flake8-tidy-imports] | ||||||
| # Disallow all relative imports. |  | ||||||
| ban-relative-imports = "all" | ban-relative-imports = "all" | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| [tool.ruff.lint.per-file-ignores] | [tool.ruff.lint.per-file-ignores] | ||||||
| "__init__.py" = ["F401"] | "__init__.py" = ["F401"] | ||||||
| @ -114,7 +114,7 @@ class CorpGroup(BaseModel): | |||||||
|                 ) |                 ) | ||||||
|                 urns_created.add(m.urn) |                 urns_created.add(m.urn) | ||||||
|             else: |             else: | ||||||
|                 logger.warn( |                 logger.warning( | ||||||
|                     f"Suppressing emission of member {m.urn} before we already emitted metadata for it" |                     f"Suppressing emission of member {m.urn} before we already emitted metadata for it" | ||||||
|                 ) |                 ) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -40,7 +40,7 @@ def get_s3_tags( | |||||||
|                 ] |                 ] | ||||||
|             ) |             ) | ||||||
|         except s3.meta.client.exceptions.ClientError: |         except s3.meta.client.exceptions.ClientError: | ||||||
|             logger.warn(f"No tags found for bucket={bucket_name}") |             logger.warning(f"No tags found for bucket={bucket_name}") | ||||||
| 
 | 
 | ||||||
|     if use_s3_object_tags and key_name is not None: |     if use_s3_object_tags and key_name is not None: | ||||||
|         s3_client = aws_config.get_s3_client() |         s3_client = aws_config.get_s3_client() | ||||||
| @ -53,7 +53,7 @@ def get_s3_tags( | |||||||
|         else: |         else: | ||||||
|             # Unlike bucket tags, if an object does not have tags, it will just return an empty array |             # Unlike bucket tags, if an object does not have tags, it will just return an empty array | ||||||
|             # as opposed to an exception. |             # as opposed to an exception. | ||||||
|             logger.warn(f"No tags found for bucket={bucket_name} key={key_name}") |             logger.warning(f"No tags found for bucket={bucket_name} key={key_name}") | ||||||
|     if len(tags_to_add) == 0: |     if len(tags_to_add) == 0: | ||||||
|         return None |         return None | ||||||
|     if ctx.graph is not None: |     if ctx.graph is not None: | ||||||
| @ -65,7 +65,7 @@ def get_s3_tags( | |||||||
|         if current_tags: |         if current_tags: | ||||||
|             tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags]) |             tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags]) | ||||||
|     else: |     else: | ||||||
|         logger.warn("Could not connect to DatahubApi. No current tags to maintain") |         logger.warning("Could not connect to DatahubApi. No current tags to maintain") | ||||||
|     # Remove duplicate tags |     # Remove duplicate tags | ||||||
|     tags_to_add = sorted(list(set(tags_to_add))) |     tags_to_add = sorted(list(set(tags_to_add))) | ||||||
|     new_tags = GlobalTagsClass( |     new_tags = GlobalTagsClass( | ||||||
|  | |||||||
| @ -1,3 +1,5 @@ | |||||||
|  | from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED | ||||||
|  | 
 | ||||||
| import collections | import collections | ||||||
| import concurrent.futures | import concurrent.futures | ||||||
| import contextlib | import contextlib | ||||||
| @ -10,7 +12,6 @@ import threading | |||||||
| import traceback | import traceback | ||||||
| import unittest.mock | import unittest.mock | ||||||
| import uuid | import uuid | ||||||
| from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED |  | ||||||
| from functools import lru_cache | from functools import lru_cache | ||||||
| from typing import ( | from typing import ( | ||||||
|     TYPE_CHECKING, |     TYPE_CHECKING, | ||||||
|  | |||||||
| @ -89,7 +89,7 @@ def make_usage_workunit( | |||||||
|     top_sql_queries: Optional[List[str]] = None |     top_sql_queries: Optional[List[str]] = None | ||||||
|     if query_freq is not None: |     if query_freq is not None: | ||||||
|         if top_n_queries < len(query_freq): |         if top_n_queries < len(query_freq): | ||||||
|             logger.warn( |             logger.warning( | ||||||
|                 f"Top N query limit exceeded on {str(resource)}.  Max number of queries {top_n_queries} <  {len(query_freq)}. Truncating top queries to {top_n_queries}." |                 f"Top N query limit exceeded on {str(resource)}.  Max number of queries {top_n_queries} <  {len(query_freq)}. Truncating top queries to {top_n_queries}." | ||||||
|             ) |             ) | ||||||
|             query_freq = query_freq[0:top_n_queries] |             query_freq = query_freq[0:top_n_queries] | ||||||
|  | |||||||
| @ -1,9 +1,10 @@ | |||||||
|  | from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | ||||||
|  | 
 | ||||||
| import dataclasses | import dataclasses | ||||||
| import functools | import functools | ||||||
| import logging | import logging | ||||||
| import traceback | import traceback | ||||||
| from collections import defaultdict | from collections import defaultdict | ||||||
| from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED |  | ||||||
| from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union | from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union | ||||||
| 
 | 
 | ||||||
| import pydantic.dataclasses | import pydantic.dataclasses | ||||||
|  | |||||||
| @ -1,8 +1,9 @@ | |||||||
|  | from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | ||||||
|  | 
 | ||||||
| import functools | import functools | ||||||
| import hashlib | import hashlib | ||||||
| import logging | import logging | ||||||
| import re | import re | ||||||
| from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED |  | ||||||
| from typing import Dict, Iterable, Optional, Tuple, Union | from typing import Dict, Iterable, Optional, Tuple, Union | ||||||
| 
 | 
 | ||||||
| import sqlglot | import sqlglot | ||||||
|  | |||||||
| @ -1,6 +1,7 @@ | |||||||
| import time |  | ||||||
| from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED | ||||||
| 
 | 
 | ||||||
|  | import time | ||||||
|  | 
 | ||||||
| import pytest | import pytest | ||||||
| import sqlglot | import sqlglot | ||||||
| import sqlglot.errors | import sqlglot.errors | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Harshal Sheth
						Harshal Sheth