fix(ingest): fix athena and GE lint errors (#6482)

This commit is contained in:
Harshal Sheth 2022-11-17 19:07:05 -05:00 committed by GitHub
parent added63437
commit b7c03731c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 15 additions and 9 deletions

View File

@ -43,8 +43,11 @@ task installDev(type: Exec, dependsOn: [install]) {
inputs.file file('setup.py') inputs.file file('setup.py')
outputs.dir("${venv_name}") outputs.dir("${venv_name}")
outputs.file("${venv_name}/.build_install_dev_sentinel") outputs.file("${venv_name}/.build_install_dev_sentinel")
commandLine 'bash', '-x', '-c', commandLine 'bash', '-c',
"${venv_name}/bin/pip install -e .[dev] ${extra_pip_requirements} && touch ${venv_name}/.build_install_dev_sentinel" "source ${venv_name}/bin/activate && set -x && " +
"${venv_name}/bin/pip install -e .[dev] ${extra_pip_requirements} && " +
"./scripts/install-sqlalchemy-stubs.sh && " +
"touch ${venv_name}/.build_install_dev_sentinel"
} }
@ -72,7 +75,6 @@ task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c', commandLine 'bash', '-c',
"find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " + "find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " +
"source ${venv_name}/bin/activate && set -x && " + "source ${venv_name}/bin/activate && set -x && " +
"./scripts/install-sqlalchemy-stubs.sh && " +
"black --check --diff src/ tests/ examples/ && " + "black --check --diff src/ tests/ examples/ && " +
"isort --check --diff src/ tests/ examples/ && " + "isort --check --diff src/ tests/ examples/ && " +
"flake8 --count --statistics src/ tests/ examples/ && " + "flake8 --count --statistics src/ tests/ examples/ && " +

View File

@ -1,7 +1,7 @@
import json import json
import logging import logging
import typing import typing
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple, cast
import pydantic import pydantic
from pyathena.common import BaseCursor from pyathena.common import BaseCursor
@ -109,9 +109,9 @@ class AthenaSource(SQLAlchemySource):
self, inspector: Inspector, schema: str, table: str self, inspector: Inspector, schema: str, table: str
) -> Tuple[Optional[str], Dict[str, str], Optional[str]]: ) -> Tuple[Optional[str], Dict[str, str], Optional[str]]:
if not self.cursor: if not self.cursor:
self.cursor = inspector.engine.raw_connection().cursor() self.cursor = cast(BaseCursor, inspector.engine.raw_connection().cursor())
assert self.cursor
assert self.cursor
# Unfortunately properties can be only get through private methods as those are not exposed # Unfortunately properties can be only get through private methods as those are not exposed
# https://github.com/laughingman7743/PyAthena/blob/9e42752b0cc7145a87c3a743bb2634fe125adfa7/pyathena/model.py#L201 # https://github.com/laughingman7743/PyAthena/blob/9e42752b0cc7145a87c3a743bb2634fe125adfa7/pyathena/model.py#L201
metadata: AthenaTableMetadata = self.cursor._get_table_metadata( metadata: AthenaTableMetadata = self.cursor._get_table_metadata(

View File

@ -8,7 +8,7 @@ import time
from dataclasses import dataclass from dataclasses import dataclass
from datetime import timezone from datetime import timezone
from decimal import Decimal from decimal import Decimal
from typing import Any, Dict, List, Optional, Union from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from great_expectations.checkpoint.actions import ValidationAction from great_expectations.checkpoint.actions import ValidationAction
from great_expectations.core.batch import Batch from great_expectations.core.batch import Batch
@ -23,7 +23,6 @@ from great_expectations.data_asset.data_asset import DataAsset
from great_expectations.data_context.data_context import DataContext from great_expectations.data_context.data_context import DataContext
from great_expectations.data_context.types.resource_identifiers import ( from great_expectations.data_context.types.resource_identifiers import (
ExpectationSuiteIdentifier, ExpectationSuiteIdentifier,
GeCloudIdentifier,
ValidationResultIdentifier, ValidationResultIdentifier,
) )
from great_expectations.execution_engine.sqlalchemy_execution_engine import ( from great_expectations.execution_engine.sqlalchemy_execution_engine import (
@ -58,6 +57,11 @@ from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeTyp
from datahub.metadata.schema_classes import PartitionSpecClass, PartitionTypeClass from datahub.metadata.schema_classes import PartitionSpecClass, PartitionTypeClass
from datahub.utilities.sql_parser import DefaultSQLParser from datahub.utilities.sql_parser import DefaultSQLParser
if TYPE_CHECKING:
from great_expectations.data_context.types.resource_identifiers import (
GXCloudIdentifier,
)
assert MARKUPSAFE_PATCHED assert MARKUPSAFE_PATCHED
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
if os.getenv("DATAHUB_DEBUG", False): if os.getenv("DATAHUB_DEBUG", False):
@ -103,7 +107,7 @@ class DataHubValidationAction(ValidationAction):
self, self,
validation_result_suite: ExpectationSuiteValidationResult, validation_result_suite: ExpectationSuiteValidationResult,
validation_result_suite_identifier: Union[ validation_result_suite_identifier: Union[
ValidationResultIdentifier, GeCloudIdentifier ValidationResultIdentifier, "GXCloudIdentifier"
], ],
data_asset: Union[Validator, DataAsset, Batch], data_asset: Union[Validator, DataAsset, Batch],
payload: Any = None, payload: Any = None,