chore(ingest): bump and pin mypy (#6584)

This commit is contained in:
Harshal Sheth 2022-12-02 13:53:28 -05:00 committed by GitHub
parent 1689212434
commit 44cfd21a65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 64 additions and 46 deletions

View File

@ -27,6 +27,7 @@ plugins =
exclude = ^(venv|build|dist)/ exclude = ^(venv|build|dist)/
ignore_missing_imports = yes ignore_missing_imports = yes
namespace_packages = no namespace_packages = no
implicit_optional = no
strict_optional = yes strict_optional = yes
check_untyped_defs = yes check_untyped_defs = yes
disallow_incomplete_defs = yes disallow_incomplete_defs = yes
@ -38,8 +39,16 @@ disallow_untyped_defs = no
# try to be a bit more strict in certain areas of the codebase # try to be a bit more strict in certain areas of the codebase
[mypy-datahub.*] [mypy-datahub.*]
ignore_missing_imports = no ignore_missing_imports = no
[mypy-datahub_provider.*]
ignore_missing_imports = no
[mypy-tests.*] [mypy-tests.*]
ignore_missing_imports = no ignore_missing_imports = no
[mypy-google.protobuf.*]
# mypy sometimes ignores the above ignore_missing_imports = yes
# See https://github.com/python/mypy/issues/10632 and
# https://github.com/python/mypy/issues/10619#issuecomment-1174208395
# for a discussion of why this happens.
ignore_missing_imports = yes
[mypy-datahub.configuration.*] [mypy-datahub.configuration.*]
disallow_untyped_defs = yes disallow_untyped_defs = yes
[mypy-datahub.emitter.*] [mypy-datahub.emitter.*]

View File

@ -385,8 +385,7 @@ mypy_stubs = {
"types-ujson>=5.2.0", "types-ujson>=5.2.0",
"types-termcolor>=1.0.0", "types-termcolor>=1.0.0",
"types-Deprecated", "types-Deprecated",
# Mypy complains with 4.21.0.0 => error: Library stubs not installed for "google.protobuf.descriptor" "types-protobuf>=4.21.0.1",
"types-protobuf<4.21.0.0",
} }
base_dev_requirements = { base_dev_requirements = {
@ -399,10 +398,7 @@ base_dev_requirements = {
"flake8>=3.8.3", "flake8>=3.8.3",
"flake8-tidy-imports>=4.3.0", "flake8-tidy-imports>=4.3.0",
"isort>=5.7.0", "isort>=5.7.0",
# mypy 0.990 enables namespace packages by default and sets "mypy==0.991",
# no implicit optional to True.
# FIXME: Enable mypy 0.990 when our codebase is fixed.
"mypy>=0.981,<0.990",
# pydantic 1.8.2 is incompatible with mypy 0.910. # pydantic 1.8.2 is incompatible with mypy 0.910.
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
# Restricting top version to <1.10 until we can fix our types. # Restricting top version to <1.10 until we can fix our types.

View File

@ -122,8 +122,6 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
"operationType": operation_type, "operationType": operation_type,
"partition": partition, "partition": partition,
} }
if filter
else None
), ),
}, },
) )

View File

@ -80,7 +80,7 @@ class OperationalError(PipelineExecutionError):
message: str message: str
info: dict info: dict
def __init__(self, message: str, info: dict = None): def __init__(self, message: str, info: Optional[dict] = None):
self.message = message self.message = message
self.info = info or {} self.info = info or {}

View File

@ -120,7 +120,12 @@ class Source(Closeable, metaclass=ABCMeta):
@classmethod @classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source": def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
pass # Technically, this method should be abstract. However, the @config_class
# decorator automatically generates a create method at runtime if one is
# not defined. Python still treats the class as abstract because it thinks
# the create method is missing. To avoid the class becoming abstract, we
# can't make this method abstract.
raise NotImplementedError('sources must implement "create"')
@abstractmethod @abstractmethod
def get_workunits(self) -> Iterable[WorkUnit]: def get_workunits(self) -> Iterable[WorkUnit]:

View File

@ -1,5 +1,5 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import Iterable, Union, overload from typing import Iterable, Optional, Union, overload
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.source import WorkUnit from datahub.ingestion.api.source import WorkUnit
@ -42,9 +42,9 @@ class MetadataWorkUnit(WorkUnit):
def __init__( def __init__(
self, self,
id: str, id: str,
mce: MetadataChangeEvent = None, mce: Optional[MetadataChangeEvent] = None,
mcp: MetadataChangeProposalWrapper = None, mcp: Optional[MetadataChangeProposalWrapper] = None,
mcp_raw: MetadataChangeProposal = None, mcp_raw: Optional[MetadataChangeProposal] = None,
treat_errors_as_warnings: bool = False, treat_errors_as_warnings: bool = False,
): ):
super().__init__(id) super().__init__(id)

View File

@ -2,7 +2,7 @@ import logging
from collections import defaultdict from collections import defaultdict
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Dict, List, Optional from typing import Dict, List, Optional, cast
from google.cloud import bigquery from google.cloud import bigquery
from google.cloud.bigquery.table import RowIterator, TableListItem, TimePartitioning from google.cloud.bigquery.table import RowIterator, TableListItem, TimePartitioning
@ -280,6 +280,8 @@ class BigQueryDataDictionary:
def get_datasets_for_project_id( def get_datasets_for_project_id(
conn: bigquery.Client, project_id: str, maxResults: Optional[int] = None conn: bigquery.Client, project_id: str, maxResults: Optional[int] = None
) -> List[BigqueryDataset]: ) -> List[BigqueryDataset]:
# FIXME: Due to a bug in BigQuery's type annotations, we need to cast here.
maxResults = cast(int, maxResults)
datasets = conn.list_datasets(project_id, max_results=maxResults) datasets = conn.list_datasets(project_id, max_results=maxResults)
return [BigqueryDataset(name=d.dataset_id) for d in datasets] return [BigqueryDataset(name=d.dataset_id) for d in datasets]

View File

@ -833,8 +833,8 @@ class DatahubGEProfiler:
self, self,
query_combiner: SQLAlchemyQueryCombiner, query_combiner: SQLAlchemyQueryCombiner,
pretty_name: str, pretty_name: str,
schema: str = None, schema: Optional[str] = None,
table: str = None, table: Optional[str] = None,
partition: Optional[str] = None, partition: Optional[str] = None,
custom_sql: Optional[str] = None, custom_sql: Optional[str] = None,
platform: Optional[str] = None, platform: Optional[str] = None,

View File

@ -123,7 +123,9 @@ def remove_prefix(text: str, prefix: str) -> str:
return text return text
def unquote(string: str, leading_quote: str = '"', trailing_quote: str = None) -> str: def unquote(
string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
) -> str:
""" """
If string starts and ends with a quote, unquote it If string starts and ends with a quote, unquote it
""" """

View File

@ -2,7 +2,7 @@ import json
import re import re
import time import time
import warnings import warnings
from typing import Any, Dict, Generator, List, Tuple from typing import Any, Dict, Generator, List, Optional, Tuple
import requests import requests
import yaml import yaml
@ -47,7 +47,10 @@ def flatten2list(d: dict) -> list:
def request_call( def request_call(
url: str, token: str = None, username: str = None, password: str = None url: str,
token: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
) -> requests.Response: ) -> requests.Response:
headers = {"accept": "application/json"} headers = {"accept": "application/json"}
@ -66,9 +69,9 @@ def request_call(
def get_swag_json( def get_swag_json(
url: str, url: str,
token: str = None, token: Optional[str] = None,
username: str = None, username: Optional[str] = None,
password: str = None, password: Optional[str] = None,
swagger_file: str = "", swagger_file: str = "",
) -> Dict: ) -> Dict:
tot_url = url + swagger_file tot_url = url + swagger_file

View File

@ -402,7 +402,7 @@ class RedashSource(Source):
return sql_table_names return sql_table_names
def _get_chart_data_source(self, data_source_id: int = None) -> Dict: def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict:
url = f"/api/data_sources/{data_source_id}" url = f"/api/data_sources/{data_source_id}"
resp = self.client._get(url).json() resp = self.client._get(url).json()
logger.debug(resp) logger.debug(resp)

View File

@ -735,7 +735,7 @@ class SalesforceSource(Source):
return self.report return self.report
def get_tags(params: List[str] = None) -> GlobalTagsClass: def get_tags(params: Optional[List[str]] = None) -> GlobalTagsClass:
if params is None: if params is None:
params = [] params = []
tags = [TagAssociationClass(tag=builder.make_tag_urn(tag)) for tag in params if tag] tags = [TagAssociationClass(tag=builder.make_tag_urn(tag)) for tag in params if tag]

View File

@ -100,7 +100,7 @@ class SnowflakeV2Config(SnowflakeConfig, SnowflakeUsageConfig):
def get_sql_alchemy_url( def get_sql_alchemy_url(
self, self,
database: str = None, database: Optional[str] = None,
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[SecretStr] = None, password: Optional[SecretStr] = None,
role: Optional[str] = None, role: Optional[str] = None,

View File

@ -103,7 +103,9 @@ class OracleInspectorObjectWrapper:
for row in cursor for row in cursor
] ]
def get_table_names(self, schema: str = None, order_by: str = None) -> List[str]: def get_table_names(
self, schema: Optional[str] = None, order_by: Optional[str] = None
) -> List[str]:
""" """
skip order_by, we are not using order_by skip order_by, we are not using order_by
""" """

View File

@ -1,4 +1,5 @@
from textwrap import dedent from textwrap import dedent
from typing import Optional
from pydantic.fields import Field from pydantic.fields import Field
from pyhive.sqlalchemy_presto import PrestoDialect from pyhive.sqlalchemy_presto import PrestoDialect
@ -60,7 +61,7 @@ def get_view_definition(self, connection, view_name, schema=None, **kw):
def _get_full_table( # type: ignore def _get_full_table( # type: ignore
self, table_name: str, schema: str = None, quote: bool = True self, table_name: str, schema: Optional[str] = None, quote: bool = True
) -> str: ) -> str:
table_part = ( table_part = (
self.identifier_preparer.quote_identifier(table_name) if quote else table_name self.identifier_preparer.quote_identifier(table_name) if quote else table_name

View File

@ -691,7 +691,7 @@ class RedshiftSource(SQLAlchemySource):
return sources return sources
def get_db_name(self, inspector: Inspector = None) -> str: def get_db_name(self, inspector: Optional[Inspector] = None) -> str:
db_name = getattr(self.config, "database") db_name = getattr(self.config, "database")
db_alias = getattr(self.config, "database_alias") db_alias = getattr(self.config, "database_alias")
if db_alias: if db_alias:

View File

@ -419,8 +419,8 @@ def get_schema_metadata(
dataset_name: str, dataset_name: str,
platform: str, platform: str,
columns: List[dict], columns: List[dict],
pk_constraints: dict = None, pk_constraints: Optional[dict] = None,
foreign_keys: List[ForeignKeyConstraint] = None, foreign_keys: Optional[List[ForeignKeyConstraint]] = None,
canonical_schema: List[SchemaField] = [], canonical_schema: List[SchemaField] = [],
) -> SchemaMetadata: ) -> SchemaMetadata:
schema_metadata = SchemaMetadata( schema_metadata = SchemaMetadata(
@ -985,7 +985,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
self, self,
dataset_name: str, dataset_name: str,
columns: List[dict], columns: List[dict],
pk_constraints: dict = None, pk_constraints: Optional[dict] = None,
tags: Optional[Dict[str, List[str]]] = None, tags: Optional[Dict[str, List[str]]] = None,
) -> List[SchemaField]: ) -> List[SchemaField]:
canonical_schema = [] canonical_schema = []
@ -1003,7 +1003,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
self, self,
dataset_name: str, dataset_name: str,
column: dict, column: dict,
pk_constraints: dict = None, pk_constraints: Optional[dict] = None,
tags: Optional[List[str]] = None, tags: Optional[List[str]] = None,
) -> List[SchemaField]: ) -> List[SchemaField]:
gtc: Optional[GlobalTagsClass] = None gtc: Optional[GlobalTagsClass] = None

View File

@ -194,7 +194,7 @@ class TrinoSource(SQLAlchemySource):
self, self,
dataset_name: str, dataset_name: str,
column: dict, column: dict,
pk_constraints: dict = None, pk_constraints: Optional[dict] = None,
tags: Optional[List[str]] = None, tags: Optional[List[str]] = None,
) -> List[SchemaField]: ) -> List[SchemaField]:

View File

@ -992,7 +992,10 @@ class TableauSource(StatefulIngestionSourceBase):
return mcp_workunit return mcp_workunit
def emit_datasource( def emit_datasource(
self, datasource: dict, workbook: dict = None, is_embedded_ds: bool = False self,
datasource: dict,
workbook: Optional[dict] = None,
is_embedded_ds: bool = False,
) -> Iterable[MetadataWorkUnit]: ) -> Iterable[MetadataWorkUnit]:
datasource_info = workbook datasource_info = workbook
if not is_embedded_ds: if not is_embedded_ds:

View File

@ -173,7 +173,7 @@ READ_STATEMENT_TYPES: List[str] = ["SELECT"]
def bigquery_audit_metadata_query_template( def bigquery_audit_metadata_query_template(
dataset: str, dataset: str,
use_date_sharded_tables: bool, use_date_sharded_tables: bool,
table_allow_filter: str = None, table_allow_filter: Optional[str] = None,
) -> str: ) -> str:
""" """
Receives a dataset (with project specified) and returns a query template that is used to query exported Receives a dataset (with project specified) and returns a query template that is used to query exported

View File

@ -306,7 +306,7 @@ class SnowflakeConfig(BaseSnowflakeConfig, SQLAlchemyConfig):
def get_sql_alchemy_url( def get_sql_alchemy_url(
self, self,
database: str = None, database: Optional[str] = None,
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[pydantic.SecretStr] = None, password: Optional[pydantic.SecretStr] = None,
role: Optional[str] = None, role: Optional[str] = None,

View File

@ -110,9 +110,9 @@ class DataHubValidationAction(ValidationAction):
ValidationResultIdentifier, "GXCloudIdentifier" ValidationResultIdentifier, "GXCloudIdentifier"
], ],
data_asset: Union[Validator, DataAsset, Batch], data_asset: Union[Validator, DataAsset, Batch],
payload: Any = None, payload: Optional[Any] = None,
expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None, expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None,
checkpoint_identifier: Any = None, checkpoint_identifier: Optional[Any] = None,
) -> Dict: ) -> Dict:
datasets = [] datasets = []
try: try:

View File

@ -67,7 +67,7 @@ class OperationProcessor:
self, self,
operation_defs: Dict[str, Dict], operation_defs: Dict[str, Dict],
tag_prefix: str = "", tag_prefix: str = "",
owner_source_type: str = None, owner_source_type: Optional[str] = None,
strip_owner_email_id: bool = False, strip_owner_email_id: bool = False,
): ):
self.operation_defs = operation_defs self.operation_defs = operation_defs

View File

@ -70,7 +70,7 @@ class DatahubLineageBackend(LineageBackend):
operator: "BaseOperator", operator: "BaseOperator",
inlets: Optional[List] = None, # unused inlets: Optional[List] = None, # unused
outlets: Optional[List] = None, # unused outlets: Optional[List] = None, # unused
context: Dict = None, context: Optional[Dict] = None,
) -> None: ) -> None:
config = get_lineage_config() config = get_lineage_config()
if not config.enabled: if not config.enabled:

View File

@ -24,7 +24,7 @@ def wait_for_port(
docker_services: pytest_docker.plugin.Services, docker_services: pytest_docker.plugin.Services,
container_name: str, container_name: str,
container_port: int, container_port: int,
hostname: str = None, hostname: Optional[str] = None,
timeout: float = 30.0, timeout: float = 30.0,
pause: float = 0.5, pause: float = 0.5,
checker: Optional[Callable[[], bool]] = None, checker: Optional[Callable[[], bool]] = None,

View File

@ -4,7 +4,6 @@ from unittest.mock import Mock
import pytest import pytest
from sqlalchemy.engine.reflection import Inspector from sqlalchemy.engine.reflection import Inspector
from datahub.ingestion.api.source import Source
from datahub.ingestion.source.sql.sql_common import ( from datahub.ingestion.source.sql.sql_common import (
PipelineContext, PipelineContext,
SQLAlchemyConfig, SQLAlchemyConfig,
@ -19,9 +18,7 @@ class _TestSQLAlchemyConfig(SQLAlchemyConfig):
class _TestSQLAlchemySource(SQLAlchemySource): class _TestSQLAlchemySource(SQLAlchemySource):
@classmethod pass
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
pass
def test_generate_foreign_key(): def test_generate_foreign_key():