chore(ingest): bump and pin mypy (#6584)

This commit is contained in:
Harshal Sheth 2022-12-02 13:53:28 -05:00 committed by GitHub
parent 1689212434
commit 44cfd21a65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 64 additions and 46 deletions

View File

@ -27,6 +27,7 @@ plugins =
exclude = ^(venv|build|dist)/
ignore_missing_imports = yes
namespace_packages = no
implicit_optional = no
strict_optional = yes
check_untyped_defs = yes
disallow_incomplete_defs = yes
@ -38,8 +39,16 @@ disallow_untyped_defs = no
# try to be a bit more strict in certain areas of the codebase
[mypy-datahub.*]
ignore_missing_imports = no
[mypy-datahub_provider.*]
ignore_missing_imports = no
[mypy-tests.*]
ignore_missing_imports = no
[mypy-google.protobuf.*]
# mypy sometimes ignores the above ignore_missing_imports = yes
# See https://github.com/python/mypy/issues/10632 and
# https://github.com/python/mypy/issues/10619#issuecomment-1174208395
# for a discussion of why this happens.
ignore_missing_imports = yes
[mypy-datahub.configuration.*]
disallow_untyped_defs = yes
[mypy-datahub.emitter.*]

View File

@ -385,8 +385,7 @@ mypy_stubs = {
"types-ujson>=5.2.0",
"types-termcolor>=1.0.0",
"types-Deprecated",
# Mypy complains with 4.21.0.0 => error: Library stubs not installed for "google.protobuf.descriptor"
"types-protobuf<4.21.0.0",
"types-protobuf>=4.21.0.1",
}
base_dev_requirements = {
@ -399,10 +398,7 @@ base_dev_requirements = {
"flake8>=3.8.3",
"flake8-tidy-imports>=4.3.0",
"isort>=5.7.0",
# mypy 0.990 enables namespace packages by default and sets
# no implicit optional to True.
# FIXME: Enable mypy 0.990 when our codebase is fixed.
"mypy>=0.981,<0.990",
"mypy==0.991",
# pydantic 1.8.2 is incompatible with mypy 0.910.
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
# Restricting top version to <1.10 until we can fix our types.

View File

@ -122,8 +122,6 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
"operationType": operation_type,
"partition": partition,
}
if filter
else None
),
},
)

View File

@ -80,7 +80,7 @@ class OperationalError(PipelineExecutionError):
message: str
info: dict
def __init__(self, message: str, info: dict = None):
def __init__(self, message: str, info: Optional[dict] = None):
self.message = message
self.info = info or {}

View File

@ -120,7 +120,12 @@ class Source(Closeable, metaclass=ABCMeta):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
pass
# Technically, this method should be abstract. However, the @config_class
# decorator automatically generates a create method at runtime if one is
# not defined. Python still treats the class as abstract because it thinks
# the create method is missing. To avoid the class becoming abstract, we
# can't make this method abstract.
raise NotImplementedError('sources must implement "create"')
@abstractmethod
def get_workunits(self) -> Iterable[WorkUnit]:

View File

@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Iterable, Union, overload
from typing import Iterable, Optional, Union, overload
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.source import WorkUnit
@ -42,9 +42,9 @@ class MetadataWorkUnit(WorkUnit):
def __init__(
self,
id: str,
mce: MetadataChangeEvent = None,
mcp: MetadataChangeProposalWrapper = None,
mcp_raw: MetadataChangeProposal = None,
mce: Optional[MetadataChangeEvent] = None,
mcp: Optional[MetadataChangeProposalWrapper] = None,
mcp_raw: Optional[MetadataChangeProposal] = None,
treat_errors_as_warnings: bool = False,
):
super().__init__(id)

View File

@ -2,7 +2,7 @@ import logging
from collections import defaultdict
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Dict, List, Optional
from typing import Dict, List, Optional, cast
from google.cloud import bigquery
from google.cloud.bigquery.table import RowIterator, TableListItem, TimePartitioning
@ -280,6 +280,8 @@ class BigQueryDataDictionary:
def get_datasets_for_project_id(
conn: bigquery.Client, project_id: str, maxResults: Optional[int] = None
) -> List[BigqueryDataset]:
# FIXME: Due to a bug in BigQuery's type annotations, we need to cast here.
maxResults = cast(int, maxResults)
datasets = conn.list_datasets(project_id, max_results=maxResults)
return [BigqueryDataset(name=d.dataset_id) for d in datasets]

View File

@ -833,8 +833,8 @@ class DatahubGEProfiler:
self,
query_combiner: SQLAlchemyQueryCombiner,
pretty_name: str,
schema: str = None,
table: str = None,
schema: Optional[str] = None,
table: Optional[str] = None,
partition: Optional[str] = None,
custom_sql: Optional[str] = None,
platform: Optional[str] = None,

View File

@ -123,7 +123,9 @@ def remove_prefix(text: str, prefix: str) -> str:
return text
def unquote(string: str, leading_quote: str = '"', trailing_quote: str = None) -> str:
def unquote(
string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
) -> str:
"""
If string starts and ends with a quote, unquote it
"""

View File

@ -2,7 +2,7 @@ import json
import re
import time
import warnings
from typing import Any, Dict, Generator, List, Tuple
from typing import Any, Dict, Generator, List, Optional, Tuple
import requests
import yaml
@ -47,7 +47,10 @@ def flatten2list(d: dict) -> list:
def request_call(
url: str, token: str = None, username: str = None, password: str = None
url: str,
token: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
) -> requests.Response:
headers = {"accept": "application/json"}
@ -66,9 +69,9 @@ def request_call(
def get_swag_json(
url: str,
token: str = None,
username: str = None,
password: str = None,
token: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
swagger_file: str = "",
) -> Dict:
tot_url = url + swagger_file

View File

@ -402,7 +402,7 @@ class RedashSource(Source):
return sql_table_names
def _get_chart_data_source(self, data_source_id: int = None) -> Dict:
def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict:
url = f"/api/data_sources/{data_source_id}"
resp = self.client._get(url).json()
logger.debug(resp)

View File

@ -735,7 +735,7 @@ class SalesforceSource(Source):
return self.report
def get_tags(params: List[str] = None) -> GlobalTagsClass:
def get_tags(params: Optional[List[str]] = None) -> GlobalTagsClass:
if params is None:
params = []
tags = [TagAssociationClass(tag=builder.make_tag_urn(tag)) for tag in params if tag]

View File

@ -100,7 +100,7 @@ class SnowflakeV2Config(SnowflakeConfig, SnowflakeUsageConfig):
def get_sql_alchemy_url(
self,
database: str = None,
database: Optional[str] = None,
username: Optional[str] = None,
password: Optional[SecretStr] = None,
role: Optional[str] = None,

View File

@ -103,7 +103,9 @@ class OracleInspectorObjectWrapper:
for row in cursor
]
def get_table_names(self, schema: str = None, order_by: str = None) -> List[str]:
def get_table_names(
self, schema: Optional[str] = None, order_by: Optional[str] = None
) -> List[str]:
"""
skip order_by, we are not using order_by
"""

View File

@ -1,4 +1,5 @@
from textwrap import dedent
from typing import Optional
from pydantic.fields import Field
from pyhive.sqlalchemy_presto import PrestoDialect
@ -60,7 +61,7 @@ def get_view_definition(self, connection, view_name, schema=None, **kw):
def _get_full_table( # type: ignore
self, table_name: str, schema: str = None, quote: bool = True
self, table_name: str, schema: Optional[str] = None, quote: bool = True
) -> str:
table_part = (
self.identifier_preparer.quote_identifier(table_name) if quote else table_name

View File

@ -691,7 +691,7 @@ class RedshiftSource(SQLAlchemySource):
return sources
def get_db_name(self, inspector: Inspector = None) -> str:
def get_db_name(self, inspector: Optional[Inspector] = None) -> str:
db_name = getattr(self.config, "database")
db_alias = getattr(self.config, "database_alias")
if db_alias:

View File

@ -419,8 +419,8 @@ def get_schema_metadata(
dataset_name: str,
platform: str,
columns: List[dict],
pk_constraints: dict = None,
foreign_keys: List[ForeignKeyConstraint] = None,
pk_constraints: Optional[dict] = None,
foreign_keys: Optional[List[ForeignKeyConstraint]] = None,
canonical_schema: List[SchemaField] = [],
) -> SchemaMetadata:
schema_metadata = SchemaMetadata(
@ -985,7 +985,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
self,
dataset_name: str,
columns: List[dict],
pk_constraints: dict = None,
pk_constraints: Optional[dict] = None,
tags: Optional[Dict[str, List[str]]] = None,
) -> List[SchemaField]:
canonical_schema = []
@ -1003,7 +1003,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
self,
dataset_name: str,
column: dict,
pk_constraints: dict = None,
pk_constraints: Optional[dict] = None,
tags: Optional[List[str]] = None,
) -> List[SchemaField]:
gtc: Optional[GlobalTagsClass] = None

View File

@ -194,7 +194,7 @@ class TrinoSource(SQLAlchemySource):
self,
dataset_name: str,
column: dict,
pk_constraints: dict = None,
pk_constraints: Optional[dict] = None,
tags: Optional[List[str]] = None,
) -> List[SchemaField]:

View File

@ -992,7 +992,10 @@ class TableauSource(StatefulIngestionSourceBase):
return mcp_workunit
def emit_datasource(
self, datasource: dict, workbook: dict = None, is_embedded_ds: bool = False
self,
datasource: dict,
workbook: Optional[dict] = None,
is_embedded_ds: bool = False,
) -> Iterable[MetadataWorkUnit]:
datasource_info = workbook
if not is_embedded_ds:

View File

@ -173,7 +173,7 @@ READ_STATEMENT_TYPES: List[str] = ["SELECT"]
def bigquery_audit_metadata_query_template(
dataset: str,
use_date_sharded_tables: bool,
table_allow_filter: str = None,
table_allow_filter: Optional[str] = None,
) -> str:
"""
Receives a dataset (with project specified) and returns a query template that is used to query exported

View File

@ -306,7 +306,7 @@ class SnowflakeConfig(BaseSnowflakeConfig, SQLAlchemyConfig):
def get_sql_alchemy_url(
self,
database: str = None,
database: Optional[str] = None,
username: Optional[str] = None,
password: Optional[pydantic.SecretStr] = None,
role: Optional[str] = None,

View File

@ -110,9 +110,9 @@ class DataHubValidationAction(ValidationAction):
ValidationResultIdentifier, "GXCloudIdentifier"
],
data_asset: Union[Validator, DataAsset, Batch],
payload: Any = None,
payload: Optional[Any] = None,
expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None,
checkpoint_identifier: Any = None,
checkpoint_identifier: Optional[Any] = None,
) -> Dict:
datasets = []
try:

View File

@ -67,7 +67,7 @@ class OperationProcessor:
self,
operation_defs: Dict[str, Dict],
tag_prefix: str = "",
owner_source_type: str = None,
owner_source_type: Optional[str] = None,
strip_owner_email_id: bool = False,
):
self.operation_defs = operation_defs

View File

@ -70,7 +70,7 @@ class DatahubLineageBackend(LineageBackend):
operator: "BaseOperator",
inlets: Optional[List] = None, # unused
outlets: Optional[List] = None, # unused
context: Dict = None,
context: Optional[Dict] = None,
) -> None:
config = get_lineage_config()
if not config.enabled:

View File

@ -24,7 +24,7 @@ def wait_for_port(
docker_services: pytest_docker.plugin.Services,
container_name: str,
container_port: int,
hostname: str = None,
hostname: Optional[str] = None,
timeout: float = 30.0,
pause: float = 0.5,
checker: Optional[Callable[[], bool]] = None,

View File

@ -4,7 +4,6 @@ from unittest.mock import Mock
import pytest
from sqlalchemy.engine.reflection import Inspector
from datahub.ingestion.api.source import Source
from datahub.ingestion.source.sql.sql_common import (
PipelineContext,
SQLAlchemyConfig,
@ -19,8 +18,6 @@ class _TestSQLAlchemyConfig(SQLAlchemyConfig):
class _TestSQLAlchemySource(SQLAlchemySource):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
pass