mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-06 06:26:25 +00:00
chore(ingest): bump and pin mypy (#6584)
This commit is contained in:
parent
1689212434
commit
44cfd21a65
@ -27,6 +27,7 @@ plugins =
|
||||
exclude = ^(venv|build|dist)/
|
||||
ignore_missing_imports = yes
|
||||
namespace_packages = no
|
||||
implicit_optional = no
|
||||
strict_optional = yes
|
||||
check_untyped_defs = yes
|
||||
disallow_incomplete_defs = yes
|
||||
@ -38,8 +39,16 @@ disallow_untyped_defs = no
|
||||
# try to be a bit more strict in certain areas of the codebase
|
||||
[mypy-datahub.*]
|
||||
ignore_missing_imports = no
|
||||
[mypy-datahub_provider.*]
|
||||
ignore_missing_imports = no
|
||||
[mypy-tests.*]
|
||||
ignore_missing_imports = no
|
||||
[mypy-google.protobuf.*]
|
||||
# mypy sometimes ignores the above ignore_missing_imports = yes
|
||||
# See https://github.com/python/mypy/issues/10632 and
|
||||
# https://github.com/python/mypy/issues/10619#issuecomment-1174208395
|
||||
# for a discussion of why this happens.
|
||||
ignore_missing_imports = yes
|
||||
[mypy-datahub.configuration.*]
|
||||
disallow_untyped_defs = yes
|
||||
[mypy-datahub.emitter.*]
|
||||
|
@ -385,8 +385,7 @@ mypy_stubs = {
|
||||
"types-ujson>=5.2.0",
|
||||
"types-termcolor>=1.0.0",
|
||||
"types-Deprecated",
|
||||
# Mypy complains with 4.21.0.0 => error: Library stubs not installed for "google.protobuf.descriptor"
|
||||
"types-protobuf<4.21.0.0",
|
||||
"types-protobuf>=4.21.0.1",
|
||||
}
|
||||
|
||||
base_dev_requirements = {
|
||||
@ -399,10 +398,7 @@ base_dev_requirements = {
|
||||
"flake8>=3.8.3",
|
||||
"flake8-tidy-imports>=4.3.0",
|
||||
"isort>=5.7.0",
|
||||
# mypy 0.990 enables namespace packages by default and sets
|
||||
# no implicit optional to True.
|
||||
# FIXME: Enable mypy 0.990 when our codebase is fixed.
|
||||
"mypy>=0.981,<0.990",
|
||||
"mypy==0.991",
|
||||
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
||||
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
||||
# Restricting top version to <1.10 until we can fix our types.
|
||||
|
@ -122,8 +122,6 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
||||
"operationType": operation_type,
|
||||
"partition": partition,
|
||||
}
|
||||
if filter
|
||||
else None
|
||||
),
|
||||
},
|
||||
)
|
||||
|
@ -80,7 +80,7 @@ class OperationalError(PipelineExecutionError):
|
||||
message: str
|
||||
info: dict
|
||||
|
||||
def __init__(self, message: str, info: dict = None):
|
||||
def __init__(self, message: str, info: Optional[dict] = None):
|
||||
self.message = message
|
||||
self.info = info or {}
|
||||
|
||||
|
@ -120,7 +120,12 @@ class Source(Closeable, metaclass=ABCMeta):
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
|
||||
pass
|
||||
# Technically, this method should be abstract. However, the @config_class
|
||||
# decorator automatically generates a create method at runtime if one is
|
||||
# not defined. Python still treats the class as abstract because it thinks
|
||||
# the create method is missing. To avoid the class becoming abstract, we
|
||||
# can't make this method abstract.
|
||||
raise NotImplementedError('sources must implement "create"')
|
||||
|
||||
@abstractmethod
|
||||
def get_workunits(self) -> Iterable[WorkUnit]:
|
||||
|
@ -1,5 +1,5 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable, Union, overload
|
||||
from typing import Iterable, Optional, Union, overload
|
||||
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.api.source import WorkUnit
|
||||
@ -42,9 +42,9 @@ class MetadataWorkUnit(WorkUnit):
|
||||
def __init__(
|
||||
self,
|
||||
id: str,
|
||||
mce: MetadataChangeEvent = None,
|
||||
mcp: MetadataChangeProposalWrapper = None,
|
||||
mcp_raw: MetadataChangeProposal = None,
|
||||
mce: Optional[MetadataChangeEvent] = None,
|
||||
mcp: Optional[MetadataChangeProposalWrapper] = None,
|
||||
mcp_raw: Optional[MetadataChangeProposal] = None,
|
||||
treat_errors_as_warnings: bool = False,
|
||||
):
|
||||
super().__init__(id)
|
||||
|
@ -2,7 +2,7 @@ import logging
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Dict, List, Optional, cast
|
||||
|
||||
from google.cloud import bigquery
|
||||
from google.cloud.bigquery.table import RowIterator, TableListItem, TimePartitioning
|
||||
@ -280,6 +280,8 @@ class BigQueryDataDictionary:
|
||||
def get_datasets_for_project_id(
|
||||
conn: bigquery.Client, project_id: str, maxResults: Optional[int] = None
|
||||
) -> List[BigqueryDataset]:
|
||||
# FIXME: Due to a bug in BigQuery's type annotations, we need to cast here.
|
||||
maxResults = cast(int, maxResults)
|
||||
datasets = conn.list_datasets(project_id, max_results=maxResults)
|
||||
|
||||
return [BigqueryDataset(name=d.dataset_id) for d in datasets]
|
||||
|
@ -833,8 +833,8 @@ class DatahubGEProfiler:
|
||||
self,
|
||||
query_combiner: SQLAlchemyQueryCombiner,
|
||||
pretty_name: str,
|
||||
schema: str = None,
|
||||
table: str = None,
|
||||
schema: Optional[str] = None,
|
||||
table: Optional[str] = None,
|
||||
partition: Optional[str] = None,
|
||||
custom_sql: Optional[str] = None,
|
||||
platform: Optional[str] = None,
|
||||
|
@ -123,7 +123,9 @@ def remove_prefix(text: str, prefix: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def unquote(string: str, leading_quote: str = '"', trailing_quote: str = None) -> str:
|
||||
def unquote(
|
||||
string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
If string starts and ends with a quote, unquote it
|
||||
"""
|
||||
|
@ -2,7 +2,7 @@ import json
|
||||
import re
|
||||
import time
|
||||
import warnings
|
||||
from typing import Any, Dict, Generator, List, Tuple
|
||||
from typing import Any, Dict, Generator, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
@ -47,7 +47,10 @@ def flatten2list(d: dict) -> list:
|
||||
|
||||
|
||||
def request_call(
|
||||
url: str, token: str = None, username: str = None, password: str = None
|
||||
url: str,
|
||||
token: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
) -> requests.Response:
|
||||
|
||||
headers = {"accept": "application/json"}
|
||||
@ -66,9 +69,9 @@ def request_call(
|
||||
|
||||
def get_swag_json(
|
||||
url: str,
|
||||
token: str = None,
|
||||
username: str = None,
|
||||
password: str = None,
|
||||
token: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
swagger_file: str = "",
|
||||
) -> Dict:
|
||||
tot_url = url + swagger_file
|
||||
|
@ -402,7 +402,7 @@ class RedashSource(Source):
|
||||
|
||||
return sql_table_names
|
||||
|
||||
def _get_chart_data_source(self, data_source_id: int = None) -> Dict:
|
||||
def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict:
|
||||
url = f"/api/data_sources/{data_source_id}"
|
||||
resp = self.client._get(url).json()
|
||||
logger.debug(resp)
|
||||
|
@ -735,7 +735,7 @@ class SalesforceSource(Source):
|
||||
return self.report
|
||||
|
||||
|
||||
def get_tags(params: List[str] = None) -> GlobalTagsClass:
|
||||
def get_tags(params: Optional[List[str]] = None) -> GlobalTagsClass:
|
||||
if params is None:
|
||||
params = []
|
||||
tags = [TagAssociationClass(tag=builder.make_tag_urn(tag)) for tag in params if tag]
|
||||
|
@ -100,7 +100,7 @@ class SnowflakeV2Config(SnowflakeConfig, SnowflakeUsageConfig):
|
||||
|
||||
def get_sql_alchemy_url(
|
||||
self,
|
||||
database: str = None,
|
||||
database: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[SecretStr] = None,
|
||||
role: Optional[str] = None,
|
||||
|
@ -103,7 +103,9 @@ class OracleInspectorObjectWrapper:
|
||||
for row in cursor
|
||||
]
|
||||
|
||||
def get_table_names(self, schema: str = None, order_by: str = None) -> List[str]:
|
||||
def get_table_names(
|
||||
self, schema: Optional[str] = None, order_by: Optional[str] = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
skip order_by, we are not using order_by
|
||||
"""
|
||||
|
@ -1,4 +1,5 @@
|
||||
from textwrap import dedent
|
||||
from typing import Optional
|
||||
|
||||
from pydantic.fields import Field
|
||||
from pyhive.sqlalchemy_presto import PrestoDialect
|
||||
@ -60,7 +61,7 @@ def get_view_definition(self, connection, view_name, schema=None, **kw):
|
||||
|
||||
|
||||
def _get_full_table( # type: ignore
|
||||
self, table_name: str, schema: str = None, quote: bool = True
|
||||
self, table_name: str, schema: Optional[str] = None, quote: bool = True
|
||||
) -> str:
|
||||
table_part = (
|
||||
self.identifier_preparer.quote_identifier(table_name) if quote else table_name
|
||||
|
@ -691,7 +691,7 @@ class RedshiftSource(SQLAlchemySource):
|
||||
|
||||
return sources
|
||||
|
||||
def get_db_name(self, inspector: Inspector = None) -> str:
|
||||
def get_db_name(self, inspector: Optional[Inspector] = None) -> str:
|
||||
db_name = getattr(self.config, "database")
|
||||
db_alias = getattr(self.config, "database_alias")
|
||||
if db_alias:
|
||||
|
@ -419,8 +419,8 @@ def get_schema_metadata(
|
||||
dataset_name: str,
|
||||
platform: str,
|
||||
columns: List[dict],
|
||||
pk_constraints: dict = None,
|
||||
foreign_keys: List[ForeignKeyConstraint] = None,
|
||||
pk_constraints: Optional[dict] = None,
|
||||
foreign_keys: Optional[List[ForeignKeyConstraint]] = None,
|
||||
canonical_schema: List[SchemaField] = [],
|
||||
) -> SchemaMetadata:
|
||||
schema_metadata = SchemaMetadata(
|
||||
@ -985,7 +985,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
||||
self,
|
||||
dataset_name: str,
|
||||
columns: List[dict],
|
||||
pk_constraints: dict = None,
|
||||
pk_constraints: Optional[dict] = None,
|
||||
tags: Optional[Dict[str, List[str]]] = None,
|
||||
) -> List[SchemaField]:
|
||||
canonical_schema = []
|
||||
@ -1003,7 +1003,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
||||
self,
|
||||
dataset_name: str,
|
||||
column: dict,
|
||||
pk_constraints: dict = None,
|
||||
pk_constraints: Optional[dict] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
) -> List[SchemaField]:
|
||||
gtc: Optional[GlobalTagsClass] = None
|
||||
|
@ -194,7 +194,7 @@ class TrinoSource(SQLAlchemySource):
|
||||
self,
|
||||
dataset_name: str,
|
||||
column: dict,
|
||||
pk_constraints: dict = None,
|
||||
pk_constraints: Optional[dict] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
) -> List[SchemaField]:
|
||||
|
||||
|
@ -992,7 +992,10 @@ class TableauSource(StatefulIngestionSourceBase):
|
||||
return mcp_workunit
|
||||
|
||||
def emit_datasource(
|
||||
self, datasource: dict, workbook: dict = None, is_embedded_ds: bool = False
|
||||
self,
|
||||
datasource: dict,
|
||||
workbook: Optional[dict] = None,
|
||||
is_embedded_ds: bool = False,
|
||||
) -> Iterable[MetadataWorkUnit]:
|
||||
datasource_info = workbook
|
||||
if not is_embedded_ds:
|
||||
|
@ -173,7 +173,7 @@ READ_STATEMENT_TYPES: List[str] = ["SELECT"]
|
||||
def bigquery_audit_metadata_query_template(
|
||||
dataset: str,
|
||||
use_date_sharded_tables: bool,
|
||||
table_allow_filter: str = None,
|
||||
table_allow_filter: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Receives a dataset (with project specified) and returns a query template that is used to query exported
|
||||
|
@ -306,7 +306,7 @@ class SnowflakeConfig(BaseSnowflakeConfig, SQLAlchemyConfig):
|
||||
|
||||
def get_sql_alchemy_url(
|
||||
self,
|
||||
database: str = None,
|
||||
database: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[pydantic.SecretStr] = None,
|
||||
role: Optional[str] = None,
|
||||
|
@ -110,9 +110,9 @@ class DataHubValidationAction(ValidationAction):
|
||||
ValidationResultIdentifier, "GXCloudIdentifier"
|
||||
],
|
||||
data_asset: Union[Validator, DataAsset, Batch],
|
||||
payload: Any = None,
|
||||
payload: Optional[Any] = None,
|
||||
expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None,
|
||||
checkpoint_identifier: Any = None,
|
||||
checkpoint_identifier: Optional[Any] = None,
|
||||
) -> Dict:
|
||||
datasets = []
|
||||
try:
|
||||
|
@ -67,7 +67,7 @@ class OperationProcessor:
|
||||
self,
|
||||
operation_defs: Dict[str, Dict],
|
||||
tag_prefix: str = "",
|
||||
owner_source_type: str = None,
|
||||
owner_source_type: Optional[str] = None,
|
||||
strip_owner_email_id: bool = False,
|
||||
):
|
||||
self.operation_defs = operation_defs
|
||||
|
@ -70,7 +70,7 @@ class DatahubLineageBackend(LineageBackend):
|
||||
operator: "BaseOperator",
|
||||
inlets: Optional[List] = None, # unused
|
||||
outlets: Optional[List] = None, # unused
|
||||
context: Dict = None,
|
||||
context: Optional[Dict] = None,
|
||||
) -> None:
|
||||
config = get_lineage_config()
|
||||
if not config.enabled:
|
||||
|
@ -24,7 +24,7 @@ def wait_for_port(
|
||||
docker_services: pytest_docker.plugin.Services,
|
||||
container_name: str,
|
||||
container_port: int,
|
||||
hostname: str = None,
|
||||
hostname: Optional[str] = None,
|
||||
timeout: float = 30.0,
|
||||
pause: float = 0.5,
|
||||
checker: Optional[Callable[[], bool]] = None,
|
||||
|
@ -4,7 +4,6 @@ from unittest.mock import Mock
|
||||
import pytest
|
||||
from sqlalchemy.engine.reflection import Inspector
|
||||
|
||||
from datahub.ingestion.api.source import Source
|
||||
from datahub.ingestion.source.sql.sql_common import (
|
||||
PipelineContext,
|
||||
SQLAlchemyConfig,
|
||||
@ -19,9 +18,7 @@ class _TestSQLAlchemyConfig(SQLAlchemyConfig):
|
||||
|
||||
|
||||
class _TestSQLAlchemySource(SQLAlchemySource):
|
||||
@classmethod
|
||||
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
||||
pass
|
||||
pass
|
||||
|
||||
|
||||
def test_generate_foreign_key():
|
||||
|
Loading…
x
Reference in New Issue
Block a user