mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-06 14:36:32 +00:00
chore(ingest): bump and pin mypy (#6584)
This commit is contained in:
parent
1689212434
commit
44cfd21a65
@ -27,6 +27,7 @@ plugins =
|
|||||||
exclude = ^(venv|build|dist)/
|
exclude = ^(venv|build|dist)/
|
||||||
ignore_missing_imports = yes
|
ignore_missing_imports = yes
|
||||||
namespace_packages = no
|
namespace_packages = no
|
||||||
|
implicit_optional = no
|
||||||
strict_optional = yes
|
strict_optional = yes
|
||||||
check_untyped_defs = yes
|
check_untyped_defs = yes
|
||||||
disallow_incomplete_defs = yes
|
disallow_incomplete_defs = yes
|
||||||
@ -38,8 +39,16 @@ disallow_untyped_defs = no
|
|||||||
# try to be a bit more strict in certain areas of the codebase
|
# try to be a bit more strict in certain areas of the codebase
|
||||||
[mypy-datahub.*]
|
[mypy-datahub.*]
|
||||||
ignore_missing_imports = no
|
ignore_missing_imports = no
|
||||||
|
[mypy-datahub_provider.*]
|
||||||
|
ignore_missing_imports = no
|
||||||
[mypy-tests.*]
|
[mypy-tests.*]
|
||||||
ignore_missing_imports = no
|
ignore_missing_imports = no
|
||||||
|
[mypy-google.protobuf.*]
|
||||||
|
# mypy sometimes ignores the above ignore_missing_imports = yes
|
||||||
|
# See https://github.com/python/mypy/issues/10632 and
|
||||||
|
# https://github.com/python/mypy/issues/10619#issuecomment-1174208395
|
||||||
|
# for a discussion of why this happens.
|
||||||
|
ignore_missing_imports = yes
|
||||||
[mypy-datahub.configuration.*]
|
[mypy-datahub.configuration.*]
|
||||||
disallow_untyped_defs = yes
|
disallow_untyped_defs = yes
|
||||||
[mypy-datahub.emitter.*]
|
[mypy-datahub.emitter.*]
|
||||||
|
@ -385,8 +385,7 @@ mypy_stubs = {
|
|||||||
"types-ujson>=5.2.0",
|
"types-ujson>=5.2.0",
|
||||||
"types-termcolor>=1.0.0",
|
"types-termcolor>=1.0.0",
|
||||||
"types-Deprecated",
|
"types-Deprecated",
|
||||||
# Mypy complains with 4.21.0.0 => error: Library stubs not installed for "google.protobuf.descriptor"
|
"types-protobuf>=4.21.0.1",
|
||||||
"types-protobuf<4.21.0.0",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
base_dev_requirements = {
|
base_dev_requirements = {
|
||||||
@ -399,10 +398,7 @@ base_dev_requirements = {
|
|||||||
"flake8>=3.8.3",
|
"flake8>=3.8.3",
|
||||||
"flake8-tidy-imports>=4.3.0",
|
"flake8-tidy-imports>=4.3.0",
|
||||||
"isort>=5.7.0",
|
"isort>=5.7.0",
|
||||||
# mypy 0.990 enables namespace packages by default and sets
|
"mypy==0.991",
|
||||||
# no implicit optional to True.
|
|
||||||
# FIXME: Enable mypy 0.990 when our codebase is fixed.
|
|
||||||
"mypy>=0.981,<0.990",
|
|
||||||
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
||||||
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
||||||
# Restricting top version to <1.10 until we can fix our types.
|
# Restricting top version to <1.10 until we can fix our types.
|
||||||
|
@ -122,8 +122,6 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|||||||
"operationType": operation_type,
|
"operationType": operation_type,
|
||||||
"partition": partition,
|
"partition": partition,
|
||||||
}
|
}
|
||||||
if filter
|
|
||||||
else None
|
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
@ -80,7 +80,7 @@ class OperationalError(PipelineExecutionError):
|
|||||||
message: str
|
message: str
|
||||||
info: dict
|
info: dict
|
||||||
|
|
||||||
def __init__(self, message: str, info: dict = None):
|
def __init__(self, message: str, info: Optional[dict] = None):
|
||||||
self.message = message
|
self.message = message
|
||||||
self.info = info or {}
|
self.info = info or {}
|
||||||
|
|
||||||
|
@ -120,7 +120,12 @@ class Source(Closeable, metaclass=ABCMeta):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
|
||||||
pass
|
# Technically, this method should be abstract. However, the @config_class
|
||||||
|
# decorator automatically generates a create method at runtime if one is
|
||||||
|
# not defined. Python still treats the class as abstract because it thinks
|
||||||
|
# the create method is missing. To avoid the class becoming abstract, we
|
||||||
|
# can't make this method abstract.
|
||||||
|
raise NotImplementedError('sources must implement "create"')
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_workunits(self) -> Iterable[WorkUnit]:
|
def get_workunits(self) -> Iterable[WorkUnit]:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Iterable, Union, overload
|
from typing import Iterable, Optional, Union, overload
|
||||||
|
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
from datahub.ingestion.api.source import WorkUnit
|
from datahub.ingestion.api.source import WorkUnit
|
||||||
@ -42,9 +42,9 @@ class MetadataWorkUnit(WorkUnit):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
id: str,
|
id: str,
|
||||||
mce: MetadataChangeEvent = None,
|
mce: Optional[MetadataChangeEvent] = None,
|
||||||
mcp: MetadataChangeProposalWrapper = None,
|
mcp: Optional[MetadataChangeProposalWrapper] = None,
|
||||||
mcp_raw: MetadataChangeProposal = None,
|
mcp_raw: Optional[MetadataChangeProposal] = None,
|
||||||
treat_errors_as_warnings: bool = False,
|
treat_errors_as_warnings: bool = False,
|
||||||
):
|
):
|
||||||
super().__init__(id)
|
super().__init__(id)
|
||||||
|
@ -2,7 +2,7 @@ import logging
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional, cast
|
||||||
|
|
||||||
from google.cloud import bigquery
|
from google.cloud import bigquery
|
||||||
from google.cloud.bigquery.table import RowIterator, TableListItem, TimePartitioning
|
from google.cloud.bigquery.table import RowIterator, TableListItem, TimePartitioning
|
||||||
@ -280,6 +280,8 @@ class BigQueryDataDictionary:
|
|||||||
def get_datasets_for_project_id(
|
def get_datasets_for_project_id(
|
||||||
conn: bigquery.Client, project_id: str, maxResults: Optional[int] = None
|
conn: bigquery.Client, project_id: str, maxResults: Optional[int] = None
|
||||||
) -> List[BigqueryDataset]:
|
) -> List[BigqueryDataset]:
|
||||||
|
# FIXME: Due to a bug in BigQuery's type annotations, we need to cast here.
|
||||||
|
maxResults = cast(int, maxResults)
|
||||||
datasets = conn.list_datasets(project_id, max_results=maxResults)
|
datasets = conn.list_datasets(project_id, max_results=maxResults)
|
||||||
|
|
||||||
return [BigqueryDataset(name=d.dataset_id) for d in datasets]
|
return [BigqueryDataset(name=d.dataset_id) for d in datasets]
|
||||||
|
@ -833,8 +833,8 @@ class DatahubGEProfiler:
|
|||||||
self,
|
self,
|
||||||
query_combiner: SQLAlchemyQueryCombiner,
|
query_combiner: SQLAlchemyQueryCombiner,
|
||||||
pretty_name: str,
|
pretty_name: str,
|
||||||
schema: str = None,
|
schema: Optional[str] = None,
|
||||||
table: str = None,
|
table: Optional[str] = None,
|
||||||
partition: Optional[str] = None,
|
partition: Optional[str] = None,
|
||||||
custom_sql: Optional[str] = None,
|
custom_sql: Optional[str] = None,
|
||||||
platform: Optional[str] = None,
|
platform: Optional[str] = None,
|
||||||
|
@ -123,7 +123,9 @@ def remove_prefix(text: str, prefix: str) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def unquote(string: str, leading_quote: str = '"', trailing_quote: str = None) -> str:
|
def unquote(
|
||||||
|
string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
If string starts and ends with a quote, unquote it
|
If string starts and ends with a quote, unquote it
|
||||||
"""
|
"""
|
||||||
|
@ -2,7 +2,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Any, Dict, Generator, List, Tuple
|
from typing import Any, Dict, Generator, List, Optional, Tuple
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
@ -47,7 +47,10 @@ def flatten2list(d: dict) -> list:
|
|||||||
|
|
||||||
|
|
||||||
def request_call(
|
def request_call(
|
||||||
url: str, token: str = None, username: str = None, password: str = None
|
url: str,
|
||||||
|
token: Optional[str] = None,
|
||||||
|
username: Optional[str] = None,
|
||||||
|
password: Optional[str] = None,
|
||||||
) -> requests.Response:
|
) -> requests.Response:
|
||||||
|
|
||||||
headers = {"accept": "application/json"}
|
headers = {"accept": "application/json"}
|
||||||
@ -66,9 +69,9 @@ def request_call(
|
|||||||
|
|
||||||
def get_swag_json(
|
def get_swag_json(
|
||||||
url: str,
|
url: str,
|
||||||
token: str = None,
|
token: Optional[str] = None,
|
||||||
username: str = None,
|
username: Optional[str] = None,
|
||||||
password: str = None,
|
password: Optional[str] = None,
|
||||||
swagger_file: str = "",
|
swagger_file: str = "",
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
tot_url = url + swagger_file
|
tot_url = url + swagger_file
|
||||||
|
@ -402,7 +402,7 @@ class RedashSource(Source):
|
|||||||
|
|
||||||
return sql_table_names
|
return sql_table_names
|
||||||
|
|
||||||
def _get_chart_data_source(self, data_source_id: int = None) -> Dict:
|
def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict:
|
||||||
url = f"/api/data_sources/{data_source_id}"
|
url = f"/api/data_sources/{data_source_id}"
|
||||||
resp = self.client._get(url).json()
|
resp = self.client._get(url).json()
|
||||||
logger.debug(resp)
|
logger.debug(resp)
|
||||||
|
@ -735,7 +735,7 @@ class SalesforceSource(Source):
|
|||||||
return self.report
|
return self.report
|
||||||
|
|
||||||
|
|
||||||
def get_tags(params: List[str] = None) -> GlobalTagsClass:
|
def get_tags(params: Optional[List[str]] = None) -> GlobalTagsClass:
|
||||||
if params is None:
|
if params is None:
|
||||||
params = []
|
params = []
|
||||||
tags = [TagAssociationClass(tag=builder.make_tag_urn(tag)) for tag in params if tag]
|
tags = [TagAssociationClass(tag=builder.make_tag_urn(tag)) for tag in params if tag]
|
||||||
|
@ -100,7 +100,7 @@ class SnowflakeV2Config(SnowflakeConfig, SnowflakeUsageConfig):
|
|||||||
|
|
||||||
def get_sql_alchemy_url(
|
def get_sql_alchemy_url(
|
||||||
self,
|
self,
|
||||||
database: str = None,
|
database: Optional[str] = None,
|
||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
password: Optional[SecretStr] = None,
|
password: Optional[SecretStr] = None,
|
||||||
role: Optional[str] = None,
|
role: Optional[str] = None,
|
||||||
|
@ -103,7 +103,9 @@ class OracleInspectorObjectWrapper:
|
|||||||
for row in cursor
|
for row in cursor
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_table_names(self, schema: str = None, order_by: str = None) -> List[str]:
|
def get_table_names(
|
||||||
|
self, schema: Optional[str] = None, order_by: Optional[str] = None
|
||||||
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
skip order_by, we are not using order_by
|
skip order_by, we are not using order_by
|
||||||
"""
|
"""
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from pydantic.fields import Field
|
from pydantic.fields import Field
|
||||||
from pyhive.sqlalchemy_presto import PrestoDialect
|
from pyhive.sqlalchemy_presto import PrestoDialect
|
||||||
@ -60,7 +61,7 @@ def get_view_definition(self, connection, view_name, schema=None, **kw):
|
|||||||
|
|
||||||
|
|
||||||
def _get_full_table( # type: ignore
|
def _get_full_table( # type: ignore
|
||||||
self, table_name: str, schema: str = None, quote: bool = True
|
self, table_name: str, schema: Optional[str] = None, quote: bool = True
|
||||||
) -> str:
|
) -> str:
|
||||||
table_part = (
|
table_part = (
|
||||||
self.identifier_preparer.quote_identifier(table_name) if quote else table_name
|
self.identifier_preparer.quote_identifier(table_name) if quote else table_name
|
||||||
|
@ -691,7 +691,7 @@ class RedshiftSource(SQLAlchemySource):
|
|||||||
|
|
||||||
return sources
|
return sources
|
||||||
|
|
||||||
def get_db_name(self, inspector: Inspector = None) -> str:
|
def get_db_name(self, inspector: Optional[Inspector] = None) -> str:
|
||||||
db_name = getattr(self.config, "database")
|
db_name = getattr(self.config, "database")
|
||||||
db_alias = getattr(self.config, "database_alias")
|
db_alias = getattr(self.config, "database_alias")
|
||||||
if db_alias:
|
if db_alias:
|
||||||
|
@ -419,8 +419,8 @@ def get_schema_metadata(
|
|||||||
dataset_name: str,
|
dataset_name: str,
|
||||||
platform: str,
|
platform: str,
|
||||||
columns: List[dict],
|
columns: List[dict],
|
||||||
pk_constraints: dict = None,
|
pk_constraints: Optional[dict] = None,
|
||||||
foreign_keys: List[ForeignKeyConstraint] = None,
|
foreign_keys: Optional[List[ForeignKeyConstraint]] = None,
|
||||||
canonical_schema: List[SchemaField] = [],
|
canonical_schema: List[SchemaField] = [],
|
||||||
) -> SchemaMetadata:
|
) -> SchemaMetadata:
|
||||||
schema_metadata = SchemaMetadata(
|
schema_metadata = SchemaMetadata(
|
||||||
@ -985,7 +985,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
self,
|
self,
|
||||||
dataset_name: str,
|
dataset_name: str,
|
||||||
columns: List[dict],
|
columns: List[dict],
|
||||||
pk_constraints: dict = None,
|
pk_constraints: Optional[dict] = None,
|
||||||
tags: Optional[Dict[str, List[str]]] = None,
|
tags: Optional[Dict[str, List[str]]] = None,
|
||||||
) -> List[SchemaField]:
|
) -> List[SchemaField]:
|
||||||
canonical_schema = []
|
canonical_schema = []
|
||||||
@ -1003,7 +1003,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
self,
|
self,
|
||||||
dataset_name: str,
|
dataset_name: str,
|
||||||
column: dict,
|
column: dict,
|
||||||
pk_constraints: dict = None,
|
pk_constraints: Optional[dict] = None,
|
||||||
tags: Optional[List[str]] = None,
|
tags: Optional[List[str]] = None,
|
||||||
) -> List[SchemaField]:
|
) -> List[SchemaField]:
|
||||||
gtc: Optional[GlobalTagsClass] = None
|
gtc: Optional[GlobalTagsClass] = None
|
||||||
|
@ -194,7 +194,7 @@ class TrinoSource(SQLAlchemySource):
|
|||||||
self,
|
self,
|
||||||
dataset_name: str,
|
dataset_name: str,
|
||||||
column: dict,
|
column: dict,
|
||||||
pk_constraints: dict = None,
|
pk_constraints: Optional[dict] = None,
|
||||||
tags: Optional[List[str]] = None,
|
tags: Optional[List[str]] = None,
|
||||||
) -> List[SchemaField]:
|
) -> List[SchemaField]:
|
||||||
|
|
||||||
|
@ -992,7 +992,10 @@ class TableauSource(StatefulIngestionSourceBase):
|
|||||||
return mcp_workunit
|
return mcp_workunit
|
||||||
|
|
||||||
def emit_datasource(
|
def emit_datasource(
|
||||||
self, datasource: dict, workbook: dict = None, is_embedded_ds: bool = False
|
self,
|
||||||
|
datasource: dict,
|
||||||
|
workbook: Optional[dict] = None,
|
||||||
|
is_embedded_ds: bool = False,
|
||||||
) -> Iterable[MetadataWorkUnit]:
|
) -> Iterable[MetadataWorkUnit]:
|
||||||
datasource_info = workbook
|
datasource_info = workbook
|
||||||
if not is_embedded_ds:
|
if not is_embedded_ds:
|
||||||
|
@ -173,7 +173,7 @@ READ_STATEMENT_TYPES: List[str] = ["SELECT"]
|
|||||||
def bigquery_audit_metadata_query_template(
|
def bigquery_audit_metadata_query_template(
|
||||||
dataset: str,
|
dataset: str,
|
||||||
use_date_sharded_tables: bool,
|
use_date_sharded_tables: bool,
|
||||||
table_allow_filter: str = None,
|
table_allow_filter: Optional[str] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Receives a dataset (with project specified) and returns a query template that is used to query exported
|
Receives a dataset (with project specified) and returns a query template that is used to query exported
|
||||||
|
@ -306,7 +306,7 @@ class SnowflakeConfig(BaseSnowflakeConfig, SQLAlchemyConfig):
|
|||||||
|
|
||||||
def get_sql_alchemy_url(
|
def get_sql_alchemy_url(
|
||||||
self,
|
self,
|
||||||
database: str = None,
|
database: Optional[str] = None,
|
||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
password: Optional[pydantic.SecretStr] = None,
|
password: Optional[pydantic.SecretStr] = None,
|
||||||
role: Optional[str] = None,
|
role: Optional[str] = None,
|
||||||
|
@ -110,9 +110,9 @@ class DataHubValidationAction(ValidationAction):
|
|||||||
ValidationResultIdentifier, "GXCloudIdentifier"
|
ValidationResultIdentifier, "GXCloudIdentifier"
|
||||||
],
|
],
|
||||||
data_asset: Union[Validator, DataAsset, Batch],
|
data_asset: Union[Validator, DataAsset, Batch],
|
||||||
payload: Any = None,
|
payload: Optional[Any] = None,
|
||||||
expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None,
|
expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None,
|
||||||
checkpoint_identifier: Any = None,
|
checkpoint_identifier: Optional[Any] = None,
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
datasets = []
|
datasets = []
|
||||||
try:
|
try:
|
||||||
|
@ -67,7 +67,7 @@ class OperationProcessor:
|
|||||||
self,
|
self,
|
||||||
operation_defs: Dict[str, Dict],
|
operation_defs: Dict[str, Dict],
|
||||||
tag_prefix: str = "",
|
tag_prefix: str = "",
|
||||||
owner_source_type: str = None,
|
owner_source_type: Optional[str] = None,
|
||||||
strip_owner_email_id: bool = False,
|
strip_owner_email_id: bool = False,
|
||||||
):
|
):
|
||||||
self.operation_defs = operation_defs
|
self.operation_defs = operation_defs
|
||||||
|
@ -70,7 +70,7 @@ class DatahubLineageBackend(LineageBackend):
|
|||||||
operator: "BaseOperator",
|
operator: "BaseOperator",
|
||||||
inlets: Optional[List] = None, # unused
|
inlets: Optional[List] = None, # unused
|
||||||
outlets: Optional[List] = None, # unused
|
outlets: Optional[List] = None, # unused
|
||||||
context: Dict = None,
|
context: Optional[Dict] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
config = get_lineage_config()
|
config = get_lineage_config()
|
||||||
if not config.enabled:
|
if not config.enabled:
|
||||||
|
@ -24,7 +24,7 @@ def wait_for_port(
|
|||||||
docker_services: pytest_docker.plugin.Services,
|
docker_services: pytest_docker.plugin.Services,
|
||||||
container_name: str,
|
container_name: str,
|
||||||
container_port: int,
|
container_port: int,
|
||||||
hostname: str = None,
|
hostname: Optional[str] = None,
|
||||||
timeout: float = 30.0,
|
timeout: float = 30.0,
|
||||||
pause: float = 0.5,
|
pause: float = 0.5,
|
||||||
checker: Optional[Callable[[], bool]] = None,
|
checker: Optional[Callable[[], bool]] = None,
|
||||||
|
@ -4,7 +4,6 @@ from unittest.mock import Mock
|
|||||||
import pytest
|
import pytest
|
||||||
from sqlalchemy.engine.reflection import Inspector
|
from sqlalchemy.engine.reflection import Inspector
|
||||||
|
|
||||||
from datahub.ingestion.api.source import Source
|
|
||||||
from datahub.ingestion.source.sql.sql_common import (
|
from datahub.ingestion.source.sql.sql_common import (
|
||||||
PipelineContext,
|
PipelineContext,
|
||||||
SQLAlchemyConfig,
|
SQLAlchemyConfig,
|
||||||
@ -19,8 +18,6 @@ class _TestSQLAlchemyConfig(SQLAlchemyConfig):
|
|||||||
|
|
||||||
|
|
||||||
class _TestSQLAlchemySource(SQLAlchemySource):
|
class _TestSQLAlchemySource(SQLAlchemySource):
|
||||||
@classmethod
|
|
||||||
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user