chore(ingest): bump black (#11898)

This commit is contained in:
Harshal Sheth 2024-11-20 13:33:54 -08:00 committed by GitHub
parent 7dbb3e60cb
commit 5519a330e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
78 changed files with 19 additions and 137 deletions

View File

@ -28,7 +28,6 @@ class CorpGroupFile(BaseModel):
with open("user/user.dhub.yaml_schema.json", "w") as fp:
fp.write(json.dumps(CorpUserFile.schema(), indent=4))
with open("group/group.dhub.yaml_schema.json", "w") as fp:

View File

@ -591,22 +591,26 @@ debug_requirements = {
"memray",
}
base_dev_requirements = {
*base_requirements,
*framework_common,
*mypy_stubs,
*s3_base,
lint_requirements = {
# This is pinned only to avoid spurious errors in CI.
# We should make an effort to keep it up to date.
"black==22.12.0",
"coverage>=5.1",
"faker>=18.4.0",
"black==23.3.0",
"flake8>=6.0.0",
"flake8-tidy-imports>=4.3.0",
"flake8-bugbear==23.3.12",
"isort>=5.7.0",
"mypy==1.10.1",
}
base_dev_requirements = {
*base_requirements,
*framework_common,
*mypy_stubs,
*s3_base,
*lint_requirements,
*test_api_requirements,
"coverage>=5.1",
"faker>=18.4.0",
"pytest-asyncio>=0.16.0",
"pytest-cov>=2.8.1",
"pytest-random-order~=1.1.0",
@ -931,6 +935,7 @@ See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
),
"cloud": ["acryl-datahub-cloud"],
"dev": list(dev_requirements),
"lint": list(lint_requirements),
"testing-utils": list(test_api_requirements), # To import `datahub.testing`
"integration-tests": list(full_test_dev_requirements),
"debug": list(debug_requirements),

View File

@ -190,7 +190,6 @@ class StructuredProperties(ConfigModel):
@classmethod
def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
with StructuredPropertiesConfig.use_graph(graph):
structured_property: Optional[
StructuredPropertyDefinitionClass

View File

@ -32,7 +32,6 @@ class S3Path:
class S3ListIterator(Iterator):
MAX_KEYS = 1000
def __init__(

View File

@ -33,7 +33,6 @@ logger: logging.Logger = logging.getLogger(__name__)
@dataclass
class ClassificationReportMixin:
num_tables_fetch_sample_values_failed: int = 0
num_tables_classification_attempted: int = 0
@ -112,7 +111,6 @@ class ClassificationHandler:
schema_metadata: SchemaMetadata,
sample_data: Union[Dict[str, list], Callable[[], Dict[str, list]]],
) -> None:
if not isinstance(sample_data, Dict):
try:
# TODO: In future, sample_data fetcher can be lazily called if classification

View File

@ -374,7 +374,6 @@ class BigQueryV2Config(
StatefulProfilingConfigMixin,
ClassificationSourceConfigMixin,
):
include_schema_metadata: bool = Field(
default=True,
description="Whether to ingest the BigQuery schema, i.e. projects, schemas, tables, and views.",

View File

@ -356,7 +356,6 @@ class BigQuerySchemaGenerator:
project_id
)
except Exception as e:
if self.config.project_ids and "not enabled BigQuery." in str(e):
action_mesage = (
"The project has not enabled BigQuery API. "
@ -417,7 +416,6 @@ class BigQuerySchemaGenerator:
bigquery_project: BigqueryProject,
db_tables: Dict[str, List[BigqueryTable]],
) -> Iterable[MetadataWorkUnit]:
db_views: Dict[str, List[BigqueryView]] = {}
db_snapshots: Dict[str, List[BigqueryTableSnapshot]] = {}
project_id = bigquery_project.id
@ -1141,7 +1139,6 @@ class BigQuerySchemaGenerator:
columns: List[BigqueryColumn],
dataset_name: BigqueryTableIdentifier,
) -> MetadataWorkUnit:
foreign_keys: List[ForeignKeyConstraint] = []
# Foreign keys only make sense for tables
if isinstance(table, BigqueryTable):
@ -1183,7 +1180,6 @@ class BigQuerySchemaGenerator:
) -> Iterable[BigqueryTable]:
# In bigquery there is no way to query all tables in a Project id
with PerfTimer() as timer:
# PARTITIONS INFORMATION_SCHEMA view is not available for BigLake tables
# based on Amazon S3 and Blob Storage data.
# https://cloud.google.com/bigquery/docs/omni-introduction#limitations

View File

@ -934,7 +934,6 @@ class BigqueryLineageExtractor:
ddl: Optional[str],
graph: Optional[DataHubGraph] = None,
) -> Iterable[MetadataWorkUnit]:
if not ddl:
return
@ -972,7 +971,6 @@ class BigqueryLineageExtractor:
source_uris: List[str],
graph: Optional[DataHubGraph] = None,
) -> Optional[UpstreamLineageClass]:
upstreams_list: List[UpstreamClass] = []
fine_grained_lineages: List[FineGrainedLineageClass] = []
gcs_urns: Set[str] = set()

View File

@ -304,7 +304,6 @@ class BigQueryQueriesExtractor(Closeable):
def deduplicate_queries(
self, queries: FileBackedList[ObservedQuery]
) -> FileBackedDict[Dict[int, ObservedQuery]]:
# This fingerprint based deduplication is done here to reduce performance hit due to
# repetitive sql parsing while adding observed query to aggregator that would otherwise
# parse same query multiple times. In future, aggregator may absorb this deduplication.
@ -342,7 +341,6 @@ class BigQueryQueriesExtractor(Closeable):
return queries_deduped
def fetch_query_log(self, project: BigqueryProject) -> Iterable[ObservedQuery]:
# Multi-regions from https://cloud.google.com/bigquery/docs/locations#supported_locations
regions = self.config.region_qualifiers
@ -355,7 +353,6 @@ class BigQueryQueriesExtractor(Closeable):
def fetch_region_query_log(
self, project: BigqueryProject, region: str
) -> Iterable[ObservedQuery]:
# Each region needs to be a different query
query_log_query = _build_enriched_query_log_query(
project_id=project.id,
@ -452,7 +449,6 @@ def _build_enriched_query_log_query(
start_time: datetime,
end_time: datetime,
) -> str:
audit_start_time = start_time.strftime(BQ_DATETIME_FORMAT)
audit_end_time = end_time.strftime(BQ_DATETIME_FORMAT)

View File

@ -332,7 +332,6 @@ class CassandraSource(StatefulIngestionSourceBase):
def _extract_views_from_keyspace(
self, keyspace_name: str
) -> Iterable[MetadataWorkUnit]:
views: List[CassandraView] = self.cassandra_api.get_views(keyspace_name)
for view in views:
view_name: str = view.view_name

View File

@ -371,7 +371,6 @@ class ConfluentSchemaRegistry(KafkaSchemaRegistryBase):
def _get_schema_metadata(
self, topic: str, platform_urn: str, is_subject: bool
) -> Optional[SchemaMetadata]:
# Process the value schema
schema, fields = self._get_schema_and_fields(
topic=topic,

View File

@ -7,7 +7,6 @@ from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
class PathSpecsConfigMixin(ConfigModel):
path_specs: List[PathSpec] = Field(
description="List of PathSpec. See [below](#path-spec) the details about PathSpec"
)

View File

@ -107,7 +107,6 @@ class DataHubSource(StatefulIngestionSourceBase):
logger.info(f"Fetching database aspects starting from {from_createdon}")
mcps = reader.get_aspects(from_createdon, self.report.stop_time)
for i, (mcp, createdon) in enumerate(mcps):
if not self.urn_pattern.allowed(str(mcp.entityUrn)):
continue

View File

@ -566,7 +566,6 @@ class DremioAPIOperations:
return tables
def validate_schema_format(self, schema):
if "." in schema:
schema_path = self.get(
url=f"/catalog/{self.get_dataset_id(schema=schema, dataset='')}"
@ -687,7 +686,6 @@ class DremioAPIOperations:
response.get("entityType")
== DremioEntityContainerType.FOLDER.value.lower()
):
containers.append(
{
"id": location_id,

View File

@ -121,7 +121,6 @@ class DremioSourceConfig(
EnvConfigMixin,
PlatformInstanceConfigMixin,
):
domain: Optional[str] = Field(
default=None,
description="Domain for all source objects.",

View File

@ -198,7 +198,6 @@ class DremioSource(StatefulIngestionSourceBase):
source_platform_name = source_name
for mapping in self.config.source_mappings or []:
if re.search(mapping.source_name, source_type, re.IGNORECASE):
source_platform_name = mapping.source_name.lower()

View File

@ -233,7 +233,6 @@ class DynamoDBSource(StatefulIngestionSourceBase):
table_name: str,
dataset_name: str,
) -> Iterable[MetadataWorkUnit]:
logger.debug(f"Processing table: {dataset_name}")
table_info = dynamodb_client.describe_table(TableName=table_name)["Table"]
account_id = table_info["TableArn"].split(":")[4]

View File

@ -69,7 +69,6 @@ class DatahubExecutionRequestCleanup:
report: DatahubExecutionRequestCleanupReport,
config: Optional[DatahubExecutionRequestCleanupConfig] = None,
) -> None:
self.graph = graph
self.report = report
self.instance_id = int(time.time())

View File

@ -95,7 +95,6 @@ class GEProfilingBaseConfig(ConfigModel):
class GEProfilingConfig(GEProfilingBaseConfig):
report_dropped_profiles: bool = Field(
default=False,
description="Whether to report datasets or dataset columns which were not profiled. Set to `True` for debugging purposes.",

View File

@ -307,7 +307,6 @@ class ViewField:
type_cls: ViewFieldType,
populate_sql_logic_in_descriptions: bool,
) -> "ViewField":
is_primary_key = field_dict.get("primary_key", "no") == "yes"
name = field_dict["name"]
@ -929,7 +928,6 @@ class LookerExplore:
reporter: SourceReport,
source_config: LookerDashboardSourceConfig,
) -> Optional["LookerExplore"]: # noqa: C901
try:
explore = client.lookml_model_explore(model, explore_name)
views: Set[str] = set()
@ -987,13 +985,11 @@ class LookerExplore:
field_name_vs_raw_explore_field: Dict = {}
if explore.fields is not None:
if explore.fields.dimensions is not None:
for dim_field in explore.fields.dimensions:
if dim_field.name is None:
continue
else:
field_name_vs_raw_explore_field[dim_field.name] = dim_field
view_fields.append(
@ -1034,7 +1030,6 @@ class LookerExplore:
if measure_field.name is None:
continue
else:
field_name_vs_raw_explore_field[
measure_field.name
] = measure_field

View File

@ -604,7 +604,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
def _create_platform_instance_aspect(
self,
) -> DataPlatformInstance:
assert (
self.source_config.platform_name
), "Platform name is not set in the configuration."
@ -999,7 +998,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
def _make_dashboard_and_chart_mces(
self, looker_dashboard: LookerDashboard
) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
# Step 1: Emit metadata for each Chart inside the Dashboard.
chart_events = []
for element in looker_dashboard.dashboard_elements:

View File

@ -55,7 +55,6 @@ class SpecialVariable:
current_dict: dict = new_dict
for key in keys[:-1]:
if key not in current_dict:
current_dict[key] = {}
@ -392,7 +391,6 @@ def process_lookml_template_language(
source_config: LookMLSourceConfig,
view_lkml_file_dict: dict,
) -> None:
if "views" not in view_lkml_file_dict:
return
@ -425,7 +423,6 @@ def load_and_preprocess_file(
path: Union[str, pathlib.Path],
source_config: LookMLSourceConfig,
) -> dict:
parsed = load_lkml(path)
process_lookml_template_language(

View File

@ -320,7 +320,6 @@ class LookerViewContext:
self,
field: str,
) -> Optional[Any]:
# According to Looker's inheritance rules, we need to merge the fields(i.e. dimensions, measures and
# dimension_groups) from both the child and parent.
if field in [DIMENSIONS, DIMENSION_GROUPS, MEASURES]:
@ -345,7 +344,6 @@ class LookerViewContext:
return self.get_including_extends(field="sql_table_name")
def _is_dot_sql_table_name_present(self) -> bool:
sql_table_name: Optional[str] = self._get_sql_table_name_field()
if sql_table_name is None:

View File

@ -144,7 +144,6 @@ class LookerView:
extract_col_level_lineage: bool = False,
populate_sql_logic_in_descriptions: bool = False,
) -> Optional["LookerView"]:
view_name = view_context.name()
logger.debug(f"Handling view {view_name} in model {model_name}")
@ -418,7 +417,6 @@ class LookMLSource(StatefulIngestionSourceBase):
def _build_dataset_mcps(
self, looker_view: LookerView
) -> List[MetadataChangeProposalWrapper]:
view_urn = looker_view.id.get_urn(self.source_config)
subTypeEvent = MetadataChangeProposalWrapper(
@ -502,7 +500,6 @@ class LookMLSource(StatefulIngestionSourceBase):
def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]:
manifest_file = folder / "manifest.lkml"
if manifest_file.exists():
manifest_dict = load_and_preprocess_file(
path=manifest_file, source_config=self.source_config
)

View File

@ -72,7 +72,6 @@ def resolve_derived_view_urn_of_col_ref(
base_folder_path: str,
config: LookMLSourceConfig,
) -> List[ColumnRef]:
new_column_refs: List[ColumnRef] = []
for col_ref in column_refs:
if is_derived_view(col_ref.table.lower()):
@ -641,7 +640,6 @@ def create_view_upstream(
ctx: PipelineContext,
reporter: LookMLSourceReport,
) -> AbstractViewUpstream:
if view_context.is_regular_case():
return RegularViewUpstream(
view_context=view_context,
@ -666,7 +664,6 @@ def create_view_upstream(
view_context.is_sql_based_derived_view_without_fields_case(),
]
):
return DerivedQueryUpstreamSource(
view_context=view_context,
config=config,

View File

@ -210,7 +210,6 @@ def _get_lineage_mcp(
# extract the old lineage and save it for the new mcp
if preserve_upstream:
client = get_default_graph()
old_upstream_lineage = get_aspects_for_entity(

View File

@ -464,7 +464,6 @@ class NifiSourceReport(SourceReport):
@support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.LINEAGE_COARSE, "Supported. See docs for limitations")
class NifiSource(Source):
config: NifiSourceConfig
report: NifiSourceReport

View File

@ -101,7 +101,6 @@ def parse_custom_sql(
env: str,
platform_instance: Optional[str],
) -> Optional["SqlParsingResult"]:
logger.debug("Using sqlglot_lineage to parse custom sql")
logger.debug(f"Processing native query using DataHub Sql Parser = {query}")

View File

@ -66,7 +66,6 @@ def get_upstream_tables(
config: PowerBiDashboardSourceConfig,
parameters: Dict[str, str] = {},
) -> List[resolver.Lineage]:
if table.expression is None:
logger.debug(f"There is no M-Query expression in table {table.full_name}")
return []

View File

@ -65,7 +65,6 @@ def urn_creator(
server: str,
qualified_table_name: str,
) -> str:
platform_detail: PlatformDetail = platform_instance_resolver.get_platform_instance(
PowerBIPlatformDetail(
data_platform_pair=data_platform_pair,
@ -179,7 +178,6 @@ class AbstractDataPlatformTableCreator(ABC):
arg_list: Tree,
table_detail: Dict[str, str],
) -> Optional[ReferencedTable]:
arguments: List[str] = tree_function.strip_char_from_list(
values=tree_function.remove_whitespaces_from_list(
tree_function.token_values(arg_list)
@ -219,7 +217,6 @@ class AbstractDataPlatformTableCreator(ABC):
def parse_custom_sql(
self, query: str, server: str, database: Optional[str], schema: Optional[str]
) -> Lineage:
dataplatform_tables: List[DataPlatformTable] = []
platform_detail: PlatformDetail = (
@ -377,7 +374,6 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
return argument_list
def take_first_argument(self, expression: Tree) -> Optional[Tree]:
# function is not data-access function, lets process function argument
first_arg_tree: Optional[Tree] = tree_function.first_arg_list_func(expression)
@ -785,7 +781,6 @@ class MSSqlDataPlatformTableCreator(DefaultTwoStepDataAccessSources):
def create_lineage(
self, data_access_func_detail: DataAccessFunctionDetail
) -> Lineage:
arguments: List[str] = tree_function.strip_char_from_list(
values=tree_function.remove_whitespaces_from_list(
tree_function.token_values(data_access_func_detail.arg_list)
@ -897,7 +892,6 @@ class DatabrickDataPlatformTableCreator(AbstractDataPlatformTableCreator):
table_reference: ReferencedTable,
data_platform_pair: DataPlatformPair,
) -> str:
platform_detail: PlatformDetail = (
self.platform_instance_resolver.get_platform_instance(
PowerBIPlatformDetail(

View File

@ -439,7 +439,6 @@ class DataResolverBase(ABC):
self,
app_id: str,
) -> Optional[App]:
raw_app: Optional[Dict] = self._get_app(
app_id=app_id,
)
@ -1062,7 +1061,6 @@ class AdminAPIResolver(DataResolverBase):
self,
app_id: str,
) -> Optional[Dict]:
app_endpoint = self.API_ENDPOINTS[Constant.GET_WORKSPACE_APP].format(
POWERBI_ADMIN_BASE_URL=DataResolverBase.ADMIN_BASE_URL,
APP_ID=app_id,

View File

@ -40,7 +40,6 @@ def form_full_table_name(
dataset_name: str,
table_name: str,
) -> str:
full_table_name: str = "{}.{}".format(
dataset_name.replace(" ", "_"), table_name.replace(" ", "_")
)
@ -596,7 +595,6 @@ class PowerBiAPI:
return workspaces
def _fill_independent_datasets(self, workspace: Workspace) -> None:
reachable_datasets: List[str] = []
# Find out reachable datasets
for dashboard in workspace.dashboards:

View File

@ -126,7 +126,6 @@ def log_http_error(e: BaseException, message: str) -> Any:
def get_response_dict(response: requests.Response, error_message: str) -> dict:
result_dict: dict = {}
try:
response.raise_for_status()

View File

@ -436,7 +436,6 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
def _extract_metadata(
self, connection: redshift_connector.Connection, database: str
) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
yield from self.gen_database_container(
database=database,
)

View File

@ -804,7 +804,6 @@ class S3Source(StatefulIngestionSourceBase):
protocol: str,
min: bool = False,
) -> List[str]:
# if len(path_spec.include.split("/")) == len(f"{protocol}{bucket_name}/{folder}".split("/")):
# return [f"{protocol}{bucket_name}/{folder}"]

View File

@ -401,7 +401,6 @@ class SACSource(StatefulIngestionSourceBase, TestableSource):
columns = self.get_import_data_model_columns(model_id=model.model_id)
for column in columns:
schema_field = SchemaFieldClass(
fieldPath=column.name,
type=self.get_schema_field_data_type(column),

View File

@ -236,7 +236,6 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
def get_known_query_lineage(
self, query: Query, dataset_name: str, db_row: UpstreamLineageEdge
) -> Optional[KnownQueryLineageInfo]:
if not db_row.UPSTREAM_TABLES:
return None

View File

@ -28,7 +28,6 @@ class CockroachDBConfig(PostgresConfig):
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
class CockroachDBSource(PostgresSource):
config: CockroachDBConfig
def __init__(self, config: CockroachDBConfig, ctx: PipelineContext):

View File

@ -178,7 +178,6 @@ class OracleInspectorObjectWrapper:
]
def get_view_names(self, schema: Optional[str] = None) -> List[str]:
schema = self._inspector_instance.dialect.denormalize_name(
schema or self.default_schema_name
)
@ -200,7 +199,6 @@ class OracleInspectorObjectWrapper:
def get_columns(
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
) -> List[dict]:
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
table_name
)
@ -344,7 +342,6 @@ class OracleInspectorObjectWrapper:
return columns
def get_table_comment(self, table_name: str, schema: Optional[str] = None) -> Dict:
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
table_name
)
@ -416,7 +413,6 @@ class OracleInspectorObjectWrapper:
def get_pk_constraint(
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
) -> Dict:
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
table_name
)
@ -458,7 +454,6 @@ class OracleInspectorObjectWrapper:
def get_foreign_keys(
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
) -> List:
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
table_name
)
@ -540,7 +535,6 @@ class OracleInspectorObjectWrapper:
def get_view_definition(
self, view_name: str, schema: Optional[str] = None
) -> Union[str, None]:
denormalized_view_name = self._inspector_instance.dialect.denormalize_name(
view_name
)

View File

@ -146,7 +146,11 @@ class GenericCheckpointState(CheckpointStateBase):
def compute_percent_entities_changed(
new_entities: List[str], old_entities: List[str]
) -> float:
(overlap_count, old_count, _,) = _get_entity_overlap_and_cardinalities(
(
overlap_count,
old_count,
_,
) = _get_entity_overlap_and_cardinalities(
new_entities=new_entities, old_entities=old_entities
)

View File

@ -2117,7 +2117,6 @@ class TableauSiteSource:
def _enrich_database_tables_with_parsed_schemas(
self, parsing_result: SqlParsingResult
) -> None:
in_tables_schemas: Dict[
str, Set[str]
] = transform_parsing_result_to_in_tables_schemas(parsing_result)

View File

@ -105,7 +105,6 @@ class SimpleAddDatasetDataProduct(AddDatasetDataProduct):
"""Transformer that adds a specified dataproduct entity for provided dataset as its asset."""
def __init__(self, config: SimpleDatasetDataProductConfig, ctx: PipelineContext):
generic_config = AddDatasetDataProductConfig(
get_data_product_to_add=lambda dataset_urn: config.dataset_to_data_product_urns.get(
dataset_urn

View File

@ -67,7 +67,6 @@ class AddDatasetTags(DatasetTagsTransformer):
def handle_end_of_stream(
self,
) -> List[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]:
mcps: List[
Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]
] = []

View File

@ -105,7 +105,6 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
def handle_end_of_stream(
self,
) -> Sequence[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]:
return self.owner_mcps
def transform_aspect(

View File

@ -103,7 +103,6 @@ class ReplaceExternalUrlContainer(ContainerPropertiesTransformer, ReplaceUrl):
def transform_aspect(
self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect]
) -> Optional[Aspect]:
in_container_properties_aspect: ContainerPropertiesClass = cast(
ContainerPropertiesClass, aspect
)

View File

@ -84,7 +84,6 @@ class TagsToTermMapper(TagsToTermTransformer):
def transform_aspect(
self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect]
) -> Optional[Aspect]:
in_glossary_terms: Optional[GlossaryTermsClass] = cast(
Optional[GlossaryTermsClass], aspect
)

View File

@ -72,7 +72,6 @@ class SnowflakeMetricSQLGenerator:
@metric_sql.register
def _(self, assertion: RowCountTotalVolumeAssertion) -> str:
# Can not use information schema here due to error -
# Data metric function body cannot refer to the non-deterministic function 'CURRENT_DATABASE_MAIN_METASTORE_ID'.

View File

@ -433,7 +433,6 @@ class DashboardPatchBuilder(MetadataPatchProposal):
def add_custom_properties(
self, custom_properties: Optional[Dict[str, str]] = None
) -> "DashboardPatchBuilder":
if custom_properties:
for key, value in custom_properties.items():
self.custom_properties_patch_helper.add_property(key, value)

View File

@ -831,7 +831,6 @@ class SqlParsingAggregator(Closeable):
session_has_temp_tables: bool = True,
_is_internal: bool = False,
) -> None:
# Adding tool specific metadata extraction here allows it
# to work for both ObservedQuery and PreparsedQuery as
# add_preparsed_query it used within add_observed_query.

View File

@ -880,7 +880,6 @@ def _sqlglot_lineage_inner(
default_schema: Optional[str] = None,
default_dialect: Optional[str] = None,
) -> SqlParsingResult:
if not default_dialect:
dialect = get_dialect(schema_resolver.platform)
else:

View File

@ -79,7 +79,6 @@ class ToolMetaExtractor:
return True
def extract_bi_metadata(self, entry: QueryLog) -> bool:
for tool, meta_extractor in self.known_tool_extractors:
try:
if meta_extractor(entry):

View File

@ -206,7 +206,7 @@ class MCPDiff:
"""
aspect_diffs = [v for d in self.aspect_changes.values() for v in d.values()]
for aspect_diff in aspect_diffs:
for (_, old, new) in aspect_diff.aspects_changed.keys():
for _, old, new in aspect_diff.aspects_changed.keys():
golden[old.delta_info.idx] = new.delta_info.original
indices_to_remove = set()

View File

@ -43,7 +43,6 @@ def _make_owner_category_list(
owner_category_urn: Optional[str],
owner_ids: List[str],
) -> List[Dict]:
return [
{
"urn": mce_builder.make_owner_urn(owner_id, owner_type),
@ -285,7 +284,6 @@ class OperationProcessor:
aspect_map[Constants.ADD_TAG_OPERATION] = tag_aspect
if Constants.ADD_OWNER_OPERATION in operation_map:
owner_aspect = OwnershipClass(
owners=[
OwnerClass(

View File

@ -19,7 +19,6 @@ class ThreadedIteratorExecutor:
args_list: Iterable[Tuple[Any, ...]],
max_workers: int,
) -> Generator[T, None, None]:
out_q: queue.Queue[T] = queue.Queue()
def _worker_wrapper(

View File

@ -68,7 +68,6 @@ def run_ingest(
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
mock_datahub_graph,
) as mock_checkpoint:
mock_checkpoint.return_value = mock_datahub_graph
mocked_functions_reference(

View File

@ -47,7 +47,6 @@ def _generate_queries_cached_file(tmp_path: Path, queries_json_path: Path) -> No
@patch("google.cloud.bigquery.Client")
@patch("google.cloud.resourcemanager_v3.ProjectsClient")
def test_queries_ingestion(project_client, client, pytestconfig, monkeypatch, tmp_path):
test_resources_dir = pytestconfig.rootpath / "tests/integration/bigquery_v2"
mcp_golden_path = f"{test_resources_dir}/bigquery_queries_mcps_golden.json"
mcp_output_path = tmp_path / "bigquery_queries_mcps.json"

View File

@ -192,7 +192,6 @@ def create_mysql_source(headers):
def upload_dataset(headers):
url = f"{DREMIO_HOST}/apiv2/source/s3/file_format/warehouse/sample.parquet"
payload = {"ignoreOtherFileFormats": False, "type": "Parquet"}

View File

@ -1047,7 +1047,6 @@ def test_independent_soft_deleted_looks(
mocked_client = mock.MagicMock()
with mock.patch("looker_sdk.init40") as mock_sdk:
mock_sdk.return_value = mocked_client
setup_mock_look(mocked_client)
setup_mock_soft_deleted_look(mocked_client)

View File

@ -831,7 +831,6 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None:
@freeze_time(FROZEN_TIME)
def test_duplicate_field_ingest(pytestconfig, tmp_path, mock_time):
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
mce_out_file = "duplicate_ingest_mces_output.json"

View File

@ -58,14 +58,12 @@ def run_ingest(
mocked_functions_reference,
recipe,
):
with patch(
"datahub.ingestion.source.identity.okta.OktaClient"
) as MockClient, patch(
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
mock_datahub_graph,
) as mock_checkpoint:
mock_checkpoint.return_value = mock_datahub_graph
mocked_functions_reference(MockClient=MockClient)
@ -277,7 +275,6 @@ def overwrite_group_in_mocked_data(test_resources_dir, MockClient):
def _init_mock_okta_client(
test_resources_dir, MockClient, mock_users_json=None, mock_groups_json=None
):
okta_users_json_file = (
test_resources_dir / "okta_users.json"
if mock_users_json is None

View File

@ -33,7 +33,6 @@ class MockViewDefinition:
@dataclass
class MockConstraints:
constraint_name: str = "mock constraint name"
constraint_type: str = "P"
local_column: str = "mock column name"

View File

@ -712,7 +712,6 @@ def test_redshift_regular_case():
def test_redshift_native_query():
table: powerbi_data_classes.Table = powerbi_data_classes.Table(
expression=M_QUERIES[22],
name="category",
@ -1101,7 +1100,6 @@ def test_double_quotes_in_alias():
@patch("datahub.ingestion.source.powerbi.m_query.parser.get_lark_parser")
def test_m_query_timeout(mock_get_lark_parser):
q = 'let\n Source = Value.NativeQuery(Snowflake.Databases("0DD93C6BD5A6.snowflakecomputing.com","sales_analytics_warehouse_prod",[Role="sales_analytics_member_ad"]){[Name="SL_OPERATIONS"]}[Data], "select SALE_NO AS ""\x1b[4mSaleNo\x1b[0m""#(lf) ,CODE AS ""Code""#(lf) ,ENDDATE AS ""end_date""#(lf) from SL_OPERATIONS.SALE.REPORTS#(lf) where ENDDATE > \'2024-02-03\'", null, [EnableFolding=true]),\n #"selected Row" = Table.SelectRows(Source)\nin\n #"selected Row"'
table: powerbi_data_classes.Table = powerbi_data_classes.Table(

View File

@ -96,7 +96,6 @@ def read_mock_data(path: Union[Path, str]) -> dict:
def register_mock_api(
pytestconfig: pytest.Config, request_mock: Any, override_data: Optional[dict] = None
) -> None:
default_mock_data_path = (
pytestconfig.rootpath
/ "tests/integration/powerbi/mock_data/default_mock_response.json"
@ -467,7 +466,6 @@ def test_scan_all_workspaces(
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
@ -517,7 +515,6 @@ def test_extract_reports(
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
@ -1219,7 +1216,6 @@ def test_independent_datasets_extraction(
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(
@ -1323,7 +1319,6 @@ def test_cll_extraction(
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(
@ -1380,7 +1375,6 @@ def test_cll_extraction_flags(
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
register_mock_api(
pytestconfig=pytestconfig,
request_mock=requests_mock,
@ -1392,7 +1386,6 @@ def test_cll_extraction_flags(
)
with pytest.raises(Exception, match=pattern):
Pipeline.create(
{
"run_id": "powerbi-test",
@ -1559,7 +1552,6 @@ def test_powerbi_app_ingest(
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
common_app_ingest(
pytestconfig=pytestconfig,
requests_mock=requests_mock,
@ -1590,7 +1582,6 @@ def test_powerbi_app_ingest_info_message(
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
pipeline = common_app_ingest(
pytestconfig=pytestconfig,
requests_mock=requests_mock,

View File

@ -1011,7 +1011,6 @@ def default_config():
def test_qlik_sense_ingest(
pytestconfig, tmp_path, requests_mock, mock_websocket_send_request
):
test_resources_dir = pytestconfig.rootpath / "tests/integration/qlik_sense"
register_mock_api(request_mock=requests_mock)
@ -1051,7 +1050,6 @@ def test_qlik_sense_ingest(
def test_platform_instance_ingest(
pytestconfig, tmp_path, requests_mock, mock_websocket_send_request
):
test_resources_dir = pytestconfig.rootpath / "tests/integration/qlik_sense"
register_mock_api(request_mock=requests_mock)

View File

@ -420,7 +420,6 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None:
@pytest.mark.integration
def test_sigma_ingest(pytestconfig, tmp_path, requests_mock):
test_resources_dir = pytestconfig.rootpath / "tests/integration/sigma"
register_mock_api(request_mock=requests_mock)
@ -464,7 +463,6 @@ def test_sigma_ingest(pytestconfig, tmp_path, requests_mock):
@pytest.mark.integration
def test_platform_instance_ingest(pytestconfig, tmp_path, requests_mock):
test_resources_dir = pytestconfig.rootpath / "tests/integration/sigma"
register_mock_api(request_mock=requests_mock)
@ -510,7 +508,6 @@ def test_platform_instance_ingest(pytestconfig, tmp_path, requests_mock):
@pytest.mark.integration
def test_sigma_ingest_shared_entities(pytestconfig, tmp_path, requests_mock):
test_resources_dir = pytestconfig.rootpath / "tests/integration/sigma"
override_data = {

View File

@ -441,7 +441,6 @@ def default_query_results( # noqa: C901
include_column_lineage=True,
),
):
return [
{
"DOWNSTREAM_TABLE_NAME": f"TEST_DB.TEST_SCHEMA.TABLE_{op_idx}",

View File

@ -276,7 +276,6 @@ def mock_sdk_client(
datasources_side_effect: List[dict],
sign_out_side_effect: List[dict],
) -> mock.MagicMock:
mock_client = mock.Mock()
mocked_metadata = mock.Mock()
mocked_metadata.query.side_effect = side_effect_query_metadata_response
@ -1228,7 +1227,6 @@ def test_permission_ingestion(pytestconfig, tmp_path, mock_datahub_graph):
@freeze_time(FROZEN_TIME)
@pytest.mark.integration
def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):
with mock.patch(
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
mock_datahub_graph,

View File

@ -282,7 +282,6 @@ ViewEntry = namedtuple(
def mock_hive_sql(query):
if query == "DESCRIBE EXTENDED `bronze_kambi`.`bet` betStatusId":
return [
("col_name", "betStatusId"),

View File

@ -16,7 +16,6 @@ from tests.performance.helpers import workunit_sink
def run_test():
with mock.patch("snowflake.connector.connect") as mock_connect:
sf_connection = mock.MagicMock()
sf_cursor = mock.MagicMock()

View File

@ -10,7 +10,6 @@ class MyTestModel(BaseModel):
def test_base_model():
test_base_model = MyTestModel(
test_string_field="test_string_field",
test_int_field=42,
@ -31,7 +30,6 @@ def test_base_model():
def test_dictwrapper():
from datahub.metadata.schema_classes import DatasetPropertiesClass
dataset_properties = DatasetPropertiesClass(
@ -58,7 +56,6 @@ def test_dictwrapper():
def test_raw_dictionary():
test_object = {
"test_string_field": "test_string_field",
"test_int_field": 42,

View File

@ -185,7 +185,6 @@ def test_platform_resource_base_model():
def test_platform_resource_filters():
query = (
ElasticPlatformResourceQuery.create_from()
.group(LogicalOperator.AND)

View File

@ -104,7 +104,6 @@ def test_incremental_table_lineage(tmp_path, pytestconfig):
def test_incremental_table_lineage_empty_upstreams(tmp_path, pytestconfig):
urn = make_dataset_urn(platform, "dataset1")
aspect = make_lineage_aspect(
"dataset1",

View File

@ -144,7 +144,6 @@ def test_column_level_lineage(lineage_entries: List[QueryEvent]) -> None:
def test_lineage_for_external_bq_table(mock_datahub_graph_instance):
pipeline_context = PipelineContext(run_id="bq_gcs_lineage")
pipeline_context.graph = mock_datahub_graph_instance
@ -239,7 +238,6 @@ def test_lineage_for_external_bq_table(mock_datahub_graph_instance):
def test_lineage_for_external_bq_table_no_column_lineage(mock_datahub_graph_instance):
pipeline_context = PipelineContext(run_id="bq_gcs_lineage")
pipeline_context.graph = mock_datahub_graph_instance

View File

@ -184,7 +184,6 @@ def test_bigquery_table_sanitasitation():
def test_unquote_and_decode_unicode_escape_seq():
# Test with a string that starts and ends with quotes and has Unicode escape sequences
input_string = '"Hello \\u003cWorld\\u003e"'
expected_output = "Hello <World>"

View File

@ -221,7 +221,6 @@ def mock_redshift_connection() -> MagicMock:
def mock_graph() -> DataHubGraph:
graph = MagicMock()
graph._make_schema_resolver.return_value = SchemaResolver(

View File

@ -958,7 +958,6 @@ def test_table_lineage_via_temp_table_disordered_add(
@freeze_time(FROZEN_TIME)
def test_basic_usage(pytestconfig: pytest.Config) -> None:
frozen_timestamp = parse_user_datetime(FROZEN_TIME)
aggregator = SqlParsingAggregator(
platform="redshift",

View File

@ -50,7 +50,6 @@ def test_change_percent(
def test_filter_ignored_entity_types():
assert filter_ignored_entity_types(
[
"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)",

View File

@ -56,7 +56,6 @@ schema_test_cases: Dict[str, Tuple[str, List[str]]] = {
def test_cassandra_schema_conversion(
schema: str, expected_field_paths: List[str]
) -> None:
schema_dict: Dict[str, List[Any]] = json.loads(schema)
column_infos: List = schema_dict["column_infos"]