mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-26 17:37:33 +00:00
chore(ingest): bump black (#11898)
This commit is contained in:
parent
7dbb3e60cb
commit
5519a330e2
@ -28,7 +28,6 @@ class CorpGroupFile(BaseModel):
|
||||
|
||||
|
||||
with open("user/user.dhub.yaml_schema.json", "w") as fp:
|
||||
|
||||
fp.write(json.dumps(CorpUserFile.schema(), indent=4))
|
||||
|
||||
with open("group/group.dhub.yaml_schema.json", "w") as fp:
|
||||
|
||||
@ -591,22 +591,26 @@ debug_requirements = {
|
||||
"memray",
|
||||
}
|
||||
|
||||
base_dev_requirements = {
|
||||
*base_requirements,
|
||||
*framework_common,
|
||||
*mypy_stubs,
|
||||
*s3_base,
|
||||
lint_requirements = {
|
||||
# This is pinned only to avoid spurious errors in CI.
|
||||
# We should make an effort to keep it up to date.
|
||||
"black==22.12.0",
|
||||
"coverage>=5.1",
|
||||
"faker>=18.4.0",
|
||||
"black==23.3.0",
|
||||
"flake8>=6.0.0",
|
||||
"flake8-tidy-imports>=4.3.0",
|
||||
"flake8-bugbear==23.3.12",
|
||||
"isort>=5.7.0",
|
||||
"mypy==1.10.1",
|
||||
}
|
||||
|
||||
base_dev_requirements = {
|
||||
*base_requirements,
|
||||
*framework_common,
|
||||
*mypy_stubs,
|
||||
*s3_base,
|
||||
*lint_requirements,
|
||||
*test_api_requirements,
|
||||
"coverage>=5.1",
|
||||
"faker>=18.4.0",
|
||||
"pytest-asyncio>=0.16.0",
|
||||
"pytest-cov>=2.8.1",
|
||||
"pytest-random-order~=1.1.0",
|
||||
@ -931,6 +935,7 @@ See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
|
||||
),
|
||||
"cloud": ["acryl-datahub-cloud"],
|
||||
"dev": list(dev_requirements),
|
||||
"lint": list(lint_requirements),
|
||||
"testing-utils": list(test_api_requirements), # To import `datahub.testing`
|
||||
"integration-tests": list(full_test_dev_requirements),
|
||||
"debug": list(debug_requirements),
|
||||
|
||||
@ -190,7 +190,6 @@ class StructuredProperties(ConfigModel):
|
||||
|
||||
@classmethod
|
||||
def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
|
||||
|
||||
with StructuredPropertiesConfig.use_graph(graph):
|
||||
structured_property: Optional[
|
||||
StructuredPropertyDefinitionClass
|
||||
|
||||
@ -32,7 +32,6 @@ class S3Path:
|
||||
|
||||
|
||||
class S3ListIterator(Iterator):
|
||||
|
||||
MAX_KEYS = 1000
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -33,7 +33,6 @@ logger: logging.Logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class ClassificationReportMixin:
|
||||
|
||||
num_tables_fetch_sample_values_failed: int = 0
|
||||
|
||||
num_tables_classification_attempted: int = 0
|
||||
@ -112,7 +111,6 @@ class ClassificationHandler:
|
||||
schema_metadata: SchemaMetadata,
|
||||
sample_data: Union[Dict[str, list], Callable[[], Dict[str, list]]],
|
||||
) -> None:
|
||||
|
||||
if not isinstance(sample_data, Dict):
|
||||
try:
|
||||
# TODO: In future, sample_data fetcher can be lazily called if classification
|
||||
|
||||
@ -374,7 +374,6 @@ class BigQueryV2Config(
|
||||
StatefulProfilingConfigMixin,
|
||||
ClassificationSourceConfigMixin,
|
||||
):
|
||||
|
||||
include_schema_metadata: bool = Field(
|
||||
default=True,
|
||||
description="Whether to ingest the BigQuery schema, i.e. projects, schemas, tables, and views.",
|
||||
|
||||
@ -356,7 +356,6 @@ class BigQuerySchemaGenerator:
|
||||
project_id
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
if self.config.project_ids and "not enabled BigQuery." in str(e):
|
||||
action_mesage = (
|
||||
"The project has not enabled BigQuery API. "
|
||||
@ -417,7 +416,6 @@ class BigQuerySchemaGenerator:
|
||||
bigquery_project: BigqueryProject,
|
||||
db_tables: Dict[str, List[BigqueryTable]],
|
||||
) -> Iterable[MetadataWorkUnit]:
|
||||
|
||||
db_views: Dict[str, List[BigqueryView]] = {}
|
||||
db_snapshots: Dict[str, List[BigqueryTableSnapshot]] = {}
|
||||
project_id = bigquery_project.id
|
||||
@ -1141,7 +1139,6 @@ class BigQuerySchemaGenerator:
|
||||
columns: List[BigqueryColumn],
|
||||
dataset_name: BigqueryTableIdentifier,
|
||||
) -> MetadataWorkUnit:
|
||||
|
||||
foreign_keys: List[ForeignKeyConstraint] = []
|
||||
# Foreign keys only make sense for tables
|
||||
if isinstance(table, BigqueryTable):
|
||||
@ -1183,7 +1180,6 @@ class BigQuerySchemaGenerator:
|
||||
) -> Iterable[BigqueryTable]:
|
||||
# In bigquery there is no way to query all tables in a Project id
|
||||
with PerfTimer() as timer:
|
||||
|
||||
# PARTITIONS INFORMATION_SCHEMA view is not available for BigLake tables
|
||||
# based on Amazon S3 and Blob Storage data.
|
||||
# https://cloud.google.com/bigquery/docs/omni-introduction#limitations
|
||||
|
||||
@ -934,7 +934,6 @@ class BigqueryLineageExtractor:
|
||||
ddl: Optional[str],
|
||||
graph: Optional[DataHubGraph] = None,
|
||||
) -> Iterable[MetadataWorkUnit]:
|
||||
|
||||
if not ddl:
|
||||
return
|
||||
|
||||
@ -972,7 +971,6 @@ class BigqueryLineageExtractor:
|
||||
source_uris: List[str],
|
||||
graph: Optional[DataHubGraph] = None,
|
||||
) -> Optional[UpstreamLineageClass]:
|
||||
|
||||
upstreams_list: List[UpstreamClass] = []
|
||||
fine_grained_lineages: List[FineGrainedLineageClass] = []
|
||||
gcs_urns: Set[str] = set()
|
||||
|
||||
@ -304,7 +304,6 @@ class BigQueryQueriesExtractor(Closeable):
|
||||
def deduplicate_queries(
|
||||
self, queries: FileBackedList[ObservedQuery]
|
||||
) -> FileBackedDict[Dict[int, ObservedQuery]]:
|
||||
|
||||
# This fingerprint based deduplication is done here to reduce performance hit due to
|
||||
# repetitive sql parsing while adding observed query to aggregator that would otherwise
|
||||
# parse same query multiple times. In future, aggregator may absorb this deduplication.
|
||||
@ -342,7 +341,6 @@ class BigQueryQueriesExtractor(Closeable):
|
||||
return queries_deduped
|
||||
|
||||
def fetch_query_log(self, project: BigqueryProject) -> Iterable[ObservedQuery]:
|
||||
|
||||
# Multi-regions from https://cloud.google.com/bigquery/docs/locations#supported_locations
|
||||
regions = self.config.region_qualifiers
|
||||
|
||||
@ -355,7 +353,6 @@ class BigQueryQueriesExtractor(Closeable):
|
||||
def fetch_region_query_log(
|
||||
self, project: BigqueryProject, region: str
|
||||
) -> Iterable[ObservedQuery]:
|
||||
|
||||
# Each region needs to be a different query
|
||||
query_log_query = _build_enriched_query_log_query(
|
||||
project_id=project.id,
|
||||
@ -452,7 +449,6 @@ def _build_enriched_query_log_query(
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
) -> str:
|
||||
|
||||
audit_start_time = start_time.strftime(BQ_DATETIME_FORMAT)
|
||||
audit_end_time = end_time.strftime(BQ_DATETIME_FORMAT)
|
||||
|
||||
|
||||
@ -332,7 +332,6 @@ class CassandraSource(StatefulIngestionSourceBase):
|
||||
def _extract_views_from_keyspace(
|
||||
self, keyspace_name: str
|
||||
) -> Iterable[MetadataWorkUnit]:
|
||||
|
||||
views: List[CassandraView] = self.cassandra_api.get_views(keyspace_name)
|
||||
for view in views:
|
||||
view_name: str = view.view_name
|
||||
|
||||
@ -371,7 +371,6 @@ class ConfluentSchemaRegistry(KafkaSchemaRegistryBase):
|
||||
def _get_schema_metadata(
|
||||
self, topic: str, platform_urn: str, is_subject: bool
|
||||
) -> Optional[SchemaMetadata]:
|
||||
|
||||
# Process the value schema
|
||||
schema, fields = self._get_schema_and_fields(
|
||||
topic=topic,
|
||||
|
||||
@ -7,7 +7,6 @@ from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
|
||||
|
||||
|
||||
class PathSpecsConfigMixin(ConfigModel):
|
||||
|
||||
path_specs: List[PathSpec] = Field(
|
||||
description="List of PathSpec. See [below](#path-spec) the details about PathSpec"
|
||||
)
|
||||
|
||||
@ -107,7 +107,6 @@ class DataHubSource(StatefulIngestionSourceBase):
|
||||
logger.info(f"Fetching database aspects starting from {from_createdon}")
|
||||
mcps = reader.get_aspects(from_createdon, self.report.stop_time)
|
||||
for i, (mcp, createdon) in enumerate(mcps):
|
||||
|
||||
if not self.urn_pattern.allowed(str(mcp.entityUrn)):
|
||||
continue
|
||||
|
||||
|
||||
@ -566,7 +566,6 @@ class DremioAPIOperations:
|
||||
return tables
|
||||
|
||||
def validate_schema_format(self, schema):
|
||||
|
||||
if "." in schema:
|
||||
schema_path = self.get(
|
||||
url=f"/catalog/{self.get_dataset_id(schema=schema, dataset='')}"
|
||||
@ -687,7 +686,6 @@ class DremioAPIOperations:
|
||||
response.get("entityType")
|
||||
== DremioEntityContainerType.FOLDER.value.lower()
|
||||
):
|
||||
|
||||
containers.append(
|
||||
{
|
||||
"id": location_id,
|
||||
|
||||
@ -121,7 +121,6 @@ class DremioSourceConfig(
|
||||
EnvConfigMixin,
|
||||
PlatformInstanceConfigMixin,
|
||||
):
|
||||
|
||||
domain: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Domain for all source objects.",
|
||||
|
||||
@ -198,7 +198,6 @@ class DremioSource(StatefulIngestionSourceBase):
|
||||
source_platform_name = source_name
|
||||
|
||||
for mapping in self.config.source_mappings or []:
|
||||
|
||||
if re.search(mapping.source_name, source_type, re.IGNORECASE):
|
||||
source_platform_name = mapping.source_name.lower()
|
||||
|
||||
|
||||
@ -233,7 +233,6 @@ class DynamoDBSource(StatefulIngestionSourceBase):
|
||||
table_name: str,
|
||||
dataset_name: str,
|
||||
) -> Iterable[MetadataWorkUnit]:
|
||||
|
||||
logger.debug(f"Processing table: {dataset_name}")
|
||||
table_info = dynamodb_client.describe_table(TableName=table_name)["Table"]
|
||||
account_id = table_info["TableArn"].split(":")[4]
|
||||
|
||||
@ -69,7 +69,6 @@ class DatahubExecutionRequestCleanup:
|
||||
report: DatahubExecutionRequestCleanupReport,
|
||||
config: Optional[DatahubExecutionRequestCleanupConfig] = None,
|
||||
) -> None:
|
||||
|
||||
self.graph = graph
|
||||
self.report = report
|
||||
self.instance_id = int(time.time())
|
||||
|
||||
@ -95,7 +95,6 @@ class GEProfilingBaseConfig(ConfigModel):
|
||||
|
||||
|
||||
class GEProfilingConfig(GEProfilingBaseConfig):
|
||||
|
||||
report_dropped_profiles: bool = Field(
|
||||
default=False,
|
||||
description="Whether to report datasets or dataset columns which were not profiled. Set to `True` for debugging purposes.",
|
||||
|
||||
@ -307,7 +307,6 @@ class ViewField:
|
||||
type_cls: ViewFieldType,
|
||||
populate_sql_logic_in_descriptions: bool,
|
||||
) -> "ViewField":
|
||||
|
||||
is_primary_key = field_dict.get("primary_key", "no") == "yes"
|
||||
|
||||
name = field_dict["name"]
|
||||
@ -929,7 +928,6 @@ class LookerExplore:
|
||||
reporter: SourceReport,
|
||||
source_config: LookerDashboardSourceConfig,
|
||||
) -> Optional["LookerExplore"]: # noqa: C901
|
||||
|
||||
try:
|
||||
explore = client.lookml_model_explore(model, explore_name)
|
||||
views: Set[str] = set()
|
||||
@ -987,13 +985,11 @@ class LookerExplore:
|
||||
field_name_vs_raw_explore_field: Dict = {}
|
||||
|
||||
if explore.fields is not None:
|
||||
|
||||
if explore.fields.dimensions is not None:
|
||||
for dim_field in explore.fields.dimensions:
|
||||
if dim_field.name is None:
|
||||
continue
|
||||
else:
|
||||
|
||||
field_name_vs_raw_explore_field[dim_field.name] = dim_field
|
||||
|
||||
view_fields.append(
|
||||
@ -1034,7 +1030,6 @@ class LookerExplore:
|
||||
if measure_field.name is None:
|
||||
continue
|
||||
else:
|
||||
|
||||
field_name_vs_raw_explore_field[
|
||||
measure_field.name
|
||||
] = measure_field
|
||||
|
||||
@ -604,7 +604,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
def _create_platform_instance_aspect(
|
||||
self,
|
||||
) -> DataPlatformInstance:
|
||||
|
||||
assert (
|
||||
self.source_config.platform_name
|
||||
), "Platform name is not set in the configuration."
|
||||
@ -999,7 +998,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
||||
def _make_dashboard_and_chart_mces(
|
||||
self, looker_dashboard: LookerDashboard
|
||||
) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
|
||||
|
||||
# Step 1: Emit metadata for each Chart inside the Dashboard.
|
||||
chart_events = []
|
||||
for element in looker_dashboard.dashboard_elements:
|
||||
|
||||
@ -55,7 +55,6 @@ class SpecialVariable:
|
||||
current_dict: dict = new_dict
|
||||
|
||||
for key in keys[:-1]:
|
||||
|
||||
if key not in current_dict:
|
||||
current_dict[key] = {}
|
||||
|
||||
@ -392,7 +391,6 @@ def process_lookml_template_language(
|
||||
source_config: LookMLSourceConfig,
|
||||
view_lkml_file_dict: dict,
|
||||
) -> None:
|
||||
|
||||
if "views" not in view_lkml_file_dict:
|
||||
return
|
||||
|
||||
@ -425,7 +423,6 @@ def load_and_preprocess_file(
|
||||
path: Union[str, pathlib.Path],
|
||||
source_config: LookMLSourceConfig,
|
||||
) -> dict:
|
||||
|
||||
parsed = load_lkml(path)
|
||||
|
||||
process_lookml_template_language(
|
||||
|
||||
@ -320,7 +320,6 @@ class LookerViewContext:
|
||||
self,
|
||||
field: str,
|
||||
) -> Optional[Any]:
|
||||
|
||||
# According to Looker's inheritance rules, we need to merge the fields(i.e. dimensions, measures and
|
||||
# dimension_groups) from both the child and parent.
|
||||
if field in [DIMENSIONS, DIMENSION_GROUPS, MEASURES]:
|
||||
@ -345,7 +344,6 @@ class LookerViewContext:
|
||||
return self.get_including_extends(field="sql_table_name")
|
||||
|
||||
def _is_dot_sql_table_name_present(self) -> bool:
|
||||
|
||||
sql_table_name: Optional[str] = self._get_sql_table_name_field()
|
||||
|
||||
if sql_table_name is None:
|
||||
|
||||
@ -144,7 +144,6 @@ class LookerView:
|
||||
extract_col_level_lineage: bool = False,
|
||||
populate_sql_logic_in_descriptions: bool = False,
|
||||
) -> Optional["LookerView"]:
|
||||
|
||||
view_name = view_context.name()
|
||||
|
||||
logger.debug(f"Handling view {view_name} in model {model_name}")
|
||||
@ -418,7 +417,6 @@ class LookMLSource(StatefulIngestionSourceBase):
|
||||
def _build_dataset_mcps(
|
||||
self, looker_view: LookerView
|
||||
) -> List[MetadataChangeProposalWrapper]:
|
||||
|
||||
view_urn = looker_view.id.get_urn(self.source_config)
|
||||
|
||||
subTypeEvent = MetadataChangeProposalWrapper(
|
||||
@ -502,7 +500,6 @@ class LookMLSource(StatefulIngestionSourceBase):
|
||||
def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]:
|
||||
manifest_file = folder / "manifest.lkml"
|
||||
if manifest_file.exists():
|
||||
|
||||
manifest_dict = load_and_preprocess_file(
|
||||
path=manifest_file, source_config=self.source_config
|
||||
)
|
||||
|
||||
@ -72,7 +72,6 @@ def resolve_derived_view_urn_of_col_ref(
|
||||
base_folder_path: str,
|
||||
config: LookMLSourceConfig,
|
||||
) -> List[ColumnRef]:
|
||||
|
||||
new_column_refs: List[ColumnRef] = []
|
||||
for col_ref in column_refs:
|
||||
if is_derived_view(col_ref.table.lower()):
|
||||
@ -641,7 +640,6 @@ def create_view_upstream(
|
||||
ctx: PipelineContext,
|
||||
reporter: LookMLSourceReport,
|
||||
) -> AbstractViewUpstream:
|
||||
|
||||
if view_context.is_regular_case():
|
||||
return RegularViewUpstream(
|
||||
view_context=view_context,
|
||||
@ -666,7 +664,6 @@ def create_view_upstream(
|
||||
view_context.is_sql_based_derived_view_without_fields_case(),
|
||||
]
|
||||
):
|
||||
|
||||
return DerivedQueryUpstreamSource(
|
||||
view_context=view_context,
|
||||
config=config,
|
||||
|
||||
@ -210,7 +210,6 @@ def _get_lineage_mcp(
|
||||
|
||||
# extract the old lineage and save it for the new mcp
|
||||
if preserve_upstream:
|
||||
|
||||
client = get_default_graph()
|
||||
|
||||
old_upstream_lineage = get_aspects_for_entity(
|
||||
|
||||
@ -464,7 +464,6 @@ class NifiSourceReport(SourceReport):
|
||||
@support_status(SupportStatus.CERTIFIED)
|
||||
@capability(SourceCapability.LINEAGE_COARSE, "Supported. See docs for limitations")
|
||||
class NifiSource(Source):
|
||||
|
||||
config: NifiSourceConfig
|
||||
report: NifiSourceReport
|
||||
|
||||
|
||||
@ -101,7 +101,6 @@ def parse_custom_sql(
|
||||
env: str,
|
||||
platform_instance: Optional[str],
|
||||
) -> Optional["SqlParsingResult"]:
|
||||
|
||||
logger.debug("Using sqlglot_lineage to parse custom sql")
|
||||
|
||||
logger.debug(f"Processing native query using DataHub Sql Parser = {query}")
|
||||
|
||||
@ -66,7 +66,6 @@ def get_upstream_tables(
|
||||
config: PowerBiDashboardSourceConfig,
|
||||
parameters: Dict[str, str] = {},
|
||||
) -> List[resolver.Lineage]:
|
||||
|
||||
if table.expression is None:
|
||||
logger.debug(f"There is no M-Query expression in table {table.full_name}")
|
||||
return []
|
||||
|
||||
@ -65,7 +65,6 @@ def urn_creator(
|
||||
server: str,
|
||||
qualified_table_name: str,
|
||||
) -> str:
|
||||
|
||||
platform_detail: PlatformDetail = platform_instance_resolver.get_platform_instance(
|
||||
PowerBIPlatformDetail(
|
||||
data_platform_pair=data_platform_pair,
|
||||
@ -179,7 +178,6 @@ class AbstractDataPlatformTableCreator(ABC):
|
||||
arg_list: Tree,
|
||||
table_detail: Dict[str, str],
|
||||
) -> Optional[ReferencedTable]:
|
||||
|
||||
arguments: List[str] = tree_function.strip_char_from_list(
|
||||
values=tree_function.remove_whitespaces_from_list(
|
||||
tree_function.token_values(arg_list)
|
||||
@ -219,7 +217,6 @@ class AbstractDataPlatformTableCreator(ABC):
|
||||
def parse_custom_sql(
|
||||
self, query: str, server: str, database: Optional[str], schema: Optional[str]
|
||||
) -> Lineage:
|
||||
|
||||
dataplatform_tables: List[DataPlatformTable] = []
|
||||
|
||||
platform_detail: PlatformDetail = (
|
||||
@ -377,7 +374,6 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
|
||||
return argument_list
|
||||
|
||||
def take_first_argument(self, expression: Tree) -> Optional[Tree]:
|
||||
|
||||
# function is not data-access function, lets process function argument
|
||||
first_arg_tree: Optional[Tree] = tree_function.first_arg_list_func(expression)
|
||||
|
||||
@ -785,7 +781,6 @@ class MSSqlDataPlatformTableCreator(DefaultTwoStepDataAccessSources):
|
||||
def create_lineage(
|
||||
self, data_access_func_detail: DataAccessFunctionDetail
|
||||
) -> Lineage:
|
||||
|
||||
arguments: List[str] = tree_function.strip_char_from_list(
|
||||
values=tree_function.remove_whitespaces_from_list(
|
||||
tree_function.token_values(data_access_func_detail.arg_list)
|
||||
@ -897,7 +892,6 @@ class DatabrickDataPlatformTableCreator(AbstractDataPlatformTableCreator):
|
||||
table_reference: ReferencedTable,
|
||||
data_platform_pair: DataPlatformPair,
|
||||
) -> str:
|
||||
|
||||
platform_detail: PlatformDetail = (
|
||||
self.platform_instance_resolver.get_platform_instance(
|
||||
PowerBIPlatformDetail(
|
||||
|
||||
@ -439,7 +439,6 @@ class DataResolverBase(ABC):
|
||||
self,
|
||||
app_id: str,
|
||||
) -> Optional[App]:
|
||||
|
||||
raw_app: Optional[Dict] = self._get_app(
|
||||
app_id=app_id,
|
||||
)
|
||||
@ -1062,7 +1061,6 @@ class AdminAPIResolver(DataResolverBase):
|
||||
self,
|
||||
app_id: str,
|
||||
) -> Optional[Dict]:
|
||||
|
||||
app_endpoint = self.API_ENDPOINTS[Constant.GET_WORKSPACE_APP].format(
|
||||
POWERBI_ADMIN_BASE_URL=DataResolverBase.ADMIN_BASE_URL,
|
||||
APP_ID=app_id,
|
||||
|
||||
@ -40,7 +40,6 @@ def form_full_table_name(
|
||||
dataset_name: str,
|
||||
table_name: str,
|
||||
) -> str:
|
||||
|
||||
full_table_name: str = "{}.{}".format(
|
||||
dataset_name.replace(" ", "_"), table_name.replace(" ", "_")
|
||||
)
|
||||
@ -596,7 +595,6 @@ class PowerBiAPI:
|
||||
return workspaces
|
||||
|
||||
def _fill_independent_datasets(self, workspace: Workspace) -> None:
|
||||
|
||||
reachable_datasets: List[str] = []
|
||||
# Find out reachable datasets
|
||||
for dashboard in workspace.dashboards:
|
||||
|
||||
@ -126,7 +126,6 @@ def log_http_error(e: BaseException, message: str) -> Any:
|
||||
|
||||
|
||||
def get_response_dict(response: requests.Response, error_message: str) -> dict:
|
||||
|
||||
result_dict: dict = {}
|
||||
try:
|
||||
response.raise_for_status()
|
||||
|
||||
@ -436,7 +436,6 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
||||
def _extract_metadata(
|
||||
self, connection: redshift_connector.Connection, database: str
|
||||
) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
|
||||
|
||||
yield from self.gen_database_container(
|
||||
database=database,
|
||||
)
|
||||
|
||||
@ -804,7 +804,6 @@ class S3Source(StatefulIngestionSourceBase):
|
||||
protocol: str,
|
||||
min: bool = False,
|
||||
) -> List[str]:
|
||||
|
||||
# if len(path_spec.include.split("/")) == len(f"{protocol}{bucket_name}/{folder}".split("/")):
|
||||
# return [f"{protocol}{bucket_name}/{folder}"]
|
||||
|
||||
|
||||
@ -401,7 +401,6 @@ class SACSource(StatefulIngestionSourceBase, TestableSource):
|
||||
|
||||
columns = self.get_import_data_model_columns(model_id=model.model_id)
|
||||
for column in columns:
|
||||
|
||||
schema_field = SchemaFieldClass(
|
||||
fieldPath=column.name,
|
||||
type=self.get_schema_field_data_type(column),
|
||||
|
||||
@ -236,7 +236,6 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
||||
def get_known_query_lineage(
|
||||
self, query: Query, dataset_name: str, db_row: UpstreamLineageEdge
|
||||
) -> Optional[KnownQueryLineageInfo]:
|
||||
|
||||
if not db_row.UPSTREAM_TABLES:
|
||||
return None
|
||||
|
||||
|
||||
@ -28,7 +28,6 @@ class CockroachDBConfig(PostgresConfig):
|
||||
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
||||
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
||||
class CockroachDBSource(PostgresSource):
|
||||
|
||||
config: CockroachDBConfig
|
||||
|
||||
def __init__(self, config: CockroachDBConfig, ctx: PipelineContext):
|
||||
|
||||
@ -178,7 +178,6 @@ class OracleInspectorObjectWrapper:
|
||||
]
|
||||
|
||||
def get_view_names(self, schema: Optional[str] = None) -> List[str]:
|
||||
|
||||
schema = self._inspector_instance.dialect.denormalize_name(
|
||||
schema or self.default_schema_name
|
||||
)
|
||||
@ -200,7 +199,6 @@ class OracleInspectorObjectWrapper:
|
||||
def get_columns(
|
||||
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
|
||||
) -> List[dict]:
|
||||
|
||||
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
|
||||
table_name
|
||||
)
|
||||
@ -344,7 +342,6 @@ class OracleInspectorObjectWrapper:
|
||||
return columns
|
||||
|
||||
def get_table_comment(self, table_name: str, schema: Optional[str] = None) -> Dict:
|
||||
|
||||
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
|
||||
table_name
|
||||
)
|
||||
@ -416,7 +413,6 @@ class OracleInspectorObjectWrapper:
|
||||
def get_pk_constraint(
|
||||
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
|
||||
) -> Dict:
|
||||
|
||||
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
|
||||
table_name
|
||||
)
|
||||
@ -458,7 +454,6 @@ class OracleInspectorObjectWrapper:
|
||||
def get_foreign_keys(
|
||||
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
|
||||
) -> List:
|
||||
|
||||
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
|
||||
table_name
|
||||
)
|
||||
@ -540,7 +535,6 @@ class OracleInspectorObjectWrapper:
|
||||
def get_view_definition(
|
||||
self, view_name: str, schema: Optional[str] = None
|
||||
) -> Union[str, None]:
|
||||
|
||||
denormalized_view_name = self._inspector_instance.dialect.denormalize_name(
|
||||
view_name
|
||||
)
|
||||
|
||||
@ -146,7 +146,11 @@ class GenericCheckpointState(CheckpointStateBase):
|
||||
def compute_percent_entities_changed(
|
||||
new_entities: List[str], old_entities: List[str]
|
||||
) -> float:
|
||||
(overlap_count, old_count, _,) = _get_entity_overlap_and_cardinalities(
|
||||
(
|
||||
overlap_count,
|
||||
old_count,
|
||||
_,
|
||||
) = _get_entity_overlap_and_cardinalities(
|
||||
new_entities=new_entities, old_entities=old_entities
|
||||
)
|
||||
|
||||
|
||||
@ -2117,7 +2117,6 @@ class TableauSiteSource:
|
||||
def _enrich_database_tables_with_parsed_schemas(
|
||||
self, parsing_result: SqlParsingResult
|
||||
) -> None:
|
||||
|
||||
in_tables_schemas: Dict[
|
||||
str, Set[str]
|
||||
] = transform_parsing_result_to_in_tables_schemas(parsing_result)
|
||||
|
||||
@ -105,7 +105,6 @@ class SimpleAddDatasetDataProduct(AddDatasetDataProduct):
|
||||
"""Transformer that adds a specified dataproduct entity for provided dataset as its asset."""
|
||||
|
||||
def __init__(self, config: SimpleDatasetDataProductConfig, ctx: PipelineContext):
|
||||
|
||||
generic_config = AddDatasetDataProductConfig(
|
||||
get_data_product_to_add=lambda dataset_urn: config.dataset_to_data_product_urns.get(
|
||||
dataset_urn
|
||||
|
||||
@ -67,7 +67,6 @@ class AddDatasetTags(DatasetTagsTransformer):
|
||||
def handle_end_of_stream(
|
||||
self,
|
||||
) -> List[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]:
|
||||
|
||||
mcps: List[
|
||||
Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]
|
||||
] = []
|
||||
|
||||
@ -105,7 +105,6 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
|
||||
def handle_end_of_stream(
|
||||
self,
|
||||
) -> Sequence[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]:
|
||||
|
||||
return self.owner_mcps
|
||||
|
||||
def transform_aspect(
|
||||
|
||||
@ -103,7 +103,6 @@ class ReplaceExternalUrlContainer(ContainerPropertiesTransformer, ReplaceUrl):
|
||||
def transform_aspect(
|
||||
self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect]
|
||||
) -> Optional[Aspect]:
|
||||
|
||||
in_container_properties_aspect: ContainerPropertiesClass = cast(
|
||||
ContainerPropertiesClass, aspect
|
||||
)
|
||||
|
||||
@ -84,7 +84,6 @@ class TagsToTermMapper(TagsToTermTransformer):
|
||||
def transform_aspect(
|
||||
self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect]
|
||||
) -> Optional[Aspect]:
|
||||
|
||||
in_glossary_terms: Optional[GlossaryTermsClass] = cast(
|
||||
Optional[GlossaryTermsClass], aspect
|
||||
)
|
||||
|
||||
@ -72,7 +72,6 @@ class SnowflakeMetricSQLGenerator:
|
||||
|
||||
@metric_sql.register
|
||||
def _(self, assertion: RowCountTotalVolumeAssertion) -> str:
|
||||
|
||||
# Can not use information schema here due to error -
|
||||
# Data metric function body cannot refer to the non-deterministic function 'CURRENT_DATABASE_MAIN_METASTORE_ID'.
|
||||
|
||||
|
||||
@ -433,7 +433,6 @@ class DashboardPatchBuilder(MetadataPatchProposal):
|
||||
def add_custom_properties(
|
||||
self, custom_properties: Optional[Dict[str, str]] = None
|
||||
) -> "DashboardPatchBuilder":
|
||||
|
||||
if custom_properties:
|
||||
for key, value in custom_properties.items():
|
||||
self.custom_properties_patch_helper.add_property(key, value)
|
||||
|
||||
@ -831,7 +831,6 @@ class SqlParsingAggregator(Closeable):
|
||||
session_has_temp_tables: bool = True,
|
||||
_is_internal: bool = False,
|
||||
) -> None:
|
||||
|
||||
# Adding tool specific metadata extraction here allows it
|
||||
# to work for both ObservedQuery and PreparsedQuery as
|
||||
# add_preparsed_query it used within add_observed_query.
|
||||
|
||||
@ -880,7 +880,6 @@ def _sqlglot_lineage_inner(
|
||||
default_schema: Optional[str] = None,
|
||||
default_dialect: Optional[str] = None,
|
||||
) -> SqlParsingResult:
|
||||
|
||||
if not default_dialect:
|
||||
dialect = get_dialect(schema_resolver.platform)
|
||||
else:
|
||||
|
||||
@ -79,7 +79,6 @@ class ToolMetaExtractor:
|
||||
return True
|
||||
|
||||
def extract_bi_metadata(self, entry: QueryLog) -> bool:
|
||||
|
||||
for tool, meta_extractor in self.known_tool_extractors:
|
||||
try:
|
||||
if meta_extractor(entry):
|
||||
|
||||
@ -206,7 +206,7 @@ class MCPDiff:
|
||||
"""
|
||||
aspect_diffs = [v for d in self.aspect_changes.values() for v in d.values()]
|
||||
for aspect_diff in aspect_diffs:
|
||||
for (_, old, new) in aspect_diff.aspects_changed.keys():
|
||||
for _, old, new in aspect_diff.aspects_changed.keys():
|
||||
golden[old.delta_info.idx] = new.delta_info.original
|
||||
|
||||
indices_to_remove = set()
|
||||
|
||||
@ -43,7 +43,6 @@ def _make_owner_category_list(
|
||||
owner_category_urn: Optional[str],
|
||||
owner_ids: List[str],
|
||||
) -> List[Dict]:
|
||||
|
||||
return [
|
||||
{
|
||||
"urn": mce_builder.make_owner_urn(owner_id, owner_type),
|
||||
@ -285,7 +284,6 @@ class OperationProcessor:
|
||||
aspect_map[Constants.ADD_TAG_OPERATION] = tag_aspect
|
||||
|
||||
if Constants.ADD_OWNER_OPERATION in operation_map:
|
||||
|
||||
owner_aspect = OwnershipClass(
|
||||
owners=[
|
||||
OwnerClass(
|
||||
|
||||
@ -19,7 +19,6 @@ class ThreadedIteratorExecutor:
|
||||
args_list: Iterable[Tuple[Any, ...]],
|
||||
max_workers: int,
|
||||
) -> Generator[T, None, None]:
|
||||
|
||||
out_q: queue.Queue[T] = queue.Queue()
|
||||
|
||||
def _worker_wrapper(
|
||||
|
||||
@ -68,7 +68,6 @@ def run_ingest(
|
||||
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
|
||||
mock_datahub_graph,
|
||||
) as mock_checkpoint:
|
||||
|
||||
mock_checkpoint.return_value = mock_datahub_graph
|
||||
|
||||
mocked_functions_reference(
|
||||
|
||||
@ -47,7 +47,6 @@ def _generate_queries_cached_file(tmp_path: Path, queries_json_path: Path) -> No
|
||||
@patch("google.cloud.bigquery.Client")
|
||||
@patch("google.cloud.resourcemanager_v3.ProjectsClient")
|
||||
def test_queries_ingestion(project_client, client, pytestconfig, monkeypatch, tmp_path):
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/bigquery_v2"
|
||||
mcp_golden_path = f"{test_resources_dir}/bigquery_queries_mcps_golden.json"
|
||||
mcp_output_path = tmp_path / "bigquery_queries_mcps.json"
|
||||
|
||||
@ -192,7 +192,6 @@ def create_mysql_source(headers):
|
||||
|
||||
|
||||
def upload_dataset(headers):
|
||||
|
||||
url = f"{DREMIO_HOST}/apiv2/source/s3/file_format/warehouse/sample.parquet"
|
||||
payload = {"ignoreOtherFileFormats": False, "type": "Parquet"}
|
||||
|
||||
|
||||
@ -1047,7 +1047,6 @@ def test_independent_soft_deleted_looks(
|
||||
mocked_client = mock.MagicMock()
|
||||
|
||||
with mock.patch("looker_sdk.init40") as mock_sdk:
|
||||
|
||||
mock_sdk.return_value = mocked_client
|
||||
setup_mock_look(mocked_client)
|
||||
setup_mock_soft_deleted_look(mocked_client)
|
||||
|
||||
@ -831,7 +831,6 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None:
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_duplicate_field_ingest(pytestconfig, tmp_path, mock_time):
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
|
||||
mce_out_file = "duplicate_ingest_mces_output.json"
|
||||
|
||||
|
||||
@ -58,14 +58,12 @@ def run_ingest(
|
||||
mocked_functions_reference,
|
||||
recipe,
|
||||
):
|
||||
|
||||
with patch(
|
||||
"datahub.ingestion.source.identity.okta.OktaClient"
|
||||
) as MockClient, patch(
|
||||
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
|
||||
mock_datahub_graph,
|
||||
) as mock_checkpoint:
|
||||
|
||||
mock_checkpoint.return_value = mock_datahub_graph
|
||||
|
||||
mocked_functions_reference(MockClient=MockClient)
|
||||
@ -277,7 +275,6 @@ def overwrite_group_in_mocked_data(test_resources_dir, MockClient):
|
||||
def _init_mock_okta_client(
|
||||
test_resources_dir, MockClient, mock_users_json=None, mock_groups_json=None
|
||||
):
|
||||
|
||||
okta_users_json_file = (
|
||||
test_resources_dir / "okta_users.json"
|
||||
if mock_users_json is None
|
||||
|
||||
@ -33,7 +33,6 @@ class MockViewDefinition:
|
||||
|
||||
@dataclass
|
||||
class MockConstraints:
|
||||
|
||||
constraint_name: str = "mock constraint name"
|
||||
constraint_type: str = "P"
|
||||
local_column: str = "mock column name"
|
||||
|
||||
@ -712,7 +712,6 @@ def test_redshift_regular_case():
|
||||
|
||||
|
||||
def test_redshift_native_query():
|
||||
|
||||
table: powerbi_data_classes.Table = powerbi_data_classes.Table(
|
||||
expression=M_QUERIES[22],
|
||||
name="category",
|
||||
@ -1101,7 +1100,6 @@ def test_double_quotes_in_alias():
|
||||
|
||||
@patch("datahub.ingestion.source.powerbi.m_query.parser.get_lark_parser")
|
||||
def test_m_query_timeout(mock_get_lark_parser):
|
||||
|
||||
q = 'let\n Source = Value.NativeQuery(Snowflake.Databases("0DD93C6BD5A6.snowflakecomputing.com","sales_analytics_warehouse_prod",[Role="sales_analytics_member_ad"]){[Name="SL_OPERATIONS"]}[Data], "select SALE_NO AS ""\x1b[4mSaleNo\x1b[0m""#(lf) ,CODE AS ""Code""#(lf) ,ENDDATE AS ""end_date""#(lf) from SL_OPERATIONS.SALE.REPORTS#(lf) where ENDDATE > \'2024-02-03\'", null, [EnableFolding=true]),\n #"selected Row" = Table.SelectRows(Source)\nin\n #"selected Row"'
|
||||
|
||||
table: powerbi_data_classes.Table = powerbi_data_classes.Table(
|
||||
|
||||
@ -96,7 +96,6 @@ def read_mock_data(path: Union[Path, str]) -> dict:
|
||||
def register_mock_api(
|
||||
pytestconfig: pytest.Config, request_mock: Any, override_data: Optional[dict] = None
|
||||
) -> None:
|
||||
|
||||
default_mock_data_path = (
|
||||
pytestconfig.rootpath
|
||||
/ "tests/integration/powerbi/mock_data/default_mock_response.json"
|
||||
@ -467,7 +466,6 @@ def test_scan_all_workspaces(
|
||||
mock_time: datetime.datetime,
|
||||
requests_mock: Any,
|
||||
) -> None:
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
||||
|
||||
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
||||
@ -517,7 +515,6 @@ def test_extract_reports(
|
||||
mock_time: datetime.datetime,
|
||||
requests_mock: Any,
|
||||
) -> None:
|
||||
|
||||
enable_logging()
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
||||
@ -1219,7 +1216,6 @@ def test_independent_datasets_extraction(
|
||||
mock_time: datetime.datetime,
|
||||
requests_mock: Any,
|
||||
) -> None:
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
||||
|
||||
register_mock_api(
|
||||
@ -1323,7 +1319,6 @@ def test_cll_extraction(
|
||||
mock_time: datetime.datetime,
|
||||
requests_mock: Any,
|
||||
) -> None:
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
||||
|
||||
register_mock_api(
|
||||
@ -1380,7 +1375,6 @@ def test_cll_extraction_flags(
|
||||
mock_time: datetime.datetime,
|
||||
requests_mock: Any,
|
||||
) -> None:
|
||||
|
||||
register_mock_api(
|
||||
pytestconfig=pytestconfig,
|
||||
request_mock=requests_mock,
|
||||
@ -1392,7 +1386,6 @@ def test_cll_extraction_flags(
|
||||
)
|
||||
|
||||
with pytest.raises(Exception, match=pattern):
|
||||
|
||||
Pipeline.create(
|
||||
{
|
||||
"run_id": "powerbi-test",
|
||||
@ -1559,7 +1552,6 @@ def test_powerbi_app_ingest(
|
||||
mock_time: datetime.datetime,
|
||||
requests_mock: Any,
|
||||
) -> None:
|
||||
|
||||
common_app_ingest(
|
||||
pytestconfig=pytestconfig,
|
||||
requests_mock=requests_mock,
|
||||
@ -1590,7 +1582,6 @@ def test_powerbi_app_ingest_info_message(
|
||||
mock_time: datetime.datetime,
|
||||
requests_mock: Any,
|
||||
) -> None:
|
||||
|
||||
pipeline = common_app_ingest(
|
||||
pytestconfig=pytestconfig,
|
||||
requests_mock=requests_mock,
|
||||
|
||||
@ -1011,7 +1011,6 @@ def default_config():
|
||||
def test_qlik_sense_ingest(
|
||||
pytestconfig, tmp_path, requests_mock, mock_websocket_send_request
|
||||
):
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/qlik_sense"
|
||||
|
||||
register_mock_api(request_mock=requests_mock)
|
||||
@ -1051,7 +1050,6 @@ def test_qlik_sense_ingest(
|
||||
def test_platform_instance_ingest(
|
||||
pytestconfig, tmp_path, requests_mock, mock_websocket_send_request
|
||||
):
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/qlik_sense"
|
||||
|
||||
register_mock_api(request_mock=requests_mock)
|
||||
|
||||
@ -420,7 +420,6 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None:
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_sigma_ingest(pytestconfig, tmp_path, requests_mock):
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/sigma"
|
||||
|
||||
register_mock_api(request_mock=requests_mock)
|
||||
@ -464,7 +463,6 @@ def test_sigma_ingest(pytestconfig, tmp_path, requests_mock):
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_platform_instance_ingest(pytestconfig, tmp_path, requests_mock):
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/sigma"
|
||||
|
||||
register_mock_api(request_mock=requests_mock)
|
||||
@ -510,7 +508,6 @@ def test_platform_instance_ingest(pytestconfig, tmp_path, requests_mock):
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_sigma_ingest_shared_entities(pytestconfig, tmp_path, requests_mock):
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/sigma"
|
||||
|
||||
override_data = {
|
||||
|
||||
@ -441,7 +441,6 @@ def default_query_results( # noqa: C901
|
||||
include_column_lineage=True,
|
||||
),
|
||||
):
|
||||
|
||||
return [
|
||||
{
|
||||
"DOWNSTREAM_TABLE_NAME": f"TEST_DB.TEST_SCHEMA.TABLE_{op_idx}",
|
||||
|
||||
@ -276,7 +276,6 @@ def mock_sdk_client(
|
||||
datasources_side_effect: List[dict],
|
||||
sign_out_side_effect: List[dict],
|
||||
) -> mock.MagicMock:
|
||||
|
||||
mock_client = mock.Mock()
|
||||
mocked_metadata = mock.Mock()
|
||||
mocked_metadata.query.side_effect = side_effect_query_metadata_response
|
||||
@ -1228,7 +1227,6 @@ def test_permission_ingestion(pytestconfig, tmp_path, mock_datahub_graph):
|
||||
@freeze_time(FROZEN_TIME)
|
||||
@pytest.mark.integration
|
||||
def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):
|
||||
|
||||
with mock.patch(
|
||||
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
|
||||
mock_datahub_graph,
|
||||
|
||||
@ -282,7 +282,6 @@ ViewEntry = namedtuple(
|
||||
|
||||
|
||||
def mock_hive_sql(query):
|
||||
|
||||
if query == "DESCRIBE EXTENDED `bronze_kambi`.`bet` betStatusId":
|
||||
return [
|
||||
("col_name", "betStatusId"),
|
||||
|
||||
@ -16,7 +16,6 @@ from tests.performance.helpers import workunit_sink
|
||||
|
||||
|
||||
def run_test():
|
||||
|
||||
with mock.patch("snowflake.connector.connect") as mock_connect:
|
||||
sf_connection = mock.MagicMock()
|
||||
sf_cursor = mock.MagicMock()
|
||||
|
||||
@ -10,7 +10,6 @@ class MyTestModel(BaseModel):
|
||||
|
||||
|
||||
def test_base_model():
|
||||
|
||||
test_base_model = MyTestModel(
|
||||
test_string_field="test_string_field",
|
||||
test_int_field=42,
|
||||
@ -31,7 +30,6 @@ def test_base_model():
|
||||
|
||||
|
||||
def test_dictwrapper():
|
||||
|
||||
from datahub.metadata.schema_classes import DatasetPropertiesClass
|
||||
|
||||
dataset_properties = DatasetPropertiesClass(
|
||||
@ -58,7 +56,6 @@ def test_dictwrapper():
|
||||
|
||||
|
||||
def test_raw_dictionary():
|
||||
|
||||
test_object = {
|
||||
"test_string_field": "test_string_field",
|
||||
"test_int_field": 42,
|
||||
|
||||
@ -185,7 +185,6 @@ def test_platform_resource_base_model():
|
||||
|
||||
|
||||
def test_platform_resource_filters():
|
||||
|
||||
query = (
|
||||
ElasticPlatformResourceQuery.create_from()
|
||||
.group(LogicalOperator.AND)
|
||||
|
||||
@ -104,7 +104,6 @@ def test_incremental_table_lineage(tmp_path, pytestconfig):
|
||||
|
||||
|
||||
def test_incremental_table_lineage_empty_upstreams(tmp_path, pytestconfig):
|
||||
|
||||
urn = make_dataset_urn(platform, "dataset1")
|
||||
aspect = make_lineage_aspect(
|
||||
"dataset1",
|
||||
|
||||
@ -144,7 +144,6 @@ def test_column_level_lineage(lineage_entries: List[QueryEvent]) -> None:
|
||||
|
||||
|
||||
def test_lineage_for_external_bq_table(mock_datahub_graph_instance):
|
||||
|
||||
pipeline_context = PipelineContext(run_id="bq_gcs_lineage")
|
||||
pipeline_context.graph = mock_datahub_graph_instance
|
||||
|
||||
@ -239,7 +238,6 @@ def test_lineage_for_external_bq_table(mock_datahub_graph_instance):
|
||||
|
||||
|
||||
def test_lineage_for_external_bq_table_no_column_lineage(mock_datahub_graph_instance):
|
||||
|
||||
pipeline_context = PipelineContext(run_id="bq_gcs_lineage")
|
||||
pipeline_context.graph = mock_datahub_graph_instance
|
||||
|
||||
|
||||
@ -184,7 +184,6 @@ def test_bigquery_table_sanitasitation():
|
||||
|
||||
|
||||
def test_unquote_and_decode_unicode_escape_seq():
|
||||
|
||||
# Test with a string that starts and ends with quotes and has Unicode escape sequences
|
||||
input_string = '"Hello \\u003cWorld\\u003e"'
|
||||
expected_output = "Hello <World>"
|
||||
|
||||
@ -221,7 +221,6 @@ def mock_redshift_connection() -> MagicMock:
|
||||
|
||||
|
||||
def mock_graph() -> DataHubGraph:
|
||||
|
||||
graph = MagicMock()
|
||||
|
||||
graph._make_schema_resolver.return_value = SchemaResolver(
|
||||
|
||||
@ -958,7 +958,6 @@ def test_table_lineage_via_temp_table_disordered_add(
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_basic_usage(pytestconfig: pytest.Config) -> None:
|
||||
|
||||
frozen_timestamp = parse_user_datetime(FROZEN_TIME)
|
||||
aggregator = SqlParsingAggregator(
|
||||
platform="redshift",
|
||||
|
||||
@ -50,7 +50,6 @@ def test_change_percent(
|
||||
|
||||
|
||||
def test_filter_ignored_entity_types():
|
||||
|
||||
assert filter_ignored_entity_types(
|
||||
[
|
||||
"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)",
|
||||
|
||||
@ -56,7 +56,6 @@ schema_test_cases: Dict[str, Tuple[str, List[str]]] = {
|
||||
def test_cassandra_schema_conversion(
|
||||
schema: str, expected_field_paths: List[str]
|
||||
) -> None:
|
||||
|
||||
schema_dict: Dict[str, List[Any]] = json.loads(schema)
|
||||
column_infos: List = schema_dict["column_infos"]
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user