feat(ingest): enable pipeline reporting by default (#8472)

This commit is contained in:
Harshal Sheth 2023-07-25 01:46:27 -07:00 committed by GitHub
parent cc46729137
commit eac003ccf4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 15 additions and 12 deletions

View File

@ -15,6 +15,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
certain column-level metrics. Instead, set `profile_table_level_only` to `false` and
individually enable / disable desired field metrics.
- #8451: The `bigquery-beta` and `snowflake-beta` source aliases have been dropped. Use `bigquery` and `snowflake` as the source type instead.
- #8472: Ingestion runs created with Pipeline.create will show up in the DataHub ingestion tab as CLI-based runs. To revert to the previous behavior of not showing these runs in DataHub, pass `no_default_report=True`.
### Potential Downtime

View File

@ -61,7 +61,8 @@ def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None:
"type": "file",
"config": {"filename": out_file.name},
},
}
},
no_default_report=True,
)
pipeline.run()

View File

@ -985,7 +985,7 @@ def ingest_sample_data(path: Optional[str], token: Optional[str]) -> None:
if token is not None:
recipe["sink"]["config"]["token"] = token
pipeline = Pipeline.create(recipe)
pipeline = Pipeline.create(recipe, no_default_report=True)
pipeline.run()
ret = pipeline.pretty_print_summary()
sys.exit(ret)

View File

@ -253,7 +253,7 @@ def mcps(path: str) -> None:
},
}
pipeline = Pipeline.create(recipe)
pipeline = Pipeline.create(recipe, no_default_report=True)
pipeline.run()
ret = pipeline.pretty_print_summary()
sys.exit(ret)

View File

@ -57,12 +57,12 @@ class DatahubClientConfig(ConfigModel):
"""Configuration class for holding connectivity to datahub gms"""
server: str = "http://localhost:8080"
token: Optional[str]
timeout_sec: Optional[int]
retry_status_codes: Optional[List[int]]
retry_max_times: Optional[int]
extra_headers: Optional[Dict[str, str]]
ca_certificate_path: Optional[str]
token: Optional[str] = None
timeout_sec: Optional[int] = None
retry_status_codes: Optional[List[int]] = None
retry_max_times: Optional[int] = None
extra_headers: Optional[Dict[str, str]] = None
ca_certificate_path: Optional[str] = None
disable_ssl_verification: bool = False
_max_threads_moved_to_sink = pydantic_removed_field(

View File

@ -328,7 +328,7 @@ class Pipeline:
dry_run: bool = False,
preview_mode: bool = False,
preview_workunits: int = 10,
report_to: Optional[str] = None,
report_to: Optional[str] = "datahub",
no_default_report: bool = False,
raw_config: Optional[dict] = None,
) -> "Pipeline":

View File

@ -132,8 +132,9 @@ class PatternAddDatasetTerms(AddDatasetTerms):
def __init__(self, config: PatternDatasetTermsConfig, ctx: PipelineContext):
term_pattern = config.term_pattern
generic_config = AddDatasetTermsConfig(
get_terms_to_add=lambda _: [
GlossaryTermAssociationClass(urn=urn) for urn in term_pattern.value(_)
get_terms_to_add=lambda entity_urn: [
GlossaryTermAssociationClass(urn=term_urn)
for term_urn in term_pattern.value(entity_urn)
],
replace_existing=config.replace_existing,
semantics=config.semantics,