mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-26 09:26:22 +00:00
feat(ingest): enable pipeline reporting by default (#8472)
This commit is contained in:
parent
cc46729137
commit
eac003ccf4
@ -15,6 +15,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
|
||||
certain column-level metrics. Instead, set `profile_table_level_only` to `false` and
|
||||
individually enable / disable desired field metrics.
|
||||
- #8451: The `bigquery-beta` and `snowflake-beta` source aliases have been dropped. Use `bigquery` and `snowflake` as the source type instead.
|
||||
- #8472: Ingestion runs created with Pipeline.create will show up in the DataHub ingestion tab as CLI-based runs. To revert to the previous behavior of not showing these runs in DataHub, pass `no_default_report=True`.
|
||||
|
||||
### Potential Downtime
|
||||
|
||||
|
||||
@ -61,7 +61,8 @@ def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None:
|
||||
"type": "file",
|
||||
"config": {"filename": out_file.name},
|
||||
},
|
||||
}
|
||||
},
|
||||
no_default_report=True,
|
||||
)
|
||||
|
||||
pipeline.run()
|
||||
|
||||
@ -985,7 +985,7 @@ def ingest_sample_data(path: Optional[str], token: Optional[str]) -> None:
|
||||
if token is not None:
|
||||
recipe["sink"]["config"]["token"] = token
|
||||
|
||||
pipeline = Pipeline.create(recipe)
|
||||
pipeline = Pipeline.create(recipe, no_default_report=True)
|
||||
pipeline.run()
|
||||
ret = pipeline.pretty_print_summary()
|
||||
sys.exit(ret)
|
||||
|
||||
@ -253,7 +253,7 @@ def mcps(path: str) -> None:
|
||||
},
|
||||
}
|
||||
|
||||
pipeline = Pipeline.create(recipe)
|
||||
pipeline = Pipeline.create(recipe, no_default_report=True)
|
||||
pipeline.run()
|
||||
ret = pipeline.pretty_print_summary()
|
||||
sys.exit(ret)
|
||||
|
||||
@ -57,12 +57,12 @@ class DatahubClientConfig(ConfigModel):
|
||||
"""Configuration class for holding connectivity to datahub gms"""
|
||||
|
||||
server: str = "http://localhost:8080"
|
||||
token: Optional[str]
|
||||
timeout_sec: Optional[int]
|
||||
retry_status_codes: Optional[List[int]]
|
||||
retry_max_times: Optional[int]
|
||||
extra_headers: Optional[Dict[str, str]]
|
||||
ca_certificate_path: Optional[str]
|
||||
token: Optional[str] = None
|
||||
timeout_sec: Optional[int] = None
|
||||
retry_status_codes: Optional[List[int]] = None
|
||||
retry_max_times: Optional[int] = None
|
||||
extra_headers: Optional[Dict[str, str]] = None
|
||||
ca_certificate_path: Optional[str] = None
|
||||
disable_ssl_verification: bool = False
|
||||
|
||||
_max_threads_moved_to_sink = pydantic_removed_field(
|
||||
|
||||
@ -328,7 +328,7 @@ class Pipeline:
|
||||
dry_run: bool = False,
|
||||
preview_mode: bool = False,
|
||||
preview_workunits: int = 10,
|
||||
report_to: Optional[str] = None,
|
||||
report_to: Optional[str] = "datahub",
|
||||
no_default_report: bool = False,
|
||||
raw_config: Optional[dict] = None,
|
||||
) -> "Pipeline":
|
||||
|
||||
@ -132,8 +132,9 @@ class PatternAddDatasetTerms(AddDatasetTerms):
|
||||
def __init__(self, config: PatternDatasetTermsConfig, ctx: PipelineContext):
|
||||
term_pattern = config.term_pattern
|
||||
generic_config = AddDatasetTermsConfig(
|
||||
get_terms_to_add=lambda _: [
|
||||
GlossaryTermAssociationClass(urn=urn) for urn in term_pattern.value(_)
|
||||
get_terms_to_add=lambda entity_urn: [
|
||||
GlossaryTermAssociationClass(urn=term_urn)
|
||||
for term_urn in term_pattern.value(entity_urn)
|
||||
],
|
||||
replace_existing=config.replace_existing,
|
||||
semantics=config.semantics,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user