mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-04 06:33:05 +00:00
feat(cli): Trim report of dataHubExecutionRequestResult to max GMS size (#11051)
This commit is contained in:
parent
edc8fd409d
commit
e14dc9159c
@ -81,7 +81,8 @@ profiling:
|
||||
- #10498 - Tableau ingestion can now be configured to ingest multiple sites at once and add the sites as containers. The feature is currently only available for Tableau Server.
|
||||
- #10466 - Extends configuration in `~/.datahubenv` to match `DatahubClientConfig` object definition. See full configuration in https://datahubproject.io/docs/python-sdk/clients/. The CLI should now respect the updated configurations specified in `~/.datahubenv` across its functions and utilities. This means that for systems where ssl certification is disabled, setting `disable_ssl_verification: true` in `~./datahubenv` will apply to all CLI calls.
|
||||
- #11002 - We will not auto-generate a `~/.datahubenv` file. You must either run `datahub init` to create that file, or set environment variables so that the config is loaded.
|
||||
|
||||
- #11023 - Added a new parameter to datahub's `put` cli command: `--run-id`. This parameter is useful to associate a given write to an ingestion process. A use-case can be mimick transformers when a transformer for aspect being written does not exist.
|
||||
- #11051 - Ingestion reports will now trim the summary text to a maximum of 800k characters to avoid generating `dataHubExecutionRequestResult` that are too large for GMS to handle.
|
||||
## 0.13.3
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
@ -31,6 +31,7 @@ from datahub.metadata.schema_classes import (
|
||||
from datahub.utilities.logging_manager import get_log_buffer
|
||||
from datahub.utilities.urns.urn import Urn
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -43,6 +44,7 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
|
||||
_EXECUTOR_ID: str = "__datahub_cli_"
|
||||
_EXECUTION_REQUEST_SOURCE_TYPE: str = "CLI_INGESTION_SOURCE"
|
||||
_INGESTION_TASK_NAME: str = "CLI Ingestion"
|
||||
_MAX_SUMMARY_SIZE: int = 800000
|
||||
|
||||
@staticmethod
|
||||
def get_cur_time_in_ms() -> int:
|
||||
@ -209,7 +211,9 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
|
||||
status=status,
|
||||
startTimeMs=self.start_time_ms,
|
||||
durationMs=self.get_cur_time_in_ms() - self.start_time_ms,
|
||||
report=summary,
|
||||
# Truncate summary such that the generated MCP will not exceed GMS's payload limit.
|
||||
# Hardcoding the overall size of dataHubExecutionRequestResult to >1MB by trimming summary to 800,000 chars
|
||||
report=summary[-self._MAX_SUMMARY_SIZE:],
|
||||
structuredReport=structured_report,
|
||||
)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user