mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-29 17:59:24 +00:00
fix(ingest): serialisation of structured report (#14973)
This commit is contained in:
parent
602f40d01d
commit
7b5680efbc
@ -4,7 +4,6 @@ from contextlib import AbstractContextManager
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from typing import Tuple
|
||||
|
||||
from datahub.utilities.perf_timer import PerfTimer
|
||||
from datahub.utilities.stats_collections import TopKDict
|
||||
@ -38,9 +37,7 @@ class IngestionStageReport:
|
||||
ingestion_high_stage_seconds: dict[IngestionHighStage, float] = field(
|
||||
default_factory=lambda: defaultdict(float)
|
||||
)
|
||||
ingestion_stage_durations: TopKDict[Tuple[IngestionHighStage, str], float] = field(
|
||||
default_factory=TopKDict
|
||||
)
|
||||
ingestion_stage_durations: TopKDict[str, float] = field(default_factory=TopKDict)
|
||||
|
||||
def new_stage(
|
||||
self, stage: str, high_stage: IngestionHighStage = IngestionHighStage._UNDEFINED
|
||||
@ -81,9 +78,9 @@ class IngestionStageContext(AbstractContextManager):
|
||||
f"Time spent in stage <{self._ingestion_stage}>: {elapsed} seconds",
|
||||
stacklevel=2,
|
||||
)
|
||||
self._report.ingestion_stage_durations[
|
||||
(self._high_stage, self._ingestion_stage)
|
||||
] = elapsed
|
||||
# Store tuple as string to avoid serialization errors
|
||||
key = f"({self._high_stage.value}, {self._ingestion_stage})"
|
||||
self._report.ingestion_stage_durations[key] = elapsed
|
||||
else:
|
||||
logger.info(
|
||||
f"Time spent in stage <{self._high_stage.value}>: {elapsed} seconds",
|
||||
|
||||
@ -14,8 +14,8 @@ def test_ingestion_stage_context_records_duration():
|
||||
pass
|
||||
assert len(report.ingestion_stage_durations) == 1
|
||||
key = next(iter(report.ingestion_stage_durations.keys()))
|
||||
assert key[0] == IngestionHighStage._UNDEFINED
|
||||
assert "Test Stage" in key[1]
|
||||
assert "Ingestion" in key
|
||||
assert "Test Stage" in key
|
||||
|
||||
|
||||
def test_ingestion_stage_context_handles_exceptions():
|
||||
@ -27,7 +27,8 @@ def test_ingestion_stage_context_handles_exceptions():
|
||||
pass
|
||||
assert len(report.ingestion_stage_durations) == 1
|
||||
key = next(iter(report.ingestion_stage_durations.keys()))
|
||||
assert "Test Stage" in key[1]
|
||||
assert "Ingestion" in key
|
||||
assert "Test Stage" in key
|
||||
|
||||
|
||||
def test_ingestion_stage_context_report_handles_multiple_stages():
|
||||
@ -45,9 +46,9 @@ def test_ingestion_stage_context_report_handles_multiple_stages():
|
||||
)
|
||||
|
||||
sorted_stages = list(sorted(report.ingestion_stage_durations.keys()))
|
||||
assert "Test Stage 1" in sorted_stages[0][1]
|
||||
assert "Test Stage 2" in sorted_stages[1][1]
|
||||
assert "Test Stage 3" in sorted_stages[2][1]
|
||||
assert "Test Stage 1" in sorted_stages[0]
|
||||
assert "Test Stage 2" in sorted_stages[1]
|
||||
assert "Test Stage 3" in sorted_stages[2]
|
||||
|
||||
|
||||
def test_ingestion_stage_context_report_handles_nested_stages():
|
||||
@ -64,14 +65,14 @@ def test_ingestion_stage_context_report_handles_nested_stages():
|
||||
for duration in report.ingestion_stage_durations.values()
|
||||
)
|
||||
sorted_stages = list(sorted(report.ingestion_stage_durations.keys()))
|
||||
assert "Inner1" in sorted_stages[0][1]
|
||||
assert "Inner2" in sorted_stages[1][1]
|
||||
assert "Outer" in sorted_stages[2][1]
|
||||
assert "Inner1" in sorted_stages[0]
|
||||
assert "Inner2" in sorted_stages[1]
|
||||
assert "Outer" in sorted_stages[2]
|
||||
|
||||
# Check that outer stage duration >= sum of inner stage durations
|
||||
outer_key = [k for k in report.ingestion_stage_durations if "Outer" in k[1]][0]
|
||||
inner1_key = [k for k in report.ingestion_stage_durations if "Inner1" in k[1]][0]
|
||||
inner2_key = [k for k in report.ingestion_stage_durations if "Inner2" in k[1]][0]
|
||||
outer_key = [k for k in report.ingestion_stage_durations if "Outer" in k][0]
|
||||
inner1_key = [k for k in report.ingestion_stage_durations if "Inner1" in k][0]
|
||||
inner2_key = [k for k in report.ingestion_stage_durations if "Inner2" in k][0]
|
||||
|
||||
outer_duration = report.ingestion_stage_durations[outer_key]
|
||||
inner1_duration = report.ingestion_stage_durations[inner1_key]
|
||||
@ -96,6 +97,6 @@ def test_ingestion_stage_with_high_stage():
|
||||
time.sleep(0.1)
|
||||
assert len(report.ingestion_stage_durations) == 1
|
||||
key = next(iter(report.ingestion_stage_durations.keys()))
|
||||
assert key[0] == IngestionHighStage.PROFILING
|
||||
assert "Test Stage" in key[1]
|
||||
assert "Profiling" in key
|
||||
assert "Test Stage" in key
|
||||
assert report.ingestion_high_stage_seconds[IngestionHighStage.PROFILING] > 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user