mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 02:37:05 +00:00 
			
		
		
		
	fix(ingest): serialisation of structured report (#14973)
This commit is contained in:
		
							parent
							
								
									602f40d01d
								
							
						
					
					
						commit
						7b5680efbc
					
				| @ -4,7 +4,6 @@ from contextlib import AbstractContextManager | |||||||
| from dataclasses import dataclass, field | from dataclasses import dataclass, field | ||||||
| from datetime import datetime, timezone | from datetime import datetime, timezone | ||||||
| from enum import Enum | from enum import Enum | ||||||
| from typing import Tuple |  | ||||||
| 
 | 
 | ||||||
| from datahub.utilities.perf_timer import PerfTimer | from datahub.utilities.perf_timer import PerfTimer | ||||||
| from datahub.utilities.stats_collections import TopKDict | from datahub.utilities.stats_collections import TopKDict | ||||||
| @ -38,9 +37,7 @@ class IngestionStageReport: | |||||||
|     ingestion_high_stage_seconds: dict[IngestionHighStage, float] = field( |     ingestion_high_stage_seconds: dict[IngestionHighStage, float] = field( | ||||||
|         default_factory=lambda: defaultdict(float) |         default_factory=lambda: defaultdict(float) | ||||||
|     ) |     ) | ||||||
|     ingestion_stage_durations: TopKDict[Tuple[IngestionHighStage, str], float] = field( |     ingestion_stage_durations: TopKDict[str, float] = field(default_factory=TopKDict) | ||||||
|         default_factory=TopKDict |  | ||||||
|     ) |  | ||||||
| 
 | 
 | ||||||
|     def new_stage( |     def new_stage( | ||||||
|         self, stage: str, high_stage: IngestionHighStage = IngestionHighStage._UNDEFINED |         self, stage: str, high_stage: IngestionHighStage = IngestionHighStage._UNDEFINED | ||||||
| @ -81,9 +78,9 @@ class IngestionStageContext(AbstractContextManager): | |||||||
|                 f"Time spent in stage <{self._ingestion_stage}>: {elapsed} seconds", |                 f"Time spent in stage <{self._ingestion_stage}>: {elapsed} seconds", | ||||||
|                 stacklevel=2, |                 stacklevel=2, | ||||||
|             ) |             ) | ||||||
|             self._report.ingestion_stage_durations[ |             # Store tuple as string to avoid serialization errors | ||||||
|                 (self._high_stage, self._ingestion_stage) |             key = f"({self._high_stage.value}, {self._ingestion_stage})" | ||||||
|             ] = elapsed |             self._report.ingestion_stage_durations[key] = elapsed | ||||||
|         else: |         else: | ||||||
|             logger.info( |             logger.info( | ||||||
|                 f"Time spent in stage <{self._high_stage.value}>: {elapsed} seconds", |                 f"Time spent in stage <{self._high_stage.value}>: {elapsed} seconds", | ||||||
|  | |||||||
| @ -14,8 +14,8 @@ def test_ingestion_stage_context_records_duration(): | |||||||
|         pass |         pass | ||||||
|     assert len(report.ingestion_stage_durations) == 1 |     assert len(report.ingestion_stage_durations) == 1 | ||||||
|     key = next(iter(report.ingestion_stage_durations.keys())) |     key = next(iter(report.ingestion_stage_durations.keys())) | ||||||
|     assert key[0] == IngestionHighStage._UNDEFINED |     assert "Ingestion" in key | ||||||
|     assert "Test Stage" in key[1] |     assert "Test Stage" in key | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_ingestion_stage_context_handles_exceptions(): | def test_ingestion_stage_context_handles_exceptions(): | ||||||
| @ -27,7 +27,8 @@ def test_ingestion_stage_context_handles_exceptions(): | |||||||
|         pass |         pass | ||||||
|     assert len(report.ingestion_stage_durations) == 1 |     assert len(report.ingestion_stage_durations) == 1 | ||||||
|     key = next(iter(report.ingestion_stage_durations.keys())) |     key = next(iter(report.ingestion_stage_durations.keys())) | ||||||
|     assert "Test Stage" in key[1] |     assert "Ingestion" in key | ||||||
|  |     assert "Test Stage" in key | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_ingestion_stage_context_report_handles_multiple_stages(): | def test_ingestion_stage_context_report_handles_multiple_stages(): | ||||||
| @ -45,9 +46,9 @@ def test_ingestion_stage_context_report_handles_multiple_stages(): | |||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     sorted_stages = list(sorted(report.ingestion_stage_durations.keys())) |     sorted_stages = list(sorted(report.ingestion_stage_durations.keys())) | ||||||
|     assert "Test Stage 1" in sorted_stages[0][1] |     assert "Test Stage 1" in sorted_stages[0] | ||||||
|     assert "Test Stage 2" in sorted_stages[1][1] |     assert "Test Stage 2" in sorted_stages[1] | ||||||
|     assert "Test Stage 3" in sorted_stages[2][1] |     assert "Test Stage 3" in sorted_stages[2] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_ingestion_stage_context_report_handles_nested_stages(): | def test_ingestion_stage_context_report_handles_nested_stages(): | ||||||
| @ -64,14 +65,14 @@ def test_ingestion_stage_context_report_handles_nested_stages(): | |||||||
|         for duration in report.ingestion_stage_durations.values() |         for duration in report.ingestion_stage_durations.values() | ||||||
|     ) |     ) | ||||||
|     sorted_stages = list(sorted(report.ingestion_stage_durations.keys())) |     sorted_stages = list(sorted(report.ingestion_stage_durations.keys())) | ||||||
|     assert "Inner1" in sorted_stages[0][1] |     assert "Inner1" in sorted_stages[0] | ||||||
|     assert "Inner2" in sorted_stages[1][1] |     assert "Inner2" in sorted_stages[1] | ||||||
|     assert "Outer" in sorted_stages[2][1] |     assert "Outer" in sorted_stages[2] | ||||||
| 
 | 
 | ||||||
|     # Check that outer stage duration >= sum of inner stage durations |     # Check that outer stage duration >= sum of inner stage durations | ||||||
|     outer_key = [k for k in report.ingestion_stage_durations if "Outer" in k[1]][0] |     outer_key = [k for k in report.ingestion_stage_durations if "Outer" in k][0] | ||||||
|     inner1_key = [k for k in report.ingestion_stage_durations if "Inner1" in k[1]][0] |     inner1_key = [k for k in report.ingestion_stage_durations if "Inner1" in k][0] | ||||||
|     inner2_key = [k for k in report.ingestion_stage_durations if "Inner2" in k[1]][0] |     inner2_key = [k for k in report.ingestion_stage_durations if "Inner2" in k][0] | ||||||
| 
 | 
 | ||||||
|     outer_duration = report.ingestion_stage_durations[outer_key] |     outer_duration = report.ingestion_stage_durations[outer_key] | ||||||
|     inner1_duration = report.ingestion_stage_durations[inner1_key] |     inner1_duration = report.ingestion_stage_durations[inner1_key] | ||||||
| @ -96,6 +97,6 @@ def test_ingestion_stage_with_high_stage(): | |||||||
|         time.sleep(0.1) |         time.sleep(0.1) | ||||||
|     assert len(report.ingestion_stage_durations) == 1 |     assert len(report.ingestion_stage_durations) == 1 | ||||||
|     key = next(iter(report.ingestion_stage_durations.keys())) |     key = next(iter(report.ingestion_stage_durations.keys())) | ||||||
|     assert key[0] == IngestionHighStage.PROFILING |     assert "Profiling" in key | ||||||
|     assert "Test Stage" in key[1] |     assert "Test Stage" in key | ||||||
|     assert report.ingestion_high_stage_seconds[IngestionHighStage.PROFILING] > 0 |     assert report.ingestion_high_stage_seconds[IngestionHighStage.PROFILING] > 0 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Aseem Bansal
						Aseem Bansal