mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-17 03:48:15 +00:00
feat(ingest/datahub): report progress on db ingestion (#12117)
This commit is contained in:
parent
8f9659fadf
commit
d2359e259a
@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from functools import partial
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
|
||||
@ -26,6 +26,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
||||
StatefulIngestionSourceBase,
|
||||
)
|
||||
from datahub.metadata.schema_classes import ChangeTypeClass
|
||||
from datahub.utilities.progress_timer import ProgressTimer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -105,11 +106,17 @@ class DataHubSource(StatefulIngestionSourceBase):
|
||||
self, from_createdon: datetime, reader: DataHubDatabaseReader
|
||||
) -> Iterable[MetadataWorkUnit]:
|
||||
logger.info(f"Fetching database aspects starting from {from_createdon}")
|
||||
progress = ProgressTimer(report_every=timedelta(seconds=60))
|
||||
mcps = reader.get_aspects(from_createdon, self.report.stop_time)
|
||||
for i, (mcp, createdon) in enumerate(mcps):
|
||||
if not self.urn_pattern.allowed(str(mcp.entityUrn)):
|
||||
continue
|
||||
|
||||
if progress.should_report():
|
||||
logger.info(
|
||||
f"Ingested {i} database aspects so far, currently at {createdon}"
|
||||
)
|
||||
|
||||
yield mcp.as_workunit()
|
||||
self.report.num_database_aspects_ingested += 1
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user