feat(ingest): reporting logging tweaks (#9835)

This commit is contained in:
Harshal Sheth 2024-02-14 11:06:03 -08:00 committed by GitHub
parent 0328f854e8
commit 045c76a0e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 16 additions and 2 deletions

View File

@ -218,5 +218,7 @@ def sql_lineage(
)
logger.debug("Sql parsing debug info: %s", lineage.debug_info)
if lineage.debug_info.error:
logger.debug("Sql parsing error details", exc_info=lineage.debug_info.error)
click.echo(lineage.json(indent=4))

View File

@ -1,4 +1,5 @@
import datetime
import logging
from abc import ABCMeta, abstractmethod
from collections import defaultdict
from dataclasses import dataclass, field
@ -40,6 +41,8 @@ from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
from datahub.utilities.lossy_collections import LossyDict, LossyList
from datahub.utilities.type_annotations import get_class_from_annotation
logger = logging.getLogger(__name__)
class SourceCapability(Enum):
PLATFORM_INSTANCE = "Platform Instance"
@ -99,11 +102,19 @@ class SourceReport(Report):
warnings.append(reason)
self.warnings[key] = warnings
def warning(self, key: str, reason: str) -> None:
self.report_warning(key, reason)
logger.warning(f"{key} => {reason}", stacklevel=2)
def report_failure(self, key: str, reason: str) -> None:
failures = self.failures.get(key, LossyList())
failures.append(reason)
self.failures[key] = failures
def failure(self, key: str, reason: str) -> None:
self.report_failure(key, reason)
logger.error(f"{key} => {reason}", stacklevel=2)
def __post_init__(self) -> None:
self.start_time = datetime.datetime.now()
self.running_time: datetime.timedelta = datetime.timedelta(seconds=0)

View File

@ -30,7 +30,8 @@ class IngestionStageReport:
if self._timer:
elapsed = round(self._timer.elapsed_seconds(), 2)
logger.info(
f"Time spent in stage <{self.ingestion_stage}>: {elapsed} seconds"
f"Time spent in stage <{self.ingestion_stage}>: {elapsed} seconds",
stacklevel=2,
)
if self.ingestion_stage:
self.ingestion_stage_durations[self.ingestion_stage] = elapsed

View File

@ -23,7 +23,7 @@ import click
from datahub.utilities.tee_io import TeeIO
BASE_LOGGING_FORMAT = (
"[%(asctime)s] %(levelname)-8s {%(name)s:%(lineno)d} - %(message)s"
"[%(asctime)s] %(levelname)-8s {%(filename)s:%(lineno)d} - %(message)s"
)
DATAHUB_PACKAGES = [
"datahub",