From 045c76a0e4014ce159acdb0ddfb0f2e1220c7c40 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 14 Feb 2024 11:06:03 -0800 Subject: [PATCH] feat(ingest): reporting logging tweaks (#9835) --- metadata-ingestion/src/datahub/cli/check_cli.py | 2 ++ .../src/datahub/ingestion/api/source.py | 11 +++++++++++ .../ingestion/source_report/ingestion_stage.py | 3 ++- .../src/datahub/utilities/logging_manager.py | 2 +- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py index 2732a72aea..0540120b9d 100644 --- a/metadata-ingestion/src/datahub/cli/check_cli.py +++ b/metadata-ingestion/src/datahub/cli/check_cli.py @@ -218,5 +218,7 @@ def sql_lineage( ) logger.debug("Sql parsing debug info: %s", lineage.debug_info) + if lineage.debug_info.error: + logger.debug("Sql parsing error details", exc_info=lineage.debug_info.error) click.echo(lineage.json(indent=4)) diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index a272b6e3cf..0679c884ba 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -1,4 +1,5 @@ import datetime +import logging from abc import ABCMeta, abstractmethod from collections import defaultdict from dataclasses import dataclass, field @@ -40,6 +41,8 @@ from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.type_annotations import get_class_from_annotation +logger = logging.getLogger(__name__) + class SourceCapability(Enum): PLATFORM_INSTANCE = "Platform Instance" @@ -99,11 +102,19 @@ class SourceReport(Report): warnings.append(reason) self.warnings[key] = warnings + def warning(self, key: str, reason: str) -> None: + self.report_warning(key, reason) + logger.warning(f"{key} => {reason}", stacklevel=2) + def report_failure(self, key: str, reason: str) -> None: failures = self.failures.get(key, LossyList()) failures.append(reason) self.failures[key] = failures + def failure(self, key: str, reason: str) -> None: + self.report_failure(key, reason) + logger.error(f"{key} => {reason}", stacklevel=2) + def __post_init__(self) -> None: self.start_time = datetime.datetime.now() self.running_time: datetime.timedelta = datetime.timedelta(seconds=0) diff --git a/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py b/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py index e7da7eb6e7..14dc428b65 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py +++ b/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py @@ -30,7 +30,8 @@ class IngestionStageReport: if self._timer: elapsed = round(self._timer.elapsed_seconds(), 2) logger.info( - f"Time spent in stage <{self.ingestion_stage}>: {elapsed} seconds" + f"Time spent in stage <{self.ingestion_stage}>: {elapsed} seconds", + stacklevel=2, ) if self.ingestion_stage: self.ingestion_stage_durations[self.ingestion_stage] = elapsed diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py index 62aa1ca7ab..dc96ef3842 100644 --- a/metadata-ingestion/src/datahub/utilities/logging_manager.py +++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py @@ -23,7 +23,7 @@ import click from datahub.utilities.tee_io import TeeIO BASE_LOGGING_FORMAT = ( - "[%(asctime)s] %(levelname)-8s {%(name)s:%(lineno)d} - %(message)s" + "[%(asctime)s] %(levelname)-8s {%(filename)s:%(lineno)d} - %(message)s" ) DATAHUB_PACKAGES = [ "datahub",