mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-30 21:10:07 +00:00
fix(ingestion): add logging, make job more resilient to errors (#4331)
This commit is contained in:
parent
2903646a15
commit
beb51ebf59
1
.gitignore
vendored
1
.gitignore
vendored
@ -60,3 +60,4 @@ metadata-ingestion/generated/**
|
|||||||
|
|
||||||
# docs
|
# docs
|
||||||
docs/generated/
|
docs/generated/
|
||||||
|
tmp*
|
@ -17,7 +17,7 @@ create or replace role datahub_role;
|
|||||||
|
|
||||||
// Grant privileges to use and select from your target warehouses / dbs / schemas / tables
|
// Grant privileges to use and select from your target warehouses / dbs / schemas / tables
|
||||||
grant operate, usage on warehouse <your-warehouse> to role datahub_role;
|
grant operate, usage on warehouse <your-warehouse> to role datahub_role;
|
||||||
grant usage on <your-database> to role datahub_role;
|
grant usage on DATABASE <your-database> to role datahub_role;
|
||||||
grant usage on all schemas in database <your-database> to role datahub_role;
|
grant usage on all schemas in database <your-database> to role datahub_role;
|
||||||
grant select on all tables in database <your-database> to role datahub_role;
|
grant select on all tables in database <your-database> to role datahub_role;
|
||||||
grant select on all external tables in database <your-database> to role datahub_role;
|
grant select on all external tables in database <your-database> to role datahub_role;
|
||||||
@ -27,6 +27,9 @@ grant select on all views in database <your-database> to role datahub_role;
|
|||||||
grant usage on future schemas in database "<your-database>" to role datahub_role;
|
grant usage on future schemas in database "<your-database>" to role datahub_role;
|
||||||
grant select on future tables in database "<your-database>" to role datahub_role;
|
grant select on future tables in database "<your-database>" to role datahub_role;
|
||||||
|
|
||||||
|
// Grant privileges on snowflake default database - needed for lineage
|
||||||
|
grant imported privileges on DATABASE snowflake to role datahub_role;
|
||||||
|
|
||||||
// Create a new DataHub user and assign the DataHub role to it
|
// Create a new DataHub user and assign the DataHub role to it
|
||||||
create user datahub_user display_name = 'DataHub' password='' default_role = datahub_role default_warehouse = '<your-warehouse>';
|
create user datahub_user display_name = 'DataHub' password='' default_role = datahub_role default_warehouse = '<your-warehouse>';
|
||||||
|
|
||||||
|
@ -136,12 +136,17 @@ class DatahubRestEmitter:
|
|||||||
self._session.mount("http://", adapter)
|
self._session.mount("http://", adapter)
|
||||||
self._session.mount("https://", adapter)
|
self._session.mount("https://", adapter)
|
||||||
|
|
||||||
def test_connection(self) -> None:
|
def test_connection(self) -> str:
|
||||||
response = self._session.get(f"{self._gms_server}/config")
|
response = self._session.get(f"{self._gms_server}/config")
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
config: dict = response.json()
|
config: dict = response.json()
|
||||||
if config.get("noCode") == "true":
|
if config.get("noCode") == "true":
|
||||||
return
|
return (
|
||||||
|
config.get("versions", {})
|
||||||
|
.get("linkedin/datahub", {})
|
||||||
|
.get("version", "")
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Looks like we either connected to an old GMS or to some other service. Let's see if we can determine which before raising an error
|
# Looks like we either connected to an old GMS or to some other service. Let's see if we can determine which before raising an error
|
||||||
# A common misconfiguration is connecting to datahub-frontend so we special-case this check
|
# A common misconfiguration is connecting to datahub-frontend so we special-case this check
|
||||||
|
@ -14,6 +14,7 @@ class SourceReport(Report):
|
|||||||
|
|
||||||
warnings: Dict[str, List[str]] = field(default_factory=dict)
|
warnings: Dict[str, List[str]] = field(default_factory=dict)
|
||||||
failures: Dict[str, List[str]] = field(default_factory=dict)
|
failures: Dict[str, List[str]] = field(default_factory=dict)
|
||||||
|
cli_version: str = ""
|
||||||
|
|
||||||
def report_workunit(self, wu: WorkUnit) -> None:
|
def report_workunit(self, wu: WorkUnit) -> None:
|
||||||
self.workunits_produced += 1
|
self.workunits_produced += 1
|
||||||
|
@ -8,6 +8,7 @@ from typing import Any, Dict, Iterable, List, Optional
|
|||||||
import click
|
import click
|
||||||
from pydantic import validator
|
from pydantic import validator
|
||||||
|
|
||||||
|
import datahub
|
||||||
from datahub.configuration.common import (
|
from datahub.configuration.common import (
|
||||||
ConfigModel,
|
ConfigModel,
|
||||||
DynamicTypedConfig,
|
DynamicTypedConfig,
|
||||||
@ -178,6 +179,7 @@ class Pipeline:
|
|||||||
|
|
||||||
callback = LoggingCallback()
|
callback = LoggingCallback()
|
||||||
extractor: Extractor = self.extractor_class()
|
extractor: Extractor = self.extractor_class()
|
||||||
|
self.source.get_report().cli_version = datahub.nice_version_name()
|
||||||
for wu in itertools.islice(
|
for wu in itertools.islice(
|
||||||
self.source.get_workunits(), 10 if self.preview_mode else None
|
self.source.get_workunits(), 10 if self.preview_mode else None
|
||||||
):
|
):
|
||||||
|
@ -24,17 +24,22 @@ class DatahubRestSinkConfig(DatahubClientConfig):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DataHubRestSinkReport(SinkReport):
|
||||||
|
gms_version: str = ""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatahubRestSink(Sink):
|
class DatahubRestSink(Sink):
|
||||||
config: DatahubRestSinkConfig
|
config: DatahubRestSinkConfig
|
||||||
emitter: DatahubRestEmitter
|
emitter: DatahubRestEmitter
|
||||||
report: SinkReport
|
report: DataHubRestSinkReport
|
||||||
treat_errors_as_warnings: bool = False
|
treat_errors_as_warnings: bool = False
|
||||||
|
|
||||||
def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig):
|
def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig):
|
||||||
super().__init__(ctx)
|
super().__init__(ctx)
|
||||||
self.config = config
|
self.config = config
|
||||||
self.report = SinkReport()
|
self.report = DataHubRestSinkReport()
|
||||||
self.emitter = DatahubRestEmitter(
|
self.emitter = DatahubRestEmitter(
|
||||||
self.config.server,
|
self.config.server,
|
||||||
self.config.token,
|
self.config.token,
|
||||||
@ -45,7 +50,7 @@ class DatahubRestSink(Sink):
|
|||||||
extra_headers=self.config.extra_headers,
|
extra_headers=self.config.extra_headers,
|
||||||
ca_certificate_path=self.config.ca_certificate_path,
|
ca_certificate_path=self.config.ca_certificate_path,
|
||||||
)
|
)
|
||||||
self.emitter.test_connection()
|
self.report.gms_version = self.emitter.test_connection()
|
||||||
self.executor = concurrent.futures.ThreadPoolExecutor(
|
self.executor = concurrent.futures.ThreadPoolExecutor(
|
||||||
max_workers=self.config.max_threads
|
max_workers=self.config.max_threads
|
||||||
)
|
)
|
||||||
|
@ -364,9 +364,10 @@ class BigQuerySource(SQLAlchemySource):
|
|||||||
)
|
)
|
||||||
self.lineage_metadata = self._create_lineage_map(parsed_entries)
|
self.lineage_metadata = self._create_lineage_map(parsed_entries)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
self.error(
|
||||||
"Error computing lineage information using GCP logs.",
|
logger,
|
||||||
e,
|
"lineage-gcp-logs",
|
||||||
|
f"Error was {e}",
|
||||||
)
|
)
|
||||||
|
|
||||||
def _compute_bigquery_lineage_via_exported_bigquery_audit_metadata(
|
def _compute_bigquery_lineage_via_exported_bigquery_audit_metadata(
|
||||||
@ -385,9 +386,10 @@ class BigQuerySource(SQLAlchemySource):
|
|||||||
)
|
)
|
||||||
self.lineage_metadata = self._create_lineage_map(parsed_entries)
|
self.lineage_metadata = self._create_lineage_map(parsed_entries)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
self.error(
|
||||||
"Error computing lineage information using exported GCP audit logs.",
|
logger,
|
||||||
e,
|
"lineage-exported-gcp-audit-logs",
|
||||||
|
f"Error: {e}",
|
||||||
)
|
)
|
||||||
|
|
||||||
def _make_bigquery_client(
|
def _make_bigquery_client(
|
||||||
|
@ -66,6 +66,10 @@ class SnowflakeReport(SQLSourceReport):
|
|||||||
num_view_to_table_edges_scanned: int = 0
|
num_view_to_table_edges_scanned: int = 0
|
||||||
num_external_table_edges_scanned: int = 0
|
num_external_table_edges_scanned: int = 0
|
||||||
upstream_lineage: Dict[str, List[str]] = field(default_factory=dict)
|
upstream_lineage: Dict[str, List[str]] = field(default_factory=dict)
|
||||||
|
# https://community.snowflake.com/s/topic/0TO0Z000000Unu5WAC/releases
|
||||||
|
saas_version: str = ""
|
||||||
|
role: str = ""
|
||||||
|
role_grants: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
class BaseSnowflakeConfig(BaseTimeWindowConfig):
|
class BaseSnowflakeConfig(BaseTimeWindowConfig):
|
||||||
@ -203,27 +207,59 @@ class SnowflakeSource(SQLAlchemySource):
|
|||||||
config = SnowflakeConfig.parse_obj(config_dict)
|
config = SnowflakeConfig.parse_obj(config_dict)
|
||||||
return cls(config, ctx)
|
return cls(config, ctx)
|
||||||
|
|
||||||
def get_inspectors(self) -> Iterable[Inspector]:
|
def get_metadata_engine(
|
||||||
url = self.config.get_sql_alchemy_url(database=None)
|
self, database: Optional[str] = None
|
||||||
|
) -> sqlalchemy.engine.Engine:
|
||||||
|
url = self.config.get_sql_alchemy_url(database=database)
|
||||||
logger.debug(f"sql_alchemy_url={url}")
|
logger.debug(f"sql_alchemy_url={url}")
|
||||||
|
return create_engine(
|
||||||
db_listing_engine = create_engine(
|
|
||||||
url,
|
url,
|
||||||
connect_args=self.config.get_sql_alchemy_connect_args(),
|
connect_args=self.config.get_sql_alchemy_connect_args(),
|
||||||
**self.config.options,
|
**self.config.options,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def inspect_version(self) -> Any:
|
||||||
|
db_engine = self.get_metadata_engine()
|
||||||
|
logger.info("Checking current version")
|
||||||
|
for db_row in db_engine.execute("select CURRENT_VERSION()"):
|
||||||
|
self.report.saas_version = db_row[0]
|
||||||
|
|
||||||
|
def inspect_role_grants(self) -> Any:
|
||||||
|
db_engine = self.get_metadata_engine()
|
||||||
|
cur_role = None
|
||||||
|
if self.config.role is None:
|
||||||
|
for db_row in db_engine.execute("select CURRENT_ROLE()"):
|
||||||
|
cur_role = db_row[0]
|
||||||
|
else:
|
||||||
|
cur_role = self.config.role
|
||||||
|
|
||||||
|
if cur_role is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.report.role = cur_role
|
||||||
|
logger.info(f"Current role is {cur_role}")
|
||||||
|
if cur_role.lower() == "accountadmin":
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"Checking grants for role {cur_role}")
|
||||||
|
for db_row in db_engine.execute(text(f"show grants to role {cur_role}")):
|
||||||
|
privilege = db_row["privilege"]
|
||||||
|
granted_on = db_row["granted_on"]
|
||||||
|
name = db_row["name"]
|
||||||
|
self.report.role_grants.append(
|
||||||
|
f"{privilege} granted on {granted_on} {name}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_inspectors(self) -> Iterable[Inspector]:
|
||||||
|
db_listing_engine = self.get_metadata_engine(database=None)
|
||||||
|
|
||||||
for db_row in db_listing_engine.execute(text("SHOW DATABASES")):
|
for db_row in db_listing_engine.execute(text("SHOW DATABASES")):
|
||||||
db = db_row.name
|
db = db_row.name
|
||||||
if self.config.database_pattern.allowed(db):
|
if self.config.database_pattern.allowed(db):
|
||||||
# We create a separate engine for each database in order to ensure that
|
# We create a separate engine for each database in order to ensure that
|
||||||
# they are isolated from each other.
|
# they are isolated from each other.
|
||||||
self.current_database = db
|
self.current_database = db
|
||||||
engine = create_engine(
|
engine = self.get_metadata_engine(database=db)
|
||||||
self.config.get_sql_alchemy_url(database=db),
|
|
||||||
connect_args=self.config.get_sql_alchemy_connect_args(),
|
|
||||||
**self.config.options,
|
|
||||||
)
|
|
||||||
|
|
||||||
with engine.connect() as conn:
|
with engine.connect() as conn:
|
||||||
inspector = inspect(conn)
|
inspector = inspect(conn)
|
||||||
@ -273,9 +309,11 @@ WHERE
|
|||||||
f"Upstream->View: Lineage[View(Down)={view_name}]:Upstream={view_upstream}"
|
f"Upstream->View: Lineage[View(Down)={view_name}]:Upstream={view_upstream}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
self.warn(
|
||||||
f"Extracting the upstream view lineage from Snowflake failed."
|
logger,
|
||||||
f"Please check your permissions. Continuing...\nError was {e}."
|
"view_upstream_lineage",
|
||||||
|
"Extracting the upstream view lineage from Snowflake failed."
|
||||||
|
+ f"Please check your permissions. Continuing...\nError was {e}.",
|
||||||
)
|
)
|
||||||
logger.info(f"A total of {num_edges} View upstream edges found.")
|
logger.info(f"A total of {num_edges} View upstream edges found.")
|
||||||
self.report.num_table_to_view_edges_scanned = num_edges
|
self.report.num_table_to_view_edges_scanned = num_edges
|
||||||
@ -387,9 +425,11 @@ WHERE
|
|||||||
num_edges += 1
|
num_edges += 1
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
self.warn(
|
||||||
|
logger,
|
||||||
|
"view_downstream_lineage",
|
||||||
f"Extracting the view lineage from Snowflake failed."
|
f"Extracting the view lineage from Snowflake failed."
|
||||||
f"Please check your permissions. Continuing...\nError was {e}."
|
f"Please check your permissions. Continuing...\nError was {e}.",
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Found {num_edges} View->Table edges. Removed {num_false_edges} false Table->Table edges."
|
f"Found {num_edges} View->Table edges. Removed {num_false_edges} false Table->Table edges."
|
||||||
@ -399,16 +439,12 @@ WHERE
|
|||||||
def _populate_view_lineage(self) -> None:
|
def _populate_view_lineage(self) -> None:
|
||||||
if not self.config.include_view_lineage:
|
if not self.config.include_view_lineage:
|
||||||
return
|
return
|
||||||
url = self.config.get_sql_alchemy_url()
|
engine = self.get_metadata_engine(database=None)
|
||||||
logger.debug(f"sql_alchemy_url={url}")
|
|
||||||
engine = create_engine(url, **self.config.options)
|
|
||||||
self._populate_view_upstream_lineage(engine)
|
self._populate_view_upstream_lineage(engine)
|
||||||
self._populate_view_downstream_lineage(engine)
|
self._populate_view_downstream_lineage(engine)
|
||||||
|
|
||||||
def _populate_external_lineage(self) -> None:
|
def _populate_external_lineage(self) -> None:
|
||||||
url = self.config.get_sql_alchemy_url()
|
engine = self.get_metadata_engine(database=None)
|
||||||
logger.debug(f"sql_alchemy_url={url}")
|
|
||||||
engine = create_engine(url, **self.config.options)
|
|
||||||
# Handles the case where a table is populated from an external location via copy.
|
# Handles the case where a table is populated from an external location via copy.
|
||||||
# Eg: copy into category_english from 's3://acryl-snow-demo-olist/olist_raw_data/category_english'credentials=(aws_key_id='...' aws_secret_key='...') pattern='.*.csv';
|
# Eg: copy into category_english from 's3://acryl-snow-demo-olist/olist_raw_data/category_english'credentials=(aws_key_id='...' aws_secret_key='...') pattern='.*.csv';
|
||||||
query: str = """
|
query: str = """
|
||||||
@ -464,21 +500,17 @@ WHERE
|
|||||||
)
|
)
|
||||||
num_edges += 1
|
num_edges += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
self.warn(
|
||||||
|
logger,
|
||||||
|
"external_lineage",
|
||||||
f"Populating external table lineage from Snowflake failed."
|
f"Populating external table lineage from Snowflake failed."
|
||||||
f"Please check your premissions. Continuing...\nError was {e}."
|
f"Please check your premissions. Continuing...\nError was {e}.",
|
||||||
)
|
)
|
||||||
logger.info(f"Found {num_edges} external lineage edges.")
|
logger.info(f"Found {num_edges} external lineage edges.")
|
||||||
self.report.num_external_table_edges_scanned = num_edges
|
self.report.num_external_table_edges_scanned = num_edges
|
||||||
|
|
||||||
def _populate_lineage(self) -> None:
|
def _populate_lineage(self) -> None:
|
||||||
url = self.config.get_sql_alchemy_url()
|
engine = self.get_metadata_engine(database=None)
|
||||||
logger.debug(f"sql_alchemy_url={url}")
|
|
||||||
engine = create_engine(
|
|
||||||
url,
|
|
||||||
connect_args=self.config.get_sql_alchemy_connect_args(),
|
|
||||||
**self.config.options,
|
|
||||||
)
|
|
||||||
query: str = """
|
query: str = """
|
||||||
WITH table_lineage_history AS (
|
WITH table_lineage_history AS (
|
||||||
SELECT
|
SELECT
|
||||||
@ -521,9 +553,11 @@ QUALIFY ROW_NUMBER() OVER (PARTITION BY downstream_table_name, upstream_table_na
|
|||||||
f"Lineage[Table(Down)={key}]:Table(Up)={self._lineage_map[key]}"
|
f"Lineage[Table(Down)={key}]:Table(Up)={self._lineage_map[key]}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
self.warn(
|
||||||
|
logger,
|
||||||
|
"lineage",
|
||||||
f"Extracting lineage from Snowflake failed."
|
f"Extracting lineage from Snowflake failed."
|
||||||
f"Please check your premissions. Continuing...\nError was {e}."
|
f"Please check your premissions. Continuing...\nError was {e}.",
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"A total of {num_edges} Table->Table edges found"
|
f"A total of {num_edges} Table->Table edges found"
|
||||||
@ -611,6 +645,13 @@ QUALIFY ROW_NUMBER() OVER (PARTITION BY downstream_table_name, upstream_table_na
|
|||||||
|
|
||||||
# Override the base class method.
|
# Override the base class method.
|
||||||
def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
|
def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
|
||||||
|
try:
|
||||||
|
self.inspect_version()
|
||||||
|
except Exception as e:
|
||||||
|
self.report.report_failure("version", f"Error: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.inspect_role_grants()
|
||||||
for wu in super().get_workunits():
|
for wu in super().get_workunits():
|
||||||
if (
|
if (
|
||||||
self.config.include_table_lineage
|
self.config.include_table_lineage
|
||||||
|
@ -441,6 +441,14 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def warn(self, log: logging.Logger, key: str, reason: str) -> Any:
|
||||||
|
self.report.report_warning(key, reason)
|
||||||
|
log.warning(reason)
|
||||||
|
|
||||||
|
def error(self, log: logging.Logger, key: str, reason: str) -> Any:
|
||||||
|
self.report.report_failure(key, reason)
|
||||||
|
log.error(reason)
|
||||||
|
|
||||||
def get_inspectors(self) -> Iterable[Inspector]:
|
def get_inspectors(self) -> Iterable[Inspector]:
|
||||||
# This method can be overridden in the case that you want to dynamically
|
# This method can be overridden in the case that you want to dynamically
|
||||||
# run on multiple databases.
|
# run on multiple databases.
|
||||||
@ -767,6 +775,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
sql_config: SQLAlchemyConfig,
|
sql_config: SQLAlchemyConfig,
|
||||||
) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]:
|
) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]:
|
||||||
tables_seen: Set[str] = set()
|
tables_seen: Set[str] = set()
|
||||||
|
try:
|
||||||
for table in inspector.get_table_names(schema):
|
for table in inspector.get_table_names(schema):
|
||||||
schema, table = self.standardize_schema_table_names(
|
schema, table = self.standardize_schema_table_names(
|
||||||
schema=schema, entity=table
|
schema=schema, entity=table
|
||||||
@ -797,7 +806,11 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
logger.warning(
|
logger.warning(
|
||||||
f"Unable to ingest {schema}.{table} due to an exception.\n {traceback.format_exc()}"
|
f"Unable to ingest {schema}.{table} due to an exception.\n {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
self.report.report_warning(f"{schema}.{table}", f"Ingestion error: {e}")
|
self.report.report_warning(
|
||||||
|
f"{schema}.{table}", f"Ingestion error: {e}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
self.report.report_failure(f"{schema}", f"Tables error: {e}")
|
||||||
|
|
||||||
def _process_table(
|
def _process_table(
|
||||||
self,
|
self,
|
||||||
@ -979,6 +992,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
schema: str,
|
schema: str,
|
||||||
sql_config: SQLAlchemyConfig,
|
sql_config: SQLAlchemyConfig,
|
||||||
) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]:
|
) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]:
|
||||||
|
try:
|
||||||
for view in inspector.get_view_names(schema):
|
for view in inspector.get_view_names(schema):
|
||||||
schema, view = self.standardize_schema_table_names(
|
schema, view = self.standardize_schema_table_names(
|
||||||
schema=schema, entity=view
|
schema=schema, entity=view
|
||||||
@ -1006,7 +1020,11 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
logger.warning(
|
logger.warning(
|
||||||
f"Unable to ingest view {schema}.{view} due to an exception.\n {traceback.format_exc()}"
|
f"Unable to ingest view {schema}.{view} due to an exception.\n {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
self.report.report_warning(f"{schema}.{view}", f"Ingestion error: {e}")
|
self.report.report_warning(
|
||||||
|
f"{schema}.{view}", f"Ingestion error: {e}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
self.report.report_failure(f"{schema}", f"Views error: {e}")
|
||||||
|
|
||||||
def _process_view(
|
def _process_view(
|
||||||
self,
|
self,
|
||||||
|
@ -235,11 +235,13 @@ class SnowflakeUsageSource(StatefulIngestionSourceBase):
|
|||||||
and int(self.config.start_time.timestamp() * 1000)
|
and int(self.config.start_time.timestamp() * 1000)
|
||||||
<= last_successful_pipeline_run_end_time_millis
|
<= last_successful_pipeline_run_end_time_millis
|
||||||
):
|
):
|
||||||
logger.info(
|
warn_msg = (
|
||||||
f"Skippig this run, since the last run's bucket duration end: "
|
f"Skippig this run, since the last run's bucket duration end: "
|
||||||
f"{datetime.fromtimestamp(last_successful_pipeline_run_end_time_millis/1000, tz=timezone.utc)}"
|
f"{datetime.fromtimestamp(last_successful_pipeline_run_end_time_millis/1000, tz=timezone.utc)}"
|
||||||
f" is later than the current start_time: {self.config.start_time}"
|
f" is later than the current start_time: {self.config.start_time}"
|
||||||
)
|
)
|
||||||
|
logger.warning(warn_msg)
|
||||||
|
self.report.report_warning("skip-run", warn_msg)
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -395,7 +397,10 @@ class SnowflakeUsageSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
if not event_dict["email"] and self.config.email_domain:
|
if not event_dict["email"] and self.config.email_domain:
|
||||||
if not event_dict["user_name"]:
|
if not event_dict["user_name"]:
|
||||||
logging.warning(
|
self.report.report_warning(
|
||||||
|
"user-name-miss", f"Missing in {event_dict}"
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
f"The user_name is missing from {event_dict}. Skipping ...."
|
f"The user_name is missing from {event_dict}. Skipping ...."
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
Loading…
x
Reference in New Issue
Block a user