mirror of
https://github.com/datahub-project/datahub.git
synced 2026-01-03 05:16:20 +00:00
feat(ingest): great-expectations - add more logs (#4832)
This commit is contained in:
parent
3775e79d47
commit
b2c82dcaa3
@ -131,7 +131,7 @@ class DataHubValidationAction(ValidationAction):
|
||||
datasets = self.get_dataset_partitions(batch_identifier, data_asset)
|
||||
|
||||
if len(datasets) == 0 or datasets[0]["dataset_urn"] is None:
|
||||
logger.info("Metadata not sent to datahub. No datasets found.")
|
||||
warn("Metadata not sent to datahub. No datasets found.")
|
||||
return {"datahub_notification_result": "none required"}
|
||||
|
||||
# Returns assertion info and assertion results
|
||||
@ -143,7 +143,15 @@ class DataHubValidationAction(ValidationAction):
|
||||
datasets,
|
||||
)
|
||||
|
||||
logger.info("Sending metadata to datahub ...")
|
||||
logger.info("Dataset URN - {urn}".format(urn=datasets[0]["dataset_urn"]))
|
||||
|
||||
for assertion in assertions:
|
||||
|
||||
logger.info(
|
||||
"Assertion URN - {urn}".format(urn=assertion["assertionUrn"])
|
||||
)
|
||||
|
||||
# Construct a MetadataChangeProposalWrapper object.
|
||||
assertion_info_mcp = MetadataChangeProposalWrapper(
|
||||
entityType="assertion",
|
||||
@ -175,7 +183,7 @@ class DataHubValidationAction(ValidationAction):
|
||||
|
||||
# Emit Result! (timseries aspect)
|
||||
emitter.emit_mcp(dataset_assertionResult_mcp)
|
||||
|
||||
logger.info("Metadata sent to datahub.")
|
||||
result = "DataHub notification succeeded"
|
||||
except Exception as e:
|
||||
result = "DataHub notification failed"
|
||||
@ -245,6 +253,11 @@ class DataHubValidationAction(ValidationAction):
|
||||
}
|
||||
)
|
||||
)
|
||||
logger.debug(
|
||||
"GE expectation_suite_name - {name}, expectation_type - {type}, Assertion URN - {urn}".format(
|
||||
name=expectation_suite_name, type=expectation_type, urn=assertionUrn
|
||||
)
|
||||
)
|
||||
assertionInfo: AssertionInfo = self.get_assertion_info(
|
||||
expectation_type,
|
||||
kwargs,
|
||||
@ -541,6 +554,8 @@ class DataHubValidationAction(ValidationAction):
|
||||
def get_dataset_partitions(self, batch_identifier, data_asset):
|
||||
dataset_partitions = []
|
||||
|
||||
logger.debug("Finding datasets being validated")
|
||||
|
||||
# for now, we support only v3-api and sqlalchemy execution engine
|
||||
if isinstance(data_asset, Validator) and isinstance(
|
||||
data_asset.execution_engine, SqlAlchemyExecutionEngine
|
||||
@ -645,13 +660,16 @@ class DataHubValidationAction(ValidationAction):
|
||||
)
|
||||
else:
|
||||
warn(
|
||||
f"DataHubValidationAction does not recognize this GE batch spec type- {type(ge_batch_spec)}."
|
||||
"DataHubValidationAction does not recognize this GE batch spec type- {batch_spec_type}.".format(
|
||||
batch_spec_type=type(ge_batch_spec)
|
||||
)
|
||||
)
|
||||
else:
|
||||
# TODO - v2-spec - SqlAlchemyDataset support
|
||||
warn(
|
||||
f"DataHubValidationAction does not recognize this GE data asset type - {type(data_asset)}. \
|
||||
This is either using v2-api or execution engine other than sqlalchemy."
|
||||
"DataHubValidationAction does not recognize this GE data asset type - {asset_type}. This is either using v2-api or execution engine other than sqlalchemy.".format(
|
||||
asset_type=type(data_asset)
|
||||
)
|
||||
)
|
||||
|
||||
return dataset_partitions
|
||||
@ -695,8 +713,9 @@ def make_dataset_urn_from_sqlalchemy_uri(
|
||||
elif data_platform in ["trino", "snowflake"]:
|
||||
if schema_name is None or url_instance.database is None:
|
||||
warn(
|
||||
f"DataHubValidationAction failed to locate schema name and/or database name \
|
||||
for {data_platform}."
|
||||
"DataHubValidationAction failed to locate schema name and/or database name for {data_platform}.".format(
|
||||
data_platform=data_platform
|
||||
)
|
||||
)
|
||||
return None
|
||||
# If data platform is snowflake, we artificially lowercase the Database name.
|
||||
@ -711,8 +730,9 @@ def make_dataset_urn_from_sqlalchemy_uri(
|
||||
elif data_platform == "bigquery":
|
||||
if url_instance.host is None or url_instance.database is None:
|
||||
warn(
|
||||
f"DataHubValidationAction failed to locate host and/or database name for \
|
||||
{data_platform}. "
|
||||
"DataHubValidationAction failed to locate host and/or database name for {data_platform}. ".format(
|
||||
data_platform=data_platform
|
||||
)
|
||||
)
|
||||
return None
|
||||
schema_name = "{}.{}".format(url_instance.host, url_instance.database)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user