mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-07 15:04:01 +00:00
fix(ingest): catch errors when profiling for sample values (#6194)
This commit is contained in:
parent
43f7b568e3
commit
3fc6364979
@ -460,16 +460,28 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|||||||
def _get_dataset_column_sample_values(
|
def _get_dataset_column_sample_values(
|
||||||
self, column_profile: DatasetFieldProfileClass, column: str
|
self, column_profile: DatasetFieldProfileClass, column: str
|
||||||
) -> None:
|
) -> None:
|
||||||
if self.config.include_field_sample_values:
|
if not self.config.include_field_sample_values:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
# TODO do this without GE
|
# TODO do this without GE
|
||||||
self.dataset.set_config_value("interactive_evaluation", True)
|
self.dataset.set_config_value("interactive_evaluation", True)
|
||||||
|
|
||||||
res = self.dataset.expect_column_values_to_be_in_set(
|
res = self.dataset.expect_column_values_to_be_in_set(
|
||||||
column, [], result_format="SUMMARY"
|
column, [], result_format="SUMMARY"
|
||||||
).result
|
).result
|
||||||
|
|
||||||
column_profile.sampleValues = [
|
column_profile.sampleValues = [
|
||||||
str(v) for v in res["partial_unexpected_list"]
|
str(v) for v in res["partial_unexpected_list"]
|
||||||
]
|
]
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(
|
||||||
|
f"Caught exception while attempting to get sample values for column {column}. {e}"
|
||||||
|
)
|
||||||
|
self.report.report_warning(
|
||||||
|
"Profiling - Unable to get column sample values",
|
||||||
|
f"{self.dataset_name}.{column}",
|
||||||
|
)
|
||||||
|
|
||||||
def generate_dataset_profile( # noqa: C901 (complexity)
|
def generate_dataset_profile( # noqa: C901 (complexity)
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user