mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-03 12:16:10 +00:00
fix(ingest/bigquery): use correct row count in null count profiling c… (#9123)
Co-authored-by: Harshal Sheth <hsheth2@gmail.com> Co-authored-by: Aseem Bansal <asmbansal2@gmail.com>
This commit is contained in:
parent
bab9d1c931
commit
12b41713b4
@ -659,7 +659,16 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|||||||
self.query_combiner.flush()
|
self.query_combiner.flush()
|
||||||
|
|
||||||
assert profile.rowCount is not None
|
assert profile.rowCount is not None
|
||||||
row_count: int = profile.rowCount
|
row_count: int # used for null counts calculation
|
||||||
|
if profile.partitionSpec and "SAMPLE" in profile.partitionSpec.partition:
|
||||||
|
# We can alternatively use `self._get_dataset_rows(profile)` to get
|
||||||
|
# exact count of rows in sample, as actual rows involved in sample
|
||||||
|
# may be slightly different (more or less) than configured `sample_size`.
|
||||||
|
# However not doing so to start with, as that adds another query overhead
|
||||||
|
# plus approximate metrics should work for sampling based profiling.
|
||||||
|
row_count = self.config.sample_size
|
||||||
|
else:
|
||||||
|
row_count = profile.rowCount
|
||||||
|
|
||||||
for column_spec in columns_profiling_queue:
|
for column_spec in columns_profiling_queue:
|
||||||
column = column_spec.column
|
column = column_spec.column
|
||||||
@ -811,7 +820,7 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|||||||
sample_pc = 100 * self.config.sample_size / profile.rowCount
|
sample_pc = 100 * self.config.sample_size / profile.rowCount
|
||||||
sql = (
|
sql = (
|
||||||
f"SELECT * FROM {str(self.dataset._table)} "
|
f"SELECT * FROM {str(self.dataset._table)} "
|
||||||
+ f"TABLESAMPLE SYSTEM ({sample_pc:.3f} percent)"
|
+ f"TABLESAMPLE SYSTEM ({sample_pc:.8f} percent)"
|
||||||
)
|
)
|
||||||
temp_table_name = create_bigquery_temp_table(
|
temp_table_name = create_bigquery_temp_table(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user