mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-03 22:23:37 +00:00
fix(ingest/snowflake): fix sample fraction for very large tables (#8988)
This commit is contained in:
parent
932fbcddbf
commit
1b06c6a30c
@ -86,7 +86,7 @@ class SnowflakeProfiler(GenericProfiler, SnowflakeCommonMixin):
|
|||||||
# Fixed-size sampling can be slower than equivalent fraction-based sampling
|
# Fixed-size sampling can be slower than equivalent fraction-based sampling
|
||||||
# as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations
|
# as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations
|
||||||
sample_pc = 100 * self.config.profiling.sample_size / table.rows_count
|
sample_pc = 100 * self.config.profiling.sample_size / table.rows_count
|
||||||
custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.3f})'
|
custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.8f})'
|
||||||
return {
|
return {
|
||||||
**super().get_batch_kwargs(table, schema_name, db_name),
|
**super().get_batch_kwargs(table, schema_name, db_name),
|
||||||
# Lowercase/Mixedcase table names in Snowflake do not work by default.
|
# Lowercase/Mixedcase table names in Snowflake do not work by default.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user