From 1b06c6a30c8d6c0ee57f75f75ee6a436aa6c13a7 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 12 Oct 2023 00:31:42 +0530 Subject: [PATCH] fix(ingest/snowflake): fix sample fraction for very large tables (#8988) --- .../datahub/ingestion/source/snowflake/snowflake_profiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py index 24275dcdff..8e18d85d6f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py @@ -86,7 +86,7 @@ class SnowflakeProfiler(GenericProfiler, SnowflakeCommonMixin): # Fixed-size sampling can be slower than equivalent fraction-based sampling # as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations sample_pc = 100 * self.config.profiling.sample_size / table.rows_count - custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.3f})' + custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.8f})' return { **super().get_batch_kwargs(table, schema_name, db_name), # Lowercase/Mixedcase table names in Snowflake do not work by default.