diff --git a/ingestion/src/metadata/orm_profiler/orm/functions/datetime.py b/ingestion/src/metadata/orm_profiler/orm/functions/datetime.py index 00af8cafe17..9da1694ee4a 100644 --- a/ingestion/src/metadata/orm_profiler/orm/functions/datetime.py +++ b/ingestion/src/metadata/orm_profiler/orm/functions/datetime.py @@ -122,11 +122,21 @@ def _(elements, compiler, **kwargs): @compiles(DatetimeAddFn, Dialects.BigQuery) -def _(elements, compiler, **kwargs): +def _(elements, compiler, **kwargs): # pylint: disable=unused-argument """generic date and datetime function""" - interval, interval_unit = [ - compiler.process(element, **kwargs) for element in elements.clauses - ] + interval = elements.clauses.clauses[0].value + interval_unit = elements.clauses.clauses[1].text + + # bigquery does not support month or year interval for timestamp + # we'll do an approximation to get the interval in days. + if interval_unit.lower() in {"month", "year"}: + raise ValueError( + "Bigquery does not support `month` or `year` interval for table partitioned on timestamp", + "field types. You can set the `interval_unit to day directly from OpenMetadata UI`." + # pylint: disable=line-too-long + "Visit https://docs.open-metadata.org/connectors/ingestion/workflows/profiler#4-updating-profiler-setting-at-the-table-level for more details.", + ) + return f"CAST(CURRENT_TIMESTAMP - interval {interval} {interval_unit} AS TIMESTAMP)" @@ -149,7 +159,6 @@ def _(elements, compiler, **kwargs): return f"(NOW() - interval {interval} {interval_unit})" -@compiles(DatetimeAddFn, Dialects.Redshift) @compiles(DatetimeAddFn, Dialects.AzureSQL) @compiles(DatetimeAddFn, Dialects.MSSQL) @compiles(DatetimeAddFn, Dialects.Snowflake) @@ -160,6 +169,16 @@ def _(elements, compiler, **kwargs): return f"DATEADD({interval_unit}, -{interval}, {func.current_timestamp()})" +@compiles(DatetimeAddFn, Dialects.Redshift) +def _(elements, compiler, **kwargs): + interval, interval_unit = [ + compiler.process(element, **kwargs) for element in elements.clauses + ] + return ( + f"DATEADD({interval_unit}, -{interval}, {func.current_timestamp()}::timestamp)" + ) + + @compiles(DatetimeAddFn, Dialects.SQLite) def _(elements, compiler, **kwargs): # pylint: disable=unused-argument interval = elements.clauses.clauses[0].value diff --git a/openmetadata-docs/content/connectors/ingestion/workflows/profiler/index.md b/openmetadata-docs/content/connectors/ingestion/workflows/profiler/index.md index 5d10d1685b1..7a1fdb3b953 100644 --- a/openmetadata-docs/content/connectors/ingestion/workflows/profiler/index.md +++ b/openmetadata-docs/content/connectors/ingestion/workflows/profiler/index.md @@ -102,6 +102,8 @@ Use a query to sample data for the profiler. This will overwrite any profle samp **Enable Column Profile** This setting allows user to exclude or include specific columns and metrics from the profiler. +*Note: for Google BigQuery tables partitioned on timestamp/datetime column type, month and year interval are not supported. You will need to set the `Interval Unit` to `DAY` or `HOUR`.* + **Enable Partition** If your table includes a timestamp, date or datetime column type you can enable partitionning. If enabled, the profiler will fetch the last `` `` of data to profile the table. Note that if "profile sample" is set, this configuration will be used against the partitioned data and not the whole table. - `Column Name`: this is the name of the column that will be used as the partition field