Fixes issue #10082 -- Unsupported field types for bigquery (#10091)

* fix(profile): Raise nicer error for BQ unsupported interval type for timestamp * fix(profiler): run checkstyle
2025-12-25 06:28:22 +00:00 · 2023-02-02 19:00:56 +01:00 · 2023-02-02 19:00:56 +01:00 · 0a77f8a3f2
commit 0a77f8a3f2
parent 61ab6ec74c
2 changed files with 26 additions and 5 deletions
--- a/ingestion/src/metadata/orm_profiler/orm/functions/datetime.py
+++ b/ingestion/src/metadata/orm_profiler/orm/functions/datetime.py
@ -122,11 +122,21 @@ def _(elements, compiler, **kwargs):


@compiles(DatetimeAddFn, Dialects.BigQuery)
-def _(elements, compiler, **kwargs):
+def _(elements, compiler, **kwargs):  # pylint: disable=unused-argument
    """generic date and datetime function"""
-    interval, interval_unit = [
-        compiler.process(element, **kwargs) for element in elements.clauses
-    ]
+    interval = elements.clauses.clauses[0].value
+    interval_unit = elements.clauses.clauses[1].text
+
+    # bigquery does not support month or year interval for timestamp
+    # we'll do an approximation to get the interval in days.
+    if interval_unit.lower() in {"month", "year"}:
+        raise ValueError(
+            "Bigquery does not support `month` or `year` interval for table partitioned on timestamp",
+            "field types. You can set the `interval_unit to day directly from OpenMetadata UI`."
+            # pylint: disable=line-too-long
+            "Visit https://docs.open-metadata.org/connectors/ingestion/workflows/profiler#4-updating-profiler-setting-at-the-table-level for more details.",
+        )
+
    return f"CAST(CURRENT_TIMESTAMP - interval {interval} {interval_unit} AS TIMESTAMP)"


@ -149,7 +159,6 @@ def _(elements, compiler, **kwargs):
    return f"(NOW() - interval {interval} {interval_unit})"


-@compiles(DatetimeAddFn, Dialects.Redshift)
@compiles(DatetimeAddFn, Dialects.AzureSQL)
@compiles(DatetimeAddFn, Dialects.MSSQL)
@compiles(DatetimeAddFn, Dialects.Snowflake)
@ -160,6 +169,16 @@ def _(elements, compiler, **kwargs):
    return f"DATEADD({interval_unit}, -{interval}, {func.current_timestamp()})"


+@compiles(DatetimeAddFn, Dialects.Redshift)
+def _(elements, compiler, **kwargs):
+    interval, interval_unit = [
+        compiler.process(element, **kwargs) for element in elements.clauses
+    ]
+    return (
+        f"DATEADD({interval_unit}, -{interval}, {func.current_timestamp()}::timestamp)"
+    )
+
+
@compiles(DatetimeAddFn, Dialects.SQLite)
 def _(elements, compiler, **kwargs):  # pylint: disable=unused-argument
    interval = elements.clauses.clauses[0].value
--- a/openmetadata-docs/content/connectors/ingestion/workflows/profiler/index.md
+++ b/openmetadata-docs/content/connectors/ingestion/workflows/profiler/index.md
@ -102,6 +102,8 @@ Use a query to sample data for the profiler. This will overwrite any profle samp
 **Enable Column Profile**
 This setting allows user to exclude or include specific columns and metrics from the profiler.

+*Note: for Google BigQuery tables partitioned on timestamp/datetime column type, month and year interval are not supported. You will need to set the `Interval Unit` to `DAY` or `HOUR`.*
+
 **Enable Partition**
 If your table includes a timestamp, date or datetime column type you can enable partitionning. If enabled, the profiler will fetch the last `<interval>` `<interval unit>` of data to profile the table. Note that if "profile sample" is set, this configuration will be used against the partitioned data and not the whole table.
 - `Column Name`: this is the name of the column that will be used as the partition field