Fixes issue #10082 -- Unsupported field types for bigquery (#10091)

* fix(profile): Raise nicer error for BQ unsupported interval type for timestamp

* fix(profiler): run checkstyle
This commit is contained in:
Teddy 2023-02-02 19:00:56 +01:00 committed by GitHub
parent 61ab6ec74c
commit 0a77f8a3f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 5 deletions

View File

@ -122,11 +122,21 @@ def _(elements, compiler, **kwargs):
@compiles(DatetimeAddFn, Dialects.BigQuery)
def _(elements, compiler, **kwargs):
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
"""generic date and datetime function"""
interval, interval_unit = [
compiler.process(element, **kwargs) for element in elements.clauses
]
interval = elements.clauses.clauses[0].value
interval_unit = elements.clauses.clauses[1].text
# bigquery does not support month or year interval for timestamp
# we'll do an approximation to get the interval in days.
if interval_unit.lower() in {"month", "year"}:
raise ValueError(
"Bigquery does not support `month` or `year` interval for table partitioned on timestamp",
"field types. You can set the `interval_unit to day directly from OpenMetadata UI`."
# pylint: disable=line-too-long
"Visit https://docs.open-metadata.org/connectors/ingestion/workflows/profiler#4-updating-profiler-setting-at-the-table-level for more details.",
)
return f"CAST(CURRENT_TIMESTAMP - interval {interval} {interval_unit} AS TIMESTAMP)"
@ -149,7 +159,6 @@ def _(elements, compiler, **kwargs):
return f"(NOW() - interval {interval} {interval_unit})"
@compiles(DatetimeAddFn, Dialects.Redshift)
@compiles(DatetimeAddFn, Dialects.AzureSQL)
@compiles(DatetimeAddFn, Dialects.MSSQL)
@compiles(DatetimeAddFn, Dialects.Snowflake)
@ -160,6 +169,16 @@ def _(elements, compiler, **kwargs):
return f"DATEADD({interval_unit}, -{interval}, {func.current_timestamp()})"
@compiles(DatetimeAddFn, Dialects.Redshift)
def _(elements, compiler, **kwargs):
interval, interval_unit = [
compiler.process(element, **kwargs) for element in elements.clauses
]
return (
f"DATEADD({interval_unit}, -{interval}, {func.current_timestamp()}::timestamp)"
)
@compiles(DatetimeAddFn, Dialects.SQLite)
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
interval = elements.clauses.clauses[0].value

View File

@ -102,6 +102,8 @@ Use a query to sample data for the profiler. This will overwrite any profle samp
**Enable Column Profile**
This setting allows user to exclude or include specific columns and metrics from the profiler.
*Note: for Google BigQuery tables partitioned on timestamp/datetime column type, month and year interval are not supported. You will need to set the `Interval Unit` to `DAY` or `HOUR`.*
**Enable Partition**
If your table includes a timestamp, date or datetime column type you can enable partitionning. If enabled, the profiler will fetch the last `<interval>` `<interval unit>` of data to profile the table. Note that if "profile sample" is set, this configuration will be used against the partitioned data and not the whole table.
- `Column Name`: this is the name of the column that will be used as the partition field