overwrite default sum function to add super for BQ overflow (#6252)

This commit is contained in:
Teddy 2022-07-22 07:49:00 +02:00 committed by GitHub
parent 809143b08f
commit d34d347a1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 48 additions and 10 deletions

View File

@ -19,6 +19,7 @@ from typing import List
from sqlalchemy import case, column, func
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.sum import SumFn
from metadata.utils.logger import profiler_logger
logger = profiler_logger()
@ -54,7 +55,7 @@ class CountInSet(StaticMetric):
try:
set_values = set(self.values)
return func.sum(case([(column(self.col.name).in_(set_values), 1)], else_=0))
return SumFn(case([(column(self.col.name).in_(set_values), 1)], else_=0))
except Exception as err: # pylint: disable=broad-except
logger.error(f"Error trying to run countInSet for {self.col.name} - {err}")

View File

@ -17,6 +17,7 @@ ILIKE Count Metric definition
from sqlalchemy import case, column, func
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.sum import SumFn
class ILikeCount(StaticMetric):
@ -46,6 +47,4 @@ class ILikeCount(StaticMetric):
raise AttributeError(
"ILike Count requires an expression to be set: add_props(expression=...)(Metrics.ILIKE_COUNT)"
)
return func.sum(
case([(column(self.col.name).ilike(self.expression), 1)], else_=0)
)
return SumFn(case([(column(self.col.name).ilike(self.expression), 1)], else_=0))

View File

@ -17,6 +17,7 @@ Like Count Metric definition
from sqlalchemy import case, column, func
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.sum import SumFn
class LikeCount(StaticMetric):
@ -46,6 +47,4 @@ class LikeCount(StaticMetric):
raise AttributeError(
"Like Count requires an expression to be set: add_props(expression=...)(Metrics.LIKE_COUNT)"
)
return func.sum(
case([(column(self.col.name).like(self.expression), 1)], else_=0)
)
return SumFn(case([(column(self.col.name).like(self.expression), 1)], else_=0))

View File

@ -17,6 +17,7 @@ Like Count Metric definition
from sqlalchemy import case, column, func
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.sum import SumFn
class NotLikeCount(StaticMetric):
@ -46,6 +47,6 @@ class NotLikeCount(StaticMetric):
raise AttributeError(
"Not Like Count requires an expression to be set: add_props(expression=...)(Metrics.NOT_LIKE_COUNT)"
)
return func.sum(
return SumFn(
case([(column(self.col.name).not_like(self.expression), 0)], else_=1)
)

View File

@ -17,6 +17,7 @@ Null Count Metric definition
from sqlalchemy import case, column, func
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.sum import SumFn
class NullCount(StaticMetric):
@ -44,4 +45,4 @@ class NullCount(StaticMetric):
@_label
def fn(self):
return func.sum(case([(column(self.col.name).is_(None), 1)], else_=0))
return SumFn(case([(column(self.col.name).is_(None), 1)], else_=0))

View File

@ -17,6 +17,7 @@ SUM Metric definition
from sqlalchemy import column, func
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.sum import SumFn
from metadata.orm_profiler.orm.registry import is_quantifiable
@ -36,6 +37,6 @@ class Sum(StaticMetric):
@_label
def fn(self):
if is_quantifiable(self.col.type):
return func.sum(column(self.col.name))
return SumFn(column(self.col.name))
return None

View File

@ -0,0 +1,36 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Define Random Number function
Returns a column with random values
between 0 and 100 to help us draw sample
data.
"""
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import GenericFunction, sum
from metadata.orm_profiler.metrics.core import CACHE
from metadata.orm_profiler.orm.registry import Dialects
class SumFn(GenericFunction):
name = "sum"
inherit_cache = CACHE
@compiles(SumFn, Dialects.BigQuery)
def _(element, compiler, **kw):
"""Handle case for empty table. If empty, clickhouse returns NaN"""
proc = compiler.process(element.clauses, **kw)
return "SUM(CAST(%s AS NUMERIC))" % proc