From d34d347a1cc72d89fabfaadd78f501e7bece43e3 Mon Sep 17 00:00:00 2001 From: Teddy Date: Fri, 22 Jul 2022 07:49:00 +0200 Subject: [PATCH] overwrite default sum function to add super for BQ overflow (#6252) --- .../metrics/static/count_in_set.py | 3 +- .../metrics/static/ilike_count.py | 5 ++- .../orm_profiler/metrics/static/like_count.py | 5 ++- .../metrics/static/not_like_count.py | 3 +- .../orm_profiler/metrics/static/null_count.py | 3 +- .../orm_profiler/metrics/static/sum.py | 3 +- .../orm_profiler/orm/functions/sum.py | 36 +++++++++++++++++++ 7 files changed, 48 insertions(+), 10 deletions(-) create mode 100644 ingestion/src/metadata/orm_profiler/orm/functions/sum.py diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/count_in_set.py b/ingestion/src/metadata/orm_profiler/metrics/static/count_in_set.py index 730f968dc87..e9f6167ae82 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/count_in_set.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/count_in_set.py @@ -19,6 +19,7 @@ from typing import List from sqlalchemy import case, column, func from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.sum import SumFn from metadata.utils.logger import profiler_logger logger = profiler_logger() @@ -54,7 +55,7 @@ class CountInSet(StaticMetric): try: set_values = set(self.values) - return func.sum(case([(column(self.col.name).in_(set_values), 1)], else_=0)) + return SumFn(case([(column(self.col.name).in_(set_values), 1)], else_=0)) except Exception as err: # pylint: disable=broad-except logger.error(f"Error trying to run countInSet for {self.col.name} - {err}") diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/ilike_count.py b/ingestion/src/metadata/orm_profiler/metrics/static/ilike_count.py index a970bc47092..ae48af523aa 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/ilike_count.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/ilike_count.py @@ -17,6 +17,7 @@ ILIKE Count Metric definition from sqlalchemy import case, column, func from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.sum import SumFn class ILikeCount(StaticMetric): @@ -46,6 +47,4 @@ class ILikeCount(StaticMetric): raise AttributeError( "ILike Count requires an expression to be set: add_props(expression=...)(Metrics.ILIKE_COUNT)" ) - return func.sum( - case([(column(self.col.name).ilike(self.expression), 1)], else_=0) - ) + return SumFn(case([(column(self.col.name).ilike(self.expression), 1)], else_=0)) diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/like_count.py b/ingestion/src/metadata/orm_profiler/metrics/static/like_count.py index 65766741ad4..c7795c824b9 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/like_count.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/like_count.py @@ -17,6 +17,7 @@ Like Count Metric definition from sqlalchemy import case, column, func from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.sum import SumFn class LikeCount(StaticMetric): @@ -46,6 +47,4 @@ class LikeCount(StaticMetric): raise AttributeError( "Like Count requires an expression to be set: add_props(expression=...)(Metrics.LIKE_COUNT)" ) - return func.sum( - case([(column(self.col.name).like(self.expression), 1)], else_=0) - ) + return SumFn(case([(column(self.col.name).like(self.expression), 1)], else_=0)) diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/not_like_count.py b/ingestion/src/metadata/orm_profiler/metrics/static/not_like_count.py index 001a0b47772..0c316687fc0 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/not_like_count.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/not_like_count.py @@ -17,6 +17,7 @@ Like Count Metric definition from sqlalchemy import case, column, func from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.sum import SumFn class NotLikeCount(StaticMetric): @@ -46,6 +47,6 @@ class NotLikeCount(StaticMetric): raise AttributeError( "Not Like Count requires an expression to be set: add_props(expression=...)(Metrics.NOT_LIKE_COUNT)" ) - return func.sum( + return SumFn( case([(column(self.col.name).not_like(self.expression), 0)], else_=1) ) diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/null_count.py b/ingestion/src/metadata/orm_profiler/metrics/static/null_count.py index 57d78a262ec..a145a94fc2f 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/null_count.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/null_count.py @@ -17,6 +17,7 @@ Null Count Metric definition from sqlalchemy import case, column, func from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.sum import SumFn class NullCount(StaticMetric): @@ -44,4 +45,4 @@ class NullCount(StaticMetric): @_label def fn(self): - return func.sum(case([(column(self.col.name).is_(None), 1)], else_=0)) + return SumFn(case([(column(self.col.name).is_(None), 1)], else_=0)) diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/sum.py b/ingestion/src/metadata/orm_profiler/metrics/static/sum.py index 1dd4be1c65d..a439327e9fb 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/sum.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/sum.py @@ -17,6 +17,7 @@ SUM Metric definition from sqlalchemy import column, func from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.sum import SumFn from metadata.orm_profiler.orm.registry import is_quantifiable @@ -36,6 +37,6 @@ class Sum(StaticMetric): @_label def fn(self): if is_quantifiable(self.col.type): - return func.sum(column(self.col.name)) + return SumFn(column(self.col.name)) return None diff --git a/ingestion/src/metadata/orm_profiler/orm/functions/sum.py b/ingestion/src/metadata/orm_profiler/orm/functions/sum.py new file mode 100644 index 00000000000..b60cc4030a3 --- /dev/null +++ b/ingestion/src/metadata/orm_profiler/orm/functions/sum.py @@ -0,0 +1,36 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Define Random Number function + +Returns a column with random values +between 0 and 100 to help us draw sample +data. +""" + +from sqlalchemy.ext.compiler import compiles +from sqlalchemy.sql.functions import GenericFunction, sum + +from metadata.orm_profiler.metrics.core import CACHE +from metadata.orm_profiler.orm.registry import Dialects + + +class SumFn(GenericFunction): + name = "sum" + inherit_cache = CACHE + + +@compiles(SumFn, Dialects.BigQuery) +def _(element, compiler, **kw): + """Handle case for empty table. If empty, clickhouse returns NaN""" + proc = compiler.process(element.clauses, **kw) + return "SUM(CAST(%s AS NUMERIC))" % proc