Fix #3356 - Vertica concat and len (#3358)

This commit is contained in:
Pere Miquel Brull 2022-03-10 17:01:47 +01:00 committed by GitHub
parent 3eda561ca9
commit f0dd85b9f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 55 additions and 69 deletions

View File

@ -7,5 +7,5 @@ Provides metadata version information.
from incremental import Version
__version__ = Version("metadata", 0, 9, 0, dev=25)
__version__ = Version("metadata", 0, 9, 0, dev=26)
__all__ = ["__version__"]

View File

@ -13,34 +13,15 @@
MAX_LENGTH Metric definition
"""
from sqlalchemy import func
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import FunctionElement
from metadata.generated.schema.entity.services.databaseService import (
DatabaseServiceType,
)
from metadata.orm_profiler.metrics.core import CACHE, StaticMetric, _label
from metadata.orm_profiler.orm.registry import is_concatenable, is_quantifiable
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.length import LenFn
from metadata.orm_profiler.orm.registry import is_concatenable
from metadata.orm_profiler.utils import logger
logger = logger()
class MaxLengthFn(FunctionElement):
name = __qualname__
inherit_cache = CACHE
@compiles(MaxLengthFn)
def _(element, compiler, **kw):
return "MAX(LEN(%s))" % compiler.process(element.clauses, **kw)
@compiles(MaxLengthFn, DatabaseServiceType.SQLite.value.lower())
def _(element, compiler, **kw):
return "MAX(LENGTH(%s))" % compiler.process(element.clauses, **kw)
class MaxLength(StaticMetric):
"""
MAX_LENGTH Metric
@ -62,7 +43,7 @@ class MaxLength(StaticMetric):
def fn(self):
if is_concatenable(self.col.type):
return MaxLengthFn(self.col)
return func.max(LenFn(self.col))
logger.debug(
f"Don't know how to process type {self.col.type} when computing MAX_LENGTH"

View File

@ -13,34 +13,15 @@
AVG Metric definition
"""
from sqlalchemy import func
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import FunctionElement
from metadata.generated.schema.entity.services.databaseService import (
DatabaseServiceType,
)
from metadata.orm_profiler.metrics.core import CACHE, StaticMetric, _label
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.length import LenFn
from metadata.orm_profiler.orm.registry import is_concatenable, is_quantifiable
from metadata.orm_profiler.utils import logger
logger = logger()
class ConcatAvgFn(FunctionElement):
name = __qualname__
inherit_cache = CACHE
@compiles(ConcatAvgFn)
def _(element, compiler, **kw):
return "AVG(LEN(%s))" % compiler.process(element.clauses, **kw)
@compiles(ConcatAvgFn, DatabaseServiceType.SQLite.value.lower())
def _(element, compiler, **kw):
return "AVG(LENGTH(%s))" % compiler.process(element.clauses, **kw)
class Mean(StaticMetric):
"""
AVG Metric
@ -65,7 +46,7 @@ class Mean(StaticMetric):
return func.avg(self.col)
if is_concatenable(self.col.type):
return ConcatAvgFn(self.col)
return func.avg(LenFn(self.col))
logger.debug(
f"Don't know how to process type {self.col.type} when computing MEAN"

View File

@ -13,34 +13,15 @@
MIN_LENGTH Metric definition
"""
from sqlalchemy import func
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import FunctionElement
from metadata.generated.schema.entity.services.databaseService import (
DatabaseServiceType,
)
from metadata.orm_profiler.metrics.core import CACHE, StaticMetric, _label
from metadata.orm_profiler.orm.registry import is_concatenable, is_quantifiable
from metadata.orm_profiler.metrics.core import StaticMetric, _label
from metadata.orm_profiler.orm.functions.length import LenFn
from metadata.orm_profiler.orm.registry import is_concatenable
from metadata.orm_profiler.utils import logger
logger = logger()
class MinLengthFn(FunctionElement):
name = __qualname__
inherit_cache = CACHE
@compiles(MinLengthFn)
def _(element, compiler, **kw):
return "MIN(LEN(%s))" % compiler.process(element.clauses, **kw)
@compiles(MinLengthFn, DatabaseServiceType.SQLite.value.lower())
def _(element, compiler, **kw):
return "MIN(LENGTH(%s))" % compiler.process(element.clauses, **kw)
class MinLength(StaticMetric):
"""
MIN_LENGTH Metric
@ -62,7 +43,7 @@ class MinLength(StaticMetric):
def fn(self):
if is_concatenable(self.col.type):
return MinLengthFn(self.col)
return func.min(LenFn(self.col))
logger.debug(
f"Don't know how to process type {self.col.type} when computing MIN_LENGTH"

View File

@ -35,6 +35,7 @@ def _(element, compiler, **kw):
@compiles(ConcatFn, DatabaseServiceType.Redshift.value.lower())
@compiles(ConcatFn, DatabaseServiceType.SQLite.value.lower())
@compiles(ConcatFn, DatabaseServiceType.Vertica.value.lower())
def _(element, compiler, **kw):
"""
This actually returns the squared STD, but as

View File

@ -0,0 +1,42 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Define Length function
"""
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import FunctionElement
from metadata.generated.schema.entity.services.databaseService import (
DatabaseServiceType,
)
from metadata.orm_profiler.metrics.core import CACHE
from metadata.orm_profiler.utils import logger
logger = logger()
class LenFn(FunctionElement):
inherit_cache = CACHE
@compiles(LenFn)
def _(element, compiler, **kw):
return "LEN(%s)" % compiler.process(element.clauses, **kw)
@compiles(LenFn, DatabaseServiceType.SQLite.value.lower())
@compiles(LenFn, DatabaseServiceType.Vertica.value.lower())
@compiles(
LenFn, DatabaseServiceType.Hive.value.lower().encode()
) # For some reason hive's dialect is in bytes...
def _(element, compiler, **kw):
return "LENGTH(%s)" % compiler.process(element.clauses, **kw)