diff --git a/ingestion-core/src/metadata/_version.py b/ingestion-core/src/metadata/_version.py index 5d6bea44be0..10545954fdb 100644 --- a/ingestion-core/src/metadata/_version.py +++ b/ingestion-core/src/metadata/_version.py @@ -7,5 +7,5 @@ Provides metadata version information. from incremental import Version -__version__ = Version("metadata", 0, 9, 0, dev=25) +__version__ = Version("metadata", 0, 9, 0, dev=26) __all__ = ["__version__"] diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/max_length.py b/ingestion/src/metadata/orm_profiler/metrics/static/max_length.py index 781fdd68da6..3f974eaab13 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/max_length.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/max_length.py @@ -13,34 +13,15 @@ MAX_LENGTH Metric definition """ from sqlalchemy import func -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.functions import FunctionElement -from metadata.generated.schema.entity.services.databaseService import ( - DatabaseServiceType, -) -from metadata.orm_profiler.metrics.core import CACHE, StaticMetric, _label -from metadata.orm_profiler.orm.registry import is_concatenable, is_quantifiable +from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.length import LenFn +from metadata.orm_profiler.orm.registry import is_concatenable from metadata.orm_profiler.utils import logger logger = logger() -class MaxLengthFn(FunctionElement): - name = __qualname__ - inherit_cache = CACHE - - -@compiles(MaxLengthFn) -def _(element, compiler, **kw): - return "MAX(LEN(%s))" % compiler.process(element.clauses, **kw) - - -@compiles(MaxLengthFn, DatabaseServiceType.SQLite.value.lower()) -def _(element, compiler, **kw): - return "MAX(LENGTH(%s))" % compiler.process(element.clauses, **kw) - - class MaxLength(StaticMetric): """ MAX_LENGTH Metric @@ -62,7 +43,7 @@ class MaxLength(StaticMetric): def fn(self): if is_concatenable(self.col.type): - return MaxLengthFn(self.col) + return func.max(LenFn(self.col)) logger.debug( f"Don't know how to process type {self.col.type} when computing MAX_LENGTH" diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/mean.py b/ingestion/src/metadata/orm_profiler/metrics/static/mean.py index 70063ca46c6..b90af75d543 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/mean.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/mean.py @@ -13,34 +13,15 @@ AVG Metric definition """ from sqlalchemy import func -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.functions import FunctionElement -from metadata.generated.schema.entity.services.databaseService import ( - DatabaseServiceType, -) -from metadata.orm_profiler.metrics.core import CACHE, StaticMetric, _label +from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.length import LenFn from metadata.orm_profiler.orm.registry import is_concatenable, is_quantifiable from metadata.orm_profiler.utils import logger logger = logger() -class ConcatAvgFn(FunctionElement): - name = __qualname__ - inherit_cache = CACHE - - -@compiles(ConcatAvgFn) -def _(element, compiler, **kw): - return "AVG(LEN(%s))" % compiler.process(element.clauses, **kw) - - -@compiles(ConcatAvgFn, DatabaseServiceType.SQLite.value.lower()) -def _(element, compiler, **kw): - return "AVG(LENGTH(%s))" % compiler.process(element.clauses, **kw) - - class Mean(StaticMetric): """ AVG Metric @@ -65,7 +46,7 @@ class Mean(StaticMetric): return func.avg(self.col) if is_concatenable(self.col.type): - return ConcatAvgFn(self.col) + return func.avg(LenFn(self.col)) logger.debug( f"Don't know how to process type {self.col.type} when computing MEAN" diff --git a/ingestion/src/metadata/orm_profiler/metrics/static/min_length.py b/ingestion/src/metadata/orm_profiler/metrics/static/min_length.py index 81ba670ba46..f9744ced0ea 100644 --- a/ingestion/src/metadata/orm_profiler/metrics/static/min_length.py +++ b/ingestion/src/metadata/orm_profiler/metrics/static/min_length.py @@ -13,34 +13,15 @@ MIN_LENGTH Metric definition """ from sqlalchemy import func -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.functions import FunctionElement -from metadata.generated.schema.entity.services.databaseService import ( - DatabaseServiceType, -) -from metadata.orm_profiler.metrics.core import CACHE, StaticMetric, _label -from metadata.orm_profiler.orm.registry import is_concatenable, is_quantifiable +from metadata.orm_profiler.metrics.core import StaticMetric, _label +from metadata.orm_profiler.orm.functions.length import LenFn +from metadata.orm_profiler.orm.registry import is_concatenable from metadata.orm_profiler.utils import logger logger = logger() -class MinLengthFn(FunctionElement): - name = __qualname__ - inherit_cache = CACHE - - -@compiles(MinLengthFn) -def _(element, compiler, **kw): - return "MIN(LEN(%s))" % compiler.process(element.clauses, **kw) - - -@compiles(MinLengthFn, DatabaseServiceType.SQLite.value.lower()) -def _(element, compiler, **kw): - return "MIN(LENGTH(%s))" % compiler.process(element.clauses, **kw) - - class MinLength(StaticMetric): """ MIN_LENGTH Metric @@ -62,7 +43,7 @@ class MinLength(StaticMetric): def fn(self): if is_concatenable(self.col.type): - return MinLengthFn(self.col) + return func.min(LenFn(self.col)) logger.debug( f"Don't know how to process type {self.col.type} when computing MIN_LENGTH" diff --git a/ingestion/src/metadata/orm_profiler/orm/functions/concat.py b/ingestion/src/metadata/orm_profiler/orm/functions/concat.py index c10c3783736..c542f96e187 100644 --- a/ingestion/src/metadata/orm_profiler/orm/functions/concat.py +++ b/ingestion/src/metadata/orm_profiler/orm/functions/concat.py @@ -35,6 +35,7 @@ def _(element, compiler, **kw): @compiles(ConcatFn, DatabaseServiceType.Redshift.value.lower()) @compiles(ConcatFn, DatabaseServiceType.SQLite.value.lower()) +@compiles(ConcatFn, DatabaseServiceType.Vertica.value.lower()) def _(element, compiler, **kw): """ This actually returns the squared STD, but as diff --git a/ingestion/src/metadata/orm_profiler/orm/functions/length.py b/ingestion/src/metadata/orm_profiler/orm/functions/length.py new file mode 100644 index 00000000000..d8c486f1f04 --- /dev/null +++ b/ingestion/src/metadata/orm_profiler/orm/functions/length.py @@ -0,0 +1,42 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Define Length function +""" +from sqlalchemy.ext.compiler import compiles +from sqlalchemy.sql.functions import FunctionElement + +from metadata.generated.schema.entity.services.databaseService import ( + DatabaseServiceType, +) +from metadata.orm_profiler.metrics.core import CACHE +from metadata.orm_profiler.utils import logger + +logger = logger() + + +class LenFn(FunctionElement): + inherit_cache = CACHE + + +@compiles(LenFn) +def _(element, compiler, **kw): + return "LEN(%s)" % compiler.process(element.clauses, **kw) + + +@compiles(LenFn, DatabaseServiceType.SQLite.value.lower()) +@compiles(LenFn, DatabaseServiceType.Vertica.value.lower()) +@compiles( + LenFn, DatabaseServiceType.Hive.value.lower().encode() +) # For some reason hive's dialect is in bytes... +def _(element, compiler, **kw): + return "LENGTH(%s)" % compiler.process(element.clauses, **kw)