2022-07-06 10:12:29 +02:00
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
Define Median function
|
|
|
|
"""
|
|
|
|
# Keep SQA docs style defining custom constructs
|
|
|
|
# pylint: disable=consider-using-f-string,duplicate-code
|
|
|
|
from sqlalchemy.ext.compiler import compiles
|
|
|
|
from sqlalchemy.sql.functions import FunctionElement
|
|
|
|
|
2023-03-01 08:20:38 +01:00
|
|
|
from metadata.profiler.metrics.core import CACHE
|
|
|
|
from metadata.profiler.orm.registry import Dialects
|
2022-07-06 10:12:29 +02:00
|
|
|
from metadata.utils.logger import profiler_logger
|
|
|
|
|
|
|
|
logger = profiler_logger()
|
|
|
|
|
|
|
|
|
|
|
|
class MedianFn(FunctionElement):
|
|
|
|
inherit_cache = CACHE
|
|
|
|
|
|
|
|
|
|
|
|
@compiles(MedianFn)
|
2022-10-11 09:36:36 +02:00
|
|
|
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
2023-01-13 08:12:04 +01:00
|
|
|
col = compiler.process(elements.clauses.clauses[0])
|
2022-09-05 15:53:31 +02:00
|
|
|
return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC)" % col
|
2022-07-06 10:12:29 +02:00
|
|
|
|
|
|
|
|
|
|
|
@compiles(MedianFn, Dialects.BigQuery)
|
|
|
|
def _(elements, compiler, **kwargs):
|
|
|
|
col, _ = [compiler.process(element, **kwargs) for element in elements.clauses]
|
|
|
|
return "percentile_cont(%s , 0.5) OVER()" % col
|
|
|
|
|
|
|
|
|
|
|
|
@compiles(MedianFn, Dialects.ClickHouse)
|
|
|
|
def _(elements, compiler, **kwargs):
|
|
|
|
col, _ = [compiler.process(element, **kwargs) for element in elements.clauses]
|
|
|
|
return "median(%s)" % col
|
|
|
|
|
|
|
|
|
2022-10-11 15:57:25 +02:00
|
|
|
# pylint: disable=unused-argument
|
2022-12-13 13:03:22 +01:00
|
|
|
@compiles(MedianFn, Dialects.Athena)
|
2022-07-08 15:55:50 +02:00
|
|
|
@compiles(MedianFn, Dialects.Trino)
|
2022-07-06 10:12:29 +02:00
|
|
|
@compiles(MedianFn, Dialects.Presto)
|
|
|
|
def _(elements, compiler, **kwargs):
|
2022-10-04 21:22:13 +02:00
|
|
|
col = elements.clauses.clauses[0].name
|
2022-12-13 13:03:22 +01:00
|
|
|
return 'approx_percentile("%s", 0.5)' % col
|
2022-07-06 10:12:29 +02:00
|
|
|
|
|
|
|
|
2022-07-13 14:43:56 +02:00
|
|
|
@compiles(MedianFn, Dialects.MSSQL)
|
|
|
|
def _(elements, compiler, **kwargs):
|
|
|
|
"""Median computation for MSSQL"""
|
2022-11-18 09:41:36 +01:00
|
|
|
col = elements.clauses.clauses[0].name
|
2023-01-20 20:36:03 +01:00
|
|
|
return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC) OVER()" % col
|
2022-07-13 14:43:56 +02:00
|
|
|
|
|
|
|
|
2022-09-24 00:48:09 +02:00
|
|
|
@compiles(MedianFn, Dialects.Hive)
|
|
|
|
def _(elements, compiler, **kwargs):
|
|
|
|
"""Median computation for Hive"""
|
|
|
|
col, _ = [compiler.process(element, **kwargs) for element in elements.clauses]
|
|
|
|
return "percentile(cast(%s as BIGINT), 0.5)" % col
|
|
|
|
|
|
|
|
|
2022-07-29 10:41:53 +02:00
|
|
|
@compiles(MedianFn, Dialects.MySQL)
|
2023-01-20 20:36:03 +01:00
|
|
|
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
2022-07-29 10:41:53 +02:00
|
|
|
"""Median computation for MySQL currently not supported
|
|
|
|
Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340
|
|
|
|
"""
|
|
|
|
return "NULL"
|
|
|
|
|
|
|
|
|
2022-07-06 10:12:29 +02:00
|
|
|
@compiles(MedianFn, Dialects.SQLite)
|
2022-10-11 09:36:36 +02:00
|
|
|
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
|
|
|
col, table = list(elements.clauses)
|
2022-07-06 10:12:29 +02:00
|
|
|
return """
|
|
|
|
(SELECT
|
|
|
|
AVG({col})
|
|
|
|
FROM (
|
|
|
|
SELECT {col}
|
|
|
|
FROM {table}
|
|
|
|
ORDER BY {col}
|
|
|
|
LIMIT 2 - (SELECT COUNT(*) FROM {table}) % 2
|
|
|
|
OFFSET (SELECT (COUNT(*) - 1) / 2
|
|
|
|
FROM {table})))
|
|
|
|
""".format(
|
|
|
|
col=col, table=table.value
|
|
|
|
)
|
2023-01-20 20:36:03 +01:00
|
|
|
|
|
|
|
|
|
|
|
@compiles(MedianFn, Dialects.Vertica)
|
|
|
|
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
|
|
|
col, table = list(elements.clauses)
|
|
|
|
return "(SELECT MEDIAN({col}) OVER() FROM {table} LIMIT 1)".format(
|
|
|
|
col=col, table=table.value
|
|
|
|
)
|