feat: add regex support for dbx (#21514)

This commit is contained in:
Teddy 2025-06-02 17:55:48 +02:00 committed by GitHub
parent bb54555daa
commit 3c5fbffeaa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 47 additions and 5 deletions

View File

@ -18,6 +18,7 @@ from sqlalchemy import case, column, not_
from metadata.generated.schema.configuration.profilerConfiguration import MetricType from metadata.generated.schema.configuration.profilerConfiguration import MetricType
from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.metrics.core import StaticMetric, _label
from metadata.profiler.orm.functions.regexp import RegexpMatchFn
from metadata.profiler.orm.functions.sum import SumFn from metadata.profiler.orm.functions.sum import SumFn
from metadata.profiler.orm.registry import is_concatenable from metadata.profiler.orm.registry import is_concatenable
@ -58,9 +59,9 @@ class NotRegexCount(StaticMetric):
[ [
( (
not_( not_(
column(self.col.name, self.col.type).regexp_match( RegexpMatchFn(
self.expression column(self.col.name, self.col.type), self.expression
) ),
), ),
0, 0,
) )

View File

@ -18,6 +18,7 @@ from sqlalchemy import case, column
from metadata.generated.schema.configuration.profilerConfiguration import MetricType from metadata.generated.schema.configuration.profilerConfiguration import MetricType
from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.metrics.core import StaticMetric, _label
from metadata.profiler.orm.functions.regexp import RegexpMatchFn
from metadata.profiler.orm.functions.sum import SumFn from metadata.profiler.orm.functions.sum import SumFn
from metadata.profiler.orm.registry import is_concatenable from metadata.profiler.orm.registry import is_concatenable
@ -57,8 +58,8 @@ class RegexCount(StaticMetric):
case( case(
[ [
( (
column(self.col.name, self.col.type).regexp_match( RegexpMatchFn(
self.expression column(self.col.name, self.col.type), self.expression
), ),
1, 1,
) )

View File

@ -0,0 +1,40 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Define a regexp match function."""
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import GenericFunction
from metadata.profiler.metrics.core import CACHE
from metadata.profiler.orm.registry import Dialects
class RegexpMatchFn(GenericFunction):
name = "regexp_match"
inherit_cache = CACHE
@compiles(RegexpMatchFn)
def _(element, compiler, **kw):
"""Base function for regexp_match"""
column, pattern = element.clauses
fn = column.regexp_match(pattern)
return compiler.process(fn, **kw)
@compiles(RegexpMatchFn, Dialects.Databricks)
def _(element, compiler, **kw):
"""Databricks function for regexp_match"""
column, pattern = element.clauses
compiled_column = compiler.process(column, **kw)
compiled_pattern = compiler.process(pattern, **kw)
return f"REGEXP_LIKE({compiled_column}, {compiled_pattern})"