FIX - profiler interface system metrics validation & e2e YAML includeDDL (#17562)

This commit is contained in:
Pere Miquel Brull 2024-08-23 09:00:18 +02:00 committed by GitHub
parent ef7042752a
commit 2180a6c7f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 35 additions and 10 deletions

View File

@ -21,7 +21,7 @@ import threading
import traceback
from collections import defaultdict
from datetime import datetime
from typing import Dict, List, Optional
from typing import Any, Dict, List, Optional
from sqlalchemy import Column, inspect, text
from sqlalchemy.exc import DBAPIError, ProgrammingError, ResourceClosedError
@ -451,14 +451,16 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin):
column=metric_func.column,
sample=sample,
)
if row:
for k, v in row.items():
# Replace NaN values with None
if isinstance(v, float) and math.isnan(v):
logger.warning(
"NaN data detected and will be cast to null in OpenMetadata to maintain database parity"
)
row[k] = None
if row and isinstance(row, dict):
row = self._validate_nulls(row)
# System metrics return a list of dictionaries, with UPDATE, INSERT or DELETE ops results
if row and metric_func.metric_type == MetricTypes.System:
row = [
self._validate_nulls(r) if isinstance(r, dict) else r
for r in row
]
except Exception as exc:
error = (
f"{metric_func.column if metric_func.column is not None else metric_func.table.__tablename__} "
@ -476,6 +478,17 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin):
return row, column, metric_func.metric_type.value
@staticmethod
def _validate_nulls(row: Dict[str, Any]) -> Dict[str, Any]:
"""Detect if we are computing NaNs and replace them with None"""
for k, v in row.items():
if isinstance(v, float) and math.isnan(v):
logger.warning(
"NaN data detected and will be cast to null in OpenMetadata to maintain database parity"
)
row[k] = None
return row
# pylint: disable=use-dict-literal
def get_all_metrics(
self,

View File

@ -19,6 +19,7 @@ source:
sourceConfig:
config:
type: DatabaseMetadata
includeDDL: true
sink:
type: metadata-rest
config: {}

View File

@ -21,6 +21,7 @@ source:
sourceConfig:
config:
type: DatabaseMetadata
includeDDL: true
schemaFilterPattern:
includes:
- do_not_touch

View File

@ -11,6 +11,7 @@ source:
sourceConfig:
config:
type: DatabaseMetadata
includeDDL: true
type: datalake
sink:
config: {}

View File

@ -12,6 +12,7 @@ source:
includeTables: true
includeViews: true
type: DatabaseMetadata
includeDDL: true
sink:
type: metadata-rest
config: {}

View File

@ -15,6 +15,7 @@ source:
includeTables: true
includeViews: true
type: DatabaseMetadata
includeDDL: true
sink:
type: metadata-rest
config: {}

View File

@ -17,6 +17,7 @@ source:
includeTables: true
includeViews: true
type: DatabaseMetadata
includeDDL: true
sink:
type: metadata-rest
config: {}

View File

@ -15,6 +15,7 @@ source:
includeTables: true
includeViews: true
type: DatabaseMetadata
includeDDL: true
sink:
type: metadata-rest
config: {}

View File

@ -13,6 +13,7 @@ source:
sourceConfig:
config:
type: DatabaseMetadata
includeDDL: true
sink:
type: metadata-rest
config: {}

View File

@ -14,6 +14,7 @@ source:
includeTables: true
includeViews: true
type: DatabaseMetadata
includeDDL: true
schemaFilterPattern:
excludes:
- information_schema

View File

@ -19,6 +19,7 @@ source:
includeViews: true
includeStoredProcedures: false
type: DatabaseMetadata
includeDDL: true
schemaFilterPattern:
excludes:
- information_schema.*

View File

@ -9,7 +9,9 @@ source:
hostPort: $E2E_VERTICA_HOST_PORT
database: VMart
sourceConfig:
config: {}
config:
type: DatabaseMetadata
includeDDL: true
sink:
type: metadata-rest
config: {}