FIX - profiler interface system metrics validation & e2e YAML includeDDL (#17562)

This commit is contained in:
Pere Miquel Brull 2024-08-23 09:00:18 +02:00 committed by GitHub
parent ef7042752a
commit 2180a6c7f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 35 additions and 10 deletions

View File

@ -21,7 +21,7 @@ import threading
import traceback import traceback
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from typing import Dict, List, Optional from typing import Any, Dict, List, Optional
from sqlalchemy import Column, inspect, text from sqlalchemy import Column, inspect, text
from sqlalchemy.exc import DBAPIError, ProgrammingError, ResourceClosedError from sqlalchemy.exc import DBAPIError, ProgrammingError, ResourceClosedError
@ -451,14 +451,16 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin):
column=metric_func.column, column=metric_func.column,
sample=sample, sample=sample,
) )
if row: if row and isinstance(row, dict):
for k, v in row.items(): row = self._validate_nulls(row)
# Replace NaN values with None
if isinstance(v, float) and math.isnan(v): # System metrics return a list of dictionaries, with UPDATE, INSERT or DELETE ops results
logger.warning( if row and metric_func.metric_type == MetricTypes.System:
"NaN data detected and will be cast to null in OpenMetadata to maintain database parity" row = [
) self._validate_nulls(r) if isinstance(r, dict) else r
row[k] = None for r in row
]
except Exception as exc: except Exception as exc:
error = ( error = (
f"{metric_func.column if metric_func.column is not None else metric_func.table.__tablename__} " f"{metric_func.column if metric_func.column is not None else metric_func.table.__tablename__} "
@ -476,6 +478,17 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin):
return row, column, metric_func.metric_type.value return row, column, metric_func.metric_type.value
@staticmethod
def _validate_nulls(row: Dict[str, Any]) -> Dict[str, Any]:
"""Detect if we are computing NaNs and replace them with None"""
for k, v in row.items():
if isinstance(v, float) and math.isnan(v):
logger.warning(
"NaN data detected and will be cast to null in OpenMetadata to maintain database parity"
)
row[k] = None
return row
# pylint: disable=use-dict-literal # pylint: disable=use-dict-literal
def get_all_metrics( def get_all_metrics(
self, self,

View File

@ -19,6 +19,7 @@ source:
sourceConfig: sourceConfig:
config: config:
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}

View File

@ -21,6 +21,7 @@ source:
sourceConfig: sourceConfig:
config: config:
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
schemaFilterPattern: schemaFilterPattern:
includes: includes:
- do_not_touch - do_not_touch

View File

@ -11,6 +11,7 @@ source:
sourceConfig: sourceConfig:
config: config:
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
type: datalake type: datalake
sink: sink:
config: {} config: {}

View File

@ -12,6 +12,7 @@ source:
includeTables: true includeTables: true
includeViews: true includeViews: true
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}

View File

@ -15,6 +15,7 @@ source:
includeTables: true includeTables: true
includeViews: true includeViews: true
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}

View File

@ -17,6 +17,7 @@ source:
includeTables: true includeTables: true
includeViews: true includeViews: true
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}

View File

@ -15,6 +15,7 @@ source:
includeTables: true includeTables: true
includeViews: true includeViews: true
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}

View File

@ -13,6 +13,7 @@ source:
sourceConfig: sourceConfig:
config: config:
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}

View File

@ -14,6 +14,7 @@ source:
includeTables: true includeTables: true
includeViews: true includeViews: true
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
schemaFilterPattern: schemaFilterPattern:
excludes: excludes:
- information_schema - information_schema

View File

@ -19,6 +19,7 @@ source:
includeViews: true includeViews: true
includeStoredProcedures: false includeStoredProcedures: false
type: DatabaseMetadata type: DatabaseMetadata
includeDDL: true
schemaFilterPattern: schemaFilterPattern:
excludes: excludes:
- information_schema.* - information_schema.*

View File

@ -9,7 +9,9 @@ source:
hostPort: $E2E_VERTICA_HOST_PORT hostPort: $E2E_VERTICA_HOST_PORT
database: VMart database: VMart
sourceConfig: sourceConfig:
config: {} config:
type: DatabaseMetadata
includeDDL: true
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}