diff --git a/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py b/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py index 612e32cec47..005f7c95ea1 100644 --- a/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py +++ b/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py @@ -21,7 +21,7 @@ import threading import traceback from collections import defaultdict from datetime import datetime -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from sqlalchemy import Column, inspect, text from sqlalchemy.exc import DBAPIError, ProgrammingError, ResourceClosedError @@ -451,14 +451,16 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin): column=metric_func.column, sample=sample, ) - if row: - for k, v in row.items(): - # Replace NaN values with None - if isinstance(v, float) and math.isnan(v): - logger.warning( - "NaN data detected and will be cast to null in OpenMetadata to maintain database parity" - ) - row[k] = None + if row and isinstance(row, dict): + row = self._validate_nulls(row) + + # System metrics return a list of dictionaries, with UPDATE, INSERT or DELETE ops results + if row and metric_func.metric_type == MetricTypes.System: + row = [ + self._validate_nulls(r) if isinstance(r, dict) else r + for r in row + ] + except Exception as exc: error = ( f"{metric_func.column if metric_func.column is not None else metric_func.table.__tablename__} " @@ -476,6 +478,17 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin): return row, column, metric_func.metric_type.value + @staticmethod + def _validate_nulls(row: Dict[str, Any]) -> Dict[str, Any]: + """Detect if we are computing NaNs and replace them with None""" + for k, v in row.items(): + if isinstance(v, float) and math.isnan(v): + logger.warning( + "NaN data detected and will be cast to null in OpenMetadata to maintain database parity" + ) + row[k] = None + return row + # pylint: disable=use-dict-literal def get_all_metrics( self, diff --git a/ingestion/tests/cli_e2e/database/bigquery/bigquery.yaml b/ingestion/tests/cli_e2e/database/bigquery/bigquery.yaml index e52bf9fdf3f..88a8be126b4 100644 --- a/ingestion/tests/cli_e2e/database/bigquery/bigquery.yaml +++ b/ingestion/tests/cli_e2e/database/bigquery/bigquery.yaml @@ -19,6 +19,7 @@ source: sourceConfig: config: type: DatabaseMetadata + includeDDL: true sink: type: metadata-rest config: {} diff --git a/ingestion/tests/cli_e2e/database/bigquery_multiple_project/bigquery_multiple_project.yaml b/ingestion/tests/cli_e2e/database/bigquery_multiple_project/bigquery_multiple_project.yaml index 45e61f48e20..4f1755aac6b 100644 --- a/ingestion/tests/cli_e2e/database/bigquery_multiple_project/bigquery_multiple_project.yaml +++ b/ingestion/tests/cli_e2e/database/bigquery_multiple_project/bigquery_multiple_project.yaml @@ -21,6 +21,7 @@ source: sourceConfig: config: type: DatabaseMetadata + includeDDL: true schemaFilterPattern: includes: - do_not_touch diff --git a/ingestion/tests/cli_e2e/database/datalake_s3/datalake_s3.yaml b/ingestion/tests/cli_e2e/database/datalake_s3/datalake_s3.yaml index 7b45d84fbc9..259a024e3be 100644 --- a/ingestion/tests/cli_e2e/database/datalake_s3/datalake_s3.yaml +++ b/ingestion/tests/cli_e2e/database/datalake_s3/datalake_s3.yaml @@ -11,6 +11,7 @@ source: sourceConfig: config: type: DatabaseMetadata + includeDDL: true type: datalake sink: config: {} diff --git a/ingestion/tests/cli_e2e/database/hive/hive.yaml b/ingestion/tests/cli_e2e/database/hive/hive.yaml index 1c890ff0baa..23a239bb29a 100644 --- a/ingestion/tests/cli_e2e/database/hive/hive.yaml +++ b/ingestion/tests/cli_e2e/database/hive/hive.yaml @@ -12,6 +12,7 @@ source: includeTables: true includeViews: true type: DatabaseMetadata + includeDDL: true sink: type: metadata-rest config: {} diff --git a/ingestion/tests/cli_e2e/database/mssql/mssql.yaml b/ingestion/tests/cli_e2e/database/mssql/mssql.yaml index 3048b6af95d..97affc98f49 100644 --- a/ingestion/tests/cli_e2e/database/mssql/mssql.yaml +++ b/ingestion/tests/cli_e2e/database/mssql/mssql.yaml @@ -15,6 +15,7 @@ source: includeTables: true includeViews: true type: DatabaseMetadata + includeDDL: true sink: type: metadata-rest config: {} diff --git a/ingestion/tests/cli_e2e/database/mysql/mysql.yaml b/ingestion/tests/cli_e2e/database/mysql/mysql.yaml index 0122e72dbce..d0e67e623a5 100644 --- a/ingestion/tests/cli_e2e/database/mysql/mysql.yaml +++ b/ingestion/tests/cli_e2e/database/mysql/mysql.yaml @@ -17,6 +17,7 @@ source: includeTables: true includeViews: true type: DatabaseMetadata + includeDDL: true sink: type: metadata-rest config: {} diff --git a/ingestion/tests/cli_e2e/database/oracle/oracle.yaml b/ingestion/tests/cli_e2e/database/oracle/oracle.yaml index 85cb37ceb67..82a4525d18e 100644 --- a/ingestion/tests/cli_e2e/database/oracle/oracle.yaml +++ b/ingestion/tests/cli_e2e/database/oracle/oracle.yaml @@ -15,6 +15,7 @@ source: includeTables: true includeViews: true type: DatabaseMetadata + includeDDL: true sink: type: metadata-rest config: {} diff --git a/ingestion/tests/cli_e2e/database/postgres/postgres.yaml b/ingestion/tests/cli_e2e/database/postgres/postgres.yaml index 731d116c32d..16d67687e30 100644 --- a/ingestion/tests/cli_e2e/database/postgres/postgres.yaml +++ b/ingestion/tests/cli_e2e/database/postgres/postgres.yaml @@ -13,6 +13,7 @@ source: sourceConfig: config: type: DatabaseMetadata + includeDDL: true sink: type: metadata-rest config: {} diff --git a/ingestion/tests/cli_e2e/database/redshift/redshift.yaml b/ingestion/tests/cli_e2e/database/redshift/redshift.yaml index 04dfa7f2632..319c4670856 100644 --- a/ingestion/tests/cli_e2e/database/redshift/redshift.yaml +++ b/ingestion/tests/cli_e2e/database/redshift/redshift.yaml @@ -14,6 +14,7 @@ source: includeTables: true includeViews: true type: DatabaseMetadata + includeDDL: true schemaFilterPattern: excludes: - information_schema diff --git a/ingestion/tests/cli_e2e/database/snowflake/snowflake.yaml b/ingestion/tests/cli_e2e/database/snowflake/snowflake.yaml index 0a331d72371..03e7a384a67 100644 --- a/ingestion/tests/cli_e2e/database/snowflake/snowflake.yaml +++ b/ingestion/tests/cli_e2e/database/snowflake/snowflake.yaml @@ -19,6 +19,7 @@ source: includeViews: true includeStoredProcedures: false type: DatabaseMetadata + includeDDL: true schemaFilterPattern: excludes: - information_schema.* diff --git a/ingestion/tests/cli_e2e/database/vertica/vertica.yaml b/ingestion/tests/cli_e2e/database/vertica/vertica.yaml index 730fc486c3e..0388a30aeb1 100644 --- a/ingestion/tests/cli_e2e/database/vertica/vertica.yaml +++ b/ingestion/tests/cli_e2e/database/vertica/vertica.yaml @@ -9,7 +9,9 @@ source: hostPort: $E2E_VERTICA_HOST_PORT database: VMart sourceConfig: - config: {} + config: + type: DatabaseMetadata + includeDDL: true sink: type: metadata-rest config: {}