diff --git a/ingestion/src/metadata/profiler/metrics/composed/null_ratio.py b/ingestion/src/metadata/profiler/metrics/composed/null_ratio.py index 282f565fecc..cdc809eeb3f 100644 --- a/ingestion/src/metadata/profiler/metrics/composed/null_ratio.py +++ b/ingestion/src/metadata/profiler/metrics/composed/null_ratio.py @@ -49,12 +49,9 @@ class NullRatio(ComposedMetric): Safely compute null ratio based on the profiler results of other Metrics """ - import pandas as pd - res_count = res.get(Count.name()) - res_null = res.get(NullCount.name()) - - if not pd.isnull(res_count) and not pd.isnull(res_null): - result = res_null / (res_null + res_count) - return None if pd.isnull(result) else result - return None + count = res.get(Count.name()) + null_count = res.get(NullCount.name()) + if count + null_count == 0: + return None + return null_count / (null_count + count) diff --git a/ingestion/tests/integration/datalake/conftest.py b/ingestion/tests/integration/datalake/conftest.py index f9326b7e6db..1ed88fa8ffb 100644 --- a/ingestion/tests/integration/datalake/conftest.py +++ b/ingestion/tests/integration/datalake/conftest.py @@ -196,21 +196,23 @@ def run_test_suite_workflow(run_ingestion): ingestion_workflow.stop() -@pytest.fixture() -def run_profiler(run_ingestion): - """Test profiler ingestion""" - workflow_config = deepcopy(INGESTION_CONFIG) - workflow_config["source"]["sourceConfig"]["config"].update( +@pytest.fixture(scope="session") +def profiler_workflow_config(workflow_config): + config = deepcopy(INGESTION_CONFIG) + config["source"]["sourceConfig"]["config"].update( { "type": "Profiler", } ) - workflow_config["processor"] = { + config["processor"] = { "type": "orm-profiler", "config": {}, } + config["workflowConfig"] = workflow_config + return config - profiler_workflow = ProfilerWorkflow.create(workflow_config) - profiler_workflow.execute() - profiler_workflow.raise_from_status() - profiler_workflow.stop() + +@pytest.fixture() +def run_profiler(run_ingestion, run_workflow, profiler_workflow_config): + """Test profiler ingestion""" + run_workflow(ProfilerWorkflow, profiler_workflow_config) diff --git a/ingestion/tests/integration/datalake/resources/users.csv b/ingestion/tests/integration/datalake/resources/users.csv index 2611c2934b7..97e6ef88e07 100644 --- a/ingestion/tests/integration/datalake/resources/users.csv +++ b/ingestion/tests/integration/datalake/resources/users.csv @@ -1,4 +1,4 @@ -id,first_name,last_name,city,country,birthdate,age,json_data -1,John,Doe,Los Angeles,US,1980-01-01,40,{"foo": {"bar": "baz"}} -2,Jane,Doe,Los Angeles,US,2000-12-31,39,{"foo": {"bar": "baz"}} -3,Jane,Smith,Paris,FR,2001-11-11,28,{"foo": {"bar": "baz"}} +id,first_name,last_name,city,country,birthdate,age,json_data,int_with_na +1,John,Doe,Los Angeles,US,1980-01-01,40,{"foo": {"bar": "baz"}},10 +2,Jane,Doe,Los Angeles,US,2000-12-31,39,{"foo": {"bar": "baz"}},20 +3,Jane,Smith,Paris,FR,2001-11-11,28,{"foo": {"bar": "baz"}},NA