fix: removed pandas dependencies for non pandas profiler/testSuite workflows (#10380)

This commit is contained in:
Teddy 2023-03-01 16:38:50 +01:00 committed by GitHub
parent 1c85f1b7fb
commit 6a4df5f460
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 3 additions and 35 deletions

View File

@ -16,8 +16,6 @@ supporting sqlalchemy abstraction layer
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional from typing import Optional
from pandas import DataFrame
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import ( from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
DatalakeConnection, DatalakeConnection,
) )
@ -46,7 +44,7 @@ class DataLakeTestSuiteInterface(TestSuiteProtocol, PandasInterfaceMixin):
ometa_client: OpenMetadata = None, ometa_client: OpenMetadata = None,
service_connection_config: DatalakeConnection = None, service_connection_config: DatalakeConnection = None,
table_entity=None, table_entity=None,
df: DataFrame = None, df=None,
): ):
self.table_entity = table_entity self.table_entity = table_entity
self.df = df self.df = df

View File

@ -13,9 +13,6 @@
Interfaces with database for all database engine Interfaces with database for all database engine
supporting sqlalchemy abstraction layer supporting sqlalchemy abstraction layer
""" """
from pandas import DataFrame
from metadata.generated.schema.entity.data.table import PartitionIntervalType from metadata.generated.schema.entity.data.table import PartitionIntervalType
from metadata.test_suite.validations.table.pandas.tableRowInsertedCountToBeBetween import ( from metadata.test_suite.validations.table.pandas.tableRowInsertedCountToBeBetween import (
TableRowInsertedCountToBeBetweenValidator, TableRowInsertedCountToBeBetweenValidator,
@ -25,7 +22,7 @@ from metadata.test_suite.validations.table.pandas.tableRowInsertedCountToBeBetwe
class PandasInterfaceMixin: class PandasInterfaceMixin:
"""Interface mixin grouping shared methods between test suite and profiler interfaces""" """Interface mixin grouping shared methods between test suite and profiler interfaces"""
def get_partitioned_df(self, df: DataFrame) -> DataFrame: def get_partitioned_df(self, df):
"""Get partitioned dataframe """Get partitioned dataframe
Returns: Returns:

View File

@ -43,18 +43,9 @@ def _label(_fn):
@wraps(_fn) @wraps(_fn)
def inner(self, *args, **kwargs): def inner(self, *args, **kwargs):
import pandas as pd # pylint: disable=import-outside-toplevel
res = _fn(self, *args, **kwargs) res = _fn(self, *args, **kwargs)
# If the metric computation returns some value # If the metric computation returns some value
if res is not None: if res is not None:
try:
if pd.isnull(res):
res = None
except ValueError:
pass
if not hasattr(res, "label"):
return res
return res.label(self.name()) return res.label(self.name())
return None return None

View File

@ -82,7 +82,6 @@ class ColumnCount(StaticMetric):
) )
return ColunCountFn(literal(len(inspect(self.table).c))) return ColunCountFn(literal(len(inspect(self.table).c)))
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""dataframe function""" """dataframe function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -84,7 +84,6 @@ class ColumnNames(StaticMetric):
col_names = ",".join(inspect(self.table).c.keys()) col_names = ",".join(inspect(self.table).c.keys())
return ColunNameFn(literal(col_names, type_=sqlalchemy.types.String)) return ColunNameFn(literal(col_names, type_=sqlalchemy.types.String))
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
from pandas import DataFrame # pylint: disable=import-outside-toplevel from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -44,7 +44,6 @@ class Count(StaticMetric):
"""sqlalchemy function""" """sqlalchemy function"""
return func.count(column(self.col.name)) return func.count(column(self.col.name))
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""pandas function""" """pandas function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -63,7 +63,6 @@ class CountInSet(StaticMetric):
logger.warning(f"Error trying to run countInSet for {self.col.name}: {exc}") logger.warning(f"Error trying to run countInSet for {self.col.name}: {exc}")
return None return None
@_label
def df_fn(self, df): def df_fn(self, df):
"""pandas function""" """pandas function"""
if not hasattr(self, "values"): if not hasattr(self, "values"):

View File

@ -43,7 +43,6 @@ class DistinctCount(StaticMetric):
def fn(self): def fn(self):
return func.count(distinct(column(self.col.name))) return func.count(distinct(column(self.col.name)))
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
from pandas import DataFrame # pylint: disable=import-outside-toplevel from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -41,7 +41,6 @@ class Max(StaticMetric):
return func.max(column(self.col.name)) return func.max(column(self.col.name))
# pylint: disable=import-outside-toplevel # pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""pandas function""" """pandas function"""
from pandas import DataFrame from pandas import DataFrame

View File

@ -58,7 +58,6 @@ class MaxLength(StaticMetric):
return None return None
# pylint: disable=import-outside-toplevel # pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""dataframe function""" """dataframe function"""
import pandas as pd import pandas as pd

View File

@ -74,7 +74,6 @@ class Mean(StaticMetric):
return None return None
# pylint: disable=import-outside-toplevel # pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""dataframe function""" """dataframe function"""
from numpy import vectorize from numpy import vectorize

View File

@ -41,7 +41,6 @@ class Min(StaticMetric):
return func.min(column(self.col.name)) return func.min(column(self.col.name))
# pylint: disable=import-outside-toplevel # pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""pandas function""" """pandas function"""
from pandas import DataFrame from pandas import DataFrame

View File

@ -58,7 +58,6 @@ class MinLength(StaticMetric):
return None return None
# pylint: disable=import-outside-toplevel # pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""dataframe function""" """dataframe function"""
from numpy import vectorize from numpy import vectorize

View File

@ -55,7 +55,6 @@ class NotRegexCount(StaticMetric):
) )
) )
@_label
def df_fn(self, df): def df_fn(self, df):
"""pandas function""" """pandas function"""
if not hasattr(self, "expression"): if not hasattr(self, "expression"):

View File

@ -50,7 +50,6 @@ class NullCount(StaticMetric):
"""sqlalchemy function""" """sqlalchemy function"""
return SumFn(case([(column(self.col.name).is_(None), 1)], else_=0)) return SumFn(case([(column(self.col.name).is_(None), 1)], else_=0))
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""pandas function""" """pandas function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -52,7 +52,6 @@ class RegexCount(StaticMetric):
case([(column(self.col.name).regexp_match(self.expression), 1)], else_=0) case([(column(self.col.name).regexp_match(self.expression), 1)], else_=0)
) )
@_label
def df_fn(self, df): def df_fn(self, df):
"""pandas function""" """pandas function"""

View File

@ -48,7 +48,6 @@ class RowCount(StaticMetric):
"""sqlalchemy function""" """sqlalchemy function"""
return func.count() return func.count()
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""pandas function""" """pandas function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -90,7 +90,6 @@ class StdDev(StaticMetric):
) )
return None return None
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""pandas function""" """pandas function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -42,7 +42,6 @@ class Sum(StaticMetric):
return None return None
@_label
def df_fn(self, df): def df_fn(self, df):
"""pandas function""" """pandas function"""
if is_quantifiable(self.col.type): if is_quantifiable(self.col.type):

View File

@ -58,7 +58,6 @@ class Median(StaticMetric):
) )
return None return None
@_label
def df_fn(self, df=None): def df_fn(self, df=None):
"""Dataframe function""" """Dataframe function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -19,7 +19,6 @@ from collections import defaultdict
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Dict, List from typing import Dict, List
from pandas import DataFrame
from sqlalchemy import Column from sqlalchemy import Column
from metadata.generated.schema.entity.data.table import DataType, TableData from metadata.generated.schema.entity.data.table import DataType, TableData
@ -101,7 +100,7 @@ class PandasProfilerInterface(ProfilerProtocol, PandasInterfaceMixin):
self, self,
metric_type: str, metric_type: str,
metrics: List[Metrics], metrics: List[Metrics],
dfs: List[DataFrame], dfs: List,
*args, *args,
**kwargs, **kwargs,
): ):