fix: removed pandas dependencies for non pandas profiler/testSuite workflows (#10380)

This commit is contained in:
Teddy 2023-03-01 16:38:50 +01:00 committed by GitHub
parent 1c85f1b7fb
commit 6a4df5f460
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 3 additions and 35 deletions

View File

@ -16,8 +16,6 @@ supporting sqlalchemy abstraction layer
from datetime import datetime, timezone
from typing import Optional
from pandas import DataFrame
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
DatalakeConnection,
)
@ -46,7 +44,7 @@ class DataLakeTestSuiteInterface(TestSuiteProtocol, PandasInterfaceMixin):
ometa_client: OpenMetadata = None,
service_connection_config: DatalakeConnection = None,
table_entity=None,
df: DataFrame = None,
df=None,
):
self.table_entity = table_entity
self.df = df

View File

@ -13,9 +13,6 @@
Interfaces with database for all database engine
supporting sqlalchemy abstraction layer
"""
from pandas import DataFrame
from metadata.generated.schema.entity.data.table import PartitionIntervalType
from metadata.test_suite.validations.table.pandas.tableRowInsertedCountToBeBetween import (
TableRowInsertedCountToBeBetweenValidator,
@ -25,7 +22,7 @@ from metadata.test_suite.validations.table.pandas.tableRowInsertedCountToBeBetwe
class PandasInterfaceMixin:
"""Interface mixin grouping shared methods between test suite and profiler interfaces"""
def get_partitioned_df(self, df: DataFrame) -> DataFrame:
def get_partitioned_df(self, df):
"""Get partitioned dataframe
Returns:

View File

@ -43,18 +43,9 @@ def _label(_fn):
@wraps(_fn)
def inner(self, *args, **kwargs):
import pandas as pd # pylint: disable=import-outside-toplevel
res = _fn(self, *args, **kwargs)
# If the metric computation returns some value
if res is not None:
try:
if pd.isnull(res):
res = None
except ValueError:
pass
if not hasattr(res, "label"):
return res
return res.label(self.name())
return None

View File

@ -82,7 +82,6 @@ class ColumnCount(StaticMetric):
)
return ColunCountFn(literal(len(inspect(self.table).c)))
@_label
def df_fn(self, df=None):
"""dataframe function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -84,7 +84,6 @@ class ColumnNames(StaticMetric):
col_names = ",".join(inspect(self.table).c.keys())
return ColunNameFn(literal(col_names, type_=sqlalchemy.types.String))
@_label
def df_fn(self, df=None):
from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -44,7 +44,6 @@ class Count(StaticMetric):
"""sqlalchemy function"""
return func.count(column(self.col.name))
@_label
def df_fn(self, df=None):
"""pandas function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -63,7 +63,6 @@ class CountInSet(StaticMetric):
logger.warning(f"Error trying to run countInSet for {self.col.name}: {exc}")
return None
@_label
def df_fn(self, df):
"""pandas function"""
if not hasattr(self, "values"):

View File

@ -43,7 +43,6 @@ class DistinctCount(StaticMetric):
def fn(self):
return func.count(distinct(column(self.col.name)))
@_label
def df_fn(self, df=None):
from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -41,7 +41,6 @@ class Max(StaticMetric):
return func.max(column(self.col.name))
# pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None):
"""pandas function"""
from pandas import DataFrame

View File

@ -58,7 +58,6 @@ class MaxLength(StaticMetric):
return None
# pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None):
"""dataframe function"""
import pandas as pd

View File

@ -74,7 +74,6 @@ class Mean(StaticMetric):
return None
# pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None):
"""dataframe function"""
from numpy import vectorize

View File

@ -41,7 +41,6 @@ class Min(StaticMetric):
return func.min(column(self.col.name))
# pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None):
"""pandas function"""
from pandas import DataFrame

View File

@ -58,7 +58,6 @@ class MinLength(StaticMetric):
return None
# pylint: disable=import-outside-toplevel
@_label
def df_fn(self, df=None):
"""dataframe function"""
from numpy import vectorize

View File

@ -55,7 +55,6 @@ class NotRegexCount(StaticMetric):
)
)
@_label
def df_fn(self, df):
"""pandas function"""
if not hasattr(self, "expression"):

View File

@ -50,7 +50,6 @@ class NullCount(StaticMetric):
"""sqlalchemy function"""
return SumFn(case([(column(self.col.name).is_(None), 1)], else_=0))
@_label
def df_fn(self, df=None):
"""pandas function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -52,7 +52,6 @@ class RegexCount(StaticMetric):
case([(column(self.col.name).regexp_match(self.expression), 1)], else_=0)
)
@_label
def df_fn(self, df):
"""pandas function"""

View File

@ -48,7 +48,6 @@ class RowCount(StaticMetric):
"""sqlalchemy function"""
return func.count()
@_label
def df_fn(self, df=None):
"""pandas function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -90,7 +90,6 @@ class StdDev(StaticMetric):
)
return None
@_label
def df_fn(self, df=None):
"""pandas function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -42,7 +42,6 @@ class Sum(StaticMetric):
return None
@_label
def df_fn(self, df):
"""pandas function"""
if is_quantifiable(self.col.type):

View File

@ -58,7 +58,6 @@ class Median(StaticMetric):
)
return None
@_label
def df_fn(self, df=None):
"""Dataframe function"""
from pandas import DataFrame # pylint: disable=import-outside-toplevel

View File

@ -19,7 +19,6 @@ from collections import defaultdict
from datetime import datetime, timezone
from typing import Dict, List
from pandas import DataFrame
from sqlalchemy import Column
from metadata.generated.schema.entity.data.table import DataType, TableData
@ -101,7 +100,7 @@ class PandasProfilerInterface(ProfilerProtocol, PandasInterfaceMixin):
self,
metric_type: str,
metrics: List[Metrics],
dfs: List[DataFrame],
dfs: List,
*args,
**kwargs,
):