mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-01 02:56:10 +00:00
fix: removed pandas dependencies for non pandas profiler/testSuite workflows (#10380)
This commit is contained in:
parent
1c85f1b7fb
commit
6a4df5f460
@ -16,8 +16,6 @@ supporting sqlalchemy abstraction layer
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
|
||||
DatalakeConnection,
|
||||
)
|
||||
@ -46,7 +44,7 @@ class DataLakeTestSuiteInterface(TestSuiteProtocol, PandasInterfaceMixin):
|
||||
ometa_client: OpenMetadata = None,
|
||||
service_connection_config: DatalakeConnection = None,
|
||||
table_entity=None,
|
||||
df: DataFrame = None,
|
||||
df=None,
|
||||
):
|
||||
self.table_entity = table_entity
|
||||
self.df = df
|
||||
|
||||
@ -13,9 +13,6 @@
|
||||
Interfaces with database for all database engine
|
||||
supporting sqlalchemy abstraction layer
|
||||
"""
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
from metadata.generated.schema.entity.data.table import PartitionIntervalType
|
||||
from metadata.test_suite.validations.table.pandas.tableRowInsertedCountToBeBetween import (
|
||||
TableRowInsertedCountToBeBetweenValidator,
|
||||
@ -25,7 +22,7 @@ from metadata.test_suite.validations.table.pandas.tableRowInsertedCountToBeBetwe
|
||||
class PandasInterfaceMixin:
|
||||
"""Interface mixin grouping shared methods between test suite and profiler interfaces"""
|
||||
|
||||
def get_partitioned_df(self, df: DataFrame) -> DataFrame:
|
||||
def get_partitioned_df(self, df):
|
||||
"""Get partitioned dataframe
|
||||
|
||||
Returns:
|
||||
|
||||
@ -43,18 +43,9 @@ def _label(_fn):
|
||||
|
||||
@wraps(_fn)
|
||||
def inner(self, *args, **kwargs):
|
||||
import pandas as pd # pylint: disable=import-outside-toplevel
|
||||
|
||||
res = _fn(self, *args, **kwargs)
|
||||
# If the metric computation returns some value
|
||||
if res is not None:
|
||||
try:
|
||||
if pd.isnull(res):
|
||||
res = None
|
||||
except ValueError:
|
||||
pass
|
||||
if not hasattr(res, "label"):
|
||||
return res
|
||||
return res.label(self.name())
|
||||
|
||||
return None
|
||||
|
||||
@ -82,7 +82,6 @@ class ColumnCount(StaticMetric):
|
||||
)
|
||||
return ColunCountFn(literal(len(inspect(self.table).c)))
|
||||
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""dataframe function"""
|
||||
from pandas import DataFrame # pylint: disable=import-outside-toplevel
|
||||
|
||||
@ -84,7 +84,6 @@ class ColumnNames(StaticMetric):
|
||||
col_names = ",".join(inspect(self.table).c.keys())
|
||||
return ColunNameFn(literal(col_names, type_=sqlalchemy.types.String))
|
||||
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
from pandas import DataFrame # pylint: disable=import-outside-toplevel
|
||||
|
||||
|
||||
@ -44,7 +44,6 @@ class Count(StaticMetric):
|
||||
"""sqlalchemy function"""
|
||||
return func.count(column(self.col.name))
|
||||
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""pandas function"""
|
||||
from pandas import DataFrame # pylint: disable=import-outside-toplevel
|
||||
|
||||
@ -63,7 +63,6 @@ class CountInSet(StaticMetric):
|
||||
logger.warning(f"Error trying to run countInSet for {self.col.name}: {exc}")
|
||||
return None
|
||||
|
||||
@_label
|
||||
def df_fn(self, df):
|
||||
"""pandas function"""
|
||||
if not hasattr(self, "values"):
|
||||
|
||||
@ -43,7 +43,6 @@ class DistinctCount(StaticMetric):
|
||||
def fn(self):
|
||||
return func.count(distinct(column(self.col.name)))
|
||||
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
from pandas import DataFrame # pylint: disable=import-outside-toplevel
|
||||
|
||||
|
||||
@ -41,7 +41,6 @@ class Max(StaticMetric):
|
||||
return func.max(column(self.col.name))
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""pandas function"""
|
||||
from pandas import DataFrame
|
||||
|
||||
@ -58,7 +58,6 @@ class MaxLength(StaticMetric):
|
||||
return None
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""dataframe function"""
|
||||
import pandas as pd
|
||||
|
||||
@ -74,7 +74,6 @@ class Mean(StaticMetric):
|
||||
return None
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""dataframe function"""
|
||||
from numpy import vectorize
|
||||
|
||||
@ -41,7 +41,6 @@ class Min(StaticMetric):
|
||||
return func.min(column(self.col.name))
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""pandas function"""
|
||||
from pandas import DataFrame
|
||||
|
||||
@ -58,7 +58,6 @@ class MinLength(StaticMetric):
|
||||
return None
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""dataframe function"""
|
||||
from numpy import vectorize
|
||||
|
||||
@ -55,7 +55,6 @@ class NotRegexCount(StaticMetric):
|
||||
)
|
||||
)
|
||||
|
||||
@_label
|
||||
def df_fn(self, df):
|
||||
"""pandas function"""
|
||||
if not hasattr(self, "expression"):
|
||||
|
||||
@ -50,7 +50,6 @@ class NullCount(StaticMetric):
|
||||
"""sqlalchemy function"""
|
||||
return SumFn(case([(column(self.col.name).is_(None), 1)], else_=0))
|
||||
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""pandas function"""
|
||||
from pandas import DataFrame # pylint: disable=import-outside-toplevel
|
||||
|
||||
@ -52,7 +52,6 @@ class RegexCount(StaticMetric):
|
||||
case([(column(self.col.name).regexp_match(self.expression), 1)], else_=0)
|
||||
)
|
||||
|
||||
@_label
|
||||
def df_fn(self, df):
|
||||
"""pandas function"""
|
||||
|
||||
|
||||
@ -48,7 +48,6 @@ class RowCount(StaticMetric):
|
||||
"""sqlalchemy function"""
|
||||
return func.count()
|
||||
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""pandas function"""
|
||||
from pandas import DataFrame # pylint: disable=import-outside-toplevel
|
||||
|
||||
@ -90,7 +90,6 @@ class StdDev(StaticMetric):
|
||||
)
|
||||
return None
|
||||
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""pandas function"""
|
||||
from pandas import DataFrame # pylint: disable=import-outside-toplevel
|
||||
|
||||
@ -42,7 +42,6 @@ class Sum(StaticMetric):
|
||||
|
||||
return None
|
||||
|
||||
@_label
|
||||
def df_fn(self, df):
|
||||
"""pandas function"""
|
||||
if is_quantifiable(self.col.type):
|
||||
|
||||
@ -58,7 +58,6 @@ class Median(StaticMetric):
|
||||
)
|
||||
return None
|
||||
|
||||
@_label
|
||||
def df_fn(self, df=None):
|
||||
"""Dataframe function"""
|
||||
from pandas import DataFrame # pylint: disable=import-outside-toplevel
|
||||
|
||||
@ -19,7 +19,6 @@ from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List
|
||||
|
||||
from pandas import DataFrame
|
||||
from sqlalchemy import Column
|
||||
|
||||
from metadata.generated.schema.entity.data.table import DataType, TableData
|
||||
@ -101,7 +100,7 @@ class PandasProfilerInterface(ProfilerProtocol, PandasInterfaceMixin):
|
||||
self,
|
||||
metric_type: str,
|
||||
metrics: List[Metrics],
|
||||
dfs: List[DataFrame],
|
||||
dfs: List,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user