Fixed linting for Profiler (#7922)

* - Fixed linting - Added logic to skip partition check when engine is not BQ - Added ingestion partition logic to testSuite * Fixed python formating * Fixed test for BQ partition
2025-11-22 05:36:11 +00:00 · 2022-10-11 09:36:36 +02:00 · 2022-10-11 09:36:36 +02:00 · 3b7f576d04
commit 3b7f576d04
parent 22ac150d7b
15 changed files with 57 additions and 53 deletions
--- a/.pylintrc
+++ b/.pylintrc
@ -22,6 +22,7 @@ extension-pkg-allow-list=pydantic
 [MESSAGES CONTROL]
 disable=no-name-in-module
 enable=useless-suppression
 [FORMAT]
 # We all have big monitors now
--- a/ingestion/src/metadata/orm_profiler/api/workflow.py
+++ b/ingestion/src/metadata/orm_profiler/api/workflow.py
@ -82,6 +82,7 @@ class ProfilerWorkflow:
    metadata: OpenMetadata
    def __init__(self, config: OpenMetadataWorkflowConfig):
        self.profiler_obj = None  # defined in `create_profiler_obj()``
        self.config = config
        self.metadata_config: OpenMetadataConnection = (
            self.config.workflowConfig.openMetadataServerConfig
@ -211,24 +212,29 @@ class ProfilerWorkflow:
        Args:
            entity: table entity
        """
        if (
            not hasattr(entity, "serviceType")
            or entity.serviceType != DatabaseServiceType.BigQuery
        ):
            return None
        entity_config: TableConfig = self.get_config_for_entity(entity)
        if entity_config:
            return entity_config.partitionConfig
        if hasattr(entity, "tablePartition") and entity.tablePartition:
-            try:
+            if entity.tablePartition.intervalType == IntervalType.TIME_UNIT:
-                if entity.tablePartition.intervalType == IntervalType.TIME_UNIT:
+                return TablePartitionConfig(
-                    return TablePartitionConfig(
+                    partitionField=entity.tablePartition.columns[0]
                        partitionField=entity.tablePartition.columns[0]
                    )
                elif entity.tablePartition.intervalType == IntervalType.INGESTION_TIME:
                    if entity.tablePartition.interval == "DAY":
                        return TablePartitionConfig(partitionField="_PARTITIONDATE")
                    return TablePartitionConfig(partitionField="_PARTITIONTIME")
            except Exception:
                raise TypeError(
                    f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table"
                )
            if entity.tablePartition.intervalType == IntervalType.INGESTION_TIME:
                if entity.tablePartition.interval == "DAY":
                    return TablePartitionConfig(partitionField="_PARTITIONDATE")
                return TablePartitionConfig(partitionField="_PARTITIONTIME")
            raise TypeError(
                f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table"
            )
        return None
    def create_profiler_interface(
        self,
@ -288,8 +294,7 @@ class ProfilerWorkflow:
                database.name.__root__, "Database pattern not allowed"
            )
            return None
-        else:
+        return database
            return database
    def filter_entities(self, tables: List[Table]) -> Iterable[Table]:
        """
@ -400,7 +405,8 @@ class ProfilerWorkflow:
        if not databases:
            raise ValueError(
                "databaseFilterPattern returned 0 result. At least 1 database must be returned by the filter pattern."
-                f"\n\t- includes: {self.source_config.databaseFilterPattern.includes}\n\t- excludes: {self.source_config.databaseFilterPattern.excludes}"
+                f"\n\t- includes: {self.source_config.databaseFilterPattern.includes}\n\t"
                f"- excludes: {self.source_config.databaseFilterPattern.excludes}"
            )
        for database in databases:
@ -457,8 +463,7 @@ class ProfilerWorkflow:
            or (hasattr(self, "sink") and self.sink.get_status().failures)
        ):
            return 1
-        else:
+        return 0
            return 0
    def raise_from_status(self, raise_warnings=False):
        """
@ -522,4 +527,4 @@ class ProfilerWorkflow:
    @staticmethod
    def _is_sample_source(service_type):
-        return service_type == "sample-data" or service_type == "sample-usage"
+        return service_type in {"sample-data", "sample-usage"}
--- a/ingestion/src/metadata/orm_profiler/metrics/core.py
+++ b/ingestion/src/metadata/orm_profiler/metrics/core.py
@ -13,6 +13,8 @@
 Metric Core definitions
 """
 # pylint: disable=invalid-name
 from abc import ABC, abstractmethod
 from enum import Enum
 from functools import wraps
--- a/ingestion/src/metadata/orm_profiler/metrics/static/mean.py
+++ b/ingestion/src/metadata/orm_profiler/metrics/static/mean.py
@ -29,7 +29,7 @@ from metadata.utils.logger import profiler_logger
 logger = profiler_logger()
-
+# pylint: disable=invalid-name
 class avg(GenericFunction):
    name = "avg"
    inherit_cache = CACHE
@ -39,7 +39,7 @@ class avg(GenericFunction):
 def _(element, compiler, **kw):
    """Handle case for empty table. If empty, clickhouse returns NaN"""
    proc = compiler.process(element.clauses, **kw)
-    return "if(isNaN(avg(%s)), null, avg(%s))" % ((proc,) * 2)
+    return f"if(isNaN(avg({proc})), null, avg({proc}))"
 class Mean(StaticMetric):
--- a/ingestion/src/metadata/orm_profiler/orm/converter.py
+++ b/ingestion/src/metadata/orm_profiler/orm/converter.py
@ -82,6 +82,7 @@ def map_types(col: Column, table_service_type):
        table_service_type == databaseService.DatabaseServiceType.Snowflake
        and col.dataType == DataType.JSON
    ):
        # pylint: disable=import-outside-toplevel
        from snowflake.sqlalchemy import VARIANT
        return VARIANT
--- a/ingestion/src/metadata/orm_profiler/orm/functions/median.py
+++ b/ingestion/src/metadata/orm_profiler/orm/functions/median.py
@ -29,7 +29,7 @@ class MedianFn(FunctionElement):
@compiles(MedianFn)
-def _(elements, compiler, **kwargs):
+def _(elements, compiler, **kwargs):  # pylint: disable=unused-argument
    col = elements.clauses.clauses[0].name
    return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC)" % col
@ -69,7 +69,7 @@ def _(elements, compiler, **kwargs):
@compiles(MedianFn, Dialects.MySQL)
-def _(elemenst, compiler, **kwargs):
+def _(elemenst, compiler, **kwargs):  # pylint: disable=unused-argument
    """Median computation for MySQL currently not supported
    Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340
    """
@ -77,8 +77,8 @@ def _(elemenst, compiler, **kwargs):
@compiles(MedianFn, Dialects.SQLite)
-def _(elements, compiler, **kwargs):
+def _(elements, compiler, **kwargs):  # pylint: disable=unused-argument
-    col, table = [element for element in elements.clauses]
+    col, table = list(elements.clauses)
    return """
    (SELECT 
        AVG({col})
--- a/ingestion/src/metadata/orm_profiler/orm/functions/modulo.py
+++ b/ingestion/src/metadata/orm_profiler/orm/functions/modulo.py
@ -13,7 +13,7 @@
 Define Modulo function
 """
 # Keep SQA docs style defining custom constructs
-# pylint: disable=consider-using-f-string,duplicate-code
+# pylint: disable=duplicate-code
 from sqlalchemy.ext.compiler import compiles
 from sqlalchemy.sql.functions import FunctionElement
--- a/ingestion/src/metadata/orm_profiler/orm/functions/sum.py
+++ b/ingestion/src/metadata/orm_profiler/orm/functions/sum.py
@ -33,11 +33,11 @@ class SumFn(GenericFunction):
 def _(element, compiler, **kw):
    """Handle case for empty table. If empty, clickhouse returns NaN"""
    proc = compiler.process(element.clauses, **kw)
-    return "SUM(%s)" % proc
+    return f"SUM({proc})"
@compiles(SumFn, Dialects.BigQuery)
 def _(element, compiler, **kw):
    """Handle case where column type is INTEGER but SUM returns a NUMBER"""
    proc = compiler.process(element.clauses, **kw)
-    return "SUM(CAST(%s AS NUMERIC))" % proc
+    return f"SUM(CAST({proc} AS NUMERIC))"
--- a/ingestion/src/metadata/orm_profiler/orm/types/bytea_to_string.py
+++ b/ingestion/src/metadata/orm_profiler/orm/types/bytea_to_string.py
@ -12,7 +12,7 @@
 """
 Expand sqlalchemy types to map them to OpenMetadata DataType
 """
-# pylint: disable=duplicate-code
+# pylint: disable=duplicate-code,abstract-method
 from typing import Optional
--- a/ingestion/src/metadata/orm_profiler/orm/types/hex_byte_string.py
+++ b/ingestion/src/metadata/orm_profiler/orm/types/hex_byte_string.py
@ -12,7 +12,7 @@
 """
 Expand sqlalchemy types to map them to OpenMetadata DataType
 """
-# pylint: disable=duplicate-code
+# pylint: disable=duplicate-code,abstract-method
 from typing import Optional
--- a/ingestion/src/metadata/orm_profiler/orm/types/uuid.py
+++ b/ingestion/src/metadata/orm_profiler/orm/types/uuid.py
@ -12,7 +12,7 @@
 """
 Expand sqlalchemy types to map them to OpenMetadata DataType
 """
-# pylint: disable=duplicate-code
+# pylint: disable=duplicate-code,abstract-method
 import traceback
 from uuid import UUID
--- a/ingestion/src/metadata/orm_profiler/profiler/core.py
+++ b/ingestion/src/metadata/orm_profiler/profiler/core.py
@ -38,7 +38,6 @@ from metadata.orm_profiler.api.models import ProfilerResponse
 from metadata.orm_profiler.metrics.core import (
    ComposedMetric,
    CustomMetric,
    Metric,
    MetricTypes,
    QueryMetric,
    StaticMetric,
@ -69,8 +68,6 @@ class Profiler(Generic[TMetric]):
    - A tuple of metrics, from which we will construct queries.
    """
    # pylint: disable=too-many-instance-attributes,too-many-public-methods
    def __init__(
        self,
        *metrics: Type[TMetric],
--- a/ingestion/src/metadata/orm_profiler/profiler/runner.py
+++ b/ingestion/src/metadata/orm_profiler/profiler/runner.py
@ -87,8 +87,7 @@ class QueryRunner:
        """dispatch query to sample or all table"""
        if isinstance(self._sample, AliasedClass):
            return self.select_first_from_sample(*entities, **kwargs)
-        else:
+        return self.select_first_from_table(*entities, **kwargs)
            return self.select_first_from_table(*entities, **kwargs)
    @staticmethod
    def select_first_from_query(query: Query):
--- a/ingestion/src/metadata/test_suite/api/workflow.py
+++ b/ingestion/src/metadata/test_suite/api/workflow.py
@ -216,29 +216,24 @@ class TestSuiteWorkflow:
        Args:
            entity: table entity
        """
-        # Should remove this with https://github.com/open-metadata/OpenMetadata/issues/5458
+        if (
-        if entity.serviceType != DatabaseServiceType.BigQuery:
+            not hasattr(entity, "serviceType")
            or entity.serviceType != DatabaseServiceType.BigQuery
        ):
            return None
        if entity.tablePartition:
            if entity.tablePartition.intervalType in {
                IntervalType.TIME_UNIT,
                IntervalType.INGESTION_TIME,
            }:
                try:
                    partition_field = entity.tablePartition.columns[0]
                except Exception:
                    raise TypeError(
                        "Unsupported ingestion based partition type. Skipping table"
                    )
        if hasattr(entity, "tablePartition") and entity.tablePartition:
            if entity.tablePartition.intervalType == IntervalType.TIME_UNIT:
                return TablePartitionConfig(
-                    partitionField=partition_field,
+                    partitionField=entity.tablePartition.columns[0]
                )
-
+            if entity.tablePartition.intervalType == IntervalType.INGESTION_TIME:
                if entity.tablePartition.interval == "DAY":
                    return TablePartitionConfig(partitionField="_PARTITIONDATE")
                return TablePartitionConfig(partitionField="_PARTITIONTIME")
            raise TypeError(
                f"Unsupported partition type {entity.tablePartition.intervalType}. Skipping table"
            )
        return None
    def _create_sqa_tests_runner_interface(self, table_fqn: str):
@ -436,8 +431,8 @@ class TestSuiteWorkflow:
                        )
            except TypeError as exc:
                logger.debug(traceback.format_exc())
-                logger.warning(f"Could not run test case {test_case.name}: {exc}")
+                logger.warning(f"Could not run test case for table {table_fqn}: {exc}")
-                self.status.failure(test_case.fullyQualifiedName.__root__)
+                self.status.failure(table_fqn)
    def print_status(self) -> None:
        """
--- a/ingestion/tests/unit/profiler/test_profiler_partitions.py
+++ b/ingestion/tests/unit/profiler/test_profiler_partitions.py
@ -18,6 +18,9 @@ from pydantic import BaseModel
 from metadata.generated.schema.entity.data.database import Database
 from metadata.generated.schema.entity.data.table import IntervalType, TablePartition
 from metadata.generated.schema.entity.services.databaseService import (
    DatabaseServiceType,
 )
 from metadata.generated.schema.type.entityReference import EntityReference
 from metadata.orm_profiler.api.workflow import ProfilerWorkflow
@ -77,6 +80,7 @@ MOCK_DATABASE = Database(
 class MockTable(BaseModel):
    tablePartition: Optional[TablePartition]
    serviceType = DatabaseServiceType.BigQuery
    class Config:
        arbitrary_types_allowed = True