mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-19 04:41:02 +00:00
MINOR: fix system profile return types (#18470)
* fix(redshift-system): redshift return type * fixed bigquery profiler * fixed snowflake profiler * job id action does not support matrix. using plain action summary. * reverted gha change
This commit is contained in:
parent
fc79d60d83
commit
a6d97b67a8
@ -115,9 +115,98 @@ ignore-paths = [
|
|||||||
"ingestion/src/metadata/great_expectations/action.py",
|
"ingestion/src/metadata/great_expectations/action.py",
|
||||||
"ingestion/src/metadata/profiler/interface/nosql/profiler_interface.py",
|
"ingestion/src/metadata/profiler/interface/nosql/profiler_interface.py",
|
||||||
"ingestion/src/metadata/profiler/processor/sampler/nosql/sampler.py",
|
"ingestion/src/metadata/profiler/processor/sampler/nosql/sampler.py",
|
||||||
"ingestion/src/metadata/ingestion/source",
|
".*/src/metadata/ingestion/source/.*/service_spec.py",
|
||||||
"ingestion/src/metadata/profiler/metrics",
|
"ingestion/src/metadata/profiler/metrics",
|
||||||
"ingestion/src/metadata/profiler/source/databricks",
|
"ingestion/src/metadata/profiler/source/databricks",
|
||||||
|
|
||||||
|
# metadata ingestion sources
|
||||||
|
"ingestion/src/metadata/ingestion/source/api/rest/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/api/rest/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/dashboard_service.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/metabase/models.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/mode/client.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/qlikcloud/client.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/qlikcloud/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/qliksense/client.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/qliksense/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/quicksight/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/sigma/client.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/superset/api_source.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/superset/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/dashboard/tableau/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/athena/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/athena/utils.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/azuresql/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/bigquery/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/bigquery/incremental_table_processor.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/bigquery/queries.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/common_db_source.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/couchbase/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/databricks/client.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/databricks/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/datalake/clients/azure_blob.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/datalake/clients/base.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/datalake/clients/gcs.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/datalake/clients/s3.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/datalake/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/datalake/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/deltalake/clients/base.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/deltalake/clients/pyspark.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/deltalake/clients/s3.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/deltalake/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/deltalake/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/doris/utils.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/exasol/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/exasol/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/external_table_lineage_mixin.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/hive/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/lineage_source.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/mssql/lineage.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/mssql/usage.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/mssql/utils.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/mysql/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/oracle/lineage.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/postgres/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/postgres/converter_orm.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/postgres/lineage.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/postgres/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/postgres/metrics.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/postgres/types/money.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/postgres/utils.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/redshift/incremental_table_processor.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/redshift/lineage.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/redshift/models.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/sample_data.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/saphana/cdata_parser.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/saphana/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/sas/client.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/sas/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/snowflake/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/snowflake/utils.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/sql_column_handler.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/teradata/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/trino/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/unitycatalog/client.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/database/unitycatalog/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/messaging/kafka/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/dagster/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/databrickspipeline/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/domopipeline/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/fivetran/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/fivetran/models.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/flink/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/openlineage/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/openlineage/utils.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/pipeline/spline/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/sqa_types.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/storage/gcs/connection.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/storage/gcs/metadata.py",
|
||||||
|
"ingestion/src/metadata/ingestion/source/storage/s3/metadata.py",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.pylint."MESSAGES CONTROL"]
|
[tool.pylint."MESSAGES CONTROL"]
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
"""BigqQuery Profiler"""
|
||||||
|
|
||||||
from typing import List, Type
|
from typing import List, Type
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.table import SystemProfile
|
from metadata.generated.schema.entity.data.table import SystemProfile
|
||||||
@ -20,10 +22,9 @@ class BigQueryProfiler(BigQueryProfilerInterface):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> List[SystemProfile]:
|
) -> List[SystemProfile]:
|
||||||
return self.system_metrics_computer.get_system_metrics(
|
return self.system_metrics_computer.get_system_metrics(
|
||||||
runner.table, self.service_connection_config
|
table=runner.table,
|
||||||
|
usage_location=self.service_connection_config.usageLocation,
|
||||||
)
|
)
|
||||||
|
|
||||||
def initialize_system_metrics_computer(
|
def initialize_system_metrics_computer(self) -> BigQuerySystemMetricsComputer:
|
||||||
self, **kwargs
|
|
||||||
) -> BigQuerySystemMetricsComputer:
|
|
||||||
return BigQuerySystemMetricsComputer(session=self.session)
|
return BigQuerySystemMetricsComputer(session=self.session)
|
||||||
|
@ -1,12 +1,10 @@
|
|||||||
|
"""BigQuery system metric source"""
|
||||||
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
from sqlalchemy.orm import DeclarativeMeta
|
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.table import DmlOperationType, SystemProfile
|
from metadata.generated.schema.entity.data.table import DmlOperationType, SystemProfile
|
||||||
from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
|
|
||||||
BigQueryConnection,
|
|
||||||
)
|
|
||||||
from metadata.ingestion.source.database.bigquery.queries import BigQueryQueryResult
|
from metadata.ingestion.source.database.bigquery.queries import BigQueryQueryResult
|
||||||
from metadata.profiler.metrics.system.dml_operation import DatabaseDMLOperations
|
from metadata.profiler.metrics.system.dml_operation import DatabaseDMLOperations
|
||||||
from metadata.profiler.metrics.system.system import (
|
from metadata.profiler.metrics.system.system import (
|
||||||
@ -24,26 +22,24 @@ logger = profiler_logger()
|
|||||||
class BigQuerySystemMetricsSource(
|
class BigQuerySystemMetricsSource(
|
||||||
SQASessionProvider, EmptySystemMetricsSource, CacheProvider
|
SQASessionProvider, EmptySystemMetricsSource, CacheProvider
|
||||||
):
|
):
|
||||||
def __init__(self, *args, **kwargs):
|
"""BigQuery system metrics source class"""
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def get_kwargs(
|
def get_kwargs(self, **kwargs):
|
||||||
self,
|
table = kwargs.get("table")
|
||||||
table: DeclarativeMeta,
|
|
||||||
service_connection: BigQueryConnection,
|
|
||||||
*args,
|
|
||||||
**kwargs,
|
|
||||||
):
|
|
||||||
return {
|
return {
|
||||||
"table": table.__table__.name,
|
"table": table.__table__.name,
|
||||||
"dataset_id": table.__table_args__["schema"],
|
"dataset_id": table.__table_args__["schema"],
|
||||||
"project_id": super().get_session().get_bind().url.host,
|
"project_id": super().get_session().get_bind().url.host,
|
||||||
"usage_location": service_connection.usageLocation,
|
"usage_location": kwargs.get("usage_location"),
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_deletes(
|
def get_deletes(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, table: str, project_id: str, usage_location: str, dataset_id: str
|
table, project_id, usage_location, dataset_id = (
|
||||||
) -> List[SystemProfile]:
|
kwargs.get("table"),
|
||||||
|
kwargs.get("project_id"),
|
||||||
|
kwargs.get("usage_location"),
|
||||||
|
kwargs.get("dataset_id"),
|
||||||
|
)
|
||||||
return self.get_system_profile(
|
return self.get_system_profile(
|
||||||
project_id,
|
project_id,
|
||||||
dataset_id,
|
dataset_id,
|
||||||
@ -62,9 +58,13 @@ class BigQuerySystemMetricsSource(
|
|||||||
DmlOperationType.DELETE,
|
DmlOperationType.DELETE,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_updates(
|
def get_updates(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, table: str, project_id: str, usage_location: str, dataset_id: str
|
table, project_id, usage_location, dataset_id = (
|
||||||
) -> List[SystemProfile]:
|
kwargs.get("table"),
|
||||||
|
kwargs.get("project_id"),
|
||||||
|
kwargs.get("usage_location"),
|
||||||
|
kwargs.get("dataset_id"),
|
||||||
|
)
|
||||||
return self.get_system_profile(
|
return self.get_system_profile(
|
||||||
project_id,
|
project_id,
|
||||||
dataset_id,
|
dataset_id,
|
||||||
@ -82,9 +82,13 @@ class BigQuerySystemMetricsSource(
|
|||||||
DmlOperationType.UPDATE,
|
DmlOperationType.UPDATE,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_inserts(
|
def get_inserts(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, table: str, project_id: str, usage_location: str, dataset_id: str
|
table, project_id, usage_location, dataset_id = (
|
||||||
) -> List[SystemProfile]:
|
kwargs.get("table"),
|
||||||
|
kwargs.get("project_id"),
|
||||||
|
kwargs.get("usage_location"),
|
||||||
|
kwargs.get("dataset_id"),
|
||||||
|
)
|
||||||
return self.get_system_profile(
|
return self.get_system_profile(
|
||||||
project_id,
|
project_id,
|
||||||
dataset_id,
|
dataset_id,
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
"""Redshift profiler"""
|
||||||
|
|
||||||
from metadata.ingestion.source.database.redshift.profiler.system import (
|
from metadata.ingestion.source.database.redshift.profiler.system import (
|
||||||
RedshiftSystemMetricsComputer,
|
RedshiftSystemMetricsComputer,
|
||||||
)
|
)
|
||||||
@ -8,5 +10,5 @@ from metadata.profiler.metrics.system.system import SystemMetricsComputer
|
|||||||
|
|
||||||
|
|
||||||
class RedshiftProfiler(SQAProfilerInterface):
|
class RedshiftProfiler(SQAProfilerInterface):
|
||||||
def initialize_system_metrics_computer(self, **kwargs) -> SystemMetricsComputer:
|
def initialize_system_metrics_computer(self) -> SystemMetricsComputer:
|
||||||
return RedshiftSystemMetricsComputer(session=self.session)
|
return RedshiftSystemMetricsComputer(session=self.session)
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
|
"""
|
||||||
|
Imeplemetation for the redshift system metrics source
|
||||||
|
"""
|
||||||
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from sqlalchemy.orm import DeclarativeMeta
|
from pydantic import TypeAdapter
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.table import SystemProfile
|
from metadata.generated.schema.entity.data.table import SystemProfile
|
||||||
from metadata.ingestion.source.database.redshift.queries import (
|
from metadata.ingestion.source.database.redshift.queries import (
|
||||||
@ -24,13 +28,14 @@ logger = profiler_logger()
|
|||||||
class RedshiftSystemMetricsSource(
|
class RedshiftSystemMetricsSource(
|
||||||
SQASessionProvider, EmptySystemMetricsSource, CacheProvider
|
SQASessionProvider, EmptySystemMetricsSource, CacheProvider
|
||||||
):
|
):
|
||||||
def __init__(self, *args, **kwargs):
|
"""Redshift system metrics source class"""
|
||||||
# collaborative constructor that initalizes the SQASessionProvider and CacheProvider
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def get_inserts(
|
def get_inserts(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, database: str, schema: str, table: str
|
database, schema, table = (
|
||||||
) -> List[SystemProfile]:
|
kwargs.get("database"),
|
||||||
|
kwargs.get("schema"),
|
||||||
|
kwargs.get("table"),
|
||||||
|
)
|
||||||
queries = self.get_or_update_cache(
|
queries = self.get_or_update_cache(
|
||||||
f"{database}.{schema}",
|
f"{database}.{schema}",
|
||||||
self._get_insert_queries,
|
self._get_insert_queries,
|
||||||
@ -39,16 +44,20 @@ class RedshiftSystemMetricsSource(
|
|||||||
)
|
)
|
||||||
return get_metric_result(queries, table)
|
return get_metric_result(queries, table)
|
||||||
|
|
||||||
def get_kwargs(self, table: DeclarativeMeta, *args, **kwargs):
|
def get_kwargs(self, **kwargs):
|
||||||
|
table = kwargs.get("table")
|
||||||
return {
|
return {
|
||||||
"table": table.__table__.name,
|
"table": table.__table__.name,
|
||||||
"database": self.get_session().get_bind().url.database,
|
"database": self.get_session().get_bind().url.database,
|
||||||
"schema": table.__table__.schema,
|
"schema": table.__table__.schema,
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_deletes(
|
def get_deletes(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, database: str, schema: str, table: str
|
database, schema, table = (
|
||||||
) -> List[SystemProfile]:
|
kwargs.get("database"),
|
||||||
|
kwargs.get("schema"),
|
||||||
|
kwargs.get("table"),
|
||||||
|
)
|
||||||
queries = self.get_or_update_cache(
|
queries = self.get_or_update_cache(
|
||||||
f"{database}.{schema}",
|
f"{database}.{schema}",
|
||||||
self._get_delete_queries,
|
self._get_delete_queries,
|
||||||
@ -57,9 +66,10 @@ class RedshiftSystemMetricsSource(
|
|||||||
)
|
)
|
||||||
return get_metric_result(queries, table)
|
return get_metric_result(queries, table)
|
||||||
|
|
||||||
def get_updates(
|
def get_updates(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, database: str, schema: str, table: str
|
database = kwargs.get("database")
|
||||||
) -> List[SystemProfile]:
|
schema = kwargs.get("schema")
|
||||||
|
table = kwargs.get("table")
|
||||||
queries = self.get_or_update_cache(
|
queries = self.get_or_update_cache(
|
||||||
f"{database}.{schema}",
|
f"{database}.{schema}",
|
||||||
self._get_update_queries,
|
self._get_update_queries,
|
||||||
@ -111,7 +121,7 @@ class RedshiftSystemMetricsSource(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_metric_result(ddls: List[QueryResult], table_name: str) -> List:
|
def get_metric_result(ddls: List[QueryResult], table_name: str) -> List[SystemProfile]:
|
||||||
"""Given query results, retur the metric result
|
"""Given query results, retur the metric result
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -121,15 +131,17 @@ def get_metric_result(ddls: List[QueryResult], table_name: str) -> List:
|
|||||||
Returns:
|
Returns:
|
||||||
List:
|
List:
|
||||||
"""
|
"""
|
||||||
return [
|
return TypeAdapter(List[SystemProfile]).validate_python(
|
||||||
{
|
[
|
||||||
"timestamp": datetime_to_timestamp(ddl.start_time, milliseconds=True),
|
{
|
||||||
"operation": ddl.query_type,
|
"timestamp": datetime_to_timestamp(ddl.start_time, milliseconds=True),
|
||||||
"rowsAffected": ddl.rows,
|
"operation": ddl.query_type,
|
||||||
}
|
"rowsAffected": ddl.rows,
|
||||||
for ddl in ddls
|
}
|
||||||
if ddl.table_name == table_name
|
for ddl in ddls
|
||||||
]
|
if ddl.table_name == table_name
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RedshiftSystemMetricsComputer(SystemMetricsComputer, RedshiftSystemMetricsSource):
|
class RedshiftSystemMetricsComputer(SystemMetricsComputer, RedshiftSystemMetricsSource):
|
||||||
|
@ -22,5 +22,5 @@ from metadata.profiler.metrics.system.system import SystemMetricsComputer
|
|||||||
|
|
||||||
|
|
||||||
class SnowflakeProfiler(SnowflakeProfilerInterface):
|
class SnowflakeProfiler(SnowflakeProfilerInterface):
|
||||||
def initialize_system_metrics_computer(self, **kwargs) -> SystemMetricsComputer:
|
def initialize_system_metrics_computer(self) -> SystemMetricsComputer:
|
||||||
return SnowflakeSystemMetricsComputer(session=self.session)
|
return SnowflakeSystemMetricsComputer(session=self.session)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
"""Snowflake system metrics source"""
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
@ -5,7 +7,6 @@ from typing import List, Optional, Tuple
|
|||||||
|
|
||||||
import sqlalchemy.orm
|
import sqlalchemy.orm
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
from sqlalchemy.orm import DeclarativeMeta
|
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.table import DmlOperationType, SystemProfile
|
from metadata.generated.schema.entity.data.table import DmlOperationType, SystemProfile
|
||||||
from metadata.ingestion.source.database.snowflake.models import (
|
from metadata.ingestion.source.database.snowflake.models import (
|
||||||
@ -69,6 +70,22 @@ def _parse_query(query: str) -> Optional[str]:
|
|||||||
|
|
||||||
|
|
||||||
class SnowflakeTableResovler:
|
class SnowflakeTableResovler:
|
||||||
|
"""A class the resolves snowflake tables by mimicking snowflake's default resolution logic:
|
||||||
|
https://docs.snowflake.com/en/sql-reference/name-resolution
|
||||||
|
|
||||||
|
This default specification searches in the following order:
|
||||||
|
- The explicitly provided schema
|
||||||
|
- The current schema
|
||||||
|
- The public schema
|
||||||
|
|
||||||
|
This can be altered by changing the SEARCH_PATH session parameter. If the users change
|
||||||
|
this paramter, this resolver will might return wrong values.
|
||||||
|
|
||||||
|
There is no way to extract the SEARCH_PATH from the query after it has been executed. Hence, we can
|
||||||
|
only rely on the default behavior and maybe allow the users to configure the search path
|
||||||
|
at the connection level (TODO).
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, session: sqlalchemy.orm.Session):
|
def __init__(self, session: sqlalchemy.orm.Session):
|
||||||
self._cache = LRUCache[bool](LRU_CACHE_SIZE)
|
self._cache = LRUCache[bool](LRU_CACHE_SIZE)
|
||||||
self.session = session
|
self.session = session
|
||||||
@ -116,7 +133,8 @@ class SnowflakeTableResovler:
|
|||||||
Returns:
|
Returns:
|
||||||
tuple: Tuple of database, schema and table names
|
tuple: Tuple of database, schema and table names
|
||||||
Raises:
|
Raises:
|
||||||
RuntimeError: If the table is not found in the metadata or if there are duplicate results (there shouldn't be)
|
RuntimeError: If the table is not found in the metadata or if there are duplicate results
|
||||||
|
(there shouldn't be)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
search_paths = []
|
search_paths = []
|
||||||
@ -131,7 +149,7 @@ class SnowflakeTableResovler:
|
|||||||
search_paths += ".".join([context_database, PUBLIC_SCHEMA, table_name])
|
search_paths += ".".join([context_database, PUBLIC_SCHEMA, table_name])
|
||||||
return context_database, PUBLIC_SCHEMA, table_name
|
return context_database, PUBLIC_SCHEMA, table_name
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"Could not find the table {search_paths}.".format(
|
"Could not find the table {search_paths}.".format( # pylint: disable=consider-using-f-string
|
||||||
search_paths=" OR ".join(map(lambda x: f"[{x}]", search_paths))
|
search_paths=" OR ".join(map(lambda x: f"[{x}]", search_paths))
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -154,7 +172,8 @@ class SnowflakeTableResovler:
|
|||||||
Args:
|
Args:
|
||||||
context_database (str): Database name from the query context
|
context_database (str): Database name from the query context
|
||||||
context_schema (Optional[str]): Schema name from the query context
|
context_schema (Optional[str]): Schema name from the query context
|
||||||
identifier (str): Identifier string extracted from a query (can be 'db.schema.table', 'schema.table' or just 'table')
|
identifier (str): Identifier string extracted from a query (can be
|
||||||
|
'db.schema.table', 'schema.table' or just 'table')
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[Optional[str], Optional[str], Optional[str]]: Tuple of database, schema and table names
|
Tuple[Optional[str], Optional[str], Optional[str]]: Tuple of database, schema and table names
|
||||||
Raises:
|
Raises:
|
||||||
@ -266,22 +285,28 @@ def get_snowflake_system_queries(
|
|||||||
class SnowflakeSystemMetricsSource(
|
class SnowflakeSystemMetricsSource(
|
||||||
SQASessionProvider, EmptySystemMetricsSource, CacheProvider[SnowflakeQueryLogEntry]
|
SQASessionProvider, EmptySystemMetricsSource, CacheProvider[SnowflakeQueryLogEntry]
|
||||||
):
|
):
|
||||||
|
"""Snowflake system metrics source"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.resolver = SnowflakeTableResovler(
|
self.resolver = SnowflakeTableResovler(
|
||||||
session=super().get_session(),
|
session=super().get_session(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_kwargs(self, table: DeclarativeMeta, *args, **kwargs):
|
def get_kwargs(self, **kwargs):
|
||||||
|
table = kwargs.get("table")
|
||||||
return {
|
return {
|
||||||
"table": table.__table__.name,
|
"table": table.__table__.name,
|
||||||
"database": self.get_session().get_bind().url.database,
|
"database": self.get_session().get_bind().url.database,
|
||||||
"schema": table.__table__.schema,
|
"schema": table.__table__.schema,
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_inserts(
|
def get_inserts(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, database: str, schema: str, table: str
|
database, schema, table = (
|
||||||
) -> List[SystemProfile]:
|
kwargs.get("database"),
|
||||||
|
kwargs.get("schema"),
|
||||||
|
kwargs.get("table"),
|
||||||
|
)
|
||||||
return self.get_system_profile(
|
return self.get_system_profile(
|
||||||
database,
|
database,
|
||||||
schema,
|
schema,
|
||||||
@ -299,9 +324,12 @@ class SnowflakeSystemMetricsSource(
|
|||||||
DmlOperationType.INSERT,
|
DmlOperationType.INSERT,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_updates(
|
def get_updates(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, database: str, schema: str, table: str
|
database, schema, table = (
|
||||||
) -> List[SystemProfile]:
|
kwargs.get("database"),
|
||||||
|
kwargs.get("schema"),
|
||||||
|
kwargs.get("table"),
|
||||||
|
)
|
||||||
return self.get_system_profile(
|
return self.get_system_profile(
|
||||||
database,
|
database,
|
||||||
schema,
|
schema,
|
||||||
@ -319,9 +347,12 @@ class SnowflakeSystemMetricsSource(
|
|||||||
DmlOperationType.UPDATE,
|
DmlOperationType.UPDATE,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_deletes(
|
def get_deletes(self, **kwargs) -> List[SystemProfile]:
|
||||||
self, database: str, schema: str, table: str
|
database, schema, table = (
|
||||||
) -> List[SystemProfile]:
|
kwargs.get("database"),
|
||||||
|
kwargs.get("schema"),
|
||||||
|
kwargs.get("table"),
|
||||||
|
)
|
||||||
return self.get_system_profile(
|
return self.get_system_profile(
|
||||||
database,
|
database,
|
||||||
schema,
|
schema,
|
||||||
|
@ -391,7 +391,7 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin):
|
|||||||
dictionnary of results
|
dictionnary of results
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Computing system metrics for {runner.table.__tablename__}")
|
logger.debug(f"Computing system metrics for {runner.table.__tablename__}")
|
||||||
return self.system_metrics_computer.get_system_metrics(runner.table)
|
return self.system_metrics_computer.get_system_metrics(table=runner.table)
|
||||||
|
|
||||||
def _create_thread_safe_sampler(
|
def _create_thread_safe_sampler(
|
||||||
self,
|
self,
|
||||||
|
@ -42,6 +42,8 @@ T = TypeVar("T")
|
|||||||
|
|
||||||
|
|
||||||
class CacheProvider(ABC, Generic[T]):
|
class CacheProvider(ABC, Generic[T]):
|
||||||
|
"""Cache provider class to provide cache for system metrics"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.cache = LRUCache[T](LRU_CACHE_SIZE)
|
self.cache = LRUCache[T](LRU_CACHE_SIZE)
|
||||||
|
|
||||||
@ -63,19 +65,20 @@ class EmptySystemMetricsSource:
|
|||||||
"""Empty system metrics source that can be used as a default. Just returns an empty list of system metrics
|
"""Empty system metrics source that can be used as a default. Just returns an empty list of system metrics
|
||||||
for any resource."""
|
for any resource."""
|
||||||
|
|
||||||
def get_inserts(self, *args, **kwargs) -> List[SystemProfile]:
|
def get_inserts(self, **kwargs) -> List[SystemProfile]:
|
||||||
"""Get insert queries"""
|
"""Get insert queries"""
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_deletes(self, *args, **kwargs) -> List[SystemProfile]:
|
def get_deletes(self, **kwargs) -> List[SystemProfile]:
|
||||||
"""Get delete queries"""
|
"""Get delete queries"""
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_updates(self, *args, **kwargs) -> List[SystemProfile]:
|
def get_updates(self, **kwargs) -> List[SystemProfile]:
|
||||||
"""Get update queries"""
|
"""Get update queries"""
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_kwargs(self, *args, **kwargs):
|
def get_kwargs(self, **kwargs):
|
||||||
|
"""Get kwargs to be used in get_inserts, get_deletes, get_updates"""
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
@ -84,11 +87,11 @@ class SystemMetricsComputer(EmptySystemMetricsSource):
|
|||||||
# collaborative constructor that initalizes upstream classes
|
# collaborative constructor that initalizes upstream classes
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
def get_system_metrics(self, *args, **kwargs) -> List[SystemProfile]:
|
def get_system_metrics(self, **kwargs) -> List[SystemProfile]:
|
||||||
"""Return system metrics for a given table. Actual passed object can be a variety of types based
|
"""Return system metrics for a given table. Actual passed object can be a variety of types based
|
||||||
on the underlying infrastructure. For example, in the case of SQLalchemy, it can be a Table object
|
on the underlying infrastructure. For example, in the case of SQLalchemy, it can be a Table object
|
||||||
and in the case of Mongo, it can be a collection object."""
|
and in the case of Mongo, it can be a collection object."""
|
||||||
kwargs = super().get_kwargs(*args, **kwargs)
|
kwargs = super().get_kwargs(**kwargs)
|
||||||
return (
|
return (
|
||||||
super().get_inserts(**kwargs)
|
super().get_inserts(**kwargs)
|
||||||
+ super().get_deletes(**kwargs)
|
+ super().get_deletes(**kwargs)
|
||||||
@ -97,6 +100,8 @@ class SystemMetricsComputer(EmptySystemMetricsSource):
|
|||||||
|
|
||||||
|
|
||||||
class SQASessionProvider:
|
class SQASessionProvider:
|
||||||
|
"""SQASessionProvider class to provide session to the system metrics"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self.session = kwargs.pop("session")
|
self.session = kwargs.pop("session")
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
@ -429,11 +429,12 @@ class CliDBBase(TestCase):
|
|||||||
profile_type=SystemProfile,
|
profile_type=SystemProfile,
|
||||||
).entities
|
).entities
|
||||||
actual_profiles = sorted(
|
actual_profiles = sorted(
|
||||||
actual_profiles, key=lambda x: (x.timestamp.root, x.operation.value)
|
actual_profiles,
|
||||||
|
key=lambda x: (-x.timestamp.root, x.operation.value),
|
||||||
)
|
)
|
||||||
expected_profile = sorted(
|
expected_profile = sorted(
|
||||||
expected_profile,
|
expected_profile,
|
||||||
key=lambda x: (x.timestamp.root, x.operation.value),
|
key=lambda x: (-x.timestamp.root, x.operation.value),
|
||||||
)
|
)
|
||||||
assert len(actual_profiles) >= len(expected_profile)
|
assert len(actual_profiles) >= len(expected_profile)
|
||||||
for expected, actual in zip(expected_profile, actual_profiles):
|
for expected, actual in zip(expected_profile, actual_profiles):
|
||||||
|
@ -268,17 +268,17 @@ class SnowflakeCliTest(CliCommonDB.TestSuite, SQACommonMethods):
|
|||||||
rowsAffected=1,
|
rowsAffected=1,
|
||||||
),
|
),
|
||||||
SystemProfile(
|
SystemProfile(
|
||||||
timestamp=Timestamp(root=0),
|
timestamp=Timestamp(root=1),
|
||||||
operation=DmlOperationType.INSERT,
|
operation=DmlOperationType.INSERT,
|
||||||
rowsAffected=1,
|
rowsAffected=1,
|
||||||
),
|
),
|
||||||
SystemProfile(
|
SystemProfile(
|
||||||
timestamp=Timestamp(root=0),
|
timestamp=Timestamp(root=2),
|
||||||
operation=DmlOperationType.UPDATE,
|
operation=DmlOperationType.UPDATE,
|
||||||
rowsAffected=1,
|
rowsAffected=1,
|
||||||
),
|
),
|
||||||
SystemProfile(
|
SystemProfile(
|
||||||
timestamp=Timestamp(root=0),
|
timestamp=Timestamp(root=3),
|
||||||
operation=DmlOperationType.DELETE,
|
operation=DmlOperationType.DELETE,
|
||||||
rowsAffected=1,
|
rowsAffected=1,
|
||||||
),
|
),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user