mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-30 11:26:23 +00:00

* fix: added code for separate engine and session for each project in rofiler and classification and refactor billing project approach * fix: added entity.database check, bigquery sampling tests * fix: system metrics logic when bigquery billing project is provided
108 lines
4.1 KiB
Python
108 lines
4.1 KiB
Python
# Copyright 2025 Collate
|
|
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Interfaces with database for all database engine
|
|
supporting sqlalchemy abstraction layer
|
|
"""
|
|
from copy import deepcopy
|
|
from typing import List, Type, cast
|
|
|
|
from sqlalchemy import Column, inspect
|
|
|
|
from metadata.generated.schema.entity.data.table import SystemProfile
|
|
from metadata.generated.schema.security.credentials.gcpValues import SingleProjectId
|
|
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
|
|
SQAProfilerInterface,
|
|
)
|
|
from metadata.profiler.metrics.system.bigquery.system import (
|
|
BigQuerySystemMetricsComputer,
|
|
)
|
|
from metadata.profiler.metrics.system.system import System
|
|
from metadata.profiler.processor.runner import QueryRunner
|
|
from metadata.utils.logger import profiler_interface_registry_logger
|
|
from metadata.utils.ssl_manager import get_ssl_connection
|
|
|
|
logger = profiler_interface_registry_logger()
|
|
|
|
|
|
class BigQueryProfilerInterface(SQAProfilerInterface):
|
|
"""BigQuery profiler interface"""
|
|
|
|
def create_session(self):
|
|
connection_config = deepcopy(self.service_connection_config)
|
|
# Create a modified connection for BigQuery with the correct project ID
|
|
if (
|
|
hasattr(connection_config.credentials.gcpConfig, "projectId")
|
|
and self.table_entity.database
|
|
):
|
|
connection_config.credentials.gcpConfig.projectId = SingleProjectId(
|
|
root=self.table_entity.database.name
|
|
)
|
|
self.connection = get_ssl_connection(connection_config)
|
|
return super().create_session()
|
|
|
|
def _compute_system_metrics(
|
|
self,
|
|
metrics: Type[System],
|
|
runner: QueryRunner,
|
|
*args,
|
|
**kwargs,
|
|
) -> List[SystemProfile]:
|
|
logger.debug(f"Computing {metrics.name()} metric for {runner.table_name}")
|
|
self.system_metrics_class = cast(
|
|
Type[BigQuerySystemMetricsComputer], self.system_metrics_class
|
|
)
|
|
instance = self.system_metrics_class(
|
|
session=self.session,
|
|
runner=runner,
|
|
usage_location=self.service_connection_config.usageLocation,
|
|
billing_project_id=self.service_connection_config.billingProjectId,
|
|
)
|
|
return instance.get_system_metrics()
|
|
|
|
def _get_struct_columns(self, columns: dict, parent: str):
|
|
""""""
|
|
# pylint: disable=import-outside-toplevel
|
|
from sqlalchemy_bigquery import STRUCT
|
|
|
|
columns_list = []
|
|
for key, value in columns:
|
|
if not isinstance(value, STRUCT):
|
|
col = Column(f"{parent}.{key}", value)
|
|
# pylint: disable=protected-access
|
|
col._set_parent(self.table.__table__)
|
|
# pylint: enable=protected-access
|
|
columns_list.append(col)
|
|
else:
|
|
col = self._get_struct_columns(
|
|
value.__dict__.get("_STRUCT_fields"), f"{parent}.{key}"
|
|
)
|
|
columns_list.extend(col)
|
|
return columns_list
|
|
|
|
def get_columns(self) -> Column:
|
|
"""Get columns from table"""
|
|
# pylint: disable=import-outside-toplevel
|
|
from sqlalchemy_bigquery import STRUCT
|
|
|
|
columns = []
|
|
for column in inspect(self.table).c:
|
|
if isinstance(column.type, STRUCT):
|
|
columns.extend(
|
|
self._get_struct_columns(
|
|
column.type.__dict__.get("_STRUCT_fields"), column.name
|
|
)
|
|
)
|
|
else:
|
|
columns.append(column)
|
|
return columns
|