mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-07 08:03:09 +00:00
Fixed MSSQL & Clickhouse Usage (#3976)
This commit is contained in:
parent
d77c60b816
commit
53e4403ccd
@ -1,15 +1,20 @@
|
||||
{
|
||||
"source": {
|
||||
"type": "clickhouse-usage",
|
||||
"type": "clickhouse_usage",
|
||||
"serviceName": "local_clickhouse",
|
||||
"serviceConnection": {
|
||||
"config": {
|
||||
"type": "ClickHouse",
|
||||
"username":"default",
|
||||
"password":"",
|
||||
"database": "default",
|
||||
"service_name": "local_clickhouse",
|
||||
"schema_filter_pattern": {
|
||||
"excludes": ["system.*","information_schema.*","INFORMATION_SCHEMA.*"]
|
||||
"hostPort": "localhost:8123",
|
||||
"database": "default"
|
||||
}
|
||||
},
|
||||
"duration":2
|
||||
"sourceConfig": {
|
||||
"config": {
|
||||
"queryLogDuration": "1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"processor": {
|
||||
@ -24,17 +29,16 @@
|
||||
"filename": "/tmp/clickhouse_usage"
|
||||
}
|
||||
},
|
||||
"bulk_sink": {
|
||||
"bulkSink": {
|
||||
"type": "metadata-usage",
|
||||
"config": {
|
||||
"filename": "/tmp/clickhouse_usage"
|
||||
}
|
||||
},
|
||||
"metadata_server": {
|
||||
"type": "metadata-server",
|
||||
"config": {
|
||||
"api_endpoint": "http://localhost:8585/api",
|
||||
"auth_provider_type": "no-auth"
|
||||
"workflowConfig": {
|
||||
"openMetadataServerConfig": {
|
||||
"hostPort": "http://localhost:8585/api",
|
||||
"authProvider": "no-auth"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,16 +1,19 @@
|
||||
{
|
||||
"source": {
|
||||
"type": "mssql-usage",
|
||||
"serviceName": "local_mssql",
|
||||
"serviceConnection": {
|
||||
"config": {
|
||||
"host_port": "localhost:1433",
|
||||
"service_name": "local_mssql",
|
||||
"type": "MSSQL",
|
||||
"database": "catalog_test",
|
||||
"query": "select top 50 * from [{}].[{}]",
|
||||
"username": "sa",
|
||||
"password": "test!Password",
|
||||
"duration":2,
|
||||
"table_filter_pattern": {
|
||||
"excludes": ["catalog_test.*"]
|
||||
"hostPort": "localhost:1433"
|
||||
}
|
||||
},
|
||||
"sourceConfig": {
|
||||
"config": {
|
||||
"queryLogDuration": "1"
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -26,17 +29,16 @@
|
||||
"filename": "/tmp/mssql_usage"
|
||||
}
|
||||
},
|
||||
"bulk_sink": {
|
||||
"bulkSink": {
|
||||
"type": "metadata-usage",
|
||||
"config": {
|
||||
"filename": "/tmp/mssql_usage"
|
||||
}
|
||||
},
|
||||
"metadata_server": {
|
||||
"type": "metadata-server",
|
||||
"config": {
|
||||
"api_endpoint": "http://localhost:8585/api",
|
||||
"auth_provider_type": "no-auth"
|
||||
"workflowConfig": {
|
||||
"openMetadataServerConfig": {
|
||||
"hostPort": "http://localhost:8585/api",
|
||||
"authProvider": "no-auth"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ from typing import IO, Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
FQDN_SEPARATOR: str = ":"
|
||||
FQDN_SEPARATOR: str = "."
|
||||
|
||||
|
||||
class ConfigModel(BaseModel):
|
||||
|
@ -28,6 +28,7 @@ from metadata.ingestion.models.table_queries import (
|
||||
)
|
||||
from metadata.ingestion.ometa.client import APIError
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.utils.helpers import _get_formmated_table_name
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -72,12 +73,13 @@ class MetadataUsageBulkSink(BulkSink):
|
||||
for record in usage_records:
|
||||
table_usage = TableUsageCount(**json.loads(record))
|
||||
if "." in table_usage.table:
|
||||
table_usage.database, table_usage.table = table_usage.table.split(".")[
|
||||
-2:
|
||||
]
|
||||
(
|
||||
table_usage.database_schema,
|
||||
table_usage.table,
|
||||
) = table_usage.table.split(".")[-2:]
|
||||
self.service_name = table_usage.service_name
|
||||
table_entity = self.__get_table_entity(
|
||||
table_usage.database, table_usage.table
|
||||
table_usage.database, table_usage.database_schema, table_usage.table
|
||||
)
|
||||
if table_entity is not None:
|
||||
if not table_usage_map.get(table_entity.id.__root__):
|
||||
@ -168,11 +170,13 @@ class MetadataUsageBulkSink(BulkSink):
|
||||
else:
|
||||
column_joins_dict[column_join.table_column.column] = {}
|
||||
main_column_fqdn = self.__get_column_fqdn(
|
||||
table_usage.database, column_join.table_column
|
||||
table_usage.database,
|
||||
table_usage.database_schema,
|
||||
column_join.table_column,
|
||||
)
|
||||
for column in column_join.joined_with:
|
||||
joined_column_fqdn = self.__get_column_fqdn(
|
||||
table_usage.database, column
|
||||
table_usage.database, table_usage.database_schema, column
|
||||
)
|
||||
if str(joined_column_fqdn) in joined_with.keys():
|
||||
column_joined_with = joined_with[str(joined_column_fqdn)]
|
||||
@ -194,16 +198,23 @@ class MetadataUsageBulkSink(BulkSink):
|
||||
)
|
||||
return table_joins
|
||||
|
||||
def __get_column_fqdn(self, database: str, table_column: TableColumn):
|
||||
table_entity = self.__get_table_entity(database, table_column.table)
|
||||
def __get_column_fqdn(
|
||||
self, database: str, database_schema: str, table_column: TableColumn
|
||||
):
|
||||
table_entity = self.__get_table_entity(
|
||||
database, database_schema, table_column.table
|
||||
)
|
||||
if table_entity is None:
|
||||
return None
|
||||
for tbl_column in table_entity.columns:
|
||||
if table_column.column.lower() == tbl_column.name.__root__.lower():
|
||||
return tbl_column.fullyQualifiedName.__root__
|
||||
return tbl_column.fullyQualifiedName.__root__.__root__
|
||||
|
||||
def __get_table_entity(self, database_name: str, table_name: str) -> Table:
|
||||
table_fqn = f"{self.service_name}{FQDN_SEPARATOR}{database_name}{FQDN_SEPARATOR}{table_name}"
|
||||
def __get_table_entity(
|
||||
self, database_name: str, database_schema: str, table_name: str
|
||||
) -> Table:
|
||||
table_fqn = f"{self.service_name}{FQDN_SEPARATOR}{database_name}{FQDN_SEPARATOR}{database_schema}{FQDN_SEPARATOR}{table_name}"
|
||||
table_fqn = _get_formmated_table_name(table_fqn)
|
||||
table_entity = self.metadata.get_by_name(Table, fqdn=table_fqn)
|
||||
return table_entity
|
||||
|
||||
|
@ -61,6 +61,7 @@ class TableUsageCount(BaseModel):
|
||||
table: str
|
||||
date: str
|
||||
database: str
|
||||
database_schema: Optional[str]
|
||||
sql_queries: List[SqlQuery]
|
||||
count: int = 1
|
||||
joins: TableColumnJoins
|
||||
|
@ -12,19 +12,25 @@
|
||||
Clickhouse usage module
|
||||
"""
|
||||
|
||||
import ast
|
||||
from typing import Any, Dict, Iterable
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.database.clickhouseConnection import (
|
||||
ClickhouseConnection,
|
||||
)
|
||||
from metadata.generated.schema.entity.services.databaseService import (
|
||||
DatabaseServiceType,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
OpenMetadataServerConfig,
|
||||
)
|
||||
from metadata.ingestion.api.source import Source, SourceStatus
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
||||
|
||||
# This import verifies that the dependencies are available.
|
||||
from metadata.ingestion.models.table_queries import TableQuery
|
||||
from metadata.ingestion.source.clickhouse import ClickhouseConfig
|
||||
from metadata.ingestion.source.sql_alchemy_helper import (
|
||||
SQLAlchemyHelper,
|
||||
SQLSourceStatus,
|
||||
@ -50,16 +56,19 @@ class ClickhouseUsageSource(Source[TableQuery]):
|
||||
report:
|
||||
"""
|
||||
|
||||
def __init__(self, config, metadata_config):
|
||||
def __init__(
|
||||
self, config: WorkflowSource, metadata_config: OpenMetadataServerConfig
|
||||
):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
start, end = get_start_and_end(config.duration)
|
||||
self.connection = config.serviceConnection.__root__.config
|
||||
start, end = get_start_and_end(self.config.sourceConfig.config.queryLogDuration)
|
||||
self.analysis_date = start
|
||||
self.sql_stmt = CLICKHOUSE_SQL_USAGE_STATEMENT.format(
|
||||
start_time=start, end_time=end
|
||||
)
|
||||
self.alchemy_helper = SQLAlchemyHelper(
|
||||
config,
|
||||
self.connection,
|
||||
metadata_config,
|
||||
DatabaseServiceType.ClickHouse.value,
|
||||
self.sql_stmt,
|
||||
@ -68,7 +77,13 @@ class ClickhouseUsageSource(Source[TableQuery]):
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, metadata_config: OpenMetadataServerConfig):
|
||||
config = ClickhouseConfig.parse_obj(config_dict)
|
||||
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
||||
connection: ClickhouseConnection = config.serviceConnection.__root__.config
|
||||
if not isinstance(connection, ClickhouseConnection):
|
||||
raise InvalidSourceException(
|
||||
f"Expected ClickhouseConnection, but got {connection}"
|
||||
)
|
||||
|
||||
return cls(config, metadata_config)
|
||||
|
||||
def prepare(self):
|
||||
@ -90,6 +105,11 @@ class ClickhouseUsageSource(Source[TableQuery]):
|
||||
:return:
|
||||
"""
|
||||
for row in get_raw_extract_iter(self.alchemy_helper):
|
||||
database = "default"
|
||||
if row["database_name"]:
|
||||
database_list = ast.literal_eval(row["database_name"])
|
||||
database = database_list[0] if len(database_list) == 1 else "default"
|
||||
|
||||
table_query = TableQuery(
|
||||
query=row["query_id"],
|
||||
user_name=row["user_name"],
|
||||
@ -97,11 +117,9 @@ class ClickhouseUsageSource(Source[TableQuery]):
|
||||
endtime=str(row["end_time"]),
|
||||
analysis_date=self.analysis_date,
|
||||
aborted=row["aborted"],
|
||||
database=row["database_name"][0]
|
||||
if len(row["database_name"]) >= 1
|
||||
else "default",
|
||||
database=database,
|
||||
sql=row["query_text"],
|
||||
service_name=self.config.service_name,
|
||||
service_name=self.config.serviceName,
|
||||
)
|
||||
yield table_query
|
||||
|
||||
|
@ -14,17 +14,22 @@ MSSQL usage module
|
||||
|
||||
from typing import Any, Dict, Iterable
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.database.mssqlConnection import (
|
||||
MssqlConnection,
|
||||
)
|
||||
from metadata.generated.schema.entity.services.databaseService import (
|
||||
DatabaseServiceType,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
OpenMetadataServerConfig,
|
||||
)
|
||||
from metadata.ingestion.api.source import Source, SourceStatus
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
||||
|
||||
# This import verifies that the dependencies are available.
|
||||
from metadata.ingestion.models.table_queries import TableQuery
|
||||
from metadata.ingestion.source.mssql import MssqlConfig
|
||||
from metadata.ingestion.source.sql_alchemy_helper import (
|
||||
SQLAlchemyHelper,
|
||||
SQLSourceStatus,
|
||||
@ -49,20 +54,32 @@ class MssqlUsageSource(Source[TableQuery]):
|
||||
report:
|
||||
"""
|
||||
|
||||
def __init__(self, config, metadata_config):
|
||||
def __init__(
|
||||
self, config: WorkflowSource, metadata_config: OpenMetadataServerConfig
|
||||
):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
start, end = get_start_and_end(config.duration)
|
||||
self.connection = config.serviceConnection.__root__.config
|
||||
start, end = get_start_and_end(self.config.sourceConfig.config.queryLogDuration)
|
||||
self.analysis_date = start
|
||||
self.sql_stmt = MSSQL_SQL_USAGE_STATEMENT.format(start_date=start, end_date=end)
|
||||
self.alchemy_helper = SQLAlchemyHelper(
|
||||
config, metadata_config, DatabaseServiceType.MSSQL.value, self.sql_stmt
|
||||
self.connection,
|
||||
metadata_config,
|
||||
DatabaseServiceType.MSSQL.value,
|
||||
self.sql_stmt,
|
||||
)
|
||||
self.report = SQLSourceStatus()
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, metadata_config: OpenMetadataServerConfig):
|
||||
config = MssqlConfig.parse_obj(config_dict)
|
||||
"""Create class instance"""
|
||||
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
||||
connection: MssqlConnection = config.serviceConnection.__root__.config
|
||||
if not isinstance(connection, MssqlConnection):
|
||||
raise InvalidSourceException(
|
||||
f"Expected MssqlConnection, but got {connection}"
|
||||
)
|
||||
return cls(config, metadata_config)
|
||||
|
||||
def prepare(self):
|
||||
@ -93,7 +110,7 @@ class MssqlUsageSource(Source[TableQuery]):
|
||||
aborted=row["aborted"],
|
||||
database=row["database_name"],
|
||||
sql=row["query_text"],
|
||||
service_name=self.config.service_name,
|
||||
service_name=self.config.serviceName,
|
||||
)
|
||||
if row["schema_name"] is not None:
|
||||
self.report.scanned(f"{row['database_name']}.{row['schema_name']}")
|
||||
|
@ -15,7 +15,8 @@ from typing import Any, Iterable
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from ...generated.schema.metadataIngestion.workflow import OpenMetadataServerConfig
|
||||
from .sql_source import SQLConnectionConfig, SQLSourceStatus
|
||||
from ...utils.source_connections import get_connection_url
|
||||
from .sql_source import SQLSourceStatus
|
||||
|
||||
|
||||
class SQLAlchemyHelper:
|
||||
@ -23,7 +24,7 @@ class SQLAlchemyHelper:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: SQLConnectionConfig,
|
||||
config,
|
||||
metadata_config: OpenMetadataServerConfig,
|
||||
platform: str,
|
||||
query: str,
|
||||
@ -39,9 +40,11 @@ class SQLAlchemyHelper:
|
||||
Create a SQLAlchemy connection to Database
|
||||
"""
|
||||
engine = create_engine(
|
||||
self.config.get_connection_url(),
|
||||
**self.config.options,
|
||||
connect_args=self.config.connect_args
|
||||
get_connection_url(self.config),
|
||||
**self.config.connectionOptions if self.config.connectionOptions else {},
|
||||
connect_args=self.config.connectionArguments
|
||||
if self.config.connectionArguments
|
||||
else {}
|
||||
)
|
||||
conn = engine.connect()
|
||||
return conn
|
||||
|
@ -62,7 +62,6 @@ def get_connection_url_common(connection):
|
||||
|
||||
if connection.username:
|
||||
url += f"{connection.username}"
|
||||
if connection.password:
|
||||
url += (
|
||||
f":{quote_plus(connection.password.get_secret_value())}"
|
||||
if connection
|
||||
|
Loading…
x
Reference in New Issue
Block a user