mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-29 17:49:14 +00:00
* Usage through query log file * Pick fileds if available in file * Created Usage Souce
This commit is contained in:
parent
400818d04d
commit
fd521763d4
@ -7,9 +7,7 @@
|
||||
"databaseUsageConfigType": {
|
||||
"description": "Database Source Config Usage Pipeline type",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"DatabaseUsage"
|
||||
],
|
||||
"enum": ["DatabaseUsage"],
|
||||
"default": "DatabaseUsage"
|
||||
}
|
||||
},
|
||||
@ -33,6 +31,10 @@
|
||||
"description": "Configuration to set the limit for query logs",
|
||||
"type": "integer",
|
||||
"default": "100"
|
||||
},
|
||||
"queryLogFilePath": {
|
||||
"description": "Configuration to set the file path for query logs",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
|
||||
@ -13,7 +13,8 @@
|
||||
},
|
||||
"sourceConfig": {
|
||||
"config": {
|
||||
"queryLogDuration": "1"
|
||||
"queryLogDuration": "1",
|
||||
"queryLogFilePath": "<path to query log file>"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
46
ingestion/examples/workflows/query_log_usage.json
Normal file
46
ingestion/examples/workflows/query_log_usage.json
Normal file
@ -0,0 +1,46 @@
|
||||
{
|
||||
"source": {
|
||||
"type": "query-log-usage",
|
||||
"serviceName": "local_mysql",
|
||||
"serviceConnection": {
|
||||
"config": {
|
||||
"type": "Mysql",
|
||||
"username": "openmetadata_user",
|
||||
"password": "openmetadata_password",
|
||||
"hostPort": "localhost:3306",
|
||||
"connectionOptions": {},
|
||||
"connectionArguments": {}
|
||||
}
|
||||
},
|
||||
"sourceConfig": {
|
||||
"config": {
|
||||
"queryLogDuration": "1",
|
||||
"queryLogFilePath": "<path to query log file>"
|
||||
}
|
||||
}
|
||||
},
|
||||
"processor": {
|
||||
"type": "query-parser",
|
||||
"config": {
|
||||
"filter": ""
|
||||
}
|
||||
},
|
||||
"stage": {
|
||||
"type": "table-usage",
|
||||
"config": {
|
||||
"filename": "/tmp/query_log_usage"
|
||||
}
|
||||
},
|
||||
"bulkSink": {
|
||||
"type": "metadata-usage",
|
||||
"config": {
|
||||
"filename": "/tmp/query_log_usage"
|
||||
}
|
||||
},
|
||||
"workflowConfig": {
|
||||
"openMetadataServerConfig": {
|
||||
"hostPort": "http://localhost:8585/api",
|
||||
"authProvider": "no-auth"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -13,7 +13,7 @@ Clickhouse usage module
|
||||
"""
|
||||
|
||||
import ast
|
||||
from typing import Any, Dict, Iterable
|
||||
from typing import Iterable
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.database.clickhouseConnection import (
|
||||
ClickhouseConnection,
|
||||
@ -27,14 +27,15 @@ from metadata.ingestion.api.source import InvalidSourceException, Source, Source
|
||||
# This import verifies that the dependencies are available.
|
||||
from metadata.ingestion.models.table_queries import TableQuery
|
||||
from metadata.ingestion.source.sql_alchemy_helper import SQLSourceStatus
|
||||
from metadata.ingestion.source.usage_source import UsageSource
|
||||
from metadata.utils.connections import get_connection, test_connection
|
||||
from metadata.utils.helpers import get_start_and_end
|
||||
from metadata.utils.sql_queries import CLICKHOUSE_SQL_USAGE_STATEMENT
|
||||
|
||||
|
||||
class ClickhouseUsageSource(Source[TableQuery]):
|
||||
class ClickhouseUsageSource(UsageSource):
|
||||
def __init__(self, config: WorkflowSource, metadata_config: WorkflowConfig):
|
||||
super().__init__()
|
||||
super().__init__(config, metadata_config)
|
||||
self.config = config
|
||||
self.connection = config.serviceConnection.__root__.config
|
||||
start, end = get_start_and_end(self.config.sourceConfig.config.queryLogDuration)
|
||||
@ -56,15 +57,6 @@ class ClickhouseUsageSource(Source[TableQuery]):
|
||||
|
||||
return cls(config, metadata_config)
|
||||
|
||||
def prepare(self):
|
||||
return super().prepare()
|
||||
|
||||
def _get_raw_extract_iter(self) -> Iterable[Dict[str, Any]]:
|
||||
|
||||
rows = self.engine.execute(self.sql_stmt)
|
||||
for row in rows:
|
||||
yield row
|
||||
|
||||
def next_record(self) -> Iterable[TableQuery]:
|
||||
"""
|
||||
Using itertools.groupby and raw level iterator,
|
||||
@ -89,20 +81,3 @@ class ClickhouseUsageSource(Source[TableQuery]):
|
||||
service_name=self.config.serviceName,
|
||||
)
|
||||
yield table_query
|
||||
|
||||
def get_report(self):
|
||||
"""
|
||||
get report
|
||||
|
||||
Returns:
|
||||
"""
|
||||
return self.report
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def get_status(self) -> SourceStatus:
|
||||
return self.report
|
||||
|
||||
def test_connection(self) -> None:
|
||||
test_connection(self.engine)
|
||||
|
||||
@ -11,36 +11,30 @@
|
||||
"""
|
||||
MSSQL usage module
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, Iterable
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.database.mssqlConnection import (
|
||||
MssqlConnection,
|
||||
)
|
||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||
OpenMetadataConnection,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig
|
||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
||||
from metadata.ingestion.api.source import InvalidSourceException
|
||||
from metadata.ingestion.source.usage_source import UsageSource
|
||||
|
||||
# This import verifies that the dependencies are available.
|
||||
from metadata.ingestion.models.table_queries import TableQuery
|
||||
from metadata.ingestion.source.sql_alchemy_helper import SQLSourceStatus
|
||||
from metadata.utils.connections import get_connection, test_connection
|
||||
from metadata.utils.helpers import get_start_and_end
|
||||
from metadata.utils.sql_queries import MSSQL_SQL_USAGE_STATEMENT
|
||||
|
||||
|
||||
class MssqlUsageSource(Source[TableQuery]):
|
||||
def __init__(self, config: WorkflowSource, metadata_config: WorkflowConfig):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.connection = config.serviceConnection.__root__.config
|
||||
start, end = get_start_and_end(self.config.sourceConfig.config.queryLogDuration)
|
||||
class MssqlUsageSource(UsageSource):
|
||||
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
||||
super().__init__(config, metadata_config)
|
||||
start, end = get_start_and_end(config.sourceConfig.config.queryLogDuration)
|
||||
self.analysis_date = start
|
||||
self.sql_stmt = MSSQL_SQL_USAGE_STATEMENT.format(start_date=start, end_date=end)
|
||||
self.report = SQLSourceStatus()
|
||||
self.engine = get_connection(self.connection)
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, metadata_config: WorkflowConfig):
|
||||
@ -52,53 +46,3 @@ class MssqlUsageSource(Source[TableQuery]):
|
||||
f"Expected MssqlConnection, but got {connection}"
|
||||
)
|
||||
return cls(config, metadata_config)
|
||||
|
||||
def prepare(self):
|
||||
return super().prepare()
|
||||
|
||||
def _get_raw_extract_iter(self) -> Iterable[Dict[str, Any]]:
|
||||
|
||||
rows = self.engine.execute(self.sql_stmt)
|
||||
for row in rows:
|
||||
yield row
|
||||
|
||||
def next_record(self) -> Iterable[TableQuery]:
|
||||
"""
|
||||
Using itertools.groupby and raw level iterator,
|
||||
it groups to table and yields TableMetadata
|
||||
:return:
|
||||
"""
|
||||
for row in self._get_raw_extract_iter():
|
||||
table_query = TableQuery(
|
||||
query=row["query_type"],
|
||||
user_name=row["user_name"],
|
||||
starttime=str(row["start_time"]),
|
||||
endtime=str(row["end_time"]),
|
||||
analysis_date=self.analysis_date,
|
||||
aborted=row["aborted"],
|
||||
database=row["database_name"],
|
||||
sql=row["query_text"],
|
||||
service_name=self.config.serviceName,
|
||||
)
|
||||
if row["schema_name"] is not None:
|
||||
self.report.scanned(f"{row['database_name']}.{row['schema_name']}")
|
||||
else:
|
||||
self.report.scanned(f"{row['database_name']}")
|
||||
yield table_query
|
||||
|
||||
def get_report(self):
|
||||
"""
|
||||
get report
|
||||
|
||||
Returns:
|
||||
"""
|
||||
return self.report
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def get_status(self) -> SourceStatus:
|
||||
return self.report
|
||||
|
||||
def test_connection(self) -> None:
|
||||
test_connection(self.engine)
|
||||
|
||||
35
ingestion/src/metadata/ingestion/source/query_log_usage.py
Normal file
35
ingestion/src/metadata/ingestion/source/query_log_usage.py
Normal file
@ -0,0 +1,35 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Common Query Log Connector
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||
OpenMetadataConnection,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig
|
||||
from metadata.ingestion.source.usage_source import UsageSource
|
||||
|
||||
|
||||
class QueryLogUsageSource(UsageSource):
|
||||
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
||||
super().__init__(config, metadata_config)
|
||||
self.analysis_date = datetime.today().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, metadata_config: WorkflowConfig):
|
||||
"""Create class instance"""
|
||||
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
||||
return cls(config, metadata_config)
|
||||
@ -13,7 +13,7 @@ Redshift usage module
|
||||
"""
|
||||
|
||||
# This import verifies that the dependencies are available.
|
||||
from typing import Any, Dict, Iterable, Iterator, Union
|
||||
from typing import Iterator, Union
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.database.redshiftConnection import (
|
||||
RedshiftConnection,
|
||||
@ -25,11 +25,8 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig
|
||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
||||
from metadata.ingestion.models.table_queries import TableQuery
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.source.sql_alchemy_helper import SQLSourceStatus
|
||||
from metadata.utils.connections import get_connection, test_connection
|
||||
from metadata.ingestion.api.source import InvalidSourceException
|
||||
from metadata.ingestion.source.usage_source import UsageSource
|
||||
from metadata.utils.helpers import get_start_and_end
|
||||
|
||||
# pylint: disable=useless-super-delegation
|
||||
@ -39,7 +36,7 @@ from metadata.utils.sql_queries import REDSHIFT_SQL_STATEMENT
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
class RedshiftUsageSource(Source[TableQuery]):
|
||||
class RedshiftUsageSource(UsageSource):
|
||||
# SELECT statement from mysql information_schema to extract table and column metadata
|
||||
SQL_STATEMENT = REDSHIFT_SQL_STATEMENT
|
||||
# CONFIG KEYS
|
||||
@ -52,11 +49,7 @@ class RedshiftUsageSource(Source[TableQuery]):
|
||||
DEFAULT_CLUSTER_SOURCE = "CURRENT_DATABASE()"
|
||||
|
||||
def __init__(self, config: WorkflowSource, metadata_config: WorkflowConfig):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.service_connection = config.serviceConnection.__root__.config
|
||||
self.metadata_config = metadata_config
|
||||
self.metadata = OpenMetadata(metadata_config)
|
||||
super().__init__(config, metadata_config)
|
||||
start, end = get_start_and_end(self.config.sourceConfig.config.queryLogDuration)
|
||||
self.sql_stmt = RedshiftUsageSource.SQL_STATEMENT.format(
|
||||
start_time=start, end_time=end
|
||||
@ -64,8 +57,6 @@ class RedshiftUsageSource(Source[TableQuery]):
|
||||
self.analysis_date = start
|
||||
self._extract_iter: Union[None, Iterator] = None
|
||||
self._database = "redshift"
|
||||
self.status = SQLSourceStatus()
|
||||
self.engine = get_connection(self.service_connection)
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, metadata_config: WorkflowConfig):
|
||||
@ -76,40 +67,3 @@ class RedshiftUsageSource(Source[TableQuery]):
|
||||
f"Expected RedshiftConnection, but got {connection}"
|
||||
)
|
||||
return cls(config, metadata_config)
|
||||
|
||||
def prepare(self):
|
||||
pass
|
||||
|
||||
def _get_raw_extract_iter(self) -> Iterable[Dict[str, Any]]:
|
||||
|
||||
rows = self.engine.execute(self.sql_stmt)
|
||||
for row in rows:
|
||||
yield row
|
||||
|
||||
def next_record(self) -> Iterable[TableQuery]:
|
||||
"""
|
||||
Using itertools.groupby and raw level iterator, it groups to table and yields TableMetadata
|
||||
:return:
|
||||
"""
|
||||
for row in self._get_raw_extract_iter():
|
||||
tq = TableQuery(
|
||||
query=row["query"],
|
||||
user_name=row["usename"],
|
||||
starttime=str(row["starttime"]),
|
||||
endtime=str(row["endtime"]),
|
||||
analysis_date=str(self.analysis_date),
|
||||
database=self.service_connection.database,
|
||||
aborted=row["aborted"],
|
||||
sql=row["querytxt"],
|
||||
service_name=self.config.serviceName,
|
||||
)
|
||||
yield tq
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def get_status(self) -> SourceStatus:
|
||||
return self.status
|
||||
|
||||
def test_connection(self) -> None:
|
||||
test_connection(self.engine)
|
||||
|
||||
@ -14,7 +14,7 @@ Snowflake usage module
|
||||
|
||||
import traceback
|
||||
from datetime import timedelta
|
||||
from typing import Any, Dict, Iterable, Iterator, Union
|
||||
from typing import Iterable, Iterator, Union
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import (
|
||||
SnowflakeConnection,
|
||||
@ -26,12 +26,11 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig
|
||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
||||
from metadata.ingestion.api.source import InvalidSourceException
|
||||
|
||||
# This import verifies that the dependencies are available.
|
||||
from metadata.ingestion.models.table_queries import TableQuery
|
||||
from metadata.ingestion.source.sql_alchemy_helper import SQLSourceStatus
|
||||
from metadata.utils.connections import get_connection, test_connection
|
||||
from metadata.ingestion.source.usage_source import UsageSource
|
||||
from metadata.utils.helpers import get_start_and_end
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
from metadata.utils.sql_queries import SNOWFLAKE_SQL_STATEMENT
|
||||
@ -39,8 +38,7 @@ from metadata.utils.sql_queries import SNOWFLAKE_SQL_STATEMENT
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
class SnowflakeUsageSource(Source[TableQuery]):
|
||||
|
||||
class SnowflakeUsageSource(UsageSource):
|
||||
# SELECT statement from mysql information_schema
|
||||
# to extract table and column metadata
|
||||
SQL_STATEMENT = SNOWFLAKE_SQL_STATEMENT
|
||||
@ -55,13 +53,10 @@ class SnowflakeUsageSource(Source[TableQuery]):
|
||||
DEFAULT_CLUSTER_SOURCE = "CURRENT_DATABASE()"
|
||||
|
||||
def __init__(self, config: WorkflowSource, metadata_config: WorkflowConfig):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.service_connection = config.serviceConnection.__root__.config
|
||||
super().__init__(config, metadata_config)
|
||||
start, end = get_start_and_end(self.config.sourceConfig.config.queryLogDuration)
|
||||
end = end + timedelta(days=1)
|
||||
self.analysis_date = start
|
||||
self.metadata_config = metadata_config
|
||||
self.sql_stmt = SnowflakeUsageSource.SQL_STATEMENT.format(
|
||||
start_date=start,
|
||||
end_date=end,
|
||||
@ -69,8 +64,6 @@ class SnowflakeUsageSource(Source[TableQuery]):
|
||||
)
|
||||
self._extract_iter: Union[None, Iterator] = None
|
||||
self._database = "Snowflake"
|
||||
self.report = SQLSourceStatus()
|
||||
self.engine = get_connection(self.service_connection)
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, metadata_config: WorkflowConfig):
|
||||
@ -82,15 +75,6 @@ class SnowflakeUsageSource(Source[TableQuery]):
|
||||
)
|
||||
return cls(config, metadata_config)
|
||||
|
||||
def prepare(self):
|
||||
pass
|
||||
|
||||
def _get_raw_extract_iter(self) -> Iterable[Dict[str, Any]]:
|
||||
|
||||
rows = self.engine.execute(self.sql_stmt)
|
||||
for row in rows:
|
||||
yield row
|
||||
|
||||
def next_record(self) -> Iterable[TableQuery]:
|
||||
"""
|
||||
Using itertools.groupby and raw level iterator,
|
||||
@ -121,20 +105,3 @@ class SnowflakeUsageSource(Source[TableQuery]):
|
||||
except Exception as err:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.debug(repr(err))
|
||||
|
||||
def get_report(self):
|
||||
"""
|
||||
get report
|
||||
|
||||
Returns:
|
||||
"""
|
||||
return self.report
|
||||
|
||||
def test_connection(self) -> None:
|
||||
test_connection(self.engine)
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def get_status(self) -> SourceStatus:
|
||||
return self.report
|
||||
|
||||
113
ingestion/src/metadata/ingestion/source/usage_source.py
Normal file
113
ingestion/src/metadata/ingestion/source/usage_source.py
Normal file
@ -0,0 +1,113 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Usage Souce Module
|
||||
"""
|
||||
import csv
|
||||
from typing import Any, Dict, Iterable
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||
OpenMetadataConnection,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig
|
||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
||||
|
||||
# This import verifies that the dependencies are available.
|
||||
from metadata.ingestion.models.table_queries import TableQuery
|
||||
from metadata.ingestion.source.sql_alchemy_helper import SQLSourceStatus
|
||||
from metadata.utils.connections import get_connection, test_connection
|
||||
from metadata.utils.helpers import get_start_and_end
|
||||
|
||||
|
||||
class UsageSource(Source[TableQuery]):
|
||||
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.metadata_config = metadata_config
|
||||
self.connection = config.serviceConnection.__root__.config
|
||||
start, end = get_start_and_end(self.config.sourceConfig.config.queryLogDuration)
|
||||
self.analysis_date = start
|
||||
self.report = SQLSourceStatus()
|
||||
self.engine = get_connection(self.connection)
|
||||
|
||||
def prepare(self):
|
||||
return super().prepare()
|
||||
|
||||
def _get_raw_extract_iter(self) -> Iterable[Dict[str, Any]]:
|
||||
if self.config.sourceConfig.config.queryLogFilePath:
|
||||
with open(self.config.sourceConfig.config.queryLogFilePath, "r") as fin:
|
||||
for i in csv.DictReader(fin):
|
||||
query_dict = dict(i)
|
||||
row = {
|
||||
"query_type": query_dict.get("query"),
|
||||
"user_name": query_dict.get("user_name", ""),
|
||||
"start_time": query_dict.get("start_time", ""),
|
||||
"end_time": query_dict.get("end_time", ""),
|
||||
"aborted": query_dict.get("aborted", False),
|
||||
"database_name": query_dict.get(
|
||||
"database_name",
|
||||
self.connection.database
|
||||
if self.connection.database
|
||||
else "default",
|
||||
),
|
||||
"query_text": query_dict.get("query"),
|
||||
"schema_name": query_dict.get("schema_name"),
|
||||
}
|
||||
yield row
|
||||
else:
|
||||
rows = self.engine.execute(self.sql_stmt)
|
||||
for row in rows:
|
||||
yield row
|
||||
|
||||
def next_record(self) -> Iterable[TableQuery]:
|
||||
"""
|
||||
Using itertools.groupby and raw level iterator,
|
||||
it groups to table and yields TableMetadata
|
||||
:return:
|
||||
"""
|
||||
|
||||
for row in self._get_raw_extract_iter():
|
||||
table_query = TableQuery(
|
||||
query=row["query_type"],
|
||||
user_name=row["user_name"],
|
||||
starttime=str(row["start_time"]),
|
||||
endtime=str(row["end_time"]),
|
||||
analysis_date=self.analysis_date,
|
||||
aborted=row["aborted"],
|
||||
database=row["database_name"],
|
||||
sql=row["query_text"],
|
||||
service_name=self.config.serviceName,
|
||||
)
|
||||
if not row["schema_name"]:
|
||||
self.report.scanned(f"{row['database_name']}.{row['schema_name']}")
|
||||
else:
|
||||
self.report.scanned(f"{row['database_name']}")
|
||||
yield table_query
|
||||
|
||||
def get_report(self):
|
||||
"""
|
||||
get report
|
||||
|
||||
Returns:
|
||||
"""
|
||||
return self.report
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def get_status(self) -> SourceStatus:
|
||||
return self.report
|
||||
|
||||
def test_connection(self) -> None:
|
||||
test_connection(self.engine)
|
||||
@ -2,16 +2,16 @@ import textwrap
|
||||
|
||||
REDSHIFT_SQL_STATEMENT = """
|
||||
SELECT DISTINCT ss.userid,
|
||||
ss.query,
|
||||
sui.usename,
|
||||
ss.query query_type,
|
||||
sui.usename user_name,
|
||||
ss.tbl,
|
||||
sq.querytxt,
|
||||
sti.database,
|
||||
sti.schema,
|
||||
sq.querytxt query_text,
|
||||
sti.database database_name,
|
||||
sti.schema schema_name,
|
||||
sti.table,
|
||||
sq.starttime,
|
||||
sq.endtime,
|
||||
sq.aborted
|
||||
sq.starttime start_time,
|
||||
sq.endtime end_time,
|
||||
sq.aborted aborted
|
||||
FROM stl_scan ss
|
||||
JOIN svv_table_info sti ON ss.tbl = sti.table_id
|
||||
JOIN stl_query sq ON ss.query = sq.query
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user