mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-09 10:09:09 +00:00
* Fix #10292: Remove DB & Schema Usage Filters * Fix Lint * lint
This commit is contained in:
parent
6e6722b5e1
commit
92fee5cf4f
@ -29,7 +29,3 @@ class BigqueryLineageSource(BigqueryQueryParserSource, LineageSource):
|
|||||||
filters = """
|
filters = """
|
||||||
AND statement_type IN ("INSERT", "MERGE", "CREATE_TABLE_AS_SELECT", "UPDATE")
|
AND statement_type IN ("INSERT", "MERGE", "CREATE_TABLE_AS_SELECT", "UPDATE")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
database_field = "project_id"
|
|
||||||
|
|
||||||
schema_field = "" # schema filtering not available
|
|
||||||
|
@ -58,7 +58,7 @@ class BigqueryQueryParserSource(QueryParserSource, ABC):
|
|||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
region=self.service_connection.usageLocation,
|
region=self.service_connection.usageLocation,
|
||||||
filters=self.filters, # pylint: disable=no-member
|
filters=self.filters,
|
||||||
result_limit=self.source_config.resultLimit,
|
result_limit=self.source_config.resultLimit,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -29,7 +29,3 @@ class BigqueryUsageSource(BigqueryQueryParserSource, UsageSource):
|
|||||||
filters = """
|
filters = """
|
||||||
AND statement_type = "SELECT"
|
AND statement_type = "SELECT"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
database_field = "project_id"
|
|
||||||
|
|
||||||
schema_field = "" # schema filtering not available
|
|
||||||
|
@ -21,7 +21,3 @@ class MssqlLineageSource(MssqlQueryParserSource, LineageSource):
|
|||||||
sql_stmt = MSSQL_SQL_STATEMENT
|
sql_stmt = MSSQL_SQL_STATEMENT
|
||||||
|
|
||||||
filters = "" # No filtering in the queries
|
filters = "" # No filtering in the queries
|
||||||
|
|
||||||
database_field = "db.NAME"
|
|
||||||
|
|
||||||
schema_field = "" # schema filtering not available
|
|
||||||
|
@ -21,7 +21,3 @@ class MssqlUsageSource(MssqlQueryParserSource, UsageSource):
|
|||||||
sql_stmt = MSSQL_SQL_STATEMENT
|
sql_stmt = MSSQL_SQL_STATEMENT
|
||||||
|
|
||||||
filters = "" # No filtering in the queries
|
filters = "" # No filtering in the queries
|
||||||
|
|
||||||
database_field = "db.NAME"
|
|
||||||
|
|
||||||
schema_field = "" # schema filtering not available
|
|
||||||
|
@ -37,8 +37,6 @@ class PostgresLineageSource(PostgresQueryParserSource, LineageSource):
|
|||||||
OR s.query ILIKE '%%insert%%'
|
OR s.query ILIKE '%%insert%%'
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
database_field = "d.datname"
|
|
||||||
schema_field = "" # schema filtering not available
|
|
||||||
|
|
||||||
def next_record(self) -> Iterable[AddLineageRequest]:
|
def next_record(self) -> Iterable[AddLineageRequest]:
|
||||||
"""
|
"""
|
||||||
|
@ -70,7 +70,7 @@ class PostgresQueryParserSource(QueryParserSource, ABC):
|
|||||||
"""
|
"""
|
||||||
return self.sql_stmt.format(
|
return self.sql_stmt.format(
|
||||||
result_limit=self.config.sourceConfig.config.resultLimit,
|
result_limit=self.config.sourceConfig.config.resultLimit,
|
||||||
filters=self.filters, # pylint: disable=no-member
|
filters=self.filters,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_table_query(self) -> Iterable[TableQuery]:
|
def get_table_query(self) -> Iterable[TableQuery]:
|
||||||
|
@ -25,5 +25,3 @@ class PostgresUsageSource(PostgresQueryParserSource, UsageSource):
|
|||||||
|
|
||||||
sql_stmt = POSTGRES_SQL_STATEMENT
|
sql_stmt = POSTGRES_SQL_STATEMENT
|
||||||
filters = ""
|
filters = ""
|
||||||
database_field = "d.datname"
|
|
||||||
schema_field = "" # schema filtering not available
|
|
||||||
|
@ -13,10 +13,9 @@ Usage Souce Module
|
|||||||
"""
|
"""
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Iterator, List, Optional, Union
|
from typing import Iterator, Optional, Union
|
||||||
|
|
||||||
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||||
from metadata.generated.schema.entity.data.database import Database
|
|
||||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||||
OpenMetadataConnection,
|
OpenMetadataConnection,
|
||||||
)
|
)
|
||||||
@ -61,31 +60,8 @@ class QueryParserSource(Source[Union[TableQuery, AddLineageRequest]], ABC):
|
|||||||
|
|
||||||
def prepare(self):
|
def prepare(self):
|
||||||
"""
|
"""
|
||||||
Fetch queries only from DB that is ingested in OM
|
By default, there's nothing to prepare
|
||||||
"""
|
"""
|
||||||
databases: List[Database] = self.metadata.list_all_entities(
|
|
||||||
Database, ["databaseSchemas"], params={"service": self.config.serviceName}
|
|
||||||
)
|
|
||||||
database_name_list = []
|
|
||||||
schema_name_list = []
|
|
||||||
|
|
||||||
for database in databases:
|
|
||||||
database_name_list.append(database.name.__root__)
|
|
||||||
if self.schema_field and database.databaseSchemas:
|
|
||||||
for schema in database.databaseSchemas.__root__:
|
|
||||||
schema_name_list.append(schema.name)
|
|
||||||
|
|
||||||
if self.database_field and database_name_list:
|
|
||||||
self.filters += ( # pylint: disable=no-member
|
|
||||||
f" AND {self.database_field} IN ('"
|
|
||||||
+ "','".join(database_name_list)
|
|
||||||
+ "')"
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.schema_field and schema_name_list:
|
|
||||||
self.filters += ( # pylint: disable=no-member
|
|
||||||
f" AND {self.schema_field} IN ('" + "','".join(schema_name_list) + "')"
|
|
||||||
)
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_table_query(self) -> Optional[Iterator[TableQuery]]:
|
def get_table_query(self) -> Optional[Iterator[TableQuery]]:
|
||||||
@ -123,7 +99,7 @@ class QueryParserSource(Source[Union[TableQuery, AddLineageRequest]], ABC):
|
|||||||
return self.sql_stmt.format(
|
return self.sql_stmt.format(
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
filters=self.filters, # pylint: disable=no-member
|
filters=self.filters,
|
||||||
result_limit=self.source_config.resultLimit,
|
result_limit=self.source_config.resultLimit,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -47,9 +47,3 @@ class RedshiftLineageSource(RedshiftQueryParserSource, LineageSource):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
sql_stmt = REDSHIFT_SQL_STATEMENT
|
sql_stmt = REDSHIFT_SQL_STATEMENT
|
||||||
|
|
||||||
database_field = "database_name"
|
|
||||||
|
|
||||||
schema_field = "schema_name"
|
|
||||||
|
|
||||||
db_filters = ""
|
|
||||||
|
@ -79,8 +79,6 @@ REDSHIFT_SQL_STATEMENT = textwrap.dedent(
|
|||||||
ON s.query = fq.query
|
ON s.query = fq.query
|
||||||
INNER JOIN pg_catalog.pg_user AS u
|
INNER JOIN pg_catalog.pg_user AS u
|
||||||
ON q.userid = u.usesysid
|
ON q.userid = u.usesysid
|
||||||
WHERE
|
|
||||||
{db_filters}
|
|
||||||
ORDER BY q.endtime DESC
|
ORDER BY q.endtime DESC
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
@ -13,9 +13,7 @@ Redshift usage module
|
|||||||
"""
|
"""
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.database import Database
|
|
||||||
from metadata.generated.schema.entity.services.connections.database.redshiftConnection import (
|
from metadata.generated.schema.entity.services.connections.database.redshiftConnection import (
|
||||||
RedshiftConnection,
|
RedshiftConnection,
|
||||||
)
|
)
|
||||||
@ -38,7 +36,6 @@ class RedshiftQueryParserSource(QueryParserSource, ABC):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
filters: str
|
filters: str
|
||||||
db_filters: str
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
||||||
@ -50,32 +47,6 @@ class RedshiftQueryParserSource(QueryParserSource, ABC):
|
|||||||
)
|
)
|
||||||
return cls(config, metadata_config)
|
return cls(config, metadata_config)
|
||||||
|
|
||||||
def prepare(self):
|
|
||||||
"""
|
|
||||||
Fetch queries only from DB that is ingested in OM
|
|
||||||
"""
|
|
||||||
databases: List[Database] = self.metadata.list_all_entities(
|
|
||||||
Database, ["databaseSchemas"], params={"service": self.config.serviceName}
|
|
||||||
)
|
|
||||||
database_name_list = []
|
|
||||||
schema_name_list = []
|
|
||||||
|
|
||||||
for database in databases:
|
|
||||||
database_name_list.append(database.name.__root__)
|
|
||||||
if self.schema_field and database.databaseSchemas:
|
|
||||||
for schema in database.databaseSchemas.__root__:
|
|
||||||
schema_name_list.append(schema.name)
|
|
||||||
|
|
||||||
if self.database_field and database_name_list:
|
|
||||||
self.db_filters += ( # pylint: disable=no-member
|
|
||||||
f"{self.database_field} IN ('" + "','".join(database_name_list) + "')"
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.schema_field and schema_name_list:
|
|
||||||
self.db_filters += ( # pylint: disable=no-member
|
|
||||||
f" AND {self.schema_field} IN ('" + "','".join(schema_name_list) + "')"
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_sql_statement(self, start_time: datetime, end_time: datetime) -> str:
|
def get_sql_statement(self, start_time: datetime, end_time: datetime) -> str:
|
||||||
"""
|
"""
|
||||||
returns sql statement to fetch query logs
|
returns sql statement to fetch query logs
|
||||||
@ -83,7 +54,6 @@ class RedshiftQueryParserSource(QueryParserSource, ABC):
|
|||||||
return self.sql_stmt.format(
|
return self.sql_stmt.format(
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
filters=self.filters, # pylint: disable=no-member
|
filters=self.filters,
|
||||||
db_filters=self.db_filters, # pylint: disable=no-member
|
|
||||||
result_limit=self.source_config.resultLimit,
|
result_limit=self.source_config.resultLimit,
|
||||||
)
|
)
|
||||||
|
@ -29,9 +29,3 @@ class RedshiftUsageSource(RedshiftQueryParserSource, UsageSource):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
sql_stmt = REDSHIFT_SQL_STATEMENT
|
sql_stmt = REDSHIFT_SQL_STATEMENT
|
||||||
|
|
||||||
database_field = "database_name"
|
|
||||||
|
|
||||||
schema_field = "schema_name"
|
|
||||||
|
|
||||||
db_filters = ""
|
|
||||||
|
@ -29,7 +29,3 @@ class SnowflakeLineageSource(SnowflakeQueryParserSource, LineageSource):
|
|||||||
filters = """
|
filters = """
|
||||||
AND QUERY_TYPE IN ('INSERT', 'MERGE', 'UPDATE','CREATE_TABLE_AS_SELECT')
|
AND QUERY_TYPE IN ('INSERT', 'MERGE', 'UPDATE','CREATE_TABLE_AS_SELECT')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
database_field = "database_name"
|
|
||||||
|
|
||||||
schema_field = "schema_name"
|
|
||||||
|
@ -67,7 +67,7 @@ class SnowflakeQueryParserSource(QueryParserSource, ABC):
|
|||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
result_limit=self.config.sourceConfig.config.resultLimit,
|
result_limit=self.config.sourceConfig.config.resultLimit,
|
||||||
filters=self.filters, # pylint: disable=no-member
|
filters=self.filters,
|
||||||
)
|
)
|
||||||
|
|
||||||
def set_session_query_tag(self) -> None:
|
def set_session_query_tag(self) -> None:
|
||||||
|
@ -32,7 +32,3 @@ class SnowflakeUsageSource(SnowflakeQueryParserSource, UsageSource):
|
|||||||
ALTER_SESSION','COPY','UPDATE','COMMIT','SHOW','ALTER','DESCRIBE','CREATE_TABLE','PUT_FILES','GET_FILES',
|
ALTER_SESSION','COPY','UPDATE','COMMIT','SHOW','ALTER','DESCRIBE','CREATE_TABLE','PUT_FILES','GET_FILES',
|
||||||
'INSERT', 'MERGE', 'CREATE_TABLE_AS_SELECT')
|
'INSERT', 'MERGE', 'CREATE_TABLE_AS_SELECT')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
database_field = "database_name"
|
|
||||||
|
|
||||||
schema_field = "schema_name"
|
|
||||||
|
@ -30,68 +30,9 @@ from metadata.generated.schema.type.entityReference import (
|
|||||||
)
|
)
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.ingestion.source.database.clickhouse.usage import ClickhouseUsageSource
|
from metadata.ingestion.source.database.clickhouse.usage import ClickhouseUsageSource
|
||||||
from metadata.ingestion.source.database.mssql.usage import MssqlUsageSource
|
|
||||||
from metadata.ingestion.source.database.redshift.usage import RedshiftUsageSource
|
|
||||||
|
|
||||||
T = TypeVar("T", bound=BaseModel)
|
T = TypeVar("T", bound=BaseModel)
|
||||||
|
|
||||||
|
|
||||||
mock_mssql_config = {
|
|
||||||
"source": {
|
|
||||||
"type": "mssql-usage",
|
|
||||||
"serviceName": "local_mssql",
|
|
||||||
"serviceConnection": {
|
|
||||||
"config": {
|
|
||||||
"type": "Mssql",
|
|
||||||
"username": "usernames",
|
|
||||||
"password": "password",
|
|
||||||
"hostPort": "localhost:1433",
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"sourceConfig": {"config": {"queryLogDuration": "1"}},
|
|
||||||
},
|
|
||||||
"stage": {"type": "table-usage", "config": {"filename": "/tmp/mssql_usage"}},
|
|
||||||
"bulkSink": {"type": "metadata-usage", "config": {"filename": "/tmp/mssql_usage"}},
|
|
||||||
"workflowConfig": {
|
|
||||||
"openMetadataServerConfig": {
|
|
||||||
"hostPort": "http://localhost:8585/api",
|
|
||||||
"authProvider": "openmetadata",
|
|
||||||
"securityConfig": {
|
|
||||||
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
||||||
},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
mock_redshift_config = {
|
|
||||||
"source": {
|
|
||||||
"type": "redshift-usage",
|
|
||||||
"serviceName": "local_redshift",
|
|
||||||
"serviceConnection": {
|
|
||||||
"config": {
|
|
||||||
"hostPort": "localhost:5439",
|
|
||||||
"username": "usernames",
|
|
||||||
"password": "password",
|
|
||||||
"type": "Redshift",
|
|
||||||
"database": "dev",
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"sourceConfig": {"config": {"queryLogDuration": "1"}},
|
|
||||||
},
|
|
||||||
"stage": {"type": "table-usage", "config": {"filename": "/tmp/mssql_usage"}},
|
|
||||||
"bulkSink": {"type": "metadata-usage", "config": {"filename": "/tmp/mssql_usage"}},
|
|
||||||
"workflowConfig": {
|
|
||||||
"openMetadataServerConfig": {
|
|
||||||
"hostPort": "http://localhost:8585/api",
|
|
||||||
"authProvider": "openmetadata",
|
|
||||||
"securityConfig": {
|
|
||||||
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
||||||
},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_clickhouse_config = {
|
mock_clickhouse_config = {
|
||||||
"source": {
|
"source": {
|
||||||
"type": "clickhouse-usage",
|
"type": "clickhouse-usage",
|
||||||
@ -195,11 +136,6 @@ def mock_list_entities(
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
EXPECTED_MSSQL_FILTER = " AND db.NAME IN ('test_db_1','test_db_2')"
|
|
||||||
|
|
||||||
EXPECTED_REDSHIFT_FILTER = """database_name IN ('test_db_1','test_db_2') AND schema_name IN ('test_schema_1','test_schema_2','test_schema_3','test_schema_4','test_schema_5','test_schema_6')"""
|
|
||||||
|
|
||||||
|
|
||||||
EXPECTED_CLICKHOUSE_FILTER = """
|
EXPECTED_CLICKHOUSE_FILTER = """
|
||||||
and query_kind = 'Select'
|
and query_kind = 'Select'
|
||||||
AND hasAny(databases, ['test_schema_1','test_schema_2','test_schema_3','test_schema_4','test_schema_5','test_schema_6'])"""
|
AND hasAny(databases, ['test_schema_1','test_schema_2','test_schema_3','test_schema_4','test_schema_5','test_schema_6'])"""
|
||||||
@ -210,26 +146,6 @@ class UsageQueryFilterTests(TestCase):
|
|||||||
Usage filter tests for database and schema filters
|
Usage filter tests for database and schema filters
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@patch.object(OpenMetadata, "list_all_entities", mock_list_entities)
|
|
||||||
def test_prepare_mssql(self):
|
|
||||||
config = OpenMetadataWorkflowConfig.parse_obj(mock_mssql_config)
|
|
||||||
mssql_source = MssqlUsageSource.create(
|
|
||||||
mock_mssql_config["source"],
|
|
||||||
config.workflowConfig.openMetadataServerConfig,
|
|
||||||
)
|
|
||||||
mssql_source.prepare()
|
|
||||||
assert mssql_source.filters == EXPECTED_MSSQL_FILTER
|
|
||||||
|
|
||||||
@patch.object(OpenMetadata, "list_all_entities", mock_list_entities)
|
|
||||||
def test_prepare_redshift(self):
|
|
||||||
config = OpenMetadataWorkflowConfig.parse_obj(mock_redshift_config)
|
|
||||||
redshift_source = RedshiftUsageSource.create(
|
|
||||||
mock_redshift_config["source"],
|
|
||||||
config.workflowConfig.openMetadataServerConfig,
|
|
||||||
)
|
|
||||||
redshift_source.prepare()
|
|
||||||
assert redshift_source.db_filters == EXPECTED_REDSHIFT_FILTER
|
|
||||||
|
|
||||||
@patch.object(OpenMetadata, "list_all_entities", mock_list_entities)
|
@patch.object(OpenMetadata, "list_all_entities", mock_list_entities)
|
||||||
def test_prepare_clickhouse(self):
|
def test_prepare_clickhouse(self):
|
||||||
config = OpenMetadataWorkflowConfig.parse_obj(mock_clickhouse_config)
|
config = OpenMetadataWorkflowConfig.parse_obj(mock_clickhouse_config)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user