mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-27 00:31:42 +00:00
Add Lineage Parsing timeout and filtering condition (#12560)
This commit is contained in:
parent
3e17c984eb
commit
c6de61f6bc
@ -23,7 +23,7 @@ from metadata.generated.schema.type.entityLineage import (
|
||||
)
|
||||
from metadata.generated.schema.type.entityReference import EntityReference
|
||||
from metadata.ingestion.lineage.models import Dialect
|
||||
from metadata.ingestion.lineage.parser import LineageParser
|
||||
from metadata.ingestion.lineage.parser import LINEAGE_PARSING_TIMEOUT, LineageParser
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.utils import fqn
|
||||
from metadata.utils.fqn import build_es_fqn_search_string
|
||||
@ -342,6 +342,7 @@ def get_lineage_by_query(
|
||||
schema_name: Optional[str],
|
||||
query: str,
|
||||
dialect: Dialect,
|
||||
timeout_seconds: int = LINEAGE_PARSING_TIMEOUT,
|
||||
) -> Optional[Iterator[AddLineageRequest]]:
|
||||
"""
|
||||
This method parses the query to get source, target and intermediate table names to create lineage,
|
||||
@ -351,7 +352,7 @@ def get_lineage_by_query(
|
||||
|
||||
try:
|
||||
logger.debug(f"Running lineage with query: {query}")
|
||||
lineage_parser = LineageParser(query, dialect)
|
||||
lineage_parser = LineageParser(query, dialect, timeout_seconds=timeout_seconds)
|
||||
|
||||
raw_column_lineage = lineage_parser.column_lineage
|
||||
column_lineage.update(populate_column_lineage_map(raw_column_lineage))
|
||||
@ -405,6 +406,7 @@ def get_lineage_via_table_entity(
|
||||
service_name: str,
|
||||
query: str,
|
||||
dialect: Dialect,
|
||||
timeout_seconds: int = LINEAGE_PARSING_TIMEOUT,
|
||||
) -> Optional[Iterator[AddLineageRequest]]:
|
||||
"""Get lineage from table entity
|
||||
|
||||
@ -427,7 +429,7 @@ def get_lineage_via_table_entity(
|
||||
|
||||
try:
|
||||
logger.debug(f"Getting lineage via table entity using query: {query}")
|
||||
lineage_parser = LineageParser(query, dialect)
|
||||
lineage_parser = LineageParser(query, dialect, timeout_seconds=timeout_seconds)
|
||||
to_table_name = table_entity.name.__root__
|
||||
|
||||
for from_table_name in lineage_parser.source_tables:
|
||||
|
||||
@ -57,7 +57,7 @@ class BigqueryQueryParserSource(QueryParserSource, ABC):
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
region=self.service_connection.usageLocation,
|
||||
filters=self.filters,
|
||||
filters=self.get_filters(),
|
||||
result_limit=self.source_config.resultLimit,
|
||||
)
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ class ClickhouseQueryParserSource(QueryParserSource, ABC):
|
||||
return self.sql_stmt.format(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
filters=self.filters, # pylint: disable=no-member
|
||||
filters=self.get_filters(),
|
||||
result_limit=self.source_config.resultLimit,
|
||||
)
|
||||
|
||||
|
||||
@ -436,6 +436,7 @@ class CommonDbSourceService(
|
||||
metadata=self.metadata,
|
||||
service_name=self.context.database_service.name.__root__,
|
||||
connection_type=self.service_connection.type.value,
|
||||
timeout_seconds=self.source_config.viewParsingTimeoutLimit,
|
||||
)
|
||||
|
||||
def _get_foreign_constraints(
|
||||
|
||||
@ -640,6 +640,7 @@ class DbtSource(DbtServiceSource):
|
||||
database_name=source_elements[1],
|
||||
schema_name=source_elements[2],
|
||||
dialect=dialect,
|
||||
timeout_seconds=self.source_config.parsingTimeoutLimit,
|
||||
)
|
||||
for lineage_request in lineages or []:
|
||||
yield lineage_request
|
||||
|
||||
@ -113,6 +113,7 @@ class LineageSource(QueryParserSource, ABC):
|
||||
database_name=table_query.databaseName,
|
||||
schema_name=table_query.databaseSchema,
|
||||
dialect=dialect,
|
||||
timeout_seconds=self.source_config.parsingTimeoutLimit,
|
||||
)
|
||||
|
||||
for lineage_request in lineages or []:
|
||||
|
||||
@ -76,7 +76,7 @@ class PostgresQueryParserSource(QueryParserSource, ABC):
|
||||
"""
|
||||
return self.sql_stmt.format(
|
||||
result_limit=self.config.sourceConfig.config.resultLimit,
|
||||
filters=self.filters,
|
||||
filters=self.get_filters(),
|
||||
time_column_name=self.get_postgres_time_column_name(),
|
||||
)
|
||||
|
||||
|
||||
@ -103,10 +103,15 @@ class QueryParserSource(Source[Union[TableQuery, AddLineageRequest]], ABC):
|
||||
return self.sql_stmt.format(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
filters=self.filters,
|
||||
filters=self.get_filters(),
|
||||
result_limit=self.source_config.resultLimit,
|
||||
)
|
||||
|
||||
def get_filters(self) -> str:
|
||||
if self.source_config.filterCondition:
|
||||
return f"{self.filters} AND {self.source_config.filterCondition}"
|
||||
return self.filters
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
By default, there is nothing to close
|
||||
|
||||
@ -54,6 +54,6 @@ class RedshiftQueryParserSource(QueryParserSource, ABC):
|
||||
return self.sql_stmt.format(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
filters=self.filters,
|
||||
filters=self.get_filters(),
|
||||
result_limit=self.source_config.resultLimit,
|
||||
)
|
||||
|
||||
@ -60,7 +60,7 @@ class SnowflakeQueryParserSource(QueryParserSource, ABC):
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
result_limit=self.config.sourceConfig.config.resultLimit,
|
||||
filters=self.filters,
|
||||
filters=self.get_filters(),
|
||||
)
|
||||
|
||||
def set_session_query_tag(self) -> None:
|
||||
|
||||
@ -17,7 +17,7 @@ import traceback
|
||||
|
||||
from metadata.generated.schema.entity.data.table import Table
|
||||
from metadata.ingestion.lineage.models import ConnectionTypeDialectMapper
|
||||
from metadata.ingestion.lineage.parser import LineageParser
|
||||
from metadata.ingestion.lineage.parser import LINEAGE_PARSING_TIMEOUT, LineageParser
|
||||
from metadata.ingestion.lineage.sql_lineage import (
|
||||
get_lineage_by_query,
|
||||
get_lineage_via_table_entity,
|
||||
@ -39,7 +39,11 @@ def get_host_from_host_port(uri: str) -> str:
|
||||
|
||||
|
||||
def get_view_lineage(
|
||||
view: TableView, metadata: OpenMetadata, service_name: str, connection_type: str
|
||||
view: TableView,
|
||||
metadata: OpenMetadata,
|
||||
service_name: str,
|
||||
connection_type: str,
|
||||
timeout_seconds: int = LINEAGE_PARSING_TIMEOUT,
|
||||
):
|
||||
"""
|
||||
Method to generate view lineage
|
||||
@ -64,7 +68,9 @@ def get_view_lineage(
|
||||
try:
|
||||
connection_type = str(connection_type)
|
||||
dialect = ConnectionTypeDialectMapper.dialect_of(connection_type)
|
||||
lineage_parser = LineageParser(view_definition, dialect)
|
||||
lineage_parser = LineageParser(
|
||||
view_definition, dialect, timeout_seconds=timeout_seconds
|
||||
)
|
||||
if lineage_parser.source_tables and lineage_parser.target_tables:
|
||||
yield from get_lineage_by_query(
|
||||
metadata,
|
||||
@ -73,6 +79,7 @@ def get_view_lineage(
|
||||
database_name=db_name,
|
||||
schema_name=schema_name,
|
||||
dialect=dialect,
|
||||
timeout_seconds=timeout_seconds,
|
||||
) or []
|
||||
|
||||
else:
|
||||
@ -84,6 +91,7 @@ def get_view_lineage(
|
||||
schema_name=schema_name,
|
||||
query=view_definition,
|
||||
dialect=dialect,
|
||||
timeout_seconds=timeout_seconds,
|
||||
) or []
|
||||
except Exception as exc:
|
||||
logger.debug(traceback.format_exc())
|
||||
|
||||
@ -0,0 +1,213 @@
|
||||
---
|
||||
title: Lineage Query Filtering
|
||||
slug: /connectors/ingestion/workflows/lineage/filter-query-set
|
||||
---
|
||||
|
||||
# Lineage Query Filtering
|
||||
|
||||
In order to extract usage information, OpenMetadata parses the queries that have run against the database. We fetch these queries from the query history table of the respective data source. This query fetches all the queries executed within configured no of days. In this guide we we understand how we can filter out the query history result set, this can help to exclude queries with specific pattern or queries executed on a specific schema or database, depending on the data source.
|
||||
|
||||
Query filtering is supported for both Usage & Lineage workflows. While configuring either usage or lineage workflow you will find a `Filtering Condition` text field where you can provide the sql condition which will be added to already existing conditions with an `AND` operation. In later part of this document you will find how to write this condition for supported data source.
|
||||
|
||||
{% image
|
||||
src="/images/v1.1.1/features/ingestion/workflows/lineage/filter-condition-field.png"
|
||||
alt="filter-condition-field"
|
||||
caption="Filter Condition Field"
|
||||
/%}
|
||||
|
||||
|
||||
## Snowflake Filter Condition
|
||||
|
||||
To fetch the query history log from snowflake we execute the following query
|
||||
|
||||
```
|
||||
SELECT
|
||||
query_type,
|
||||
query_text,
|
||||
user_name,
|
||||
database_name,
|
||||
schema_name,
|
||||
start_time,
|
||||
end_time,
|
||||
total_elapsed_time/1000 duration
|
||||
from snowflake.account_usage.query_history
|
||||
WHERE query_text NOT LIKE '/* {app": "OpenMetadata", %} */%'
|
||||
AND query_text NOT LIKE '/* {"app": "dbt", %} */%'
|
||||
AND start_time between to_timestamp_ltz('{start_time}') and to_timestamp_ltz('{end_time}')
|
||||
AND QUERY_TYPE NOT IN ('ROLLBACK','CREATE_USER',....)
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this snowflake documentation](https://docs.snowflake.com/en/sql-reference/functions/query_history) to find out more about the query history table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query_text NOT LIKE '--metabase %'`
|
||||
|
||||
you also need to further restrict the query log and need only queries which have been executed on `SALES` database then you can put the filter condition as `query_text NOT LIKE '--metabase %' AND database_name='SALES'`.
|
||||
|
||||
|
||||
## Bigquery Filter Condition
|
||||
|
||||
To fetch the query history log from bigquery we execute the following query
|
||||
|
||||
```
|
||||
SELECT
|
||||
project_id as database_name,
|
||||
user_email as user_name,
|
||||
statement_type as query_type,
|
||||
start_time,
|
||||
end_time,
|
||||
query as query_text,
|
||||
null as schema_name,
|
||||
total_slot_ms/1000 as duration
|
||||
FROM `region-{region}`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
||||
WHERE creation_time BETWEEN "{start_time}" AND "{end_time}"
|
||||
AND statement_type IN ("SELECT",....)
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
AND job_type = "QUERY"
|
||||
AND state = "DONE"
|
||||
AND IFNULL(statement_type, "NO") not in ("NO", "DROP_TABLE", "CREATE_TABLE")
|
||||
AND query NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
AND query NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this bigquery documentation](https://cloud.google.com/bigquery/docs/information-schema-jobs) to find out more about the JOBS_BY_PROJECT table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query NOT LIKE '--metabase %'`.
|
||||
|
||||
|
||||
## MSSQL Filter Condition
|
||||
|
||||
To fetch the query history log from MSSQL we execute the following query
|
||||
|
||||
```
|
||||
SELECT TOP {result_limit}
|
||||
db.NAME database_name,
|
||||
t.text query_text,
|
||||
s.last_execution_time start_time,
|
||||
DATEADD(s, s.total_elapsed_time/1000, s.last_execution_time) end_time,
|
||||
s.total_elapsed_time/1000 duration,
|
||||
NULL schema_name,
|
||||
NULL query_type,
|
||||
NULL user_name,
|
||||
NULL aborted
|
||||
FROM sys.dm_exec_cached_plans AS p
|
||||
INNER JOIN sys.dm_exec_query_stats AS s
|
||||
ON p.plan_handle = s.plan_handle
|
||||
CROSS APPLY sys.Dm_exec_sql_text(p.plan_handle) AS t
|
||||
INNER JOIN sys.databases db
|
||||
ON db.database_id = t.dbid
|
||||
WHERE s.last_execution_time between '{start_time}' and '{end_time}'
|
||||
AND t.text NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
AND t.text NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
AND p.objtype != 'Prepared'
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
ORDER BY s.last_execution_time DESC
|
||||
```
|
||||
|
||||
You can refer to [this mssql documentation](https://learn.microsoft.com/en-us/sql/relational-databases/system-dynamic-management-views/sys-dm-exec-cached-plans-transact-sql?view=sql-server-ver16) to find out more about the dm_exec_cached_plans table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `t.text NOT LIKE '--metabase %'`.
|
||||
|
||||
you also need to further restrict the query log and need only queries which have been executed on `SALES` database then you can put the filter condition as `t.text NOT LIKE '--metabase %' AND db.NAME='SALES'`.
|
||||
|
||||
|
||||
## Clickhouse Filter Condition
|
||||
|
||||
To fetch the query history log from clickhouse we execute the following query
|
||||
|
||||
```
|
||||
Select
|
||||
query_start_time start_time,
|
||||
DATEADD(query_duration_ms, query_start_time) end_time,
|
||||
query_duration_ms/1000 duration,
|
||||
'default' database_name,
|
||||
user user_name,
|
||||
FALSE aborted,
|
||||
query_id query_id,
|
||||
query query_text,
|
||||
databases schema_name,
|
||||
tables tables
|
||||
From system.query_log
|
||||
Where start_time between '{start_time}' and '{end_time}'
|
||||
and CAST(type,'Int8') <> 3
|
||||
and CAST(type,'Int8') <> 4
|
||||
and query NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
and query NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
and (`type`='QueryFinish' or `type`='QueryStart')
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this clickhouse documentation](https://clickhouse.com/docs/en/operations/system-tables/query_log) to find out more about the query_log table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query NOT LIKE '--metabase %'`.
|
||||
|
||||
|
||||
## Vertica Filter Condition
|
||||
|
||||
To fetch the query history log from vertica we execute the following query
|
||||
|
||||
```
|
||||
SELECT
|
||||
DBNAME() AS database_name,
|
||||
p.query AS query_text,
|
||||
r.start_timestamp AS start_time,
|
||||
r.end_timestamp AS end_time,
|
||||
p.schema_name,
|
||||
p.query_duration_us/1000 AS duration,
|
||||
p.query_type,
|
||||
p.user_name,
|
||||
NULL aborted
|
||||
FROM query_profiles p
|
||||
LEFT JOIN query_requests r
|
||||
ON p.TRANSACTION_ID = r.TRANSACTION_ID
|
||||
AND p.STATEMENT_ID = r.STATEMENT_ID
|
||||
WHERE query_start between '{start_time}' and '{end_time}'
|
||||
AND query NOT LIKE '%%/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
AND query NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
AND success = 1
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
ORDER BY query_start DESC
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this vertica documentation](https://www.vertica.com/docs/10.0.x/HTML/Content/Authoring/SQLReferenceManual/SystemTables/MONITOR/QUERY_PROFILES.htm) to find out more about the query_profiles table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query NOT LIKE '--metabase %'`.
|
||||
|
||||
|
||||
## Redshift Filter Condition
|
||||
|
||||
To fetch the query history log from redshift we execute the following query
|
||||
|
||||
```
|
||||
SELECT *
|
||||
FROM pg_catalog.stl_query
|
||||
WHERE userid > 1
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
-- Filter out all automated & cursor queries
|
||||
AND querytxt NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
AND querytxt NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
AND aborted = 0
|
||||
AND starttime >= '{start_time}'
|
||||
AND starttime < '{end_time}'
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this redshift documentation](https://docs.aws.amazon.com/redshift/latest/dg/r_STL_QUERY.html) to find out more about the stl_query table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query NOT LIKE '--metabase %'`.
|
||||
@ -0,0 +1,213 @@
|
||||
---
|
||||
title: Usage Query Filtering
|
||||
slug: /connectors/ingestion/workflows/usage/filter-query-set
|
||||
---
|
||||
|
||||
# Usage Query Filtering
|
||||
|
||||
In order to extract usage information, OpenMetadata parses the queries that have run against the database. We fetch these queries from the query history table of the respective data source. This query fetches all the queries executed within configured no of days. In this guide we we understand how we can filter out the query history result set, this can help to exclude queries with specific pattern or queries executed on a specific schema or database, depending on the data source.
|
||||
|
||||
Query filtering is supported for both Usage & Lineage workflows. While configuring either usage or lineage workflow you will find a `Filtering Condition` text field where you can provide the sql condition which will be added to already existing conditions with an `AND` operation. In later part of this document you will find how to write this condition for supported data source.
|
||||
|
||||
{% image
|
||||
src="/images/v1.1.1/features/ingestion/workflows/usage/filter-condition-field.png"
|
||||
alt="filter-condition-field"
|
||||
caption="Filter Condition Field"
|
||||
/%}
|
||||
|
||||
|
||||
## Snowflake Filter Condition
|
||||
|
||||
To fetch the query history log from snowflake we execute the following query
|
||||
|
||||
```
|
||||
SELECT
|
||||
query_type,
|
||||
query_text,
|
||||
user_name,
|
||||
database_name,
|
||||
schema_name,
|
||||
start_time,
|
||||
end_time,
|
||||
total_elapsed_time/1000 duration
|
||||
from snowflake.account_usage.query_history
|
||||
WHERE query_text NOT LIKE '/* {app": "OpenMetadata", %} */%'
|
||||
AND query_text NOT LIKE '/* {"app": "dbt", %} */%'
|
||||
AND start_time between to_timestamp_ltz('{start_time}') and to_timestamp_ltz('{end_time}')
|
||||
AND QUERY_TYPE NOT IN ('ROLLBACK','CREATE_USER',....)
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this snowflake documentation](https://docs.snowflake.com/en/sql-reference/functions/query_history) to find out more about the query history table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query_text NOT LIKE '--metabase %'`
|
||||
|
||||
you also need to further restrict the query log and need only queries which have been executed on `SALES` database then you can put the filter condition as `query_text NOT LIKE '--metabase %' AND database_name='SALES'`.
|
||||
|
||||
|
||||
## Bigquery Filter Condition
|
||||
|
||||
To fetch the query history log from bigquery we execute the following query
|
||||
|
||||
```
|
||||
SELECT
|
||||
project_id as database_name,
|
||||
user_email as user_name,
|
||||
statement_type as query_type,
|
||||
start_time,
|
||||
end_time,
|
||||
query as query_text,
|
||||
null as schema_name,
|
||||
total_slot_ms/1000 as duration
|
||||
FROM `region-{region}`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
|
||||
WHERE creation_time BETWEEN "{start_time}" AND "{end_time}"
|
||||
AND statement_type IN ("SELECT",....)
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
AND job_type = "QUERY"
|
||||
AND state = "DONE"
|
||||
AND IFNULL(statement_type, "NO") not in ("NO", "DROP_TABLE", "CREATE_TABLE")
|
||||
AND query NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
AND query NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this bigquery documentation](https://cloud.google.com/bigquery/docs/information-schema-jobs) to find out more about the JOBS_BY_PROJECT table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query NOT LIKE '--metabase %'`.
|
||||
|
||||
|
||||
## MSSQL Filter Condition
|
||||
|
||||
To fetch the query history log from MSSQL we execute the following query
|
||||
|
||||
```
|
||||
SELECT TOP {result_limit}
|
||||
db.NAME database_name,
|
||||
t.text query_text,
|
||||
s.last_execution_time start_time,
|
||||
DATEADD(s, s.total_elapsed_time/1000, s.last_execution_time) end_time,
|
||||
s.total_elapsed_time/1000 duration,
|
||||
NULL schema_name,
|
||||
NULL query_type,
|
||||
NULL user_name,
|
||||
NULL aborted
|
||||
FROM sys.dm_exec_cached_plans AS p
|
||||
INNER JOIN sys.dm_exec_query_stats AS s
|
||||
ON p.plan_handle = s.plan_handle
|
||||
CROSS APPLY sys.Dm_exec_sql_text(p.plan_handle) AS t
|
||||
INNER JOIN sys.databases db
|
||||
ON db.database_id = t.dbid
|
||||
WHERE s.last_execution_time between '{start_time}' and '{end_time}'
|
||||
AND t.text NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
AND t.text NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
AND p.objtype != 'Prepared'
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
ORDER BY s.last_execution_time DESC
|
||||
```
|
||||
|
||||
You can refer to [this mssql documentation](https://learn.microsoft.com/en-us/sql/relational-databases/system-dynamic-management-views/sys-dm-exec-cached-plans-transact-sql?view=sql-server-ver16) to find out more about the dm_exec_cached_plans table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `t.text NOT LIKE '--metabase %'`.
|
||||
|
||||
you also need to further restrict the query log and need only queries which have been executed on `SALES` database then you can put the filter condition as `t.text NOT LIKE '--metabase %' AND db.NAME='SALES'`.
|
||||
|
||||
|
||||
## Clickhouse Filter Condition
|
||||
|
||||
To fetch the query history log from clickhouse we execute the following query
|
||||
|
||||
```
|
||||
Select
|
||||
query_start_time start_time,
|
||||
DATEADD(query_duration_ms, query_start_time) end_time,
|
||||
query_duration_ms/1000 duration,
|
||||
'default' database_name,
|
||||
user user_name,
|
||||
FALSE aborted,
|
||||
query_id query_id,
|
||||
query query_text,
|
||||
databases schema_name,
|
||||
tables tables
|
||||
From system.query_log
|
||||
Where start_time between '{start_time}' and '{end_time}'
|
||||
and CAST(type,'Int8') <> 3
|
||||
and CAST(type,'Int8') <> 4
|
||||
and query NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
and query NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
and (`type`='QueryFinish' or `type`='QueryStart')
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this clickhouse documentation](https://clickhouse.com/docs/en/operations/system-tables/query_log) to find out more about the query_log table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query NOT LIKE '--metabase %'`.
|
||||
|
||||
|
||||
## Vertica Filter Condition
|
||||
|
||||
To fetch the query history log from vertica we execute the following query
|
||||
|
||||
```
|
||||
SELECT
|
||||
DBNAME() AS database_name,
|
||||
p.query AS query_text,
|
||||
r.start_timestamp AS start_time,
|
||||
r.end_timestamp AS end_time,
|
||||
p.schema_name,
|
||||
p.query_duration_us/1000 AS duration,
|
||||
p.query_type,
|
||||
p.user_name,
|
||||
NULL aborted
|
||||
FROM query_profiles p
|
||||
LEFT JOIN query_requests r
|
||||
ON p.TRANSACTION_ID = r.TRANSACTION_ID
|
||||
AND p.STATEMENT_ID = r.STATEMENT_ID
|
||||
WHERE query_start between '{start_time}' and '{end_time}'
|
||||
AND query NOT LIKE '%%/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
AND query NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
AND success = 1
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
ORDER BY query_start DESC
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this vertica documentation](https://www.vertica.com/docs/10.0.x/HTML/Content/Authoring/SQLReferenceManual/SystemTables/MONITOR/QUERY_PROFILES.htm) to find out more about the query_profiles table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query NOT LIKE '--metabase %'`.
|
||||
|
||||
|
||||
## Redshift Filter Condition
|
||||
|
||||
To fetch the query history log from redshift we execute the following query
|
||||
|
||||
```
|
||||
SELECT *
|
||||
FROM pg_catalog.stl_query
|
||||
WHERE userid > 1
|
||||
|
||||
AND {**your filter condition**}
|
||||
|
||||
-- Filter out all automated & cursor queries
|
||||
AND querytxt NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
|
||||
AND querytxt NOT LIKE '/* {{"app": "dbt", %%}} */%%'
|
||||
AND aborted = 0
|
||||
AND starttime >= '{start_time}'
|
||||
AND starttime < '{end_time}'
|
||||
LIMIT {result_limit}
|
||||
```
|
||||
|
||||
You can refer to [this redshift documentation](https://docs.aws.amazon.com/redshift/latest/dg/r_STL_QUERY.html) to find out more about the stl_query table.
|
||||
|
||||
For example if you want to add a condition to filter out queries executed by metabase client, i.e. the queries staring with `-- metabase %` then you can put the condition as `query NOT LIKE '--metabase %'`.
|
||||
@ -513,10 +513,14 @@ site_menu:
|
||||
url: /connectors/ingestion/workflows/usage
|
||||
- category: Connectors / Ingestion / Workflows / Usage / Usage Workflow Through Query Logs
|
||||
url: /connectors/ingestion/workflows/usage/usage-workflow-query-logs
|
||||
- category: Connectors / Ingestion / Workflows / Usage / Usage Query Filtering
|
||||
url: /connectors/ingestion/workflows/usage/filter-query-set
|
||||
- category: Connectors / Ingestion / Workflows / Lineage
|
||||
url: /connectors/ingestion/workflows/lineage
|
||||
- category: Connectors / Ingestion / Workflows / Lineage / Lineage Workflow Through Query Logs
|
||||
url: /connectors/ingestion/workflows/lineage/lineage-workflow-query-logs
|
||||
- category: Connectors / Ingestion / Workflows / Lineage / Lineage Query Filtering
|
||||
url: /connectors/ingestion/workflows/lineage/filter-query-set
|
||||
- category: Connectors / Ingestion / Workflows / dbt
|
||||
url: /connectors/ingestion/workflows/dbt
|
||||
- category: Connectors / Ingestion / Workflows / dbt / Ingest dbt UI
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 111 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 106 KiB |
@ -48,6 +48,11 @@
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
},
|
||||
"viewParsingTimeoutLimit": {
|
||||
"description": "Configuration to set the timeout for parsing view query in seconds.",
|
||||
"type": "integer",
|
||||
"default": "300"
|
||||
},
|
||||
"schemaFilterPattern": {
|
||||
"description": "Regex to only fetch tables or databases that matches the pattern.",
|
||||
"$ref": "../type/filterPattern.json#/definitions/filterPattern"
|
||||
|
||||
@ -31,6 +31,15 @@
|
||||
"type": "integer",
|
||||
"default": "1000"
|
||||
},
|
||||
"parsingTimeoutLimit": {
|
||||
"description": "Configuration to set the timeout for parsing the query in seconds.",
|
||||
"type": "integer",
|
||||
"default": "300"
|
||||
},
|
||||
"filterCondition": {
|
||||
"description": "Configuration the condition to filter the query history.",
|
||||
"type": "string"
|
||||
},
|
||||
"schemaFilterPattern": {
|
||||
"description": "Regex to only fetch tables or databases that matches the pattern.",
|
||||
"$ref": "../type/filterPattern.json#/definitions/filterPattern"
|
||||
|
||||
@ -27,6 +27,10 @@
|
||||
"type": "string",
|
||||
"default": "/tmp/query_log"
|
||||
},
|
||||
"filterCondition": {
|
||||
"description": "Configuration the condition to filter the query history.",
|
||||
"type": "string"
|
||||
},
|
||||
"resultLimit": {
|
||||
"description": "Configuration to set the limit for query logs",
|
||||
"type": "integer",
|
||||
|
||||
@ -66,6 +66,11 @@
|
||||
"description": "Regex exclude tables or databases that matches the pattern.",
|
||||
"$ref": "../type/filterPattern.json#/definitions/filterPattern"
|
||||
},
|
||||
"parsingTimeoutLimit": {
|
||||
"description": "Configuration to set the timeout for parsing the query in seconds.",
|
||||
"type": "integer",
|
||||
"default": "300"
|
||||
},
|
||||
"databaseFilterPattern": {
|
||||
"description": "Regex to only fetch databases that matches the pattern.",
|
||||
"$ref": "../type/filterPattern.json#/definitions/filterPattern"
|
||||
|
||||
@ -343,3 +343,10 @@ Option to include fetching the tags metadata from dbt.
|
||||
|
||||
When enabled, OpenMetadata will fetch tags associated with tables and columns from dbt `manifest.json` and attach them to the corresponding tables in OpenMetadata.
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### Query Parsing Timeout Limit $(id="parsingTimeoutLimit")
|
||||
|
||||
Specify the timeout limit for parsing the sql queries to perform the lineage analysis.
|
||||
$$
|
||||
@ -31,3 +31,20 @@ SELECT xyz FROM query_history limit <resultLimit>
|
||||
|
||||
This value will take precedence over the `Query Log Duration`.
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### Filtering Condition $(id="filterCondition")
|
||||
|
||||
We execute a query on query history table of the respective data source to perform the query analysis and extract the lineage and usage information. This field will be useful when you want to restrict some queries from being part of this analysis. In this field you can specify a sql condition that will be applied on the query history result set.
|
||||
|
||||
For example: `query_text not ilike '--- metabase query %'`
|
||||
|
||||
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/usage/filter-query-set) document for further examples on filter conditions.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Query Parsing Timeout Limit $(id="parsingTimeoutLimit")
|
||||
|
||||
Specify the timeout limit for parsing the sql queries to perform the lineage analysis.
|
||||
$$
|
||||
@ -120,3 +120,10 @@ Here are some examples of scenarios where tables will get soft deleted if this f
|
||||
- If you already have `SchemaA` & `SchemaB` ingested in OpenMetadata ,then later you apply a `Schema Filter Pattern` to exclude `SchemaB`, ALL tables from `SchemaB` will be deleted due to this ingestion pipeline. This might be useful if you want to remove a full schema from OpenMetadata that you missed to filter out the first time.
|
||||
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### View Definition Parsing Timeout Limit $(id="viewParsingTimeoutLimit")
|
||||
|
||||
Specify the timeout limit for parsing the view definition sql queries to perform the lineage analysis.
|
||||
$$
|
||||
@ -33,3 +33,15 @@ SELECT xyz FROM query_history limit <resultLimit>
|
||||
|
||||
This value will take precedence over the `Query Log Duration`.
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### Filtering Condition $(id="filterCondition")
|
||||
|
||||
We execute a query on query history table of the respective data source to perform the query analysis and extract the lineage and usage information. This field will be useful when you want to restrict some queries from being part of this analysis. In this field you can specify a sql condition that will be applied on the query history result set.
|
||||
|
||||
For example: `query_text not ilike '--- metabase query %'`
|
||||
|
||||
|
||||
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/usage/filter-query-set) document for further examples on filter conditions.
|
||||
$$
|
||||
@ -24,6 +24,7 @@ import React, {
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import {
|
||||
DBT_CLASSIFICATION_DEFAULT_VALUE,
|
||||
DEFAULT_PARSING_TIMEOUT_LIMIT,
|
||||
INITIAL_FILTER_PATTERN,
|
||||
STEPS_FOR_ADD_INGESTION,
|
||||
} from '../../constants/Ingestions.constant';
|
||||
@ -251,6 +252,11 @@ const AddIngestion = ({
|
||||
confidence: sourceConfig?.confidence,
|
||||
dbtClassificationName:
|
||||
sourceConfig?.dbtClassificationName ?? DBT_CLASSIFICATION_DEFAULT_VALUE, // default value from Json Schema
|
||||
parsingTimeoutLimit:
|
||||
sourceConfig?.parsingTimeoutLimit ?? DEFAULT_PARSING_TIMEOUT_LIMIT,
|
||||
viewParsingTimeoutLimit:
|
||||
sourceConfig?.viewParsingTimeoutLimit ?? DEFAULT_PARSING_TIMEOUT_LIMIT,
|
||||
filterCondition: sourceConfig?.filterCondition ?? '',
|
||||
}),
|
||||
[]
|
||||
);
|
||||
@ -395,6 +401,7 @@ const AddIngestion = ({
|
||||
topicFilterPattern,
|
||||
useFqnFilter,
|
||||
includeOwners,
|
||||
viewParsingTimeoutLimit,
|
||||
} = state;
|
||||
|
||||
switch (serviceCategory) {
|
||||
@ -418,6 +425,7 @@ const AddIngestion = ({
|
||||
markDeletedTables: markDeletedTables,
|
||||
markAllDeletedTables: markAllDeletedTables,
|
||||
type: ConfigType.DatabaseMetadata,
|
||||
viewParsingTimeoutLimit: viewParsingTimeoutLimit,
|
||||
};
|
||||
}
|
||||
case ServiceCategory.MESSAGING_SERVICES: {
|
||||
@ -513,6 +521,8 @@ const AddIngestion = ({
|
||||
timeoutSeconds,
|
||||
processPii,
|
||||
confidence,
|
||||
filterCondition,
|
||||
parsingTimeoutLimit,
|
||||
} = state;
|
||||
switch (type) {
|
||||
case PipelineType.Usage: {
|
||||
@ -521,6 +531,7 @@ const AddIngestion = ({
|
||||
resultLimit: resultLimit,
|
||||
stageFileLocation: stageFileLocation,
|
||||
type: usageIngestionType,
|
||||
filterCondition: filterCondition,
|
||||
};
|
||||
}
|
||||
case PipelineType.Lineage: {
|
||||
@ -528,6 +539,8 @@ const AddIngestion = ({
|
||||
queryLogDuration: queryLogDuration,
|
||||
resultLimit: resultLimit,
|
||||
type: lineageIngestionType,
|
||||
filterCondition: filterCondition,
|
||||
parsingTimeoutLimit: parsingTimeoutLimit,
|
||||
};
|
||||
}
|
||||
case PipelineType.Profiler: {
|
||||
@ -572,6 +585,7 @@ const AddIngestion = ({
|
||||
databaseFilterPattern: databaseFilterPattern,
|
||||
schemaFilterPattern: schemaFilterPattern,
|
||||
tableFilterPattern: tableFilterPattern,
|
||||
parsingTimeoutLimit: parsingTimeoutLimit,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -95,6 +95,9 @@ const ConfigureIngestion = ({
|
||||
processPii,
|
||||
confidence,
|
||||
includeOwners,
|
||||
parsingTimeoutLimit,
|
||||
filterCondition,
|
||||
viewParsingTimeoutLimit,
|
||||
} = useMemo(
|
||||
() => ({
|
||||
dataModelFilterPattern: data.dataModelFilterPattern,
|
||||
@ -143,6 +146,9 @@ const ConfigureIngestion = ({
|
||||
markDeletedMlModels: data.markDeletedMlModels,
|
||||
markDeletedPipelines: data.markDeletedPipelines,
|
||||
confidence: data.confidence,
|
||||
parsingTimeoutLimit: data.parsingTimeoutLimit,
|
||||
filterCondition: data.filterCondition,
|
||||
viewParsingTimeoutLimit: data.viewParsingTimeoutLimit,
|
||||
}),
|
||||
[data]
|
||||
);
|
||||
@ -218,6 +224,14 @@ const ConfigureIngestion = ({
|
||||
|
||||
const handleConfidenceScore = handleIntValue('confidence');
|
||||
|
||||
const handleParsingTimeoutLimit = handleIntValue('parsingTimeoutLimit');
|
||||
|
||||
const handleViewParsingTimeoutLimit = handleIntValue(
|
||||
'viewParsingTimeoutLimit'
|
||||
);
|
||||
|
||||
const handleFilterCondition = handleValueChange('filterCondition');
|
||||
|
||||
const handleProfileSampleTypeChange = (value: ProfileSampleType) => {
|
||||
onChange({
|
||||
profileSampleType: value,
|
||||
@ -252,6 +266,24 @@ const ConfigureIngestion = ({
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const filterConditionField: FieldProp = {
|
||||
name: 'filterCondition',
|
||||
label: t('label.filtering-condition'),
|
||||
type: FieldTypes.TEXT,
|
||||
required: false,
|
||||
id: 'root/filterCondition',
|
||||
hasSeparator: true,
|
||||
formItemProps: {
|
||||
initialValue: filterCondition,
|
||||
},
|
||||
props: {
|
||||
'data-testid': 'filtering-condition',
|
||||
value: filterCondition,
|
||||
onChange: handleFilterCondition,
|
||||
},
|
||||
};
|
||||
|
||||
const includeOwnersField: FieldProp = {
|
||||
name: 'includeOwners',
|
||||
label: t('label.include-owner'),
|
||||
@ -563,6 +595,22 @@ const ConfigureIngestion = ({
|
||||
},
|
||||
] as FieldProp[])
|
||||
: []),
|
||||
{
|
||||
name: 'viewParsingTimeoutLimit',
|
||||
label: t('label.view-parsing-timeout-limit'),
|
||||
type: FieldTypes.NUMBER,
|
||||
required: false,
|
||||
id: 'root/viewParsingTimeoutLimit',
|
||||
hasSeparator: true,
|
||||
formItemProps: {
|
||||
initialValue: viewParsingTimeoutLimit,
|
||||
},
|
||||
props: {
|
||||
'data-testid': 'dbt-view-parsing-timeout-limit',
|
||||
value: viewParsingTimeoutLimit,
|
||||
onChange: handleViewParsingTimeoutLimit,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const dashboardMetadataFields: FieldProp[] = [
|
||||
@ -899,6 +947,7 @@ const ConfigureIngestion = ({
|
||||
},
|
||||
},
|
||||
rateLimitField,
|
||||
filterConditionField,
|
||||
loggerLevelField,
|
||||
];
|
||||
|
||||
@ -910,6 +959,23 @@ const ConfigureIngestion = ({
|
||||
queryLogDurationField,
|
||||
rateLimitField,
|
||||
loggerLevelField,
|
||||
filterConditionField,
|
||||
{
|
||||
name: 'parsingTimeoutLimit',
|
||||
label: t('label.parsing-timeout-limit'),
|
||||
type: FieldTypes.NUMBER,
|
||||
required: false,
|
||||
id: 'root/parsingTimeoutLimit',
|
||||
hasSeparator: true,
|
||||
formItemProps: {
|
||||
initialValue: parsingTimeoutLimit,
|
||||
},
|
||||
props: {
|
||||
'data-testid': 'dbt-parsing-timeout-limit',
|
||||
value: parsingTimeoutLimit,
|
||||
onChange: handleParsingTimeoutLimit,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
return generateFormFields(fields);
|
||||
|
||||
@ -114,6 +114,7 @@ export interface AddIngestionState {
|
||||
dbtConfigSourceType: DBT_SOURCES;
|
||||
description: string;
|
||||
enableDebugLog: boolean;
|
||||
filterCondition: string;
|
||||
gcsConfigType: GCS_CONFIG | undefined;
|
||||
includeLineage: boolean;
|
||||
includeTags: boolean;
|
||||
@ -153,6 +154,8 @@ export interface AddIngestionState {
|
||||
timeoutSeconds: number;
|
||||
topicFilterPattern: FilterPattern;
|
||||
useFqnFilter: boolean;
|
||||
viewParsingTimeoutLimit: number;
|
||||
parsingTimeoutLimit: number;
|
||||
processPii: boolean;
|
||||
includeOwners: boolean;
|
||||
confidence?: number;
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
import { render, screen } from '@testing-library/react';
|
||||
import React from 'react';
|
||||
import { DBTCloudConfig } from './DBTCloudConfig';
|
||||
import { dbtParsingTimeoutLimit } from './DBTFormConstants';
|
||||
|
||||
const mockProps = {
|
||||
dbtCloudAccountId: '',
|
||||
@ -21,6 +22,7 @@ const mockProps = {
|
||||
dbtUpdateDescriptions: false,
|
||||
dbtCloudUrl: 'https://cloud.getdbt.com/',
|
||||
enableDebugLog: false,
|
||||
parsingTimeoutLimit: dbtParsingTimeoutLimit,
|
||||
};
|
||||
|
||||
jest.mock('./DBTCommonFields.component', () =>
|
||||
|
||||
@ -20,6 +20,7 @@ import { DbtConfigCloud } from './DBTConfigForm.interface';
|
||||
|
||||
interface Props extends DbtConfigCloud {
|
||||
enableDebugLog: boolean;
|
||||
parsingTimeoutLimit: number;
|
||||
}
|
||||
|
||||
export const DBTCloudConfig: FunctionComponent<Props> = ({
|
||||
@ -32,6 +33,7 @@ export const DBTCloudConfig: FunctionComponent<Props> = ({
|
||||
dbtCloudUrl = 'https://cloud.getdbt.com/',
|
||||
dbtClassificationName,
|
||||
enableDebugLog,
|
||||
parsingTimeoutLimit,
|
||||
}: Props) => {
|
||||
const cloudConfigFields: FieldProp[] = [
|
||||
{
|
||||
@ -110,6 +112,7 @@ export const DBTCloudConfig: FunctionComponent<Props> = ({
|
||||
descriptionId="cloud-update-description"
|
||||
enableDebugLog={enableDebugLog}
|
||||
includeTags={includeTags}
|
||||
parsingTimeoutLimit={parsingTimeoutLimit}
|
||||
/>
|
||||
</Fragment>
|
||||
);
|
||||
|
||||
@ -22,6 +22,7 @@ interface Props {
|
||||
dbtUpdateDescriptions: boolean;
|
||||
enableDebugLog: boolean;
|
||||
includeTags: boolean;
|
||||
parsingTimeoutLimit: number;
|
||||
}
|
||||
|
||||
function DBTCommonFields({
|
||||
@ -30,6 +31,7 @@ function DBTCommonFields({
|
||||
dbtClassificationName,
|
||||
enableDebugLog,
|
||||
includeTags,
|
||||
parsingTimeoutLimit,
|
||||
}: Props) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
@ -100,6 +102,20 @@ function DBTCommonFields({
|
||||
valuePropName: 'checked',
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'parsingTimeoutLimit',
|
||||
label: t('label.parsing-timeout-limit'),
|
||||
type: FieldTypes.NUMBER,
|
||||
required: false,
|
||||
props: {
|
||||
'data-testid': 'dbt-parsing-timeout-limit',
|
||||
},
|
||||
id: 'root/parsingTimeoutLimit',
|
||||
hasSeparator: true,
|
||||
formItemProps: {
|
||||
initialValue: parsingTimeoutLimit,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
return <Fragment>{generateFormFields(commonFields)}</Fragment>;
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
import { render, screen } from '@testing-library/react';
|
||||
import React from 'react';
|
||||
import DBTCommonFields from './DBTCommonFields.component';
|
||||
import { dbtParsingTimeoutLimit } from './DBTFormConstants';
|
||||
|
||||
const mockProps = {
|
||||
dbtUpdateDescriptions: false,
|
||||
@ -21,6 +22,7 @@ const mockProps = {
|
||||
descriptionId: 'test-id',
|
||||
dbtClassificationName: 'DBT',
|
||||
enableDebugLog: false,
|
||||
parsingTimeoutLimit: dbtParsingTimeoutLimit,
|
||||
};
|
||||
|
||||
describe('DBTCommonFields', () => {
|
||||
|
||||
@ -69,6 +69,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
dbtClassificationName: data.dbtClassificationName,
|
||||
dbtUpdateDescriptions: data.dbtUpdateDescriptions,
|
||||
includeTags: data.includeTags,
|
||||
parsingTimeoutLimit: data.parsingTimeoutLimit,
|
||||
},
|
||||
databaseFilterPattern: data.databaseFilterPattern,
|
||||
schemaFilterPattern: data.schemaFilterPattern,
|
||||
@ -107,6 +108,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
dbtUpdateDescriptions={dbtConfigSource?.dbtUpdateDescriptions}
|
||||
enableDebugLog={data.enableDebugLog}
|
||||
includeTags={dbtConfigSource?.includeTags}
|
||||
parsingTimeoutLimit={dbtConfigSource?.parsingTimeoutLimit}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@ -120,6 +122,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
dbtUpdateDescriptions={dbtConfigSource?.dbtUpdateDescriptions}
|
||||
enableDebugLog={data.enableDebugLog}
|
||||
includeTags={dbtConfigSource?.includeTags}
|
||||
parsingTimeoutLimit={dbtConfigSource?.parsingTimeoutLimit}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@ -133,6 +136,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
dbtUpdateDescriptions={dbtConfigSource?.dbtUpdateDescriptions}
|
||||
enableDebugLog={data.enableDebugLog}
|
||||
includeTags={dbtConfigSource?.includeTags}
|
||||
parsingTimeoutLimit={dbtConfigSource?.parsingTimeoutLimit}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@ -145,6 +149,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
dbtUpdateDescriptions={dbtConfigSource?.dbtUpdateDescriptions}
|
||||
enableDebugLog={data.enableDebugLog}
|
||||
includeTags={dbtConfigSource?.includeTags}
|
||||
parsingTimeoutLimit={dbtConfigSource?.parsingTimeoutLimit}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@ -158,6 +163,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
enableDebugLog={data.enableDebugLog}
|
||||
gcsType={gcsConfigType}
|
||||
includeTags={dbtConfigSource?.includeTags}
|
||||
parsingTimeoutLimit={dbtConfigSource?.parsingTimeoutLimit}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@ -295,6 +301,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
},
|
||||
ingestionName: value?.name,
|
||||
enableDebugLog: value?.loggerLevel,
|
||||
parsingTimeoutLimit: value?.parsingTimeoutLimit,
|
||||
});
|
||||
onSubmit();
|
||||
}
|
||||
@ -315,6 +322,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
},
|
||||
ingestionName: value?.name,
|
||||
enableDebugLog: value?.loggerLevel,
|
||||
parsingTimeoutLimit: value?.parsingTimeoutLimit,
|
||||
});
|
||||
onSubmit();
|
||||
}
|
||||
@ -336,6 +344,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
},
|
||||
ingestionName: value?.name,
|
||||
enableDebugLog: value?.loggerLevel,
|
||||
parsingTimeoutLimit: value?.parsingTimeoutLimit,
|
||||
});
|
||||
onSubmit();
|
||||
}
|
||||
@ -367,6 +376,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
},
|
||||
ingestionName: value?.name,
|
||||
enableDebugLog: value?.loggerLevel,
|
||||
parsingTimeoutLimit: value?.parsingTimeoutLimit,
|
||||
});
|
||||
onSubmit();
|
||||
}
|
||||
@ -430,6 +440,7 @@ const DBTConfigFormBuilder: FunctionComponent<DBTConfigFormProps> = ({
|
||||
},
|
||||
ingestionName: value?.name,
|
||||
enableDebugLog: value?.loggerLevel,
|
||||
parsingTimeoutLimit: value?.parsingTimeoutLimit,
|
||||
});
|
||||
onSubmit();
|
||||
}
|
||||
|
||||
@ -95,3 +95,5 @@ export const rulesDBTGCSCredsFields: Record<
|
||||
email: ['clientEmail'],
|
||||
url: ['authUri', 'tokenUri', 'authProviderX509CertUrl', 'clientX509CertUrl'],
|
||||
};
|
||||
|
||||
export const dbtParsingTimeoutLimit = 300;
|
||||
|
||||
@ -17,6 +17,7 @@ import {
|
||||
DBTBucketDetails,
|
||||
} from 'generated/metadataIngestion/dbtPipeline';
|
||||
import React from 'react';
|
||||
import { dbtParsingTimeoutLimit } from './DBTFormConstants';
|
||||
import { GCS_CONFIG } from './DBTFormEnum';
|
||||
import { DBTGCSConfig } from './DBTGCSConfig';
|
||||
|
||||
@ -27,6 +28,7 @@ const mockProps = {
|
||||
dbtClassificationName: '',
|
||||
dbtSecurityConfig: {} as Credentials,
|
||||
dbtPrefixConfig: {} as DBTBucketDetails,
|
||||
parsingTimeoutLimit: dbtParsingTimeoutLimit,
|
||||
};
|
||||
|
||||
jest.mock('./DBTCommonFields.component', () =>
|
||||
|
||||
@ -24,6 +24,7 @@ import { GCS_CONFIG } from './DBTFormEnum';
|
||||
interface Props extends DbtConfigS3GCS {
|
||||
gcsType?: GCS_CONFIG;
|
||||
enableDebugLog: boolean;
|
||||
parsingTimeoutLimit: number;
|
||||
}
|
||||
|
||||
export const DBTGCSConfig: FunctionComponent<Props> = ({
|
||||
@ -34,6 +35,7 @@ export const DBTGCSConfig: FunctionComponent<Props> = ({
|
||||
includeTags = true,
|
||||
dbtClassificationName,
|
||||
enableDebugLog,
|
||||
parsingTimeoutLimit,
|
||||
}: Props) => {
|
||||
const dbtPrefixConfigFields: FieldProp[] = [
|
||||
{
|
||||
@ -252,6 +254,7 @@ export const DBTGCSConfig: FunctionComponent<Props> = ({
|
||||
descriptionId="gcs-update-description"
|
||||
enableDebugLog={enableDebugLog}
|
||||
includeTags={includeTags}
|
||||
parsingTimeoutLimit={parsingTimeoutLimit}
|
||||
/>
|
||||
</Fragment>
|
||||
);
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
import { act, fireEvent, render, screen } from '@testing-library/react';
|
||||
import React from 'react';
|
||||
import { dbtParsingTimeoutLimit } from './DBTFormConstants';
|
||||
import { DBTHttpConfig } from './DBTHttpConfig';
|
||||
|
||||
jest.mock('./DBTCommonFields.component', () =>
|
||||
@ -25,6 +26,7 @@ const mockProps = {
|
||||
dbtRunResultsHttpPath: '',
|
||||
dbtUpdateDescriptions: false,
|
||||
enableDebugLog: false,
|
||||
parsingTimeoutLimit: dbtParsingTimeoutLimit,
|
||||
};
|
||||
|
||||
describe('Test DBT Http Config Form', () => {
|
||||
|
||||
@ -20,6 +20,7 @@ import { DbtConfigHttp } from './DBTConfigForm.interface';
|
||||
|
||||
interface Props extends DbtConfigHttp {
|
||||
enableDebugLog: boolean;
|
||||
parsingTimeoutLimit: number;
|
||||
}
|
||||
|
||||
export const DBTHttpConfig: FunctionComponent<Props> = ({
|
||||
@ -30,6 +31,7 @@ export const DBTHttpConfig: FunctionComponent<Props> = ({
|
||||
includeTags = true,
|
||||
dbtClassificationName,
|
||||
enableDebugLog,
|
||||
parsingTimeoutLimit,
|
||||
}: Props) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
@ -85,6 +87,7 @@ export const DBTHttpConfig: FunctionComponent<Props> = ({
|
||||
descriptionId="http-update-description"
|
||||
enableDebugLog={enableDebugLog}
|
||||
includeTags={includeTags}
|
||||
parsingTimeoutLimit={parsingTimeoutLimit}
|
||||
/>
|
||||
</Fragment>
|
||||
);
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
import { act, fireEvent, render, screen } from '@testing-library/react';
|
||||
import React from 'react';
|
||||
import { dbtParsingTimeoutLimit } from './DBTFormConstants';
|
||||
import { DBTLocalConfig } from './DBTLocalConfig';
|
||||
|
||||
const mockProps = {
|
||||
@ -21,6 +22,7 @@ const mockProps = {
|
||||
dbtRunResultsFilePath: '',
|
||||
dbtUpdateDescriptions: false,
|
||||
enableDebugLog: false,
|
||||
parsingTimeoutLimit: dbtParsingTimeoutLimit,
|
||||
};
|
||||
|
||||
jest.mock('./DBTCommonFields.component', () =>
|
||||
|
||||
@ -20,6 +20,7 @@ import { DbtConfigLocal } from './DBTConfigForm.interface';
|
||||
|
||||
interface Props extends DbtConfigLocal {
|
||||
enableDebugLog: boolean;
|
||||
parsingTimeoutLimit: number;
|
||||
}
|
||||
|
||||
export const DBTLocalConfig: FunctionComponent<Props> = ({
|
||||
@ -30,6 +31,7 @@ export const DBTLocalConfig: FunctionComponent<Props> = ({
|
||||
includeTags = true,
|
||||
dbtClassificationName,
|
||||
enableDebugLog,
|
||||
parsingTimeoutLimit,
|
||||
}: Props) => {
|
||||
const localConfigFields: FieldProp[] = [
|
||||
{
|
||||
@ -85,6 +87,7 @@ export const DBTLocalConfig: FunctionComponent<Props> = ({
|
||||
descriptionId="local-update-description"
|
||||
enableDebugLog={enableDebugLog}
|
||||
includeTags={includeTags}
|
||||
parsingTimeoutLimit={parsingTimeoutLimit}
|
||||
/>
|
||||
</Fragment>
|
||||
);
|
||||
|
||||
@ -13,10 +13,12 @@
|
||||
|
||||
import { render, screen } from '@testing-library/react';
|
||||
import React from 'react';
|
||||
import { dbtParsingTimeoutLimit } from './DBTFormConstants';
|
||||
import { DBTS3Config } from './DBTS3Config';
|
||||
|
||||
const mockProps = {
|
||||
enableDebugLog: false,
|
||||
parsingTimeoutLimit: dbtParsingTimeoutLimit,
|
||||
};
|
||||
|
||||
jest.mock('./DBTCommonFields.component', () =>
|
||||
|
||||
@ -20,6 +20,7 @@ import { DbtConfigS3GCS } from './DBTConfigForm.interface';
|
||||
|
||||
interface Props extends DbtConfigS3GCS {
|
||||
enableDebugLog: boolean;
|
||||
parsingTimeoutLimit: number;
|
||||
}
|
||||
|
||||
export const DBTS3Config: FunctionComponent<Props> = ({
|
||||
@ -29,6 +30,7 @@ export const DBTS3Config: FunctionComponent<Props> = ({
|
||||
includeTags = true,
|
||||
dbtClassificationName,
|
||||
enableDebugLog,
|
||||
parsingTimeoutLimit,
|
||||
}: Props) => {
|
||||
const s3ConfigFields: FieldProp[] = [
|
||||
{
|
||||
@ -187,6 +189,7 @@ export const DBTS3Config: FunctionComponent<Props> = ({
|
||||
descriptionId="s3-update-description"
|
||||
enableDebugLog={enableDebugLog}
|
||||
includeTags={includeTags}
|
||||
parsingTimeoutLimit={parsingTimeoutLimit}
|
||||
/>
|
||||
</Fragment>
|
||||
);
|
||||
|
||||
@ -59,3 +59,5 @@ export const PIPELINE_TYPE_LOCALIZATION = {
|
||||
};
|
||||
|
||||
export const DBT_CLASSIFICATION_DEFAULT_VALUE = 'dbtTags';
|
||||
|
||||
export const DEFAULT_PARSING_TIMEOUT_LIMIT = 300;
|
||||
|
||||
@ -378,6 +378,7 @@
|
||||
"filter": "Filter",
|
||||
"filter-pattern": "Filter Pattern",
|
||||
"filter-plural": "Filters",
|
||||
"filtering-condition": "Filtering Condition",
|
||||
"first": "First",
|
||||
"first-lowercase": "first",
|
||||
"first-quartile": "First Quartile",
|
||||
@ -643,6 +644,7 @@
|
||||
"page-views-by-data-asset-plural": "Page Views by Data Assets",
|
||||
"parameter": "Parameter",
|
||||
"parent": "Parent",
|
||||
"parsing-timeout-limit": "Query Parsing Timeout Limit",
|
||||
"partition-lowercase-plural": "partitions",
|
||||
"partition-plural": "Partitions",
|
||||
"partitioned": "Partitioned",
|
||||
@ -1021,6 +1023,7 @@
|
||||
"view-entity": "View {{entity}}",
|
||||
"view-more": "View more",
|
||||
"view-new-count": "View {{count}} new",
|
||||
"view-parsing-timeout-limit": "View Definition Parsing Timeout Limit",
|
||||
"view-plural": "Views",
|
||||
"volume-change": "Volume Change",
|
||||
"warning": "Warning",
|
||||
|
||||
@ -378,6 +378,7 @@
|
||||
"filter": "Filtro",
|
||||
"filter-pattern": "Patrón de Filtro",
|
||||
"filter-plural": "Filtros",
|
||||
"filtering-condition": "Filtering Condition",
|
||||
"first": "Primero",
|
||||
"first-lowercase": "primero",
|
||||
"first-quartile": "Primer Cuartil",
|
||||
@ -643,6 +644,7 @@
|
||||
"page-views-by-data-asset-plural": "Vistas de página por activos de datos",
|
||||
"parameter": "Parámetro",
|
||||
"parent": "Padre",
|
||||
"parsing-timeout-limit": "Query Parsing Timeout Limit",
|
||||
"partition-lowercase-plural": "particiones",
|
||||
"partition-plural": "Particiones",
|
||||
"partitioned": "Partitioned",
|
||||
@ -1021,6 +1023,7 @@
|
||||
"view-entity": "Ver {{entity}}",
|
||||
"view-more": "Ver más",
|
||||
"view-new-count": "Ver {{count}} nuevo",
|
||||
"view-parsing-timeout-limit": "View Definition Parsing Timeout Limit",
|
||||
"view-plural": "Vistas",
|
||||
"volume-change": "Volume Change",
|
||||
"warning": "Warning",
|
||||
|
||||
@ -378,6 +378,7 @@
|
||||
"filter": "Filtre",
|
||||
"filter-pattern": "Configuration du Filtre",
|
||||
"filter-plural": "Filtres",
|
||||
"filtering-condition": "Filtering Condition",
|
||||
"first": "Premier",
|
||||
"first-lowercase": "premier",
|
||||
"first-quartile": "Premier Quartile",
|
||||
@ -643,6 +644,7 @@
|
||||
"page-views-by-data-asset-plural": "Page vues par actif de données",
|
||||
"parameter": "Paramètre",
|
||||
"parent": "Parent",
|
||||
"parsing-timeout-limit": "Query Parsing Timeout Limit",
|
||||
"partition-lowercase-plural": "partitions",
|
||||
"partition-plural": "Partitions",
|
||||
"partitioned": "Partitionné",
|
||||
@ -1021,6 +1023,7 @@
|
||||
"view-entity": "Voir {{entity}}",
|
||||
"view-more": "Voir plus",
|
||||
"view-new-count": "Voir {{count}} nouveau",
|
||||
"view-parsing-timeout-limit": "View Definition Parsing Timeout Limit",
|
||||
"view-plural": "Voir",
|
||||
"volume-change": "Volume Change",
|
||||
"warning": "Attention",
|
||||
|
||||
@ -378,6 +378,7 @@
|
||||
"filter": "Filter",
|
||||
"filter-pattern": "フィルターのパターン",
|
||||
"filter-plural": "フィルター",
|
||||
"filtering-condition": "Filtering Condition",
|
||||
"first": "最初",
|
||||
"first-lowercase": "最初",
|
||||
"first-quartile": "第1四分位数",
|
||||
@ -643,6 +644,7 @@
|
||||
"page-views-by-data-asset-plural": "データアセットごとのページビュー",
|
||||
"parameter": "パラメータ",
|
||||
"parent": "親",
|
||||
"parsing-timeout-limit": "Query Parsing Timeout Limit",
|
||||
"partition-lowercase-plural": "パーティション",
|
||||
"partition-plural": "パーティション",
|
||||
"partitioned": "Partitioned",
|
||||
@ -1021,6 +1023,7 @@
|
||||
"view-entity": "{{entity}}を見る",
|
||||
"view-more": "もっと見る",
|
||||
"view-new-count": "View {{count}} new",
|
||||
"view-parsing-timeout-limit": "View Definition Parsing Timeout Limit",
|
||||
"view-plural": "Views",
|
||||
"volume-change": "Volume Change",
|
||||
"warning": "Warning",
|
||||
|
||||
@ -378,6 +378,7 @@
|
||||
"filter": "Filter",
|
||||
"filter-pattern": "Padrão de filtro",
|
||||
"filter-plural": "Filtros",
|
||||
"filtering-condition": "Filtering Condition",
|
||||
"first": "Primeiro",
|
||||
"first-lowercase": "primeiro",
|
||||
"first-quartile": "Primeiro quartil",
|
||||
@ -643,6 +644,7 @@
|
||||
"page-views-by-data-asset-plural": "Visualizações da página por ativo de dados",
|
||||
"parameter": "Parâmetro",
|
||||
"parent": "Pai",
|
||||
"parsing-timeout-limit": "Query Parsing Timeout Limit",
|
||||
"partition-lowercase-plural": "partições",
|
||||
"partition-plural": "Partições",
|
||||
"partitioned": "Partitioned",
|
||||
@ -1021,6 +1023,7 @@
|
||||
"view-entity": "Ver {{entity}}",
|
||||
"view-more": "Ver mais",
|
||||
"view-new-count": "Ver {{count}} novo",
|
||||
"view-parsing-timeout-limit": "View Definition Parsing Timeout Limit",
|
||||
"view-plural": "Visualizações",
|
||||
"volume-change": "Volume Change",
|
||||
"warning": "Warning",
|
||||
|
||||
@ -378,6 +378,7 @@
|
||||
"filter": "过滤",
|
||||
"filter-pattern": "过滤条件",
|
||||
"filter-plural": "过滤",
|
||||
"filtering-condition": "Filtering Condition",
|
||||
"first": "第一",
|
||||
"first-lowercase": "第一",
|
||||
"first-quartile": "第一四分位数",
|
||||
@ -643,6 +644,7 @@
|
||||
"page-views-by-data-asset-plural": "数据资产页面浏览量",
|
||||
"parameter": "参数",
|
||||
"parent": "父级",
|
||||
"parsing-timeout-limit": "Query Parsing Timeout Limit",
|
||||
"partition-lowercase-plural": "分区",
|
||||
"partition-plural": "分区",
|
||||
"partitioned": "已分区",
|
||||
@ -1021,6 +1023,7 @@
|
||||
"view-entity": "查看{{entity}}",
|
||||
"view-more": "查看更多",
|
||||
"view-new-count": "查看{{count}}个新的",
|
||||
"view-parsing-timeout-limit": "View Definition Parsing Timeout Limit",
|
||||
"view-plural": "查看",
|
||||
"volume-change": "Volume Change",
|
||||
"warning": "警告",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user