Fix #9562: Add duration in usage ingestion (#9564)

This commit is contained in:
Mayur Singal 2023-01-05 12:03:07 +05:30 committed by GitHub
parent a813d25efa
commit 0a34e18e26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 36 additions and 7 deletions

View File

@ -17,3 +17,8 @@ JOIN dbservice_entity db
WHERE db.serviceType = 'SampleData'; WHERE db.serviceType = 'SampleData';
DELETE FROM dbservice_entity where serviceType = 'SampleData'; DELETE FROM dbservice_entity where serviceType = 'SampleData';
-- Delete supportsUsageExtraction from vertica
UPDATE dbservice_entity
SET json = JSON_REMOVE(json, '$.connection.config.supportsUsageExtraction')
WHERE serviceType = 'Vertica';

View File

@ -14,3 +14,8 @@ WHERE (db.id = er.fromId OR db.id = er.toId)
AND db.serviceType = 'SampleData'; AND db.serviceType = 'SampleData';
DELETE FROM dbservice_entity WHERE serviceType = 'SampleData'; DELETE FROM dbservice_entity WHERE serviceType = 'SampleData';
-- Delete supportsUsageExtraction from vertica
UPDATE dbservice_entity
SET json = json::jsonb #- '{connection,config,supportsUsageExtraction}'
WHERE serviceType = 'Vertica';

View File

@ -1,5 +1,5 @@
source: source:
type: clickhouse_usage type: clickhouse-usage
serviceName: local_clickhouse serviceName: local_clickhouse
serviceConnection: serviceConnection:
config: config:

View File

@ -58,6 +58,7 @@ def parse_sql_statement(record: TableQuery) -> Optional[ParsedData]:
userName=record.userName, userName=record.userName,
date=start_date.__root__.strftime("%Y-%m-%d"), date=start_date.__root__.strftime("%Y-%m-%d"),
serviceName=record.serviceName, serviceName=record.serviceName,
duration=record.duration,
) )

View File

@ -23,7 +23,8 @@ BIGQUERY_STATEMENT = textwrap.dedent(
start_time, start_time,
end_time, end_time,
query as query_text, query as query_text,
null as schema_name null as schema_name,
total_slot_ms/1000 as duration
FROM `region-{region}`.INFORMATION_SCHEMA.JOBS_BY_PROJECT FROM `region-{region}`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
WHERE creation_time BETWEEN "{start_time}" AND "{end_time}" WHERE creation_time BETWEEN "{start_time}" AND "{end_time}"
{filters} {filters}

View File

@ -19,6 +19,7 @@ CLICKHOUSE_SQL_STATEMENT = textwrap.dedent(
Select Select
query_start_time start_time, query_start_time start_time,
DATEADD(query_duration_ms, query_start_time) end_time, DATEADD(query_duration_ms, query_start_time) end_time,
query_duration_ms/1000 duration,
'default' database_name, 'default' database_name,
user user_name, user user_name,
FALSE aborted, FALSE aborted,

View File

@ -102,6 +102,9 @@ class DatabricksUsageSource(DatabricksQueryParserSource, UsageSource):
endTime=row.get("execution_end_time_ms"), endTime=row.get("execution_end_time_ms"),
analysisDate=datetime.now(), analysisDate=datetime.now(),
serviceName=self.config.serviceName, serviceName=self.config.serviceName,
duration=row.get("duration") / 1000
if row.get("duration")
else None,
) )
) )
except Exception as err: except Exception as err:

View File

@ -21,6 +21,7 @@ MSSQL_SQL_STATEMENT = textwrap.dedent(
t.text query_text, t.text query_text,
s.last_execution_time start_time, s.last_execution_time start_time,
DATEADD(s, s.total_elapsed_time/1000, s.last_execution_time) end_time, DATEADD(s, s.total_elapsed_time/1000, s.last_execution_time) end_time,
s.total_elapsed_time/1000 duration,
NULL schema_name, NULL schema_name,
NULL query_type, NULL query_type,
NULL user_name, NULL user_name,

View File

@ -20,7 +20,7 @@ POSTGRES_SQL_STATEMENT = textwrap.dedent(
u.usename, u.usename,
d.datname database_name, d.datname database_name,
s.query query_text, s.query query_text,
s.total_exec_time s.total_exec_time/1000 duration
FROM FROM
pg_stat_statements s pg_stat_statements s
JOIN pg_catalog.pg_database d ON s.dbid = d.oid JOIN pg_catalog.pg_database d ON s.dbid = d.oid

View File

@ -154,6 +154,7 @@ class PostgresQueryParserSource(QueryParserSource, ABC):
databaseName=self.get_database_name(row), databaseName=self.get_database_name(row),
serviceName=self.config.serviceName, serviceName=self.config.serviceName,
databaseSchema=self.get_schema_name(row), databaseSchema=self.get_schema_name(row),
duration=row.get("duration"),
) )
) )
except Exception as err: except Exception as err:

View File

@ -63,6 +63,7 @@ REDSHIFT_SQL_STATEMENT = textwrap.dedent(
s.schema_name, s.schema_name,
q.starttime AS start_time, q.starttime AS start_time,
q.endtime AS end_time, q.endtime AS end_time,
datediff(second,q.starttime,q.endtime) AS duration,
q.aborted AS aborted q.aborted AS aborted
FROM scans AS s FROM scans AS s
INNER JOIN queries AS q INNER JOIN queries AS q

View File

@ -23,7 +23,8 @@ SNOWFLAKE_SQL_STATEMENT = textwrap.dedent(
database_name, database_name,
schema_name, schema_name,
start_time, start_time,
end_time end_time,
total_elapsed_time/1000 duration
from snowflake.account_usage.query_history from snowflake.account_usage.query_history
WHERE query_text NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%' WHERE query_text NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
AND query_text NOT LIKE '/* {{"app": "dbt", %%}} */%%' AND query_text NOT LIKE '/* {{"app": "dbt", %%}} */%%'

View File

@ -57,6 +57,7 @@ class UsageSource(QueryParserSource, ABC):
userName=query_dict.get("user_name", ""), userName=query_dict.get("user_name", ""),
startTime=query_dict.get("start_time", ""), startTime=query_dict.get("start_time", ""),
endTime=query_dict.get("end_time", ""), endTime=query_dict.get("end_time", ""),
duration=query_dict.get("duration"),
analysisDate=analysis_date, analysisDate=analysis_date,
aborted=self.get_aborted_status(query_dict), aborted=self.get_aborted_status(query_dict),
databaseName=self.get_database_name(query_dict), databaseName=self.get_database_name(query_dict),
@ -94,6 +95,7 @@ class UsageSource(QueryParserSource, ABC):
analysisDate=row["start_time"], analysisDate=row["start_time"],
aborted=self.get_aborted_status(row), aborted=self.get_aborted_status(row),
databaseName=self.get_database_name(row), databaseName=self.get_database_name(row),
duration=row.get("duration"),
serviceName=self.config.serviceName, serviceName=self.config.serviceName,
databaseSchema=self.get_schema_name(row), databaseSchema=self.get_schema_name(row),
) )

View File

@ -99,6 +99,7 @@ class TableUsageStage(Stage[QueryParserData]):
query=record.sql, query=record.sql,
users=self._get_user_entity(record.userName), users=self._get_user_entity(record.userName),
queryDate=record.date, queryDate=record.date,
duration=record.duration,
) )
) )
else: else:
@ -107,6 +108,7 @@ class TableUsageStage(Stage[QueryParserData]):
query=record.sql, query=record.sql,
users=self._get_user_entity(record.userName), users=self._get_user_entity(record.userName),
queryDate=record.date, queryDate=record.date,
duration=record.duration,
) )
] ]

View File

@ -65,9 +65,6 @@
"title": "Supports Metadata Extraction", "title": "Supports Metadata Extraction",
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"
}, },
"supportsUsageExtraction": {
"$ref": "../connectionBasicType.json#/definitions/supportsUsageExtraction"
},
"supportsDBTExtraction": { "supportsDBTExtraction": {
"$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction" "$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction"
}, },

View File

@ -49,6 +49,10 @@
"databaseSchema": { "databaseSchema": {
"description": "Database schema of the associated with query", "description": "Database schema of the associated with query",
"type": "string" "type": "string"
},
"duration": {
"description": "How long did the query took to run in seconds.",
"type": "number"
} }
}, },
"required": ["sql", "serviceName", "tables"] "required": ["sql", "serviceName", "tables"]

View File

@ -42,6 +42,10 @@
"databaseSchema": { "databaseSchema": {
"description": "Database schema of the associated with query", "description": "Database schema of the associated with query",
"type": "string" "type": "string"
},
"duration": {
"description": "How long did the query took to run in seconds.",
"type": "number"
} }
}, },
"required": ["query", "serviceName"] "required": ["query", "serviceName"]