Fix #9562: Add duration in usage ingestion (#9564)

This commit is contained in:
Mayur Singal 2023-01-05 12:03:07 +05:30 committed by GitHub
parent a813d25efa
commit 0a34e18e26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 36 additions and 7 deletions

View File

@ -17,3 +17,8 @@ JOIN dbservice_entity db
WHERE db.serviceType = 'SampleData';
DELETE FROM dbservice_entity where serviceType = 'SampleData';
-- Delete supportsUsageExtraction from vertica
UPDATE dbservice_entity
SET json = JSON_REMOVE(json, '$.connection.config.supportsUsageExtraction')
WHERE serviceType = 'Vertica';

View File

@ -14,3 +14,8 @@ WHERE (db.id = er.fromId OR db.id = er.toId)
AND db.serviceType = 'SampleData';
DELETE FROM dbservice_entity WHERE serviceType = 'SampleData';
-- Delete supportsUsageExtraction from vertica
UPDATE dbservice_entity
SET json = json::jsonb #- '{connection,config,supportsUsageExtraction}'
WHERE serviceType = 'Vertica';

View File

@ -1,5 +1,5 @@
source:
type: clickhouse_usage
type: clickhouse-usage
serviceName: local_clickhouse
serviceConnection:
config:

View File

@ -58,6 +58,7 @@ def parse_sql_statement(record: TableQuery) -> Optional[ParsedData]:
userName=record.userName,
date=start_date.__root__.strftime("%Y-%m-%d"),
serviceName=record.serviceName,
duration=record.duration,
)

View File

@ -23,7 +23,8 @@ BIGQUERY_STATEMENT = textwrap.dedent(
start_time,
end_time,
query as query_text,
null as schema_name
null as schema_name,
total_slot_ms/1000 as duration
FROM `region-{region}`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
WHERE creation_time BETWEEN "{start_time}" AND "{end_time}"
{filters}

View File

@ -19,6 +19,7 @@ CLICKHOUSE_SQL_STATEMENT = textwrap.dedent(
Select
query_start_time start_time,
DATEADD(query_duration_ms, query_start_time) end_time,
query_duration_ms/1000 duration,
'default' database_name,
user user_name,
FALSE aborted,

View File

@ -102,6 +102,9 @@ class DatabricksUsageSource(DatabricksQueryParserSource, UsageSource):
endTime=row.get("execution_end_time_ms"),
analysisDate=datetime.now(),
serviceName=self.config.serviceName,
duration=row.get("duration") / 1000
if row.get("duration")
else None,
)
)
except Exception as err:

View File

@ -21,6 +21,7 @@ MSSQL_SQL_STATEMENT = textwrap.dedent(
t.text query_text,
s.last_execution_time start_time,
DATEADD(s, s.total_elapsed_time/1000, s.last_execution_time) end_time,
s.total_elapsed_time/1000 duration,
NULL schema_name,
NULL query_type,
NULL user_name,

View File

@ -20,7 +20,7 @@ POSTGRES_SQL_STATEMENT = textwrap.dedent(
u.usename,
d.datname database_name,
s.query query_text,
s.total_exec_time
s.total_exec_time/1000 duration
FROM
pg_stat_statements s
JOIN pg_catalog.pg_database d ON s.dbid = d.oid

View File

@ -154,6 +154,7 @@ class PostgresQueryParserSource(QueryParserSource, ABC):
databaseName=self.get_database_name(row),
serviceName=self.config.serviceName,
databaseSchema=self.get_schema_name(row),
duration=row.get("duration"),
)
)
except Exception as err:

View File

@ -63,6 +63,7 @@ REDSHIFT_SQL_STATEMENT = textwrap.dedent(
s.schema_name,
q.starttime AS start_time,
q.endtime AS end_time,
datediff(second,q.starttime,q.endtime) AS duration,
q.aborted AS aborted
FROM scans AS s
INNER JOIN queries AS q

View File

@ -23,7 +23,8 @@ SNOWFLAKE_SQL_STATEMENT = textwrap.dedent(
database_name,
schema_name,
start_time,
end_time
end_time,
total_elapsed_time/1000 duration
from snowflake.account_usage.query_history
WHERE query_text NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%'
AND query_text NOT LIKE '/* {{"app": "dbt", %%}} */%%'

View File

@ -57,6 +57,7 @@ class UsageSource(QueryParserSource, ABC):
userName=query_dict.get("user_name", ""),
startTime=query_dict.get("start_time", ""),
endTime=query_dict.get("end_time", ""),
duration=query_dict.get("duration"),
analysisDate=analysis_date,
aborted=self.get_aborted_status(query_dict),
databaseName=self.get_database_name(query_dict),
@ -94,6 +95,7 @@ class UsageSource(QueryParserSource, ABC):
analysisDate=row["start_time"],
aborted=self.get_aborted_status(row),
databaseName=self.get_database_name(row),
duration=row.get("duration"),
serviceName=self.config.serviceName,
databaseSchema=self.get_schema_name(row),
)

View File

@ -99,6 +99,7 @@ class TableUsageStage(Stage[QueryParserData]):
query=record.sql,
users=self._get_user_entity(record.userName),
queryDate=record.date,
duration=record.duration,
)
)
else:
@ -107,6 +108,7 @@ class TableUsageStage(Stage[QueryParserData]):
query=record.sql,
users=self._get_user_entity(record.userName),
queryDate=record.date,
duration=record.duration,
)
]

View File

@ -65,9 +65,6 @@
"title": "Supports Metadata Extraction",
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"
},
"supportsUsageExtraction": {
"$ref": "../connectionBasicType.json#/definitions/supportsUsageExtraction"
},
"supportsDBTExtraction": {
"$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction"
},

View File

@ -49,6 +49,10 @@
"databaseSchema": {
"description": "Database schema of the associated with query",
"type": "string"
},
"duration": {
"description": "How long did the query took to run in seconds.",
"type": "number"
}
},
"required": ["sql", "serviceName", "tables"]

View File

@ -42,6 +42,10 @@
"databaseSchema": {
"description": "Database schema of the associated with query",
"type": "string"
},
"duration": {
"description": "How long did the query took to run in seconds.",
"type": "number"
}
},
"required": ["query", "serviceName"]