diff --git a/bootstrap/sql/com.mysql.cj.jdbc.Driver/v008__create_db_connection_info.sql b/bootstrap/sql/com.mysql.cj.jdbc.Driver/v008__create_db_connection_info.sql index 507377bb8b8..0577b7376b9 100644 --- a/bootstrap/sql/com.mysql.cj.jdbc.Driver/v008__create_db_connection_info.sql +++ b/bootstrap/sql/com.mysql.cj.jdbc.Driver/v008__create_db_connection_info.sql @@ -17,3 +17,8 @@ JOIN dbservice_entity db WHERE db.serviceType = 'SampleData'; DELETE FROM dbservice_entity where serviceType = 'SampleData'; + +-- Delete supportsUsageExtraction from vertica +UPDATE dbservice_entity +SET json = JSON_REMOVE(json, '$.connection.config.supportsUsageExtraction') +WHERE serviceType = 'Vertica'; diff --git a/bootstrap/sql/org.postgresql.Driver/v008__create_db_connection_info.sql b/bootstrap/sql/org.postgresql.Driver/v008__create_db_connection_info.sql index ec621f4fa8e..61f55fc4fc2 100644 --- a/bootstrap/sql/org.postgresql.Driver/v008__create_db_connection_info.sql +++ b/bootstrap/sql/org.postgresql.Driver/v008__create_db_connection_info.sql @@ -14,3 +14,8 @@ WHERE (db.id = er.fromId OR db.id = er.toId) AND db.serviceType = 'SampleData'; DELETE FROM dbservice_entity WHERE serviceType = 'SampleData'; + +-- Delete supportsUsageExtraction from vertica +UPDATE dbservice_entity +SET json = json::jsonb #- '{connection,config,supportsUsageExtraction}' +WHERE serviceType = 'Vertica'; diff --git a/ingestion/src/metadata/examples/workflows/clickhouse_usage.yaml b/ingestion/src/metadata/examples/workflows/clickhouse_usage.yaml index fd019c66e55..b8bb742d5c5 100644 --- a/ingestion/src/metadata/examples/workflows/clickhouse_usage.yaml +++ b/ingestion/src/metadata/examples/workflows/clickhouse_usage.yaml @@ -1,5 +1,5 @@ source: - type: clickhouse_usage + type: clickhouse-usage serviceName: local_clickhouse serviceConnection: config: diff --git a/ingestion/src/metadata/ingestion/processor/query_parser.py b/ingestion/src/metadata/ingestion/processor/query_parser.py index 97bfe147761..a2718d89814 100644 --- a/ingestion/src/metadata/ingestion/processor/query_parser.py +++ b/ingestion/src/metadata/ingestion/processor/query_parser.py @@ -58,6 +58,7 @@ def parse_sql_statement(record: TableQuery) -> Optional[ParsedData]: userName=record.userName, date=start_date.__root__.strftime("%Y-%m-%d"), serviceName=record.serviceName, + duration=record.duration, ) diff --git a/ingestion/src/metadata/ingestion/source/database/bigquery/queries.py b/ingestion/src/metadata/ingestion/source/database/bigquery/queries.py index 163e7a2ae03..d746c4b16ad 100644 --- a/ingestion/src/metadata/ingestion/source/database/bigquery/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/bigquery/queries.py @@ -23,7 +23,8 @@ BIGQUERY_STATEMENT = textwrap.dedent( start_time, end_time, query as query_text, - null as schema_name + null as schema_name, + total_slot_ms/1000 as duration FROM `region-{region}`.INFORMATION_SCHEMA.JOBS_BY_PROJECT WHERE creation_time BETWEEN "{start_time}" AND "{end_time}" {filters} diff --git a/ingestion/src/metadata/ingestion/source/database/clickhouse/queries.py b/ingestion/src/metadata/ingestion/source/database/clickhouse/queries.py index d8d86087991..0a84620a185 100644 --- a/ingestion/src/metadata/ingestion/source/database/clickhouse/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/clickhouse/queries.py @@ -19,6 +19,7 @@ CLICKHOUSE_SQL_STATEMENT = textwrap.dedent( Select query_start_time start_time, DATEADD(query_duration_ms, query_start_time) end_time, + query_duration_ms/1000 duration, 'default' database_name, user user_name, FALSE aborted, diff --git a/ingestion/src/metadata/ingestion/source/database/databricks/usage.py b/ingestion/src/metadata/ingestion/source/database/databricks/usage.py index 01e98ba235b..3f429ff67be 100644 --- a/ingestion/src/metadata/ingestion/source/database/databricks/usage.py +++ b/ingestion/src/metadata/ingestion/source/database/databricks/usage.py @@ -102,6 +102,9 @@ class DatabricksUsageSource(DatabricksQueryParserSource, UsageSource): endTime=row.get("execution_end_time_ms"), analysisDate=datetime.now(), serviceName=self.config.serviceName, + duration=row.get("duration") / 1000 + if row.get("duration") + else None, ) ) except Exception as err: diff --git a/ingestion/src/metadata/ingestion/source/database/mssql/queries.py b/ingestion/src/metadata/ingestion/source/database/mssql/queries.py index c0822928c01..b62c698e989 100644 --- a/ingestion/src/metadata/ingestion/source/database/mssql/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/mssql/queries.py @@ -21,6 +21,7 @@ MSSQL_SQL_STATEMENT = textwrap.dedent( t.text query_text, s.last_execution_time start_time, DATEADD(s, s.total_elapsed_time/1000, s.last_execution_time) end_time, + s.total_elapsed_time/1000 duration, NULL schema_name, NULL query_type, NULL user_name, diff --git a/ingestion/src/metadata/ingestion/source/database/postgres/queries.py b/ingestion/src/metadata/ingestion/source/database/postgres/queries.py index 47d23a9421d..77cc2037ee3 100644 --- a/ingestion/src/metadata/ingestion/source/database/postgres/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/postgres/queries.py @@ -20,7 +20,7 @@ POSTGRES_SQL_STATEMENT = textwrap.dedent( u.usename, d.datname database_name, s.query query_text, - s.total_exec_time + s.total_exec_time/1000 duration FROM pg_stat_statements s JOIN pg_catalog.pg_database d ON s.dbid = d.oid diff --git a/ingestion/src/metadata/ingestion/source/database/postgres/query_parser.py b/ingestion/src/metadata/ingestion/source/database/postgres/query_parser.py index 77e50268fd7..4858134558a 100644 --- a/ingestion/src/metadata/ingestion/source/database/postgres/query_parser.py +++ b/ingestion/src/metadata/ingestion/source/database/postgres/query_parser.py @@ -154,6 +154,7 @@ class PostgresQueryParserSource(QueryParserSource, ABC): databaseName=self.get_database_name(row), serviceName=self.config.serviceName, databaseSchema=self.get_schema_name(row), + duration=row.get("duration"), ) ) except Exception as err: diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/queries.py b/ingestion/src/metadata/ingestion/source/database/redshift/queries.py index 4d9f73972cd..2401b619c9b 100644 --- a/ingestion/src/metadata/ingestion/source/database/redshift/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/redshift/queries.py @@ -63,6 +63,7 @@ REDSHIFT_SQL_STATEMENT = textwrap.dedent( s.schema_name, q.starttime AS start_time, q.endtime AS end_time, + datediff(second,q.starttime,q.endtime) AS duration, q.aborted AS aborted FROM scans AS s INNER JOIN queries AS q diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py b/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py index 9490d5a42dc..94039a7947a 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/queries.py @@ -23,7 +23,8 @@ SNOWFLAKE_SQL_STATEMENT = textwrap.dedent( database_name, schema_name, start_time, - end_time + end_time, + total_elapsed_time/1000 duration from snowflake.account_usage.query_history WHERE query_text NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%' AND query_text NOT LIKE '/* {{"app": "dbt", %%}} */%%' diff --git a/ingestion/src/metadata/ingestion/source/database/usage_source.py b/ingestion/src/metadata/ingestion/source/database/usage_source.py index 8fdc752eabd..142ee81b709 100644 --- a/ingestion/src/metadata/ingestion/source/database/usage_source.py +++ b/ingestion/src/metadata/ingestion/source/database/usage_source.py @@ -57,6 +57,7 @@ class UsageSource(QueryParserSource, ABC): userName=query_dict.get("user_name", ""), startTime=query_dict.get("start_time", ""), endTime=query_dict.get("end_time", ""), + duration=query_dict.get("duration"), analysisDate=analysis_date, aborted=self.get_aborted_status(query_dict), databaseName=self.get_database_name(query_dict), @@ -94,6 +95,7 @@ class UsageSource(QueryParserSource, ABC): analysisDate=row["start_time"], aborted=self.get_aborted_status(row), databaseName=self.get_database_name(row), + duration=row.get("duration"), serviceName=self.config.serviceName, databaseSchema=self.get_schema_name(row), ) diff --git a/ingestion/src/metadata/ingestion/stage/table_usage.py b/ingestion/src/metadata/ingestion/stage/table_usage.py index 4d503698ad8..0a0e990148b 100644 --- a/ingestion/src/metadata/ingestion/stage/table_usage.py +++ b/ingestion/src/metadata/ingestion/stage/table_usage.py @@ -99,6 +99,7 @@ class TableUsageStage(Stage[QueryParserData]): query=record.sql, users=self._get_user_entity(record.userName), queryDate=record.date, + duration=record.duration, ) ) else: @@ -107,6 +108,7 @@ class TableUsageStage(Stage[QueryParserData]): query=record.sql, users=self._get_user_entity(record.userName), queryDate=record.date, + duration=record.duration, ) ] diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/verticaConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/verticaConnection.json index 88b89e1f422..71b84b3ed41 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/verticaConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/verticaConnection.json @@ -65,9 +65,6 @@ "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" }, - "supportsUsageExtraction": { - "$ref": "../connectionBasicType.json#/definitions/supportsUsageExtraction" - }, "supportsDBTExtraction": { "$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction" }, diff --git a/openmetadata-spec/src/main/resources/json/schema/type/queryParserData.json b/openmetadata-spec/src/main/resources/json/schema/type/queryParserData.json index f2c6cfa44e1..554894c7c9f 100644 --- a/openmetadata-spec/src/main/resources/json/schema/type/queryParserData.json +++ b/openmetadata-spec/src/main/resources/json/schema/type/queryParserData.json @@ -49,6 +49,10 @@ "databaseSchema": { "description": "Database schema of the associated with query", "type": "string" + }, + "duration": { + "description": "How long did the query took to run in seconds.", + "type": "number" } }, "required": ["sql", "serviceName", "tables"] diff --git a/openmetadata-spec/src/main/resources/json/schema/type/tableQuery.json b/openmetadata-spec/src/main/resources/json/schema/type/tableQuery.json index 3903b01f7a9..fb4acbd20d8 100644 --- a/openmetadata-spec/src/main/resources/json/schema/type/tableQuery.json +++ b/openmetadata-spec/src/main/resources/json/schema/type/tableQuery.json @@ -42,6 +42,10 @@ "databaseSchema": { "description": "Database schema of the associated with query", "type": "string" + }, + "duration": { + "description": "How long did the query took to run in seconds.", + "type": "number" } }, "required": ["query", "serviceName"]