mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-31 02:29:03 +00:00 
			
		
		
		
	Improve Redshift Usage query (#881)
* Improve Redshift Usage query * Improve Redshift Usage query * Improve Redshift Usage query
This commit is contained in:
		
							parent
							
								
									39a7b3e8c6
								
							
						
					
					
						commit
						29c87f77d0
					
				| @ -27,28 +27,20 @@ class TableQuery(JsonSerializable): | |||||||
|     def __init__( |     def __init__( | ||||||
|         self, |         self, | ||||||
|         query: str, |         query: str, | ||||||
|         label: str, |         user_name: str, | ||||||
|         userid: int, |  | ||||||
|         xid: int, |  | ||||||
|         pid: int, |  | ||||||
|         starttime: str, |         starttime: str, | ||||||
|         endtime: str, |         endtime: str, | ||||||
|         analysis_date: str, |         analysis_date: str, | ||||||
|         duration: int, |  | ||||||
|         database: str, |         database: str, | ||||||
|         aborted: bool, |         aborted: bool, | ||||||
|         sql: str, |         sql: str, | ||||||
|     ) -> None: |     ) -> None: | ||||||
|         """ """ |         """ """ | ||||||
|         self.query = query |         self.query = query | ||||||
|         self.label = label |         self.user_name = user_name | ||||||
|         self.userid = userid |  | ||||||
|         self.xid = xid |  | ||||||
|         self.pid = pid |  | ||||||
|         self.starttime = starttime |         self.starttime = starttime | ||||||
|         self.endtime = endtime |         self.endtime = endtime | ||||||
|         self.analysis_date = analysis_date |         self.analysis_date = analysis_date | ||||||
|         self.duration = duration |  | ||||||
|         self.database = database |         self.database = database | ||||||
|         self.aborted = aborted |         self.aborted = aborted | ||||||
|         self.sql = sql |         self.sql = sql | ||||||
|  | |||||||
| @ -15,6 +15,7 @@ | |||||||
| 
 | 
 | ||||||
| import datetime | import datetime | ||||||
| import logging | import logging | ||||||
|  | import traceback | ||||||
| from typing import Optional | from typing import Optional | ||||||
| 
 | 
 | ||||||
| from sql_metadata import Parser | from sql_metadata import Parser | ||||||
|  | |||||||
| @ -33,29 +33,26 @@ logger = logging.getLogger(__name__) | |||||||
| class RedshiftUsageSource(Source): | class RedshiftUsageSource(Source): | ||||||
|     # SELECT statement from mysql information_schema to extract table and column metadata |     # SELECT statement from mysql information_schema to extract table and column metadata | ||||||
|     SQL_STATEMENT = """ |     SQL_STATEMENT = """ | ||||||
|         WITH query_sql AS ( |         SELECT DISTINCT ss.userid, | ||||||
|                  SELECT |             ss.query, | ||||||
|                     query, |             sui.usename, | ||||||
|                     LISTAGG(text) WITHIN GROUP (ORDER BY sequence) AS sql |             ss.tbl, | ||||||
|                 FROM stl_querytext  |             sq.querytxt, | ||||||
|                 GROUP BY 1 |             sti.database, | ||||||
|         ) |             sti.schema, | ||||||
| 
 |             sti.table, | ||||||
|         SELECT |             sq.starttime, | ||||||
|             q.query,  q.label, userid,  xid,  pid,  starttime,  endtime, |             sq.endtime, | ||||||
|             DATEDIFF(milliseconds, starttime, endtime) AS duration, |             sq.aborted | ||||||
|             TRIM(database) AS database, |         FROM stl_scan ss | ||||||
|             '{start_date}' as analysis_date, |             JOIN svv_table_info sti ON ss.tbl = sti.table_id | ||||||
|             (CASE aborted WHEN 1 THEN TRUE ELSE FALSE END) AS aborted, |             JOIN stl_query sq ON ss.query = sq.query | ||||||
|         sql |             JOIN svl_user_info sui ON sq.userid = sui.usesysid | ||||||
|         FROM |         WHERE ss.starttime >= '{start_time}' | ||||||
|             stl_query q JOIN query_sql qs ON (q.query = qs.query) |             AND ss.starttime < '{end_time}' | ||||||
|         WHERE |             AND sq.aborted = 0 | ||||||
|         endtime between '{start_date}' and '{end_date}' |         ORDER BY ss.endtime DESC; | ||||||
|         {where_clause} |     """ | ||||||
|         ORDER BY starttime; |  | ||||||
|         """ |  | ||||||
| 
 |  | ||||||
|     # CONFIG KEYS |     # CONFIG KEYS | ||||||
|     WHERE_CLAUSE_SUFFIX_KEY = "where_clause" |     WHERE_CLAUSE_SUFFIX_KEY = "where_clause" | ||||||
|     CLUSTER_SOURCE = "cluster_source" |     CLUSTER_SOURCE = "cluster_source" | ||||||
| @ -69,8 +66,9 @@ class RedshiftUsageSource(Source): | |||||||
|         super().__init__(ctx) |         super().__init__(ctx) | ||||||
|         start, end = get_start_and_end(config.duration) |         start, end = get_start_and_end(config.duration) | ||||||
|         self.sql_stmt = RedshiftUsageSource.SQL_STATEMENT.format( |         self.sql_stmt = RedshiftUsageSource.SQL_STATEMENT.format( | ||||||
|             where_clause=config.where_clause, start_date=start, end_date=end |             start_time=start, end_time=end | ||||||
|         ) |         ) | ||||||
|  |         self.analysis_date = start | ||||||
|         self.alchemy_helper = SQLAlchemyHelper( |         self.alchemy_helper = SQLAlchemyHelper( | ||||||
|             config, metadata_config, ctx, "Redshift", self.sql_stmt |             config, metadata_config, ctx, "Redshift", self.sql_stmt | ||||||
|         ) |         ) | ||||||
| @ -103,18 +101,14 @@ class RedshiftUsageSource(Source): | |||||||
|         """ |         """ | ||||||
|         for row in self._get_raw_extract_iter(): |         for row in self._get_raw_extract_iter(): | ||||||
|             tq = TableQuery( |             tq = TableQuery( | ||||||
|                 row["query"], |                 query=row["query"], | ||||||
|                 row["label"], |                 user_name=row["usename"], | ||||||
|                 row["userid"], |                 starttime=str(row["starttime"]), | ||||||
|                 row["xid"], |                 endtime=str(row["endtime"]), | ||||||
|                 row["pid"], |                 analysis_date=str(self.analysis_date), | ||||||
|                 str(row["starttime"]), |                 database=row["database"], | ||||||
|                 str(row["endtime"]), |                 aborted=row["aborted"], | ||||||
|                 str(row["analysis_date"]), |                 sql=row["querytxt"], | ||||||
|                 row["duration"], |  | ||||||
|                 row["database"], |  | ||||||
|                 row["aborted"], |  | ||||||
|                 row["sql"], |  | ||||||
|             ) |             ) | ||||||
|             yield tq |             yield tq | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Sriharsha Chintalapani
						Sriharsha Chintalapani