mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-04 04:29:13 +00:00 
			
		
		
		
	Improve Redshift Usage query (#881)
* Improve Redshift Usage query * Improve Redshift Usage query * Improve Redshift Usage query
This commit is contained in:
		
							parent
							
								
									39a7b3e8c6
								
							
						
					
					
						commit
						29c87f77d0
					
				@ -27,28 +27,20 @@ class TableQuery(JsonSerializable):
 | 
				
			|||||||
    def __init__(
 | 
					    def __init__(
 | 
				
			||||||
        self,
 | 
					        self,
 | 
				
			||||||
        query: str,
 | 
					        query: str,
 | 
				
			||||||
        label: str,
 | 
					        user_name: str,
 | 
				
			||||||
        userid: int,
 | 
					 | 
				
			||||||
        xid: int,
 | 
					 | 
				
			||||||
        pid: int,
 | 
					 | 
				
			||||||
        starttime: str,
 | 
					        starttime: str,
 | 
				
			||||||
        endtime: str,
 | 
					        endtime: str,
 | 
				
			||||||
        analysis_date: str,
 | 
					        analysis_date: str,
 | 
				
			||||||
        duration: int,
 | 
					 | 
				
			||||||
        database: str,
 | 
					        database: str,
 | 
				
			||||||
        aborted: bool,
 | 
					        aborted: bool,
 | 
				
			||||||
        sql: str,
 | 
					        sql: str,
 | 
				
			||||||
    ) -> None:
 | 
					    ) -> None:
 | 
				
			||||||
        """ """
 | 
					        """ """
 | 
				
			||||||
        self.query = query
 | 
					        self.query = query
 | 
				
			||||||
        self.label = label
 | 
					        self.user_name = user_name
 | 
				
			||||||
        self.userid = userid
 | 
					 | 
				
			||||||
        self.xid = xid
 | 
					 | 
				
			||||||
        self.pid = pid
 | 
					 | 
				
			||||||
        self.starttime = starttime
 | 
					        self.starttime = starttime
 | 
				
			||||||
        self.endtime = endtime
 | 
					        self.endtime = endtime
 | 
				
			||||||
        self.analysis_date = analysis_date
 | 
					        self.analysis_date = analysis_date
 | 
				
			||||||
        self.duration = duration
 | 
					 | 
				
			||||||
        self.database = database
 | 
					        self.database = database
 | 
				
			||||||
        self.aborted = aborted
 | 
					        self.aborted = aborted
 | 
				
			||||||
        self.sql = sql
 | 
					        self.sql = sql
 | 
				
			||||||
 | 
				
			|||||||
@ -15,6 +15,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import datetime
 | 
					import datetime
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					import traceback
 | 
				
			||||||
from typing import Optional
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from sql_metadata import Parser
 | 
					from sql_metadata import Parser
 | 
				
			||||||
 | 
				
			|||||||
@ -33,29 +33,26 @@ logger = logging.getLogger(__name__)
 | 
				
			|||||||
class RedshiftUsageSource(Source):
 | 
					class RedshiftUsageSource(Source):
 | 
				
			||||||
    # SELECT statement from mysql information_schema to extract table and column metadata
 | 
					    # SELECT statement from mysql information_schema to extract table and column metadata
 | 
				
			||||||
    SQL_STATEMENT = """
 | 
					    SQL_STATEMENT = """
 | 
				
			||||||
        WITH query_sql AS (
 | 
					        SELECT DISTINCT ss.userid,
 | 
				
			||||||
                 SELECT
 | 
					            ss.query,
 | 
				
			||||||
                    query,
 | 
					            sui.usename,
 | 
				
			||||||
                    LISTAGG(text) WITHIN GROUP (ORDER BY sequence) AS sql
 | 
					            ss.tbl,
 | 
				
			||||||
                FROM stl_querytext 
 | 
					            sq.querytxt,
 | 
				
			||||||
                GROUP BY 1
 | 
					            sti.database,
 | 
				
			||||||
        )
 | 
					            sti.schema,
 | 
				
			||||||
 | 
					            sti.table,
 | 
				
			||||||
        SELECT
 | 
					            sq.starttime,
 | 
				
			||||||
            q.query,  q.label, userid,  xid,  pid,  starttime,  endtime,
 | 
					            sq.endtime,
 | 
				
			||||||
            DATEDIFF(milliseconds, starttime, endtime) AS duration,
 | 
					            sq.aborted
 | 
				
			||||||
            TRIM(database) AS database,
 | 
					        FROM stl_scan ss
 | 
				
			||||||
            '{start_date}' as analysis_date,
 | 
					            JOIN svv_table_info sti ON ss.tbl = sti.table_id
 | 
				
			||||||
            (CASE aborted WHEN 1 THEN TRUE ELSE FALSE END) AS aborted,
 | 
					            JOIN stl_query sq ON ss.query = sq.query
 | 
				
			||||||
        sql
 | 
					            JOIN svl_user_info sui ON sq.userid = sui.usesysid
 | 
				
			||||||
        FROM
 | 
					        WHERE ss.starttime >= '{start_time}'
 | 
				
			||||||
            stl_query q JOIN query_sql qs ON (q.query = qs.query)
 | 
					            AND ss.starttime < '{end_time}'
 | 
				
			||||||
        WHERE
 | 
					            AND sq.aborted = 0
 | 
				
			||||||
        endtime between '{start_date}' and '{end_date}'
 | 
					        ORDER BY ss.endtime DESC;
 | 
				
			||||||
        {where_clause}
 | 
					 | 
				
			||||||
        ORDER BY starttime;
 | 
					 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					 | 
				
			||||||
    # CONFIG KEYS
 | 
					    # CONFIG KEYS
 | 
				
			||||||
    WHERE_CLAUSE_SUFFIX_KEY = "where_clause"
 | 
					    WHERE_CLAUSE_SUFFIX_KEY = "where_clause"
 | 
				
			||||||
    CLUSTER_SOURCE = "cluster_source"
 | 
					    CLUSTER_SOURCE = "cluster_source"
 | 
				
			||||||
@ -69,8 +66,9 @@ class RedshiftUsageSource(Source):
 | 
				
			|||||||
        super().__init__(ctx)
 | 
					        super().__init__(ctx)
 | 
				
			||||||
        start, end = get_start_and_end(config.duration)
 | 
					        start, end = get_start_and_end(config.duration)
 | 
				
			||||||
        self.sql_stmt = RedshiftUsageSource.SQL_STATEMENT.format(
 | 
					        self.sql_stmt = RedshiftUsageSource.SQL_STATEMENT.format(
 | 
				
			||||||
            where_clause=config.where_clause, start_date=start, end_date=end
 | 
					            start_time=start, end_time=end
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        self.analysis_date = start
 | 
				
			||||||
        self.alchemy_helper = SQLAlchemyHelper(
 | 
					        self.alchemy_helper = SQLAlchemyHelper(
 | 
				
			||||||
            config, metadata_config, ctx, "Redshift", self.sql_stmt
 | 
					            config, metadata_config, ctx, "Redshift", self.sql_stmt
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -103,18 +101,14 @@ class RedshiftUsageSource(Source):
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        for row in self._get_raw_extract_iter():
 | 
					        for row in self._get_raw_extract_iter():
 | 
				
			||||||
            tq = TableQuery(
 | 
					            tq = TableQuery(
 | 
				
			||||||
                row["query"],
 | 
					                query=row["query"],
 | 
				
			||||||
                row["label"],
 | 
					                user_name=row["usename"],
 | 
				
			||||||
                row["userid"],
 | 
					                starttime=str(row["starttime"]),
 | 
				
			||||||
                row["xid"],
 | 
					                endtime=str(row["endtime"]),
 | 
				
			||||||
                row["pid"],
 | 
					                analysis_date=str(self.analysis_date),
 | 
				
			||||||
                str(row["starttime"]),
 | 
					                database=row["database"],
 | 
				
			||||||
                str(row["endtime"]),
 | 
					                aborted=row["aborted"],
 | 
				
			||||||
                str(row["analysis_date"]),
 | 
					                sql=row["querytxt"],
 | 
				
			||||||
                row["duration"],
 | 
					 | 
				
			||||||
                row["database"],
 | 
					 | 
				
			||||||
                row["aborted"],
 | 
					 | 
				
			||||||
                row["sql"],
 | 
					 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            yield tq
 | 
					            yield tq
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user