From d8674bd7f87b0648aadc0b8f8fd4a755e7b621bb Mon Sep 17 00:00:00 2001 From: Noe Alejandro Perez Dominguez <6936336+osocron@users.noreply.github.com> Date: Fri, 27 Jan 2023 10:36:49 +0100 Subject: [PATCH] Deduplicate stl_querytext rows, fixes #8867 (#9870) * Deduplicate stl_querytext rows, fixes #8867 * Update queries.py Filter down result-set by joining with queries * Reduce line character length --- .../ingestion/source/database/redshift/queries.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/queries.py b/ingestion/src/metadata/ingestion/source/database/redshift/queries.py index de611e00945..60ddf4be5b3 100644 --- a/ingestion/src/metadata/ingestion/source/database/redshift/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/redshift/queries.py @@ -31,12 +31,19 @@ REDSHIFT_SQL_STATEMENT = textwrap.dedent( AND starttime < '{end_time}' LIMIT {result_limit} ), + deduped_querytext AS ( + -- Sometimes rows are duplicated, causing LISTAGG to fail in the full_queries CTE. + SELECT DISTINCT qt.* + FROM pg_catalog.stl_querytext AS qt + INNER JOIN queries AS q + ON qt.query = q.query + ), full_queries AS ( SELECT query, LISTAGG(CASE WHEN LEN(RTRIM(text)) = 0 THEN text ELSE RTRIM(text) END, '') WITHIN GROUP (ORDER BY sequence) AS query_text - FROM pg_catalog.stl_querytext + FROM deduped_querytext WHERE sequence < 327 -- each chunk contains up to 200, RS has a maximum str length of 65535. GROUP BY query ),