fix(ingestion): Handle Redshift string length limit in Serverless mode (#10051)

This commit is contained in:
skrydal 2024-03-19 18:54:41 +01:00 committed by GitHub
parent 59a26a6543
commit 9aa099f4b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -822,7 +822,7 @@ class RedshiftServerlessQuery(RedshiftCommonQuery):
WHERE
qs.step_name = 'scan' AND
qs.source = 'Redshift(local)' AND
qt.sequence < 320 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext
qt.sequence < 16 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext
sti.database = '{db_name}' AND -- this was required to not retrieve some internal redshift tables, try removing to see what happens
sui.user_name <> 'rdsdb' -- not entirely sure about this filter
GROUP BY sti.schema, sti.table, qs.table_id, qs.query_id, sui.user_name
@ -909,7 +909,7 @@ class RedshiftServerlessQuery(RedshiftCommonQuery):
cluster = '{db_name}' AND
qd.start_time >= '{start_time}' AND
qd.start_time < '{end_time}' AND
qt.sequence < 320 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext
qt.sequence < 16 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext
ld.query_id IS NULL -- filter out queries which are also stored in SYS_LOAD_DETAIL
ORDER BY target_table ASC
)
@ -996,7 +996,7 @@ class RedshiftServerlessQuery(RedshiftCommonQuery):
query_type IN ('DDL', 'CTAS', 'OTHER', 'COMMAND')
AND qh.start_time >= '{start_time_str}'
AND qh.start_time < '{end_time_str}'
AND qt.sequence < 320
AND qt.sequence < 16
GROUP BY qh.start_time, qh.session_id, qh.transaction_id, qh.user_id
ORDER BY qh.start_time, qh.session_id, qh.transaction_id, qh.user_id ASC
)