mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-04 12:51:23 +00:00 
			
		
		
		
	fix(ingestion): Handle Redshift string length limit in Serverless mode (#10051)
This commit is contained in:
		
							parent
							
								
									59a26a6543
								
							
						
					
					
						commit
						9aa099f4b0
					
				@ -822,7 +822,7 @@ class RedshiftServerlessQuery(RedshiftCommonQuery):
 | 
				
			|||||||
                WHERE
 | 
					                WHERE
 | 
				
			||||||
                    qs.step_name = 'scan' AND
 | 
					                    qs.step_name = 'scan' AND
 | 
				
			||||||
                    qs.source = 'Redshift(local)' AND
 | 
					                    qs.source = 'Redshift(local)' AND
 | 
				
			||||||
                    qt.sequence < 320 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext
 | 
					                    qt.sequence < 16 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext
 | 
				
			||||||
                    sti.database = '{db_name}' AND -- this was required to not retrieve some internal redshift tables, try removing to see what happens
 | 
					                    sti.database = '{db_name}' AND -- this was required to not retrieve some internal redshift tables, try removing to see what happens
 | 
				
			||||||
                    sui.user_name <> 'rdsdb' -- not entirely sure about this filter
 | 
					                    sui.user_name <> 'rdsdb' -- not entirely sure about this filter
 | 
				
			||||||
                GROUP BY sti.schema, sti.table, qs.table_id, qs.query_id, sui.user_name
 | 
					                GROUP BY sti.schema, sti.table, qs.table_id, qs.query_id, sui.user_name
 | 
				
			||||||
@ -909,7 +909,7 @@ class RedshiftServerlessQuery(RedshiftCommonQuery):
 | 
				
			|||||||
                    cluster = '{db_name}' AND
 | 
					                    cluster = '{db_name}' AND
 | 
				
			||||||
                    qd.start_time >= '{start_time}' AND
 | 
					                    qd.start_time >= '{start_time}' AND
 | 
				
			||||||
                    qd.start_time < '{end_time}' AND
 | 
					                    qd.start_time < '{end_time}' AND
 | 
				
			||||||
                    qt.sequence < 320 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext
 | 
					                    qt.sequence < 16 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext
 | 
				
			||||||
                    ld.query_id IS NULL -- filter out queries which are also stored in SYS_LOAD_DETAIL
 | 
					                    ld.query_id IS NULL -- filter out queries which are also stored in SYS_LOAD_DETAIL
 | 
				
			||||||
                ORDER BY target_table ASC
 | 
					                ORDER BY target_table ASC
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
@ -996,7 +996,7 @@ class RedshiftServerlessQuery(RedshiftCommonQuery):
 | 
				
			|||||||
                                            query_type IN ('DDL', 'CTAS', 'OTHER', 'COMMAND')
 | 
					                                            query_type IN ('DDL', 'CTAS', 'OTHER', 'COMMAND')
 | 
				
			||||||
                                            AND qh.start_time >= '{start_time_str}'
 | 
					                                            AND qh.start_time >= '{start_time_str}'
 | 
				
			||||||
                                            AND qh.start_time < '{end_time_str}'
 | 
					                                            AND qh.start_time < '{end_time_str}'
 | 
				
			||||||
                                            AND qt.sequence < 320
 | 
					                                            AND qt.sequence < 16
 | 
				
			||||||
                                    GROUP BY qh.start_time, qh.session_id, qh.transaction_id, qh.user_id
 | 
					                                    GROUP BY qh.start_time, qh.session_id, qh.transaction_id, qh.user_id
 | 
				
			||||||
                                    ORDER BY qh.start_time, qh.session_id, qh.transaction_id, qh.user_id ASC
 | 
					                                    ORDER BY qh.start_time, qh.session_id, qh.transaction_id, qh.user_id ASC
 | 
				
			||||||
                            )
 | 
					                            )
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user