Fix: Filter Datalake empty files (#8677)

This commit is contained in:
Milan Bariya 2022-11-13 22:09:29 +05:30 committed by GitHub
parent a20945e13f
commit ebfb872e16
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -301,20 +301,21 @@ class DatalakeSource(DatabaseServiceSource):
data_frame = self.get_gcs_files(key=table_name, bucket_name=schema_name) data_frame = self.get_gcs_files(key=table_name, bucket_name=schema_name)
if isinstance(self.service_connection.configSource, S3Config): if isinstance(self.service_connection.configSource, S3Config):
data_frame = self.get_s3_files(key=table_name, bucket_name=schema_name) data_frame = self.get_s3_files(key=table_name, bucket_name=schema_name)
columns = self.get_columns(data_frame) if not data_frame.empty:
table_request = CreateTableRequest( columns = self.get_columns(data_frame)
name=table_name, table_request = CreateTableRequest(
tableType=table_type, name=table_name,
description="", tableType=table_type,
columns=columns, description="",
tableConstraints=table_constraints if table_constraints else None, columns=columns,
databaseSchema=EntityReference( tableConstraints=table_constraints if table_constraints else None,
id=self.context.database_schema.id, databaseSchema=EntityReference(
type="databaseSchema", id=self.context.database_schema.id,
), type="databaseSchema",
) ),
yield table_request )
self.register_record(table_request=table_request) yield table_request
self.register_record(table_request=table_request)
except Exception as exc: except Exception as exc:
logger.debug(traceback.format_exc()) logger.debug(traceback.format_exc())