mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-24 14:45:55 +00:00
Fix failure in GCS datalake ingestions (#9336)
This commit is contained in:
parent
c758d83276
commit
50d1538374
@ -256,6 +256,14 @@ class DatalakeSource(DatabaseServiceSource): # pylint: disable=too-many-public-
|
|||||||
bucket = self.client.get_bucket(bucket_name)
|
bucket = self.client.get_bucket(bucket_name)
|
||||||
for key in bucket.list_blobs(prefix=prefix):
|
for key in bucket.list_blobs(prefix=prefix):
|
||||||
table_name = self.standardize_table_name(bucket_name, key.name)
|
table_name = self.standardize_table_name(bucket_name, key.name)
|
||||||
|
# adding this condition as the gcp blobs also contains directory, which we can filter out
|
||||||
|
if table_name.endswith("/") or not self.check_valid_file_type(
|
||||||
|
key.name
|
||||||
|
):
|
||||||
|
logger.debug(
|
||||||
|
f"Object filtered due to unsupported file type: {key.name}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
table_fqn = fqn.build(
|
table_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Table,
|
entity_type=Table,
|
||||||
@ -264,6 +272,7 @@ class DatalakeSource(DatabaseServiceSource): # pylint: disable=too-many-public-
|
|||||||
schema_name=self.context.database_schema.name.__root__,
|
schema_name=self.context.database_schema.name.__root__,
|
||||||
table_name=table_name,
|
table_name=table_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
if filter_by_table(
|
if filter_by_table(
|
||||||
self.config.sourceConfig.config.tableFilterPattern,
|
self.config.sourceConfig.config.tableFilterPattern,
|
||||||
table_fqn
|
table_fqn
|
||||||
@ -275,11 +284,6 @@ class DatalakeSource(DatabaseServiceSource): # pylint: disable=too-many-public-
|
|||||||
"Object Filtered Out",
|
"Object Filtered Out",
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
if not self.check_valid_file_type(key.name):
|
|
||||||
logger.debug(
|
|
||||||
f"Object filtered due to unsupported file type: {key.name}"
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield table_name, TableType.Regular
|
yield table_name, TableType.Regular
|
||||||
if isinstance(self.service_connection.configSource, S3Config):
|
if isinstance(self.service_connection.configSource, S3Config):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user