mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-25 18:30:00 +00:00
Enhance SQL column processing for BigQuery ingestion (#20408)
- Refactored the handling of nested columns in `sql_column_handler.py` to prioritize source-provided children, ensuring they override any derived children. - Removed the overridden `_process_col_type` method in `bigquery/metadata.py` to streamline column type handling, enforcing the use of the standard path for BigQuery. This update improves the accuracy of column metadata processing and simplifies the codebase.
This commit is contained in:
parent
d344caa8c7
commit
1434b5dba2
@ -1020,13 +1020,3 @@ class BigquerySource(LifeCycleQueryMixin, CommonDbSourceService, MultiDBSource):
|
||||
)
|
||||
else:
|
||||
yield from super().mark_tables_as_deleted()
|
||||
|
||||
def _process_col_type(self, column: dict, schema: str) -> Tuple:
|
||||
"""
|
||||
Override the parent method to always return parsed_string as None for BigQuery.
|
||||
This ensures we always use the standard column type handling path.
|
||||
"""
|
||||
data_type_display, arr_data_type, _ = super()._process_col_type(column, schema)
|
||||
# For BigQuery, we always want to force parsed_string to None
|
||||
# This ensures we use the standard column type handling path
|
||||
return data_type_display, arr_data_type, None
|
||||
|
@ -299,12 +299,6 @@ class SqlColumnHandlerMixin:
|
||||
arrayDataType=arr_data_type,
|
||||
ordinalPosition=column.get("ordinalPosition"),
|
||||
)
|
||||
if column.get("children"):
|
||||
om_column.children = [
|
||||
process_column(children) for children in column.get("children")
|
||||
]
|
||||
if not arr_data_type:
|
||||
om_column.arrayDataType = DataType.UNKNOWN.value
|
||||
if precision:
|
||||
# Precision and scale must be integer values
|
||||
om_column.precision = int(precision[0])
|
||||
@ -314,6 +308,15 @@ class SqlColumnHandlerMixin:
|
||||
column=column, parsed_string=parsed_string
|
||||
)
|
||||
om_column = col_obj
|
||||
|
||||
if column.get("children"):
|
||||
# Prioritize source-provided children for column processing.
|
||||
# If 'children' are directly provided in the source metadata,
|
||||
# process and assign them to the output column, overriding any derived children.
|
||||
# Currently, this is only used for BigQuery.
|
||||
om_column.children = [
|
||||
process_column(children) for children in column.get("children")
|
||||
]
|
||||
om_column.tags = self.get_column_tag_labels(
|
||||
table_name=table_name, column=column
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user