Fix column resize to limit to 128 chars (#11889)

* Fix column resize to limit to 128 chars

* Fix pyformat issue

* replace column_name with col_name

---------

Co-authored-by: Ayush Shah <ayush@getcollate.io>
This commit is contained in:
Sriharsha Chintalapani 2023-06-06 22:24:08 -07:00 committed by GitHub
parent 4ecbbcd43c
commit 2b2602b76b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 20 additions and 4 deletions

View File

@ -22,3 +22,12 @@ def remove_table_from_column_name(table_name: str, raw_column_name: str) -> str:
"." in the name, so we cannot just split.
"""
return raw_column_name.replace(table_name + ".", "")
def truncate_column_name(col_name: str):
"""
OpenMetadata table column specification limits column name to 128 characters.
To allow ingestion of tables we set name to truncate to 128 characters if its longer
and use displayName to have the raw column name
"""
return col_name[:128]

View File

@ -53,6 +53,7 @@ from metadata.ingestion.api.source import InvalidSourceException
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_connection
from metadata.ingestion.source.database.column_helpers import truncate_column_name
from metadata.ingestion.source.database.database_service import DatabaseServiceSource
from metadata.ingestion.source.database.datalake.models import (
DatalakeTableSchemaWrapper,
@ -455,7 +456,8 @@ class DatalakeSource(DatabaseServiceSource):
parent_col = complex_col_dict.get(col_hierarchy[: index + 1])
else:
intermediate_column = Column(
name=col_name[:64],
name=truncate_column_name(col_name),
displayName=col_name,
dataType=DataType.RECORD.value,
children=[],
dataTypeDisplay=DataType.RECORD.value,
@ -536,7 +538,8 @@ class DatalakeSource(DatabaseServiceSource):
parsed_string = {
"dataTypeDisplay": data_type,
"dataType": data_type,
"name": column[:64],
"name": truncate_column_name(column),
"displayName": column,
}
parsed_string["dataLength"] = parsed_string.get("dataLength", 1)
cols.append(Column(**parsed_string))

View File

@ -38,6 +38,7 @@ from metadata.ingestion.api.source import InvalidSourceException
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_connection
from metadata.ingestion.source.database.column_helpers import truncate_column_name
from metadata.ingestion.source.database.column_type_parser import ColumnTypeParser
from metadata.ingestion.source.database.database_service import DatabaseServiceSource
from metadata.utils import fqn
@ -170,7 +171,8 @@ class DynamodbSource(DatabaseServiceSource):
parsed_string = {}
parsed_string["dataTypeDisplay"] = str(column["AttributeType"])
parsed_string["dataType"] = "UNION"
parsed_string["name"] = column["AttributeName"][:64]
parsed_string["name"] = truncate_column_name(column["AttributeName"])
parsed_string["displayName"] = column["AttributeName"]
parsed_string["dataLength"] = parsed_string.get("dataLength", 1)
parsed_string["dataTypeDisplay"] = str(column["AttributeType"])
yield Column(**parsed_string)

View File

@ -39,6 +39,7 @@ from metadata.ingestion.api.source import InvalidSourceException
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_connection
from metadata.ingestion.source.database.column_helpers import truncate_column_name
from metadata.ingestion.source.database.column_type_parser import ColumnTypeParser
from metadata.ingestion.source.database.database_service import DatabaseServiceSource
from metadata.ingestion.source.database.glue.models import Column as GlueColumn
@ -295,7 +296,8 @@ class GlueSource(DatabaseServiceSource):
parsed_string = {}
parsed_string["dataTypeDisplay"] = str(column.Type)
parsed_string["dataType"] = "UNION"
parsed_string["name"] = column.Name[:64]
parsed_string["name"] = truncate_column_name(column.Name)
parsed_string["displayName"] = column.Name
parsed_string["dataLength"] = parsed_string.get("dataLength", 1)
parsed_string["description"] = column.Comment
return Column(**parsed_string)