diff --git a/ingestion/src/metadata/ingestion/source/database/column_helpers.py b/ingestion/src/metadata/ingestion/source/database/column_helpers.py index 62255bf83aa..8dc260555fe 100644 --- a/ingestion/src/metadata/ingestion/source/database/column_helpers.py +++ b/ingestion/src/metadata/ingestion/source/database/column_helpers.py @@ -22,3 +22,12 @@ def remove_table_from_column_name(table_name: str, raw_column_name: str) -> str: "." in the name, so we cannot just split. """ return raw_column_name.replace(table_name + ".", "") + + +def truncate_column_name(col_name: str): + """ + OpenMetadata table column specification limits column name to 128 characters. + To allow ingestion of tables we set name to truncate to 128 characters if its longer + and use displayName to have the raw column name + """ + return col_name[:128] diff --git a/ingestion/src/metadata/ingestion/source/database/datalake/metadata.py b/ingestion/src/metadata/ingestion/source/database/datalake/metadata.py index 541d5f9e631..6dd0fd2b0f2 100644 --- a/ingestion/src/metadata/ingestion/source/database/datalake/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/datalake/metadata.py @@ -53,6 +53,7 @@ from metadata.ingestion.api.source import InvalidSourceException from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.connections import get_connection +from metadata.ingestion.source.database.column_helpers import truncate_column_name from metadata.ingestion.source.database.database_service import DatabaseServiceSource from metadata.ingestion.source.database.datalake.models import ( DatalakeTableSchemaWrapper, @@ -455,7 +456,8 @@ class DatalakeSource(DatabaseServiceSource): parent_col = complex_col_dict.get(col_hierarchy[: index + 1]) else: intermediate_column = Column( - name=col_name[:64], + name=truncate_column_name(col_name), + displayName=col_name, dataType=DataType.RECORD.value, children=[], dataTypeDisplay=DataType.RECORD.value, @@ -536,7 +538,8 @@ class DatalakeSource(DatabaseServiceSource): parsed_string = { "dataTypeDisplay": data_type, "dataType": data_type, - "name": column[:64], + "name": truncate_column_name(column), + "displayName": column, } parsed_string["dataLength"] = parsed_string.get("dataLength", 1) cols.append(Column(**parsed_string)) diff --git a/ingestion/src/metadata/ingestion/source/database/dynamodb/metadata.py b/ingestion/src/metadata/ingestion/source/database/dynamodb/metadata.py index c0443aab8ab..48db28ef723 100644 --- a/ingestion/src/metadata/ingestion/source/database/dynamodb/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/dynamodb/metadata.py @@ -38,6 +38,7 @@ from metadata.ingestion.api.source import InvalidSourceException from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.connections import get_connection +from metadata.ingestion.source.database.column_helpers import truncate_column_name from metadata.ingestion.source.database.column_type_parser import ColumnTypeParser from metadata.ingestion.source.database.database_service import DatabaseServiceSource from metadata.utils import fqn @@ -170,7 +171,8 @@ class DynamodbSource(DatabaseServiceSource): parsed_string = {} parsed_string["dataTypeDisplay"] = str(column["AttributeType"]) parsed_string["dataType"] = "UNION" - parsed_string["name"] = column["AttributeName"][:64] + parsed_string["name"] = truncate_column_name(column["AttributeName"]) + parsed_string["displayName"] = column["AttributeName"] parsed_string["dataLength"] = parsed_string.get("dataLength", 1) parsed_string["dataTypeDisplay"] = str(column["AttributeType"]) yield Column(**parsed_string) diff --git a/ingestion/src/metadata/ingestion/source/database/glue/metadata.py b/ingestion/src/metadata/ingestion/source/database/glue/metadata.py index e5c472a97d0..4d7d3456a6b 100755 --- a/ingestion/src/metadata/ingestion/source/database/glue/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/glue/metadata.py @@ -39,6 +39,7 @@ from metadata.ingestion.api.source import InvalidSourceException from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.connections import get_connection +from metadata.ingestion.source.database.column_helpers import truncate_column_name from metadata.ingestion.source.database.column_type_parser import ColumnTypeParser from metadata.ingestion.source.database.database_service import DatabaseServiceSource from metadata.ingestion.source.database.glue.models import Column as GlueColumn @@ -295,7 +296,8 @@ class GlueSource(DatabaseServiceSource): parsed_string = {} parsed_string["dataTypeDisplay"] = str(column.Type) parsed_string["dataType"] = "UNION" - parsed_string["name"] = column.Name[:64] + parsed_string["name"] = truncate_column_name(column.Name) + parsed_string["displayName"] = column.Name parsed_string["dataLength"] = parsed_string.get("dataLength", 1) parsed_string["description"] = column.Comment return Column(**parsed_string)