mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-24 05:58:31 +00:00
(cherry picked from commit e6b7b89f8634b2429e6f26e8809fa51ab295df47)
This commit is contained in:
parent
44856348bf
commit
d1f022b1d6
@ -13,6 +13,7 @@ Mixin class containing Table specific methods
|
||||
|
||||
To be used by OpenMetadata class
|
||||
"""
|
||||
import base64
|
||||
import traceback
|
||||
from typing import List, Optional, Type, TypeVar
|
||||
|
||||
@ -56,6 +57,7 @@ class OMetaTableMixin:
|
||||
|
||||
client: REST
|
||||
|
||||
# pylint: disable=too-many-nested-blocks
|
||||
def ingest_table_sample_data(
|
||||
self, table: Table, sample_data: TableData
|
||||
) -> Optional[TableData]:
|
||||
@ -67,9 +69,37 @@ class OMetaTableMixin:
|
||||
"""
|
||||
resp = None
|
||||
try:
|
||||
# Pre-process sample data to handle binary/non-UTF-8 data before serialization
|
||||
if sample_data and sample_data.rows:
|
||||
|
||||
for row in sample_data.rows:
|
||||
if not row:
|
||||
continue
|
||||
for col_idx, value in enumerate(row):
|
||||
# Handle binary data explicitly
|
||||
if isinstance(value, bytes):
|
||||
# Convert binary data to Base64-encoded string
|
||||
try:
|
||||
row[
|
||||
col_idx
|
||||
] = f"[base64]{base64.b64encode(value).decode('ascii', errors='ignore')}"
|
||||
except Exception as _:
|
||||
row[col_idx] = f"[binary]{value}"
|
||||
|
||||
try:
|
||||
data = sample_data.model_dump_json()
|
||||
except Exception as _:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.warning(
|
||||
f"Error serializing sample data for {table.fullyQualifiedName.root}"
|
||||
" please check if the data is valid"
|
||||
)
|
||||
return None
|
||||
|
||||
# Now safely serialize to JSON
|
||||
resp = self.client.put(
|
||||
f"{self.get_suffix(Table)}/{table.id.root}/sampleData",
|
||||
data=sample_data.model_dump_json(),
|
||||
data=data,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(traceback.format_exc())
|
||||
|
||||
@ -673,3 +673,24 @@ class OMetaTableTest(TestCase):
|
||||
)
|
||||
|
||||
assert res.name == name
|
||||
|
||||
def test_ingest_sample_data_with_binary_data(self):
|
||||
"""
|
||||
Test ingesting sample data with binary data
|
||||
"""
|
||||
table: Table = self.metadata.create_or_update(
|
||||
data=get_create_entity(
|
||||
entity=Table,
|
||||
name="random",
|
||||
reference=self.create_schema_entity.fullyQualifiedName,
|
||||
)
|
||||
)
|
||||
sample_data = TableData(
|
||||
columns=["id"], rows=[[b"data\x00\x01\x02\x8e\xba\xab\xf0"]]
|
||||
)
|
||||
res = self.metadata.ingest_table_sample_data(table, sample_data)
|
||||
assert res == sample_data
|
||||
|
||||
sample_data = TableData(columns=["id"], rows=[[b"\x00\x01\x02"]])
|
||||
res = self.metadata.ingest_table_sample_data(table, sample_data)
|
||||
assert res == sample_data
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user