diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java index 6255415bfe..f9ec2e34c0 100644 --- a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java @@ -352,6 +352,10 @@ class TableOpsDelegate extends TableOrViewOpsDelegate { if (totalRecordsStr != null) { dataSetProfile.setRowCount(Long.parseLong(totalRecordsStr)); } + String totalFileSizeStr = currentSnapshot.summary().get(SnapshotSummary.TOTAL_FILE_SIZE_PROP); + if (totalFileSizeStr != null) { + dataSetProfile.setSizeInBytes(Long.parseLong(totalFileSizeStr)); + } } return dataSetProfile; diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/TableOpsDelegateTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/TableOpsDelegateTest.java index 5783791a5b..6ea67c6a5c 100644 --- a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/TableOpsDelegateTest.java +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/TableOpsDelegateTest.java @@ -32,9 +32,13 @@ import io.datahubproject.metadata.context.ActorContext; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.schematron.converters.avro.AvroSchemaConverter; import java.time.Instant; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; import java.util.Set; import org.apache.iceberg.Schema; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.SnapshotSummary; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.avro.AvroSchemaUtil; @@ -426,4 +430,68 @@ public class TableOpsDelegateTest { when(mockWarehouse.getIcebergMetadata(identifier)).thenReturn(Optional.empty()); assertNull(tableDelegate.refresh()); } + + @Test + public void testGetDataSetProfileWithTotalFileSize() { + // Create a real TableOpsDelegate instance for testing the actual getDataSetProfile method + TableOpsDelegate realTableDelegate = + new TableOpsDelegate( + mockWarehouse, identifier, mockEntityService, mockOperationContext, mockFileIOFactory); + + // Mock TableMetadata with snapshot and summary + TableMetadata mockMetadata = mock(TableMetadata.class); + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "data", Types.StringType.get())); + when(mockMetadata.schema()).thenReturn(schema); + + // Mock Snapshot with summary containing total file size + Snapshot mockSnapshot = mock(Snapshot.class); + Map mockSummary = new HashMap<>(); + mockSummary.put(SnapshotSummary.TOTAL_RECORDS_PROP, "1000"); + mockSummary.put(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "5242880"); // 5MB in bytes + when(mockSnapshot.summary()).thenReturn(mockSummary); + when(mockMetadata.currentSnapshot()).thenReturn(mockSnapshot); + + // Call the actual getDataSetProfile method + DatasetProfile result = realTableDelegate.getDataSetProfile(mockMetadata); + + // Verify the results + assertEquals(result.getColumnCount().longValue(), 2L); + assertEquals(result.getRowCount().longValue(), 1000L); + assertEquals(result.getSizeInBytes().longValue(), 5242880L); + } + + @Test + public void testGetDataSetProfileWithoutTotalFileSize() { + // Create a real TableOpsDelegate instance for testing the actual getDataSetProfile method + TableOpsDelegate realTableDelegate = + new TableOpsDelegate( + mockWarehouse, identifier, mockEntityService, mockOperationContext, mockFileIOFactory); + + // Mock TableMetadata with snapshot but no file size in summary + TableMetadata mockMetadata = mock(TableMetadata.class); + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "data", Types.StringType.get())); + when(mockMetadata.schema()).thenReturn(schema); + + // Mock Snapshot with summary containing only row count, no file size + Snapshot mockSnapshot = mock(Snapshot.class); + Map mockSummary = new HashMap<>(); + mockSummary.put(SnapshotSummary.TOTAL_RECORDS_PROP, "500"); + // No TOTAL_FILE_SIZE_PROP in the map + when(mockSnapshot.summary()).thenReturn(mockSummary); + when(mockMetadata.currentSnapshot()).thenReturn(mockSnapshot); + + // Call the actual getDataSetProfile method + DatasetProfile result = realTableDelegate.getDataSetProfile(mockMetadata); + + // Verify the results + assertEquals(result.getColumnCount().longValue(), 2L); + assertEquals(result.getRowCount().longValue(), 500L); + assertNull(result.getSizeInBytes()); // Should be null when no file size info + } }