mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-14 10:19:51 +00:00
feat(catalog): report total table files size in dataset profile (#14312)
This commit is contained in:
parent
38cd4d02d0
commit
59384dfc9b
@ -352,6 +352,10 @@ class TableOpsDelegate extends TableOrViewOpsDelegate<TableMetadata> {
|
|||||||
if (totalRecordsStr != null) {
|
if (totalRecordsStr != null) {
|
||||||
dataSetProfile.setRowCount(Long.parseLong(totalRecordsStr));
|
dataSetProfile.setRowCount(Long.parseLong(totalRecordsStr));
|
||||||
}
|
}
|
||||||
|
String totalFileSizeStr = currentSnapshot.summary().get(SnapshotSummary.TOTAL_FILE_SIZE_PROP);
|
||||||
|
if (totalFileSizeStr != null) {
|
||||||
|
dataSetProfile.setSizeInBytes(Long.parseLong(totalFileSizeStr));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return dataSetProfile;
|
return dataSetProfile;
|
||||||
|
|||||||
@ -32,9 +32,13 @@ import io.datahubproject.metadata.context.ActorContext;
|
|||||||
import io.datahubproject.metadata.context.OperationContext;
|
import io.datahubproject.metadata.context.OperationContext;
|
||||||
import io.datahubproject.schematron.converters.avro.AvroSchemaConverter;
|
import io.datahubproject.schematron.converters.avro.AvroSchemaConverter;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.iceberg.Schema;
|
import org.apache.iceberg.Schema;
|
||||||
|
import org.apache.iceberg.Snapshot;
|
||||||
|
import org.apache.iceberg.SnapshotSummary;
|
||||||
import org.apache.iceberg.TableMetadata;
|
import org.apache.iceberg.TableMetadata;
|
||||||
import org.apache.iceberg.TableMetadataParser;
|
import org.apache.iceberg.TableMetadataParser;
|
||||||
import org.apache.iceberg.avro.AvroSchemaUtil;
|
import org.apache.iceberg.avro.AvroSchemaUtil;
|
||||||
@ -426,4 +430,68 @@ public class TableOpsDelegateTest {
|
|||||||
when(mockWarehouse.getIcebergMetadata(identifier)).thenReturn(Optional.empty());
|
when(mockWarehouse.getIcebergMetadata(identifier)).thenReturn(Optional.empty());
|
||||||
assertNull(tableDelegate.refresh());
|
assertNull(tableDelegate.refresh());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetDataSetProfileWithTotalFileSize() {
|
||||||
|
// Create a real TableOpsDelegate instance for testing the actual getDataSetProfile method
|
||||||
|
TableOpsDelegate realTableDelegate =
|
||||||
|
new TableOpsDelegate(
|
||||||
|
mockWarehouse, identifier, mockEntityService, mockOperationContext, mockFileIOFactory);
|
||||||
|
|
||||||
|
// Mock TableMetadata with snapshot and summary
|
||||||
|
TableMetadata mockMetadata = mock(TableMetadata.class);
|
||||||
|
Schema schema =
|
||||||
|
new Schema(
|
||||||
|
Types.NestedField.required(1, "id", Types.LongType.get()),
|
||||||
|
Types.NestedField.optional(2, "data", Types.StringType.get()));
|
||||||
|
when(mockMetadata.schema()).thenReturn(schema);
|
||||||
|
|
||||||
|
// Mock Snapshot with summary containing total file size
|
||||||
|
Snapshot mockSnapshot = mock(Snapshot.class);
|
||||||
|
Map<String, String> mockSummary = new HashMap<>();
|
||||||
|
mockSummary.put(SnapshotSummary.TOTAL_RECORDS_PROP, "1000");
|
||||||
|
mockSummary.put(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "5242880"); // 5MB in bytes
|
||||||
|
when(mockSnapshot.summary()).thenReturn(mockSummary);
|
||||||
|
when(mockMetadata.currentSnapshot()).thenReturn(mockSnapshot);
|
||||||
|
|
||||||
|
// Call the actual getDataSetProfile method
|
||||||
|
DatasetProfile result = realTableDelegate.getDataSetProfile(mockMetadata);
|
||||||
|
|
||||||
|
// Verify the results
|
||||||
|
assertEquals(result.getColumnCount().longValue(), 2L);
|
||||||
|
assertEquals(result.getRowCount().longValue(), 1000L);
|
||||||
|
assertEquals(result.getSizeInBytes().longValue(), 5242880L);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetDataSetProfileWithoutTotalFileSize() {
|
||||||
|
// Create a real TableOpsDelegate instance for testing the actual getDataSetProfile method
|
||||||
|
TableOpsDelegate realTableDelegate =
|
||||||
|
new TableOpsDelegate(
|
||||||
|
mockWarehouse, identifier, mockEntityService, mockOperationContext, mockFileIOFactory);
|
||||||
|
|
||||||
|
// Mock TableMetadata with snapshot but no file size in summary
|
||||||
|
TableMetadata mockMetadata = mock(TableMetadata.class);
|
||||||
|
Schema schema =
|
||||||
|
new Schema(
|
||||||
|
Types.NestedField.required(1, "id", Types.LongType.get()),
|
||||||
|
Types.NestedField.optional(2, "data", Types.StringType.get()));
|
||||||
|
when(mockMetadata.schema()).thenReturn(schema);
|
||||||
|
|
||||||
|
// Mock Snapshot with summary containing only row count, no file size
|
||||||
|
Snapshot mockSnapshot = mock(Snapshot.class);
|
||||||
|
Map<String, String> mockSummary = new HashMap<>();
|
||||||
|
mockSummary.put(SnapshotSummary.TOTAL_RECORDS_PROP, "500");
|
||||||
|
// No TOTAL_FILE_SIZE_PROP in the map
|
||||||
|
when(mockSnapshot.summary()).thenReturn(mockSummary);
|
||||||
|
when(mockMetadata.currentSnapshot()).thenReturn(mockSnapshot);
|
||||||
|
|
||||||
|
// Call the actual getDataSetProfile method
|
||||||
|
DatasetProfile result = realTableDelegate.getDataSetProfile(mockMetadata);
|
||||||
|
|
||||||
|
// Verify the results
|
||||||
|
assertEquals(result.getColumnCount().longValue(), 2L);
|
||||||
|
assertEquals(result.getRowCount().longValue(), 500L);
|
||||||
|
assertNull(result.getSizeInBytes()); // Should be null when no file size info
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user