From c150ef04d6e2035ceaa15acbfff4ce55ec3ec3a5 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 2 Feb 2023 09:38:06 -0800 Subject: [PATCH] feat(ui): Supporting display of columns and storage count in previews (#7198) --- .../dataset/mappers/DatasetProfileMapper.java | 1 + .../src/main/resources/entity.graphql | 5 + .../mappers/DatasetProfileMapperTest.java | 187 ++++++++++++++++++ datahub-web-react/src/Mocks.tsx | 3 + .../src/app/entity/dataset/DatasetEntity.tsx | 2 + .../app/entity/dataset/preview/Preview.tsx | 6 + .../stats/DatasetStatsSummarySubHeader.tsx | 4 + .../dataset/shared/DatasetStatsSummary.tsx | 39 ++-- .../dataset/shared/FormattedBytesStat.tsx | 12 ++ .../app/shared/__tests__/formatNumber.test.ts | 33 ++++ .../src/app/shared/formatNumber.ts | 19 ++ datahub-web-react/src/graphql/dataset.graphql | 2 + datahub-web-react/src/graphql/search.graphql | 2 + .../examples/mce_files/bootstrap_mce.json | 2 +- 14 files changed, 303 insertions(+), 14 deletions(-) create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetProfileMapperTest.java create mode 100644 datahub-web-react/src/app/entity/dataset/shared/FormattedBytesStat.tsx create mode 100644 datahub-web-react/src/app/shared/__tests__/formatNumber.test.ts diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetProfileMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetProfileMapper.java index 0ec4598994..dbaaf27a3f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetProfileMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetProfileMapper.java @@ -31,6 +31,7 @@ public class DatasetProfileMapper implements TimeSeriesAspectMapper( + ImmutableList.of( + new com.linkedin.datahub.graphql.generated.DatasetFieldProfile("/field1", + 30L, + 30.5f, + 20L, + 20.5f, + "5", + "1", + "2", + "4", + "3", + new ArrayList<>(ImmutableList.of("val1", "val2"))), + new com.linkedin.datahub.graphql.generated.DatasetFieldProfile("/field2", + 40L, + 40.5f, + 30L, + 30.5f, + "6", + "2", + "3", + "5", + "4", + new ArrayList<>(ImmutableList.of("val3", "val4"))) + ) + )); + Assert.assertEquals(actual.getTimestampMillis(), expected.getTimestampMillis()); + Assert.assertEquals(actual.getRowCount(), expected.getRowCount()); + Assert.assertEquals(actual.getColumnCount(), expected.getColumnCount()); + Assert.assertEquals(actual.getSizeInBytes(), expected.getSizeInBytes()); + + Assert.assertEquals(actual.getFieldProfiles().get(0).getFieldPath(), expected.getFieldProfiles().get(0).getFieldPath()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getMax(), expected.getFieldProfiles().get(0).getMax()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getMean(), expected.getFieldProfiles().get(0).getMean()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getMedian(), expected.getFieldProfiles().get(0).getMedian()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getNullCount(), expected.getFieldProfiles().get(0).getNullCount()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getNullProportion(), expected.getFieldProfiles().get(0).getNullProportion()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getStdev(), expected.getFieldProfiles().get(0).getStdev()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getUniqueCount(), expected.getFieldProfiles().get(0).getUniqueCount()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getUniqueProportion(), expected.getFieldProfiles().get(0).getUniqueProportion()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getSampleValues(), expected.getFieldProfiles().get(0).getSampleValues()); + + Assert.assertEquals(actual.getFieldProfiles().get(1).getFieldPath(), expected.getFieldProfiles().get(1).getFieldPath()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getMax(), expected.getFieldProfiles().get(1).getMax()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getMean(), expected.getFieldProfiles().get(1).getMean()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getMedian(), expected.getFieldProfiles().get(1).getMedian()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getNullCount(), expected.getFieldProfiles().get(1).getNullCount()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getNullProportion(), expected.getFieldProfiles().get(1).getNullProportion()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getStdev(), expected.getFieldProfiles().get(1).getStdev()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getUniqueCount(), expected.getFieldProfiles().get(1).getUniqueCount()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getUniqueProportion(), expected.getFieldProfiles().get(1).getUniqueProportion()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getSampleValues(), expected.getFieldProfiles().get(1).getSampleValues()); + } + + @Test + public void testMapperPartialProfile() { + final com.linkedin.dataset.DatasetProfile input = new com.linkedin.dataset.DatasetProfile(); + input.setTimestampMillis(1L); + input.setRowCount(10L); + input.setColumnCount(45L); + input.setFieldProfiles(new DatasetFieldProfileArray(ImmutableList.of( + new DatasetFieldProfile().setFieldPath("/field1") + .setUniqueCount(30L) + .setUniqueProportion(30.5f), + new DatasetFieldProfile().setFieldPath("/field2") + .setMax("2") + .setMean("3") + .setStdev("4") + .setMedian("5") + .setMin("6") + .setUniqueCount(40L) + .setUniqueProportion(40.5f) + ))); + final EnvelopedAspect inputAspect = new EnvelopedAspect() + .setAspect(GenericRecordUtils.serializeAspect(input)); + final DatasetProfile actual = DatasetProfileMapper.map(inputAspect); + final DatasetProfile expected = new DatasetProfile(); + expected.setTimestampMillis(1L); + expected.setRowCount(10L); + expected.setColumnCount(45L); + expected.setFieldProfiles(new ArrayList<>( + ImmutableList.of( + new com.linkedin.datahub.graphql.generated.DatasetFieldProfile("/field1", + 30L, + 30.5f, + null, + null, + null, + null, + null, + null, + null, + null), + new com.linkedin.datahub.graphql.generated.DatasetFieldProfile("/field2", + 40L, + 40.5f, + null, + null, + "6", + "2", + "3", + "5", + "4", + null) + ) + )); + Assert.assertEquals(actual.getTimestampMillis(), expected.getTimestampMillis()); + Assert.assertEquals(actual.getRowCount(), expected.getRowCount()); + Assert.assertEquals(actual.getColumnCount(), expected.getColumnCount()); + Assert.assertEquals(actual.getSizeInBytes(), expected.getSizeInBytes()); + + Assert.assertEquals(actual.getFieldProfiles().get(0).getFieldPath(), expected.getFieldProfiles().get(0).getFieldPath()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getMax(), expected.getFieldProfiles().get(0).getMax()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getMean(), expected.getFieldProfiles().get(0).getMean()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getMedian(), expected.getFieldProfiles().get(0).getMedian()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getNullCount(), expected.getFieldProfiles().get(0).getNullCount()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getNullProportion(), expected.getFieldProfiles().get(0).getNullProportion()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getStdev(), expected.getFieldProfiles().get(0).getStdev()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getUniqueCount(), expected.getFieldProfiles().get(0).getUniqueCount()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getUniqueProportion(), expected.getFieldProfiles().get(0).getUniqueProportion()); + Assert.assertEquals(actual.getFieldProfiles().get(0).getSampleValues(), expected.getFieldProfiles().get(0).getSampleValues()); + + Assert.assertEquals(actual.getFieldProfiles().get(1).getFieldPath(), expected.getFieldProfiles().get(1).getFieldPath()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getMax(), expected.getFieldProfiles().get(1).getMax()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getMean(), expected.getFieldProfiles().get(1).getMean()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getMedian(), expected.getFieldProfiles().get(1).getMedian()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getNullCount(), expected.getFieldProfiles().get(1).getNullCount()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getNullProportion(), expected.getFieldProfiles().get(1).getNullProportion()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getStdev(), expected.getFieldProfiles().get(1).getStdev()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getUniqueCount(), expected.getFieldProfiles().get(1).getUniqueCount()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getUniqueProportion(), expected.getFieldProfiles().get(1).getUniqueProportion()); + Assert.assertEquals(actual.getFieldProfiles().get(1).getSampleValues(), expected.getFieldProfiles().get(1).getSampleValues()); + } +} diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index dcab10cf52..67821c68c1 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -221,6 +221,7 @@ export const dataset1 = { timestampMillis: 0, rowCount: 10, columnCount: 5, + sizeInBytes: 10, fieldProfiles: [ { fieldPath: 'testColumn', @@ -307,6 +308,7 @@ export const dataset2 = { timestampMillis: 0, rowCount: 10, columnCount: 5, + sizeInBytes: 10000, fieldProfiles: [ { fieldPath: 'testColumn', @@ -517,6 +519,7 @@ export const dataset3 = { { rowCount: 10, columnCount: 5, + sizeInBytes: 10000, timestampMillis: 0, fieldProfiles: [ { diff --git a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx index f97810664e..6edcf2899b 100644 --- a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx +++ b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx @@ -285,6 +285,8 @@ export class DatasetEntity implements Entity { externalUrl={data.properties?.externalUrl} statsSummary={data.statsSummary} rowCount={(data as any).lastProfile?.length && (data as any).lastProfile[0].rowCount} + columnCount={(data as any).lastProfile?.length && (data as any).lastProfile[0].columnCount} + sizeInBytes={(data as any).lastProfile?.length && (data as any).lastProfile[0].sizeInBytes} lastUpdatedMs={ (data as any).lastOperation?.length && (data as any).lastOperation[0].lastUpdatedTimestamp } diff --git a/datahub-web-react/src/app/entity/dataset/preview/Preview.tsx b/datahub-web-react/src/app/entity/dataset/preview/Preview.tsx index ba9677f0b6..ce8eacf412 100644 --- a/datahub-web-react/src/app/entity/dataset/preview/Preview.tsx +++ b/datahub-web-react/src/app/entity/dataset/preview/Preview.tsx @@ -41,6 +41,8 @@ export const Preview = ({ container, parentContainers, rowCount, + columnCount, + sizeInBytes, statsSummary, lastUpdatedMs, }: { @@ -65,6 +67,8 @@ export const Preview = ({ container?: Container | null; parentContainers?: ParentContainersResult | null; rowCount?: number | null; + columnCount?: number | null; + sizeInBytes?: number | null; statsSummary?: DatasetStatsSummary | null; lastUpdatedMs?: number | null; }): JSX.Element => { @@ -97,6 +101,8 @@ export const Preview = ({ subHeader={ { ((dataset?.operations?.length || 0) > 0 && (dataset?.operations![0] as Operation)) || undefined; const rowCount = maybeLastProfile?.rowCount; + const columnCount = maybeLastProfile?.columnCount; + const sizeInBytes = maybeLastProfile?.sizeInBytes; const queryCountLast30Days = maybeStatsSummary?.queryCountLast30Days; const uniqueUserCountLast30Days = maybeStatsSummary?.uniqueUserCountLast30Days; const lastUpdatedMs = maybeLastOperation?.lastUpdatedTimestamp; @@ -26,6 +28,8 @@ export const DatasetStatsSummarySubHeader = () => { return ( { const statsViews = [ - (!!rowCount && ( + !!rowCount && ( {formatNumberWithoutAbbreviation(rowCount)} rows + {!!columnCount && ( + <> + , `{formatNumberWithoutAbbreviation(columnCount)} columns + + )} - )) || - undefined, - (!!queryCountLast30Days && ( + ), + !!sizeInBytes && ( + + + + + ), + !!queryCountLast30Days && ( {formatNumberWithoutAbbreviation(queryCountLast30Days)} queries last month - )) || - undefined, - (!!uniqueUserCountLast30Days && ( + ), + !!uniqueUserCountLast30Days && ( {formatNumberWithoutAbbreviation(uniqueUserCountLast30Days)} unique users - )) || - undefined, - (!!lastUpdatedMs && ( + ), + !!lastUpdatedMs && ( @@ -73,9 +87,8 @@ export const DatasetStatsSummary = ({ Changed {toRelativeTimeString(lastUpdatedMs)} - )) || - undefined, - ].filter((stat) => stat !== undefined); + ), + ].filter((stat) => stat); return <>{statsViews.length > 0 && }; }; diff --git a/datahub-web-react/src/app/entity/dataset/shared/FormattedBytesStat.tsx b/datahub-web-react/src/app/entity/dataset/shared/FormattedBytesStat.tsx new file mode 100644 index 0000000000..0b8e41df44 --- /dev/null +++ b/datahub-web-react/src/app/entity/dataset/shared/FormattedBytesStat.tsx @@ -0,0 +1,12 @@ +import React from 'react'; +import { Tooltip } from 'antd'; +import { formatBytes, formatNumberWithoutAbbreviation } from '../../../shared/formatNumber'; + +export const FormattedBytesStat = ({ bytes }: { bytes: number }) => { + const formattedBytes = formatBytes(bytes); + return ( + + {formattedBytes.number} {formattedBytes.unit} + + ); +}; diff --git a/datahub-web-react/src/app/shared/__tests__/formatNumber.test.ts b/datahub-web-react/src/app/shared/__tests__/formatNumber.test.ts new file mode 100644 index 0000000000..2f09c1c46e --- /dev/null +++ b/datahub-web-react/src/app/shared/__tests__/formatNumber.test.ts @@ -0,0 +1,33 @@ +import { formatBytes } from '../formatNumber'; + +describe('formatBytes', () => { + it('should property format bytes counts', () => { + expect(formatBytes(0)).toStrictEqual({ number: 0, unit: 'Bytes' }); + // Whole Numbers + expect(formatBytes(1)).toStrictEqual({ number: 1, unit: 'Bytes' }); + expect(formatBytes(10)).toStrictEqual({ number: 10, unit: 'Bytes' }); + expect(formatBytes(100)).toStrictEqual({ number: 100, unit: 'Bytes' }); + expect(formatBytes(1000)).toStrictEqual({ number: 1, unit: 'KB' }); + expect(formatBytes(10000)).toStrictEqual({ number: 10, unit: 'KB' }); + expect(formatBytes(100000)).toStrictEqual({ number: 100, unit: 'KB' }); + expect(formatBytes(1000000)).toStrictEqual({ number: 1, unit: 'MB' }); + expect(formatBytes(10000000)).toStrictEqual({ number: 10, unit: 'MB' }); + expect(formatBytes(100000000)).toStrictEqual({ number: 100, unit: 'MB' }); + expect(formatBytes(1000000000)).toStrictEqual({ number: 1, unit: 'GB' }); + expect(formatBytes(10000000000)).toStrictEqual({ number: 10, unit: 'GB' }); + expect(formatBytes(100000000000)).toStrictEqual({ number: 100, unit: 'GB' }); + expect(formatBytes(1000000000000)).toStrictEqual({ number: 1, unit: 'TB' }); + expect(formatBytes(10000000000000)).toStrictEqual({ number: 10, unit: 'TB' }); + expect(formatBytes(100000000000000)).toStrictEqual({ number: 100, unit: 'TB' }); + expect(formatBytes(1000000000000000)).toStrictEqual({ number: 1, unit: 'PB' }); + // Decimal Numbers + expect(formatBytes(12)).toStrictEqual({ number: 12, unit: 'Bytes' }); + expect(formatBytes(1200)).toStrictEqual({ number: 1.2, unit: 'KB' }); + expect(formatBytes(1200000)).toStrictEqual({ number: 1.2, unit: 'MB' }); + expect(formatBytes(1200000000)).toStrictEqual({ number: 1.2, unit: 'GB' }); + expect(formatBytes(1200000000000)).toStrictEqual({ number: 1.2, unit: 'TB' }); + expect(formatBytes(1230000000000)).toStrictEqual({ number: 1.23, unit: 'TB' }); + expect(formatBytes(1200000000000000)).toStrictEqual({ number: 1.2, unit: 'PB' }); + expect(formatBytes(1230000000000000)).toStrictEqual({ number: 1.23, unit: 'PB' }); + }); +}); diff --git a/datahub-web-react/src/app/shared/formatNumber.ts b/datahub-web-react/src/app/shared/formatNumber.ts index e8535d146c..8f86350532 100644 --- a/datahub-web-react/src/app/shared/formatNumber.ts +++ b/datahub-web-react/src/app/shared/formatNumber.ts @@ -9,3 +9,22 @@ export function formatNumber(n) { export function formatNumberWithoutAbbreviation(n) { return n.toLocaleString(); } + +export function formatBytes(bytes: number, decimals = 2): { number: number; unit: string } { + if (!bytes) + return { + number: 0, + unit: 'Bytes', + }; + + const k = 1000; // We use IEEE standards definition of units of byte, where 1000 bytes = 1kb. + const dm = decimals < 0 ? 0 : decimals; + const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']; + + const i = Math.floor(Math.log(bytes) / Math.log(k)); + return { + // eslint-disable-next-line no-restricted-properties + number: parseFloat((bytes / Math.pow(k, i)).toFixed(dm)), + unit: sizes[i], + }; +} diff --git a/datahub-web-react/src/graphql/dataset.graphql b/datahub-web-react/src/graphql/dataset.graphql index 4ad1276b6b..235225bb31 100644 --- a/datahub-web-react/src/graphql/dataset.graphql +++ b/datahub-web-react/src/graphql/dataset.graphql @@ -4,6 +4,7 @@ query getDataProfiles($urn: String!, $limit: Int, $startTime: Long, $endTime: Lo datasetProfiles(limit: $limit, startTimeMillis: $startTime, endTimeMillis: $endTime) { rowCount columnCount + sizeInBytes timestampMillis fieldProfiles { fieldPath @@ -117,6 +118,7 @@ fragment nonSiblingDatasetFields on Dataset { datasetProfiles(limit: 1) { rowCount columnCount + sizeInBytes timestampMillis fieldProfiles { fieldPath diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index d8ae53e65c..bfea78128b 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -259,6 +259,8 @@ fragment searchResultFields on Entity { } lastProfile: datasetProfiles(limit: 1) { rowCount + columnCount + sizeInBytes timestampMillis } lastOperation: operations(limit: 1) { diff --git a/metadata-ingestion/examples/mce_files/bootstrap_mce.json b/metadata-ingestion/examples/mce_files/bootstrap_mce.json index 9a291368a5..05374c7e48 100644 --- a/metadata-ingestion/examples/mce_files/bootstrap_mce.json +++ b/metadata-ingestion/examples/mce_files/bootstrap_mce.json @@ -3387,7 +3387,7 @@ "changeType":"UPSERT", "aspectName":"datasetProfile", "aspect":{ - "value":"{\"timestampMillis\": 1664352243000, \"rowCount\": 4500, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}", + "value":"{\"timestampMillis\": 1629097200000, \"rowCount\": 4500, \"columnCount\": 2, \"sizeInBytes\": 842000200000, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}", "contentType":"application/json" }, "systemMetadata":null