feat(ui): Supporting display of columns and storage count in previews (#7198)

This commit is contained in:
John Joyce 2023-02-02 09:38:06 -08:00 committed by GitHub
parent 0d67e188ef
commit c150ef04d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 303 additions and 14 deletions

View File

@ -31,6 +31,7 @@ public class DatasetProfileMapper implements TimeSeriesAspectMapper<com.linkedin
result.setRowCount(gmsProfile.getRowCount());
result.setColumnCount(gmsProfile.getColumnCount());
result.setSizeInBytes(gmsProfile.getSizeInBytes());
result.setTimestampMillis(gmsProfile.getTimestampMillis());
if (gmsProfile.hasFieldProfiles()) {
result.setFieldProfiles(

View File

@ -6049,6 +6049,11 @@ type DatasetProfile implements TimeSeriesAspect {
"""
columnCount: Long
"""
The storage size in bytes
"""
sizeInBytes: Long
"""
An optional set of per field statistics obtained in the profile
"""

View File

@ -0,0 +1,187 @@
package com.linkedin.datahub.graphql.types.dataset.mappers;
import com.google.common.collect.ImmutableList;
import com.linkedin.data.template.StringArray;
import com.linkedin.datahub.graphql.generated.DatasetProfile;
import com.linkedin.dataset.DatasetFieldProfile;
import com.linkedin.dataset.DatasetFieldProfileArray;
import com.linkedin.metadata.aspect.EnvelopedAspect;
import com.linkedin.metadata.utils.GenericRecordUtils;
import java.util.ArrayList;
import org.testng.Assert;
import org.testng.annotations.Test;
public class DatasetProfileMapperTest {
@Test
public void testMapperFullProfile() {
final com.linkedin.dataset.DatasetProfile input = new com.linkedin.dataset.DatasetProfile();
input.setTimestampMillis(1L);
input.setRowCount(10L);
input.setColumnCount(45L);
input.setSizeInBytes(15L);
input.setFieldProfiles(new DatasetFieldProfileArray(ImmutableList.of(
new DatasetFieldProfile().setFieldPath("/field1")
.setMax("1")
.setMean("2")
.setStdev("3")
.setMedian("4")
.setMin("5")
.setNullCount(20L)
.setNullProportion(20.5f)
.setUniqueCount(30L)
.setUniqueProportion(30.5f)
.setSampleValues(new StringArray(ImmutableList.of("val1", "val2"))),
new DatasetFieldProfile().setFieldPath("/field2")
.setMax("2")
.setMean("3")
.setStdev("4")
.setMedian("5")
.setMin("6")
.setNullCount(30L)
.setNullProportion(30.5f)
.setUniqueCount(40L)
.setUniqueProportion(40.5f)
.setSampleValues(new StringArray(ImmutableList.of("val3", "val4")))
)));
final EnvelopedAspect inputAspect = new EnvelopedAspect()
.setAspect(GenericRecordUtils.serializeAspect(input));
final DatasetProfile actual = DatasetProfileMapper.map(inputAspect);
final DatasetProfile expected = new DatasetProfile();
expected.setTimestampMillis(1L);
expected.setRowCount(10L);
expected.setColumnCount(45L);
expected.setSizeInBytes(15L);
expected.setFieldProfiles(new ArrayList<>(
ImmutableList.of(
new com.linkedin.datahub.graphql.generated.DatasetFieldProfile("/field1",
30L,
30.5f,
20L,
20.5f,
"5",
"1",
"2",
"4",
"3",
new ArrayList<>(ImmutableList.of("val1", "val2"))),
new com.linkedin.datahub.graphql.generated.DatasetFieldProfile("/field2",
40L,
40.5f,
30L,
30.5f,
"6",
"2",
"3",
"5",
"4",
new ArrayList<>(ImmutableList.of("val3", "val4")))
)
));
Assert.assertEquals(actual.getTimestampMillis(), expected.getTimestampMillis());
Assert.assertEquals(actual.getRowCount(), expected.getRowCount());
Assert.assertEquals(actual.getColumnCount(), expected.getColumnCount());
Assert.assertEquals(actual.getSizeInBytes(), expected.getSizeInBytes());
Assert.assertEquals(actual.getFieldProfiles().get(0).getFieldPath(), expected.getFieldProfiles().get(0).getFieldPath());
Assert.assertEquals(actual.getFieldProfiles().get(0).getMax(), expected.getFieldProfiles().get(0).getMax());
Assert.assertEquals(actual.getFieldProfiles().get(0).getMean(), expected.getFieldProfiles().get(0).getMean());
Assert.assertEquals(actual.getFieldProfiles().get(0).getMedian(), expected.getFieldProfiles().get(0).getMedian());
Assert.assertEquals(actual.getFieldProfiles().get(0).getNullCount(), expected.getFieldProfiles().get(0).getNullCount());
Assert.assertEquals(actual.getFieldProfiles().get(0).getNullProportion(), expected.getFieldProfiles().get(0).getNullProportion());
Assert.assertEquals(actual.getFieldProfiles().get(0).getStdev(), expected.getFieldProfiles().get(0).getStdev());
Assert.assertEquals(actual.getFieldProfiles().get(0).getUniqueCount(), expected.getFieldProfiles().get(0).getUniqueCount());
Assert.assertEquals(actual.getFieldProfiles().get(0).getUniqueProportion(), expected.getFieldProfiles().get(0).getUniqueProportion());
Assert.assertEquals(actual.getFieldProfiles().get(0).getSampleValues(), expected.getFieldProfiles().get(0).getSampleValues());
Assert.assertEquals(actual.getFieldProfiles().get(1).getFieldPath(), expected.getFieldProfiles().get(1).getFieldPath());
Assert.assertEquals(actual.getFieldProfiles().get(1).getMax(), expected.getFieldProfiles().get(1).getMax());
Assert.assertEquals(actual.getFieldProfiles().get(1).getMean(), expected.getFieldProfiles().get(1).getMean());
Assert.assertEquals(actual.getFieldProfiles().get(1).getMedian(), expected.getFieldProfiles().get(1).getMedian());
Assert.assertEquals(actual.getFieldProfiles().get(1).getNullCount(), expected.getFieldProfiles().get(1).getNullCount());
Assert.assertEquals(actual.getFieldProfiles().get(1).getNullProportion(), expected.getFieldProfiles().get(1).getNullProportion());
Assert.assertEquals(actual.getFieldProfiles().get(1).getStdev(), expected.getFieldProfiles().get(1).getStdev());
Assert.assertEquals(actual.getFieldProfiles().get(1).getUniqueCount(), expected.getFieldProfiles().get(1).getUniqueCount());
Assert.assertEquals(actual.getFieldProfiles().get(1).getUniqueProportion(), expected.getFieldProfiles().get(1).getUniqueProportion());
Assert.assertEquals(actual.getFieldProfiles().get(1).getSampleValues(), expected.getFieldProfiles().get(1).getSampleValues());
}
@Test
public void testMapperPartialProfile() {
final com.linkedin.dataset.DatasetProfile input = new com.linkedin.dataset.DatasetProfile();
input.setTimestampMillis(1L);
input.setRowCount(10L);
input.setColumnCount(45L);
input.setFieldProfiles(new DatasetFieldProfileArray(ImmutableList.of(
new DatasetFieldProfile().setFieldPath("/field1")
.setUniqueCount(30L)
.setUniqueProportion(30.5f),
new DatasetFieldProfile().setFieldPath("/field2")
.setMax("2")
.setMean("3")
.setStdev("4")
.setMedian("5")
.setMin("6")
.setUniqueCount(40L)
.setUniqueProportion(40.5f)
)));
final EnvelopedAspect inputAspect = new EnvelopedAspect()
.setAspect(GenericRecordUtils.serializeAspect(input));
final DatasetProfile actual = DatasetProfileMapper.map(inputAspect);
final DatasetProfile expected = new DatasetProfile();
expected.setTimestampMillis(1L);
expected.setRowCount(10L);
expected.setColumnCount(45L);
expected.setFieldProfiles(new ArrayList<>(
ImmutableList.of(
new com.linkedin.datahub.graphql.generated.DatasetFieldProfile("/field1",
30L,
30.5f,
null,
null,
null,
null,
null,
null,
null,
null),
new com.linkedin.datahub.graphql.generated.DatasetFieldProfile("/field2",
40L,
40.5f,
null,
null,
"6",
"2",
"3",
"5",
"4",
null)
)
));
Assert.assertEquals(actual.getTimestampMillis(), expected.getTimestampMillis());
Assert.assertEquals(actual.getRowCount(), expected.getRowCount());
Assert.assertEquals(actual.getColumnCount(), expected.getColumnCount());
Assert.assertEquals(actual.getSizeInBytes(), expected.getSizeInBytes());
Assert.assertEquals(actual.getFieldProfiles().get(0).getFieldPath(), expected.getFieldProfiles().get(0).getFieldPath());
Assert.assertEquals(actual.getFieldProfiles().get(0).getMax(), expected.getFieldProfiles().get(0).getMax());
Assert.assertEquals(actual.getFieldProfiles().get(0).getMean(), expected.getFieldProfiles().get(0).getMean());
Assert.assertEquals(actual.getFieldProfiles().get(0).getMedian(), expected.getFieldProfiles().get(0).getMedian());
Assert.assertEquals(actual.getFieldProfiles().get(0).getNullCount(), expected.getFieldProfiles().get(0).getNullCount());
Assert.assertEquals(actual.getFieldProfiles().get(0).getNullProportion(), expected.getFieldProfiles().get(0).getNullProportion());
Assert.assertEquals(actual.getFieldProfiles().get(0).getStdev(), expected.getFieldProfiles().get(0).getStdev());
Assert.assertEquals(actual.getFieldProfiles().get(0).getUniqueCount(), expected.getFieldProfiles().get(0).getUniqueCount());
Assert.assertEquals(actual.getFieldProfiles().get(0).getUniqueProportion(), expected.getFieldProfiles().get(0).getUniqueProportion());
Assert.assertEquals(actual.getFieldProfiles().get(0).getSampleValues(), expected.getFieldProfiles().get(0).getSampleValues());
Assert.assertEquals(actual.getFieldProfiles().get(1).getFieldPath(), expected.getFieldProfiles().get(1).getFieldPath());
Assert.assertEquals(actual.getFieldProfiles().get(1).getMax(), expected.getFieldProfiles().get(1).getMax());
Assert.assertEquals(actual.getFieldProfiles().get(1).getMean(), expected.getFieldProfiles().get(1).getMean());
Assert.assertEquals(actual.getFieldProfiles().get(1).getMedian(), expected.getFieldProfiles().get(1).getMedian());
Assert.assertEquals(actual.getFieldProfiles().get(1).getNullCount(), expected.getFieldProfiles().get(1).getNullCount());
Assert.assertEquals(actual.getFieldProfiles().get(1).getNullProportion(), expected.getFieldProfiles().get(1).getNullProportion());
Assert.assertEquals(actual.getFieldProfiles().get(1).getStdev(), expected.getFieldProfiles().get(1).getStdev());
Assert.assertEquals(actual.getFieldProfiles().get(1).getUniqueCount(), expected.getFieldProfiles().get(1).getUniqueCount());
Assert.assertEquals(actual.getFieldProfiles().get(1).getUniqueProportion(), expected.getFieldProfiles().get(1).getUniqueProportion());
Assert.assertEquals(actual.getFieldProfiles().get(1).getSampleValues(), expected.getFieldProfiles().get(1).getSampleValues());
}
}

View File

@ -221,6 +221,7 @@ export const dataset1 = {
timestampMillis: 0,
rowCount: 10,
columnCount: 5,
sizeInBytes: 10,
fieldProfiles: [
{
fieldPath: 'testColumn',
@ -307,6 +308,7 @@ export const dataset2 = {
timestampMillis: 0,
rowCount: 10,
columnCount: 5,
sizeInBytes: 10000,
fieldProfiles: [
{
fieldPath: 'testColumn',
@ -517,6 +519,7 @@ export const dataset3 = {
{
rowCount: 10,
columnCount: 5,
sizeInBytes: 10000,
timestampMillis: 0,
fieldProfiles: [
{

View File

@ -285,6 +285,8 @@ export class DatasetEntity implements Entity<Dataset> {
externalUrl={data.properties?.externalUrl}
statsSummary={data.statsSummary}
rowCount={(data as any).lastProfile?.length && (data as any).lastProfile[0].rowCount}
columnCount={(data as any).lastProfile?.length && (data as any).lastProfile[0].columnCount}
sizeInBytes={(data as any).lastProfile?.length && (data as any).lastProfile[0].sizeInBytes}
lastUpdatedMs={
(data as any).lastOperation?.length && (data as any).lastOperation[0].lastUpdatedTimestamp
}

View File

@ -41,6 +41,8 @@ export const Preview = ({
container,
parentContainers,
rowCount,
columnCount,
sizeInBytes,
statsSummary,
lastUpdatedMs,
}: {
@ -65,6 +67,8 @@ export const Preview = ({
container?: Container | null;
parentContainers?: ParentContainersResult | null;
rowCount?: number | null;
columnCount?: number | null;
sizeInBytes?: number | null;
statsSummary?: DatasetStatsSummary | null;
lastUpdatedMs?: number | null;
}): JSX.Element => {
@ -97,6 +101,8 @@ export const Preview = ({
subHeader={
<DatasetStatsSummaryView
rowCount={rowCount}
columnCount={columnCount}
sizeInBytes={sizeInBytes}
queryCountLast30Days={statsSummary?.queryCountLast30Days}
uniqueUserCountLast30Days={statsSummary?.uniqueUserCountLast30Days}
lastUpdatedMs={lastUpdatedMs}

View File

@ -19,6 +19,8 @@ export const DatasetStatsSummarySubHeader = () => {
((dataset?.operations?.length || 0) > 0 && (dataset?.operations![0] as Operation)) || undefined;
const rowCount = maybeLastProfile?.rowCount;
const columnCount = maybeLastProfile?.columnCount;
const sizeInBytes = maybeLastProfile?.sizeInBytes;
const queryCountLast30Days = maybeStatsSummary?.queryCountLast30Days;
const uniqueUserCountLast30Days = maybeStatsSummary?.uniqueUserCountLast30Days;
const lastUpdatedMs = maybeLastOperation?.lastUpdatedTimestamp;
@ -26,6 +28,8 @@ export const DatasetStatsSummarySubHeader = () => {
return (
<DatasetStatsSummary
rowCount={rowCount}
columnCount={columnCount}
sizeInBytes={sizeInBytes}
queryCountLast30Days={queryCountLast30Days}
uniqueUserCountLast30Days={uniqueUserCountLast30Days}
lastUpdatedMs={lastUpdatedMs}

View File

@ -7,11 +7,13 @@ import {
TableOutlined,
TeamOutlined,
QuestionCircleOutlined,
HddOutlined,
} from '@ant-design/icons';
import { formatNumberWithoutAbbreviation } from '../../../shared/formatNumber';
import { ANTD_GRAY } from '../../shared/constants';
import { toLocalDateTimeString, toRelativeTimeString } from '../../../shared/time/timeUtils';
import { StatsSummary } from '../../shared/components/styled/StatsSummary';
import { FormattedBytesStat } from './FormattedBytesStat';
const StatText = styled.span`
color: ${ANTD_GRAY[8]};
@ -24,6 +26,8 @@ const HelpIcon = styled(QuestionCircleOutlined)`
type Props = {
rowCount?: number | null;
columnCount?: number | null;
sizeInBytes?: number | null;
queryCountLast30Days?: number | null;
uniqueUserCountLast30Days?: number | null;
lastUpdatedMs?: number | null;
@ -31,33 +35,43 @@ type Props = {
export const DatasetStatsSummary = ({
rowCount,
columnCount,
sizeInBytes,
queryCountLast30Days,
uniqueUserCountLast30Days,
lastUpdatedMs,
}: Props) => {
const statsViews = [
(!!rowCount && (
!!rowCount && (
<StatText>
<TableOutlined style={{ marginRight: 8, color: ANTD_GRAY[7] }} />
<b>{formatNumberWithoutAbbreviation(rowCount)}</b> rows
{!!columnCount && (
<>
, `<b>{formatNumberWithoutAbbreviation(columnCount)}</b> columns
</>
)}
</StatText>
)) ||
undefined,
(!!queryCountLast30Days && (
),
!!sizeInBytes && (
<StatText>
<HddOutlined style={{ marginRight: 8, color: ANTD_GRAY[7] }} />
<FormattedBytesStat bytes={sizeInBytes} />
</StatText>
),
!!queryCountLast30Days && (
<StatText>
<ConsoleSqlOutlined style={{ marginRight: 8, color: ANTD_GRAY[7] }} />
<b>{formatNumberWithoutAbbreviation(queryCountLast30Days)}</b> queries last month
</StatText>
)) ||
undefined,
(!!uniqueUserCountLast30Days && (
),
!!uniqueUserCountLast30Days && (
<StatText>
<TeamOutlined style={{ marginRight: 8, color: ANTD_GRAY[7] }} />
<b>{formatNumberWithoutAbbreviation(uniqueUserCountLast30Days)}</b> unique users
</StatText>
)) ||
undefined,
(!!lastUpdatedMs && (
),
!!lastUpdatedMs && (
<Popover
content={
<div>
@ -73,9 +87,8 @@ export const DatasetStatsSummary = ({
Changed {toRelativeTimeString(lastUpdatedMs)}
</StatText>
</Popover>
)) ||
undefined,
].filter((stat) => stat !== undefined);
),
].filter((stat) => stat);
return <>{statsViews.length > 0 && <StatsSummary stats={statsViews} />}</>;
};

View File

@ -0,0 +1,12 @@
import React from 'react';
import { Tooltip } from 'antd';
import { formatBytes, formatNumberWithoutAbbreviation } from '../../../shared/formatNumber';
export const FormattedBytesStat = ({ bytes }: { bytes: number }) => {
const formattedBytes = formatBytes(bytes);
return (
<Tooltip title={`This dataset consumes ${formatNumberWithoutAbbreviation(bytes)} bytes of storage.`}>
<b>{formattedBytes.number}</b> {formattedBytes.unit}
</Tooltip>
);
};

View File

@ -0,0 +1,33 @@
import { formatBytes } from '../formatNumber';
describe('formatBytes', () => {
it('should property format bytes counts', () => {
expect(formatBytes(0)).toStrictEqual({ number: 0, unit: 'Bytes' });
// Whole Numbers
expect(formatBytes(1)).toStrictEqual({ number: 1, unit: 'Bytes' });
expect(formatBytes(10)).toStrictEqual({ number: 10, unit: 'Bytes' });
expect(formatBytes(100)).toStrictEqual({ number: 100, unit: 'Bytes' });
expect(formatBytes(1000)).toStrictEqual({ number: 1, unit: 'KB' });
expect(formatBytes(10000)).toStrictEqual({ number: 10, unit: 'KB' });
expect(formatBytes(100000)).toStrictEqual({ number: 100, unit: 'KB' });
expect(formatBytes(1000000)).toStrictEqual({ number: 1, unit: 'MB' });
expect(formatBytes(10000000)).toStrictEqual({ number: 10, unit: 'MB' });
expect(formatBytes(100000000)).toStrictEqual({ number: 100, unit: 'MB' });
expect(formatBytes(1000000000)).toStrictEqual({ number: 1, unit: 'GB' });
expect(formatBytes(10000000000)).toStrictEqual({ number: 10, unit: 'GB' });
expect(formatBytes(100000000000)).toStrictEqual({ number: 100, unit: 'GB' });
expect(formatBytes(1000000000000)).toStrictEqual({ number: 1, unit: 'TB' });
expect(formatBytes(10000000000000)).toStrictEqual({ number: 10, unit: 'TB' });
expect(formatBytes(100000000000000)).toStrictEqual({ number: 100, unit: 'TB' });
expect(formatBytes(1000000000000000)).toStrictEqual({ number: 1, unit: 'PB' });
// Decimal Numbers
expect(formatBytes(12)).toStrictEqual({ number: 12, unit: 'Bytes' });
expect(formatBytes(1200)).toStrictEqual({ number: 1.2, unit: 'KB' });
expect(formatBytes(1200000)).toStrictEqual({ number: 1.2, unit: 'MB' });
expect(formatBytes(1200000000)).toStrictEqual({ number: 1.2, unit: 'GB' });
expect(formatBytes(1200000000000)).toStrictEqual({ number: 1.2, unit: 'TB' });
expect(formatBytes(1230000000000)).toStrictEqual({ number: 1.23, unit: 'TB' });
expect(formatBytes(1200000000000000)).toStrictEqual({ number: 1.2, unit: 'PB' });
expect(formatBytes(1230000000000000)).toStrictEqual({ number: 1.23, unit: 'PB' });
});
});

View File

@ -9,3 +9,22 @@ export function formatNumber(n) {
export function formatNumberWithoutAbbreviation(n) {
return n.toLocaleString();
}
export function formatBytes(bytes: number, decimals = 2): { number: number; unit: string } {
if (!bytes)
return {
number: 0,
unit: 'Bytes',
};
const k = 1000; // We use IEEE standards definition of units of byte, where 1000 bytes = 1kb.
const dm = decimals < 0 ? 0 : decimals;
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return {
// eslint-disable-next-line no-restricted-properties
number: parseFloat((bytes / Math.pow(k, i)).toFixed(dm)),
unit: sizes[i],
};
}

View File

@ -4,6 +4,7 @@ query getDataProfiles($urn: String!, $limit: Int, $startTime: Long, $endTime: Lo
datasetProfiles(limit: $limit, startTimeMillis: $startTime, endTimeMillis: $endTime) {
rowCount
columnCount
sizeInBytes
timestampMillis
fieldProfiles {
fieldPath
@ -117,6 +118,7 @@ fragment nonSiblingDatasetFields on Dataset {
datasetProfiles(limit: 1) {
rowCount
columnCount
sizeInBytes
timestampMillis
fieldProfiles {
fieldPath

View File

@ -259,6 +259,8 @@ fragment searchResultFields on Entity {
}
lastProfile: datasetProfiles(limit: 1) {
rowCount
columnCount
sizeInBytes
timestampMillis
}
lastOperation: operations(limit: 1) {

View File

@ -3387,7 +3387,7 @@
"changeType":"UPSERT",
"aspectName":"datasetProfile",
"aspect":{
"value":"{\"timestampMillis\": 1664352243000, \"rowCount\": 4500, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}",
"value":"{\"timestampMillis\": 1629097200000, \"rowCount\": 4500, \"columnCount\": 2, \"sizeInBytes\": 842000200000, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}",
"contentType":"application/json"
},
"systemMetadata":null