ui: Add UI Graph for Histogram #10365 (#10493)

* added quartile metrics chart for column profiler

* translation key sync

* added histogram graph

* sync - translation file

* added histogram comoponent and unit test for the same

* addressing comments

* addressing comment

* renaming date variable to graphDate
This commit is contained in:
Shailesh Parmar 2023-03-13 12:26:20 +05:30 committed by GitHub
parent 30e5d696a1
commit e916c85399
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 428 additions and 14 deletions

View File

@ -11,6 +11,7 @@
* limitations under the License.
*/
import { ColumnProfile } from 'generated/entity/data/table';
import { MetricChartType } from '../ProfilerDashboard/profilerDashboard.interface';
export interface CustomBarChartProps {
@ -18,3 +19,10 @@ export interface CustomBarChartProps {
name: string;
tickFormatter?: string;
}
export interface DataDistributionHistogramProps {
data: {
firstDayData?: ColumnProfile;
currentDayData?: ColumnProfile;
};
}

View File

@ -0,0 +1,132 @@
/*
* Copyright 2023 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Col, Row, Tag } from 'antd';
import ErrorPlaceHolder from 'components/common/error-with-placeholder/ErrorPlaceHolder';
import { GRAPH_BACKGROUND_COLOR } from 'constants/constants';
import { DEFAULT_HISTOGRAM_DATA } from 'constants/profiler.constant';
import { HistogramClass } from 'generated/entity/data/table';
import { isUndefined, map } from 'lodash';
import React from 'react';
import { useTranslation } from 'react-i18next';
import {
Bar,
BarChart,
CartesianGrid,
Legend,
ResponsiveContainer,
Tooltip,
XAxis,
YAxis,
} from 'recharts';
import { axisTickFormatter, tooltipFormatter } from 'utils/ChartUtils';
import { getFormattedDateFromSeconds } from 'utils/TimeUtils';
import { DataDistributionHistogramProps } from './Chart.interface';
const DataDistributionHistogram = ({
data,
}: DataDistributionHistogramProps) => {
const { t } = useTranslation();
const showSingleGraph =
isUndefined(data.firstDayData?.histogram) ||
isUndefined(data.currentDayData?.histogram);
if (
isUndefined(data.firstDayData?.histogram) &&
isUndefined(data.currentDayData?.histogram)
) {
return (
<Row align="middle" className="h-full w-full" justify="center">
<Col>
<ErrorPlaceHolder>
<p>{t('message.no-data-available')}</p>
</ErrorPlaceHolder>
</Col>
</Row>
);
}
return (
<Row className="w-full" data-testid="chart-container">
{map(data, (columnProfile, key) => {
if (isUndefined(columnProfile?.histogram)) {
return;
}
const histogramData =
(columnProfile?.histogram as HistogramClass) ||
DEFAULT_HISTOGRAM_DATA;
const graphData = histogramData.frequencies?.map((frequency, i) => ({
name: histogramData?.boundaries?.[i],
frequency,
}));
const graphDate = getFormattedDateFromSeconds(
columnProfile?.timestamp || 0,
'dd/MMM'
);
return (
<Col key={key} span={showSingleGraph ? 24 : 12}>
<Row gutter={[8, 8]}>
<Col
data-testid="date"
offset={showSingleGraph ? 1 : 2}
span={24}>
{graphDate}
</Col>
<Col offset={showSingleGraph ? 1 : 2} span={24}>
<Tag data-testid="skew-tag">{`${t('label.skew')}: ${
columnProfile?.nonParametricSkew || '--'
}`}</Tag>
</Col>
<Col span={24}>
<ResponsiveContainer
debounce={200}
id={`${key}-histogram`}
minHeight={300}>
<BarChart
className="w-full"
data={graphData}
margin={{ left: 16 }}>
<CartesianGrid stroke={GRAPH_BACKGROUND_COLOR} />
<XAxis
dataKey="name"
interval={0}
padding={{ left: 16, right: 16 }}
tick={{ fontSize: 12 }}
/>
<YAxis
allowDataOverflow
padding={{ top: 16, bottom: 16 }}
tick={{ fontSize: 12 }}
tickFormatter={(props) => axisTickFormatter(props)}
/>
<Legend />
<Tooltip
formatter={(value: number) => tooltipFormatter(value)}
/>
<Bar dataKey="frequency" fill="#1890FF" />
</BarChart>
</ResponsiveContainer>
</Col>
</Row>
</Col>
);
})}
</Row>
);
};
export default DataDistributionHistogram;

View File

@ -0,0 +1,164 @@
/*
* Copyright 2023 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { queryByAttribute, render, screen } from '@testing-library/react';
import React from 'react';
import DataDistributionHistogram from './DataDistributionHistogram.component';
const MOCK_HISTOGRAM_DATA = [
{
name: 'shop_id',
timestamp: 1678375427,
valuesCount: 14567.0,
nullCount: 0.0,
nullProportion: 0.0,
uniqueCount: 14567.0,
uniqueProportion: 1.0,
distinctCount: 14509.0,
distinctProportion: 1.0,
min: 1.0,
max: 587.0,
mean: 45.0,
sum: 1367.0,
stddev: 35.0,
median: 7654.0,
firstQuartile: 7.4,
thirdQuartile: 8766.5,
interQuartileRange: 8002.1,
nonParametricSkew: -0.567,
histogram: {
boundaries: [
'5.00 to 100.00',
'100.00 to 200.00',
'200.00 to 300.00',
'300.00 and up',
],
frequencies: [101, 235, 123, 98],
},
},
{
name: 'shop_id',
timestamp: 1678202627,
valuesCount: 10256.0,
nullCount: 0.0,
nullProportion: 0.0,
uniqueCount: 10098.0,
uniqueProportion: 0.91,
distinctCount: 10256.0,
distinctProportion: 1.0,
min: 1.0,
max: 542.0,
mean: 45.0,
sum: 1367.0,
stddev: 35.0,
median: 7344.0,
firstQuartile: 7.4,
thirdQuartile: 8005.5,
interQuartileRange: 8069.1,
nonParametricSkew: -0.567,
histogram: {
boundaries: [
'5.00 to 100.00',
'100.00 to 200.00',
'200.00 to 300.00',
'300.00 and up',
],
frequencies: [56, 62, 66, 99],
},
},
];
const COLUMN_PROFILER = {
name: 'shop_id',
timestamp: 1678169698,
valuesCount: 10256.0,
nullCount: 0.0,
nullProportion: 0.0,
uniqueCount: 10098.0,
uniqueProportion: 0.91,
distinctCount: 10256.0,
distinctProportion: 1.0,
min: 1.0,
max: 542.0,
mean: 45.0,
sum: 1367.0,
stddev: 35.0,
median: 7344.0,
};
jest.mock('components/common/error-with-placeholder/ErrorPlaceHolder', () => {
return jest.fn().mockImplementation(({ children }) => <div>{children}</div>);
});
describe('DataDistributionHistogram component test', () => {
it('Component should render', async () => {
const { container } = render(
<DataDistributionHistogram
data={{
firstDayData: MOCK_HISTOGRAM_DATA[1],
currentDayData: MOCK_HISTOGRAM_DATA[0],
}}
/>
);
const skewTags = await screen.findAllByTestId('skew-tag');
const date = await screen.findAllByTestId('date');
expect(await screen.findByTestId('chart-container')).toBeInTheDocument();
expect(
queryByAttribute('id', container, 'firstDayData-histogram')
).toBeInTheDocument();
expect(
queryByAttribute('id', container, 'currentDayData-histogram')
).toBeInTheDocument();
expect(skewTags).toHaveLength(2);
expect(date).toHaveLength(2);
});
it('Render one graph if histogram data is available in only one profile data', async () => {
const { container } = render(
<DataDistributionHistogram
data={{
firstDayData: COLUMN_PROFILER,
currentDayData: MOCK_HISTOGRAM_DATA[0],
}}
/>
);
const skewTags = await screen.findAllByTestId('skew-tag');
const date = await screen.findAllByTestId('date');
expect(await screen.findByTestId('chart-container')).toBeInTheDocument();
expect(
queryByAttribute('id', container, 'firstDayData-histogram')
).not.toBeInTheDocument();
expect(
queryByAttribute('id', container, 'currentDayData-histogram')
).toBeInTheDocument();
expect(skewTags).toHaveLength(1);
expect(date).toHaveLength(1);
});
it('No data placeholder should render when firstDay & currentDay data is undefined', async () => {
render(
<DataDistributionHistogram
data={{
firstDayData: undefined,
currentDayData: undefined,
}}
/>
);
expect(
await screen.findByText('message.no-data-available')
).toBeInTheDocument();
});
});

View File

@ -45,6 +45,11 @@ jest.mock('./ProfilerSummaryCard', () => {
jest.mock('./ProfilerDetailsCard', () => {
return jest.fn().mockImplementation(() => <div>ProfilerDetailsCard</div>);
});
jest.mock('components/Chart/DataDistributionHistogram.component', () => {
return jest
.fn()
.mockImplementation(() => <div>DataDistributionHistogram</div>);
});
jest.mock('react-i18next', () => ({
// this mock makes sure any components using the translate hook can use it without a warning being shown
@ -65,6 +70,7 @@ describe('Test ProfilerTab component', () => {
const pageContainer = await screen.findByTestId('profiler-tab-container');
const description = await screen.findByTestId('description');
const histogram = await screen.findByTestId('histogram-metrics');
const dataTypeContainer = await screen.findByTestId('data-type-container');
const ProfilerSummaryCards = await screen.findAllByText(
'ProfilerSummaryCard'
@ -76,8 +82,9 @@ describe('Test ProfilerTab component', () => {
expect(pageContainer).toBeInTheDocument();
expect(description).toBeInTheDocument();
expect(dataTypeContainer).toBeInTheDocument();
expect(histogram).toBeInTheDocument();
expect(ProfilerSummaryCards).toHaveLength(2);
expect(ProfilerDetailsCards).toHaveLength(4);
expect(ProfilerDetailsCards).toHaveLength(5);
});
it('ProfilerTab component should render properly with empty data', async () => {
@ -97,6 +104,7 @@ describe('Test ProfilerTab component', () => {
const pageContainer = await screen.findByTestId('profiler-tab-container');
const description = await screen.findByTestId('description');
const dataTypeContainer = await screen.findByTestId('data-type-container');
const histogram = await screen.findByTestId('histogram-metrics');
const ProfilerSummaryCards = await screen.findAllByText(
'ProfilerSummaryCard'
);
@ -107,8 +115,9 @@ describe('Test ProfilerTab component', () => {
expect(pageContainer).toBeInTheDocument();
expect(description).toBeInTheDocument();
expect(dataTypeContainer).toBeInTheDocument();
expect(histogram).toBeInTheDocument();
expect(ProfilerSummaryCards).toHaveLength(2);
expect(ProfilerDetailsCards).toHaveLength(4);
expect(ProfilerDetailsCards).toHaveLength(5);
});
it('ProfilerTab component should render properly even if getListTestCase API fails', async () => {
@ -123,6 +132,7 @@ describe('Test ProfilerTab component', () => {
const pageContainer = await screen.findByTestId('profiler-tab-container');
const description = await screen.findByTestId('description');
const histogram = await screen.findByTestId('histogram-metrics');
const dataTypeContainer = await screen.findByTestId('data-type-container');
const ProfilerSummaryCards = await screen.findAllByText(
'ProfilerSummaryCard'
@ -134,7 +144,8 @@ describe('Test ProfilerTab component', () => {
expect(pageContainer).toBeInTheDocument();
expect(description).toBeInTheDocument();
expect(dataTypeContainer).toBeInTheDocument();
expect(histogram).toBeInTheDocument();
expect(ProfilerSummaryCards).toHaveLength(2);
expect(ProfilerDetailsCards).toHaveLength(4);
expect(ProfilerDetailsCards).toHaveLength(5);
});
});

View File

@ -13,7 +13,8 @@
import { Card, Col, Row, Statistic, Typography } from 'antd';
import { AxiosError } from 'axios';
import { sortBy } from 'lodash';
import DataDistributionHistogram from 'components/Chart/DataDistributionHistogram.component';
import { first, last, sortBy } from 'lodash';
import React, { useEffect, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { useParams } from 'react-router-dom';
@ -23,6 +24,7 @@ import {
INITIAL_COUNT_METRIC_VALUE,
INITIAL_MATH_METRIC_VALUE,
INITIAL_PROPORTION_METRIC_VALUE,
INITIAL_QUARTILE_METRIC_VALUE,
INITIAL_SUM_METRIC_VALUE,
INITIAL_TEST_RESULT_SUMMARY,
} from '../../../constants/profiler.constant';
@ -58,6 +60,9 @@ const ProfilerTab: React.FC<ProfilerTabProps> = ({
const [sumMetrics, setSumMetrics] = useState<MetricChartType>(
INITIAL_SUM_METRIC_VALUE
);
const [quartileMetrics, setQuartileMetrics] = useState<MetricChartType>(
INITIAL_QUARTILE_METRIC_VALUE
);
const [tableTests, setTableTests] = useState<TableTestsType>({
tests: [],
results: INITIAL_TEST_RESULT_SUMMARY,
@ -105,12 +110,20 @@ const ProfilerTab: React.FC<ProfilerTabProps> = ({
];
}, [tableTests]);
const { firstDay, currentDay } = useMemo(() => {
return {
firstDay: last(profilerData),
currentDay: first(profilerData),
};
}, [profilerData]);
const createMetricsChartData = () => {
const updateProfilerData = sortBy(profilerData, 'timestamp');
const countMetricData: MetricChartType['data'] = [];
const proportionMetricData: MetricChartType['data'] = [];
const mathMetricData: MetricChartType['data'] = [];
const sumMetricData: MetricChartType['data'] = [];
const quartileMetricData: MetricChartType['data'] = [];
updateProfilerData.forEach((col) => {
const x = getFormattedDateFromSeconds(col.timestamp);
@ -135,7 +148,6 @@ const ProfilerTab: React.FC<ProfilerTabProps> = ({
max: (col.max as number) || 0,
min: (col.min as number) || 0,
mean: col.mean || 0,
median: col.median || 0,
});
proportionMetricData.push({
@ -145,6 +157,15 @@ const ProfilerTab: React.FC<ProfilerTabProps> = ({
nullProportion: Math.round((col.nullProportion || 0) * 100),
uniqueProportion: Math.round((col.uniqueProportion || 0) * 100),
});
quartileMetricData.push({
name: x,
timestamp: col.timestamp || 0,
firstQuartile: col.firstQuartile || 0,
thirdQuartile: col.thirdQuartile || 0,
interQuartileRange: col.interQuartileRange || 0,
median: col.median || 0,
});
});
const countMetricInfo = countMetrics.information.map((item) => ({
@ -171,6 +192,11 @@ const ProfilerTab: React.FC<ProfilerTabProps> = ({
...item,
latestValue: sumMetricData[sumMetricData.length - 1]?.[item.dataKey] || 0,
}));
const quartileMetricInfo = quartileMetrics.information.map((item) => ({
...item,
latestValue:
quartileMetricData[quartileMetricData.length - 1]?.[item.dataKey] || 0,
}));
setCountMetrics((pre) => ({
...pre,
@ -192,6 +218,11 @@ const ProfilerTab: React.FC<ProfilerTabProps> = ({
information: sumMetricInfo,
data: sumMetricData,
}));
setQuartileMetrics((pre) => ({
...pre,
information: quartileMetricInfo,
data: quartileMetricData,
}));
};
const fetchAllTests = async () => {
@ -227,7 +258,10 @@ const ProfilerTab: React.FC<ProfilerTabProps> = ({
}, []);
return (
<Row data-testid="profiler-tab-container" gutter={[16, 16]}>
<Row
className="m-b-lg"
data-testid="profiler-tab-container"
gutter={[16, 16]}>
<Col span={8}>
<Card className="tw-rounded-md tw-border tw-h-full">
<Row gutter={16}>
@ -294,6 +328,30 @@ const ProfilerTab: React.FC<ProfilerTabProps> = ({
<Col span={24}>
<ProfilerDetailsCard chartCollection={sumMetrics} name="sum" />
</Col>
<Col span={24}>
<ProfilerDetailsCard
chartCollection={quartileMetrics}
name="quartile"
/>
</Col>
<Col span={24}>
<Card className="shadow-none" data-testid="histogram-metrics">
<Row gutter={[16, 16]}>
<Col span={4}>
<Typography.Text
className="text-grey-body"
data-testid="data-distribution-title">
{t('label.data-distribution')}
</Typography.Text>
</Col>
<Col span={20}>
<DataDistributionHistogram
data={{ firstDayData: firstDay, currentDayData: currentDay }}
/>
</Col>
</Row>
</Card>
</Col>
</Row>
);
};

View File

@ -13,6 +13,7 @@
import { t } from 'i18next';
import { StepperStepType } from 'Models';
import i18n from 'utils/i18next/LocalUtil';
import { CSMode } from '../enums/codemirror.enum';
import { DMLOperationType } from '../generated/api/data/createTableProfile';
import {
@ -170,25 +171,20 @@ export const INITIAL_PROPORTION_METRIC_VALUE = {
export const INITIAL_MATH_METRIC_VALUE = {
information: [
{
title: t('label.median'),
dataKey: 'median',
color: '#1890FF',
},
{
title: t('label.max'),
dataKey: 'max',
color: '#7147E8',
color: '#1890FF',
},
{
title: t('label.mean'),
dataKey: 'mean',
color: '#008376',
color: '#7147E8',
},
{
title: t('label.min'),
dataKey: 'min',
color: '#B02AAC',
color: '#008376',
},
],
data: [],
@ -204,6 +200,31 @@ export const INITIAL_SUM_METRIC_VALUE = {
],
data: [],
};
export const INITIAL_QUARTILE_METRIC_VALUE = {
information: [
{
title: i18n.t('label.first-quartile'),
dataKey: 'firstQuartile',
color: '#1890FF',
},
{
title: i18n.t('label.median'),
dataKey: 'median',
color: '#7147E8',
},
{
title: i18n.t('label.inter-quartile-range'),
dataKey: 'interQuartileRange',
color: '#008376',
},
{
title: i18n.t('label.third-quartile'),
dataKey: 'thirdQuartile',
color: '#B02AAC',
},
],
data: [],
};
export const INITIAL_ROW_METRIC_VALUE = {
information: [
@ -327,3 +348,8 @@ export const PROFILE_SAMPLE_OPTIONS = [
value: ProfileSampleType.Rows,
},
];
export const DEFAULT_HISTOGRAM_DATA = {
boundaries: [],
frequencies: [],
};

View File

@ -165,6 +165,7 @@
"data-asset-type": "Data Asset Type",
"data-assets-report": "Data Assets Report",
"data-assets-with-tier-plural": "Data Assets with Tiers",
"data-distribution": "Data Distribution",
"data-entity": "Data {{entity}}",
"data-insight": "Data Insight",
"data-insight-active-user-summary": "Most Active Users",
@ -295,6 +296,7 @@
"filter-plural": "Filters",
"first": "First",
"first-lowercase": "first",
"first-quartile": "First Quartile",
"flush-interval-secs": "Flush Interval (secs)",
"follow": "Follow",
"followed-lowercase": "followed",
@ -365,6 +367,7 @@
"install-service-connectors": "Install Service Connectors",
"instance-lowercase": "instance",
"integration-plural": "Integrations",
"inter-quartile-range": "Inter Quartile Range",
"interval": "Interval",
"interval-type": "Interval Type",
"interval-unit": "Interval Unit",
@ -676,6 +679,7 @@
"show-deleted-team": "Show Deleted Team",
"show-or-hide-advanced-config": "{{showAdv}} Advanced Config",
"sign-in-with-sso": "Sign in with {{sso}}",
"skew": "Skew",
"slack": "Slack",
"soft-delete": "Soft Delete",
"soft-lowercase": "soft",
@ -746,6 +750,7 @@
"testing-connection": "Testing Connection",
"tests-summary": "Tests Summary",
"text": "Text",
"third-quartile": "Third Quartile",
"thread": "Thread",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",

View File

@ -165,6 +165,7 @@
"data-asset-type": "Type de Resources de Données",
"data-assets-report": "Data Assets Report",
"data-assets-with-tier-plural": "Data Assets with Tiers",
"data-distribution": "Data Distribution",
"data-entity": "Data {{entity}}",
"data-insight": "Data Insight",
"data-insight-active-user-summary": "Utilisateurs les plus Actfis",
@ -295,6 +296,7 @@
"filter-plural": "Filters",
"first": "First",
"first-lowercase": "first",
"first-quartile": "First Quartile",
"flush-interval-secs": "Flush Interval (secs)",
"follow": "Follow",
"followed-lowercase": "followed",
@ -365,6 +367,7 @@
"install-service-connectors": "Install Service Connectors",
"instance-lowercase": "instance",
"integration-plural": "Integrations",
"inter-quartile-range": "Inter Quartile Range",
"interval": "Interval",
"interval-type": "Type d'Interval",
"interval-unit": "Unité d'Interval",
@ -676,6 +679,7 @@
"show-deleted-team": "Show Deleted Team",
"show-or-hide-advanced-config": "{{showAdv}} Config Avancée",
"sign-in-with-sso": "Sign in with {{sso}}",
"skew": "Skew",
"slack": "Slack",
"soft-delete": "Suppression Logique (Soft delete)",
"soft-lowercase": "soft",
@ -746,6 +750,7 @@
"testing-connection": "Testing Connection",
"tests-summary": "Tests Summary",
"text": "Text",
"third-quartile": "Third Quartile",
"thread": "Thread",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",

View File

@ -165,6 +165,7 @@
"data-asset-type": "数据资产类型",
"data-assets-report": "数据资产报告",
"data-assets-with-tier-plural": "分层的数据资产",
"data-distribution": "Data Distribution",
"data-entity": "数据 {{entity}}",
"data-insight": "数据洞察",
"data-insight-active-user-summary": "最活跃用户",
@ -295,6 +296,7 @@
"filter-plural": "过滤器",
"first": "First",
"first-lowercase": "first",
"first-quartile": "First Quartile",
"flush-interval-secs": "刷新间隔 (secs)",
"follow": "Follow",
"followed-lowercase": "被关注",
@ -365,6 +367,7 @@
"install-service-connectors": "Install Service Connectors",
"instance-lowercase": "instance",
"integration-plural": "Integrations",
"inter-quartile-range": "Inter Quartile Range",
"interval": "间隔",
"interval-type": "间隔类型",
"interval-unit": "间隔单位",
@ -676,6 +679,7 @@
"show-deleted-team": "Show Deleted Team",
"show-or-hide-advanced-config": "{{showAdv}} 高级配置",
"sign-in-with-sso": "Sign in with {{sso}}",
"skew": "Skew",
"slack": "Slack",
"soft-delete": "软删除",
"soft-lowercase": "soft",
@ -746,6 +750,7 @@
"testing-connection": "Testing Connection",
"tests-summary": "Tests Summary",
"text": "Text",
"third-quartile": "Third Quartile",
"thread": "Thread",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",