mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-01 13:58:01 +00:00
159 lines
108 KiB
HTML
159 lines
108 KiB
HTML
![]() |
<!doctype html>
|
|||
|
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/managed-datahub/observe/volume-assertions" data-has-hydrated="false">
|
|||
|
<head>
|
|||
|
<meta charset="UTF-8">
|
|||
|
<meta name="generator" content="Docusaurus v2.4.3">
|
|||
|
<title data-rh="true">Volume Assertions | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/managed-datahub/observe/volume-assertions"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Volume Assertions | DataHub"><meta data-rh="true" name="description" content="This page provides an overview of working with DataHub Volume Assertions"><meta data-rh="true" property="og:description" content="This page provides an overview of working with DataHub Volume Assertions"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/managed-datahub/observe/volume-assertions"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/managed-datahub/observe/volume-assertions" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/managed-datahub/observe/volume-assertions" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
|
|||
|
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
|
|||
|
|
|||
|
<link rel="preconnect" href="https://www.google-analytics.com">
|
|||
|
<link rel="preconnect" href="https://www.googletagmanager.com">
|
|||
|
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
|
|||
|
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
|
|||
|
<link rel="preconnect" href="https://www.googletagmanager.com">
|
|||
|
<script>window.dataLayer=window.dataLayer||[]</script>
|
|||
|
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
|
|||
|
|
|||
|
|
|||
|
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<meta httpequiv="Content-Security-Policy" content="frame-ancestors 'self' https://*.acryl.io https://acryldata.io http://localhost:*">
|
|||
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
|
|||
|
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
|
|||
|
<script src="/scripts/rb2b.js" async defer="defer"></script>
|
|||
|
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
|
|||
|
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
|
|||
|
<script src="/scripts/reo.js"></script>
|
|||
|
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.d8fe2eb8.css">
|
|||
|
<link rel="preload" href="/assets/js/runtime~main.ec69bdbc.js" as="script">
|
|||
|
<link rel="preload" href="/assets/js/main.89423a85.js" as="script">
|
|||
|
</head>
|
|||
|
<body class="navigation-with-keyboard">
|
|||
|
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
|
|||
|
|
|||
|
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
|
|||
|
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p>DataHub Secures $35 Million Series B</p><a href="https://datahub.com/news/series-b-announcement/" target="_blank" class="button"><div>Read the announcement<span> →</span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/managed-datahub/observe/volume-assertions">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/managed-datahub/observe/volume-assertions">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Learn</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/weekly-demo" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Weekly Demo"></div><div class="title_c7DP">Weekly Demo</div></a></div><div><a href="https://datahub.com/use-cases" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-forum.png" alt="Use Cases"></div><div class="title_c7DP">Use Cases</div></a></div><div><a href="httpps://datahub.com/adoption-stories" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Adoption Stories"></div><div class="title_c7DP">Adoption Stories</div></a></div><div><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Blog"></div><div class="title_c7DP">Blog</div></a></div><div><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Youtube"></div><div class="title_c7DP">Youtube</div></a></div></div></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/slack/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Join Slack"></div><div class="title_c7DP">Join Slack</div></a></div><div><a href="https://datahub.com/events" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Events"></div><div class="title_c7DP">Events</div></a></div><div><a href="https://datahub.com/champions/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Champions"></div><div class="title_c7DP">Champions</div></a></div><div><a href="https://datahub.com/share-your-journey/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Share Your Journey"></div><div class="title_c7DP">Share Your Journey</div></a></div></div></ul></div><a href="https://datahub.com/products/why-datahub-cloud/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
|
|||
|
<style>
|
|||
|
.cloud-cta {
|
|||
|
color: var(--ifm-menu-color-active);
|
|||
|
font-weight: 600;
|
|||
|
background: linear-gradient(40deg, var(--ifm-menu-color-active), var(--ifm-menu-color-active));
|
|||
|
background-size: 200% 100%;
|
|||
|
-webkit-background-clip: text;
|
|||
|
background-clip: text;
|
|||
|
transition: background-image 0.3s ease;
|
|||
|
}
|
|||
|
.cloud-cta:hover {
|
|||
|
color: transparent;
|
|||
|
background: linear-gradient(40deg, var(--ifm-menu-color-active), #ff1493);
|
|||
|
background-size: 200% 100%;
|
|||
|
-webkit-background-clip: text;
|
|||
|
background-clip: text;
|
|||
|
animation: gradientShift 3s ease infinite;
|
|||
|
}
|
|||
|
@keyframes gradientShift {
|
|||
|
0%, 100% { background-position: 0% 50%; }
|
|||
|
50% { background-position: 100% 50%; }
|
|||
|
}
|
|||
|
</style>
|
|||
|
<div class="cloud-cta">Get Cloud</div>
|
|||
|
</a><a href="https://datahub.com/slack?utm_source=docs&utm_medium=header&utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
|
|||
|
<style>
|
|||
|
.slack-logo:hover {
|
|||
|
opacity: 0.8;
|
|||
|
}
|
|||
|
</style>
|
|||
|
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
|
|||
|
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category 'What Is DataHub?'" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" aria-expanded="true" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category 'Features'" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" aria-expanded="true" tabindex="0" href="/docs/managed-datahub/observe/assertions">Assertions (Data Quality)</a><button aria-label="Toggle the collapsible sidebar category 'Assertions (Data Quality)'" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/column-assertions">Column Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/custom-sql-assertions">Custom SQL Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/freshness-assertions">Freshness Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/schema-assertions">Schema Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/managed-datahub/observe/volume-assertions">Volume Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sideba
|
|||
|
If you are interested in learning more about <strong>DataHub Cloud Observe</strong> or trying it out, please <a href="https://datahub.com/products/data-observability/" target="_blank" rel="noopener noreferrer">visit our website</a>.</p></blockquote><h2 class="anchor anchorWithStickyNavbar_LWe7" id="introduction">Introduction<a href="#introduction" class="hash-link" aria-label="Direct link to Introduction" title="Direct link to Introduction"></a></h2><p>Can you remember a time when the meaning of Data Warehouse Table that you depended on fundamentally changed, with little or no notice?
|
|||
|
If the answer is yes, how did you find out? We'll take a guess - someone looking at an internal reporting dashboard or worse, a user using your your product, sounded an alarm when
|
|||
|
a number looked a bit out of the ordinary. Perhaps your table initially tracked purchases made on your company's e-commerce web store, but suddenly began to include purchases made
|
|||
|
through your company's new mobile app.</p><p>There are many reasons why an important Table on Snowflake, Redshift, BigQuery, or Databricks may change in its meaning - application code bugs, new feature rollouts,
|
|||
|
changes to key metric definitions, etc. Often times, these changes break important assumptions made about the data used in building key downstream data products
|
|||
|
like reporting dashboards or data-driven product features.</p><p>What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data
|
|||
|
issues <em>before</em> anyone else? With DataHub Cloud <strong>Volume Assertions</strong>, you can.</p><p>DataHub Cloud allows users to define expectations about the normal volume, or size, of a particular warehouse Table,
|
|||
|
and then monitor those expectations over time as the table grows and changes.</p><p>In this article, we'll cover the basics of monitoring Volume Assertions - what they are, how to configure them, and more - so that you and your team can
|
|||
|
start building trust in your most important data assets.</p><p>Let's get started!</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="support">Support<a href="#support" class="hash-link" aria-label="Direct link to Support" title="Direct link to Support"></a></h2><p>Volume Assertions are currently supported for:</p><ol><li>Snowflake</li><li>Redshift</li><li>BigQuery</li><li>Databricks</li><li>DataHub Dataset Profile (collected via ingestion)</li></ol><p>Note that an Ingestion Source <em>must</em> be configured with the data platform of your choice in DataHub Cloud's <strong>Ingestion</strong>
|
|||
|
tab.</p><blockquote><p>Note that Volume Assertions are not yet supported if you are connecting to your warehouse
|
|||
|
using the DataHub CLI.</p></blockquote><h2 class="anchor anchorWithStickyNavbar_LWe7" id="what-is-a-volume-assertion">What is a Volume Assertion?<a href="#what-is-a-volume-assertion" class="hash-link" aria-label="Direct link to What is a Volume Assertion?" title="Direct link to What is a Volume Assertion?"></a></h2><p>A <strong>Volume Assertion</strong> is a configurable Data Quality rule used to monitor a Data Warehouse Table
|
|||
|
for unexpected or sudden changes in "volume", or row count. Volume Assertions can be particularly useful when you have frequently-changing
|
|||
|
Tables which have a relatively stable pattern of growth or decline.</p><p>For example, imagine that we work for a company with a Snowflake Table that stores user clicks collected from our e-commerce website.
|
|||
|
This table is updated with new data on a specific cadence: once per hour (In practice, daily or even weekly are also common).
|
|||
|
In turn, there is a downstream Business Analytics Dashboard in Looker that shows important metrics like
|
|||
|
the number of people clicking our "Daily Sale" banners, and this dashboard is generated from data stored in our "clicks" table.
|
|||
|
It is important that our clicks Table is updated with the correct number of rows each hour, else it could mean
|
|||
|
that our downstream metrics dashboard becomes incorrect. The risk of this situation is obvious: our organization
|
|||
|
may make bad decisions based on incomplete information.</p><p>In such cases, we can use a <strong>Volume Assertion</strong> that checks whether the Snowflake "clicks" Table is growing in an expected
|
|||
|
way, and that there are no sudden increases or sudden decreases in the rows being added or removed from the table.
|
|||
|
If too many rows are added or removed within an hour, we can notify key stakeholders and begin to root cause before the problem impacts stakeholders of the data.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="anatomy-of-a-volume-assertion">Anatomy of a Volume Assertion<a href="#anatomy-of-a-volume-assertion" class="hash-link" aria-label="Direct link to Anatomy of a Volume Assertion" title="Direct link to Anatomy of a Volume Assertion"></a></h3><p>At the most basic level, <strong>Volume Assertions</strong> consist of a few important parts:</p><ol><li>An <strong>Evaluation Schedule</strong></li><li>A <strong>Volume Condition</strong></li><li>A <strong>Volume Source</strong></li></ol><p>In this section, we'll give an overview of each.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="1-evaluation-schedule">1. Evaluation Schedule<a href="#1-evaluation-schedule" class="hash-link" aria-label="Direct link to 1. Evaluation Schedule" title="Direct link to 1. Evaluation Schedule"></a></h4><p>The <strong>Evaluation Schedule</strong>: This defines how often to check a given warehouse Table for its volume. This should usually
|
|||
|
be configured to match the expected change frequency of the Table, although it can also be less frequently depending
|
|||
|
on the requirements. You can also specify specific days of the week, hours in the day, or even
|
|||
|
minutes in an hour.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="2-volume-condition">2. Volume Condition<a href="#2-volume-condition" class="hash-link" aria-label="Direct link to 2. Volume Condition" title="Direct link to 2. Volume Condition"></a></h4><p>The <strong>Volume Condition</strong>: This defines the type of condition that we'd like to monitor, or when the Assertion
|
|||
|
should result in failure.</p><p>There are a 2 different categories of conditions: <strong>Total</strong> Volume and <strong>Change</strong> Volume.</p><p><em>Total</em> volume conditions are those which are defined against the point-in-time total row count for a table. They allow you to specify conditions like:</p><ol><li><strong>Table has too many rows</strong>: The table should always have less than 1000 rows</li><li><strong>Table has too few rows</strong>: The table should always have more than 1000 rows</li><li><strong>Table row count is outside a range</strong>: The table should always have between 1000 and 2000 rows.</li></ol><p><em>Change</em> volume conditions are those which are defined against the growth or decline rate of a table, measured between subsequent checks
|
|||
|
of the table volume. They allow you to specify conditions like:</p><ol><li><strong>Table growth is too fast</strong>: When the table volume is checked, it should have < 1000 more rows than it had during the previous check.</li><li><strong>Table growth is too slow</strong>: When the table volume is checked, it should have > 1000 more rows than it had during the previous check.</li><li><strong>Table growth is outside a range</strong>: When the table volume is checked, it should have between 1000 and 2000 more rows than it had during the previous check.</li></ol><p>For change volume conditions, both <em>absolute</em> row count deltas and relative percentage deltas are supported for identifying
|
|||
|
table that are following an abnormal pattern of growth.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="3-volume-source">3. Volume Source<a href="#3-volume-source" class="hash-link" aria-label="Direct link to 3. Volume Source" title="Direct link to 3. Volume Source"></a></h4><p>The <strong>Volume Source</strong>: This is the mechanism that DataHub Cloud should use to determine the table volume (row count). The supported
|
|||
|
source types vary by the platform, but generally fall into these categories:</p><ul><li><p><strong>Information Schema</strong>: A system Table that is exposed by the Data Warehouse which contains live information about the Databases
|
|||
|
and Tables stored inside the Data Warehouse, including their row count. It is usually efficient to check, but can in some cases be slightly delayed to update
|
|||
|
once a change has been made to a table.</p></li><li><p><strong>Query</strong>: A <code>COUNT(*)</code> query is used to retrieve the latest row count for a table, with optional SQL filters applied (depending on platform).
|
|||
|
This can be less efficient to check depending on the size of the table. This approach is more portable, as it does not involve
|
|||
|
system warehouse tables, it is also easily portable across Data Warehouse and Data Lake providers.</p></li><li><p><strong>DataHub Dataset Profile</strong>: The DataHub Dataset Profile aspect is used to retrieve the latest row count information for a table.
|
|||
|
Using this option avoids contacting your data platform, and instead uses the DataHub Dataset Profile metadata to evaluate Volume Assertions.
|
|||
|
Note if you have not configured an ingestion source through DataHub, then this may be the only option available.</p></li></ul><p>Volume Assertions also have an off switch: they can be started or stopped at any time with the click of button.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="creating-a-volume-assertion">Creating a Volume Assertion<a href="#creating-a-volume-assertion" class="hash-link" aria-label="Direct link to Creating a Volume Assertion" title="Direct link to Creating a Volume Assertion"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="prerequisites">Prerequisites<a href="#prerequisites" class="hash-link" aria-label="Direct link to Prerequisites" title="Direct link to Prerequisites"></a></h3><ol><li><p><strong>Permissions</strong>: To create or delete Volume Assertions for a specific entity on DataHub, you'll need to be granted the
|
|||
|
<code>Edit Assertions</code> and <code>Edit Monitors</code> privileges for the entity. This will be granted to Entity owners as part of the <code>Asset Owners - Metadata Policy</code>
|
|||
|
by default.</p></li><li><p>(Optional) <strong>Data Platform Connection</strong>: In order to create a Volume Assertion that queries the source data platform directly (instead of DataHub metadata), you'll need to have an <strong>Ingestion Source</strong> configured to your
|
|||
|
Data Platform: Snowflake, BigQuery, or Redshift under the <strong>Integrations</strong> tab.</p></li></ol><p>Once these are in place, you're ready to create your Volume Assertions!</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="steps">Steps<a href="#steps" class="hash-link" aria-label="Direct link to Steps" title="Direct link to Steps"></a></h3><ol><li>Navigate to the Table that to monitor for volume</li><li>Click the <strong>Quality</strong> tab</li></ol><p align="left"><img loading="lazy" width="80%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/profile-validation-tab.png" class="img_ev3q"></p><ol start="3"><li>Click <strong>+ Create Assertion</strong></li></ol><p align="left"><img loading="lazy" width="45%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/assertion-builder-volume-choose-type.png" class="img_ev3q"></p><ol start="4"><li><p>Choose <strong>Volume</strong></p></li><li><p>Configure the evaluation <strong>schedule</strong>. This is the frequency at which the assertion will be evaluated to produce a pass or fail result, and the times
|
|||
|
when the table volume will be checked.</p></li><li><p>Configure the evaluation <strong>condition type</strong>. This determines the cases in which the new assertion will fail when it is evaluated.</p></li></ol><p align="left"><img loading="lazy" width="30%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/assertion-builder-volume-condition-type.png" class="img_ev3q"></p><ol start="7"><li>(Optional) Click <strong>Advanced</strong> to customize the volume <strong>source</strong>. This is the mechanism that will be used to obtain the table
|
|||
|
row count metric. Each Data Platform supports different options including Information Schema, Query, and DataHub Dataset Profile.</li></ol><p align="left"><img loading="lazy" width="30%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/assertion-builder-volume-select-source-type.png" class="img_ev3q"></p><ul><li><strong>Information Schema</strong>: Check the Data Platform system metadata tables to determine the table row count.</li><li><strong>Query</strong>: Issue a <code>COUNT(*)</code> query to the table to determine the row count.</li><li><strong>DataHub Dataset Profile</strong>: Use the DataHub Dataset Profile metadata to determine the row count.</li></ul><ol start="8"><li>Configure actions that should be taken when the Volume Assertion passes or fails</li></ol><p align="left"><img loading="lazy" width="40%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/assertion-builder-actions.png" class="img_ev3q"></p><ul><li><p><strong>Raise incident</strong>: Automatically raise a new DataHub <code>Volume</code> Incident for the Table whenever the Volume Assertion is failing. This
|
|||
|
may indicate that the Table is unfit for consumption. Configure Slack Notifications under <strong>Settings</strong> to be notified when
|
|||
|
an incident is created due to an Assertion failure.</p></li><li><p><strong>Resolve incident</strong>: Automatically resolved any incidents that were raised due to failures in this Volume Assertion. Note that
|
|||
|
any other incidents will not be impacted.</p></li></ul><ol start="9"><li><p>Click <strong>Next</strong> and provide a description.</p></li><li><p>Click <strong>Save</strong>.</p></li></ol><p>And that's it! DataHub will now begin to monitor your Volume Assertion for the table.</p><p>Once your assertion has run, you will begin to see Success or Failure status for the Table</p><p align="left"><img loading="lazy" width="45%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/profile-passing-volume-assertions-expanded.png" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="anomaly-detection-with-smart-assertions-">Anomaly Detection with Smart Assertions ⚡<a href="#anomaly-detection-with-smart-assertions-" class="hash-link" aria-label="Direct link to Anomaly Detection with Smart Assertions ⚡" title="Direct link to Anomaly Detection with Smart Assertions ⚡"></a></h2><p>As part of the <strong>DataHub Cloud Observe</strong> module, DataHub Cloud also provides <strong>Smart Assertions</strong> out of the box. These are
|
|||
|
dynamic, AI-powered Volume Assertions that you can use to monitor the volume of important warehouse Tables, without
|
|||
|
requiring any manual setup.</p><p>You can create smart assertions by simply selecting the <code>Detect with AI</code> option in the UI:</p><p align="left"><img loading="lazy" width="90%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/volume-smart-assertion.png" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="stopping-a-volume-assertion">Stopping a Volume Assertion<a href="#stopping-a-volume-assertion" class="hash-link" aria-label="Direct link to Stopping a Volume Assertion" title="Direct link to Stopping a Volume Assertion"></a></h2><p>In order to temporarily stop the evaluation of the assertion:</p><ol><li>Navigate to the <strong>Quality</strong> tab of the Table with the assertion</li><li>Click <strong>Volume</strong> to open the Volume Assertion assertions</li><li>Click the "Stop" button for the assertion you wish to pause.</li></ol><p align="left"><img loading="lazy" width="25%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/stop-assertion.png" class="img_ev3q"></p><p>To resume the assertion, simply click <strong>Start</strong>.</p><p align="left"><img loading="lazy" width="25%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/start-assertion.png" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="creating-volume-assertions-via-api">Creating Volume Assertions via API<a href="#creating-volume-assertions-via-api" class="hash-link" aria-label="Direct link to Creating Volume Assertions via API" title="Direct link to Creating Volume Assertions via API"></a></h2><p>Under the hood, DataHub Cloud implements Volume Assertion Monitoring using two concepts:</p><ul><li><p><strong>Assertion</strong>: The specific expectation for volume, e.g. "The table was changed int the past 7 hours"
|
|||
|
or "The table is changed on a schedule of every day by 8am". This is the "what".</p></li><li><p><strong>Monitor</strong>: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific
|
|||
|
mechanisms. This is the "how".</p></li></ul><p>Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you'll need the
|
|||
|
<code>Edit Assertions</code> and <code>Edit Monitors</code> privileges for it.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="graphql">GraphQL<a href="#graphql" class="hash-link" aria-label="Direct link to GraphQL" title="Direct link to GraphQL"></a></h4><p>In order to create or update a Volume Assertion, you can use the <code>upsertDatasetVolumeAssertionMonitor</code> mutation.</p><h5 class="anchor anchorWithStickyNavbar_LWe7" id="examples">Examples<a href="#examples" class="hash-link" aria-label="Direct link to Examples" title="Direct link to Examples"></a></h5><p>To create a Volume Assertion Entity that verifies that the row count for a table is between 10 and 20 rows, and runs every 8 hours:</p><div class="language-graphql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-graphql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">mutation</span><span class="token plain"> </span><span class="token definition-mutation function" style="color:rgb(130, 170, 255)">upsertDatasetVolumeAssertionMonitor</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property-query">upsertDatasetVolumeAssertionMonitor</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">input</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">entityUrn</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token description string" style="color:rgb(195, 232, 141)">"</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)"><</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)">urn</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">of</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">entity</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">being</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">monitored</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)">></span><span class="token description string" style="color:rgb(195, 232, 141)">"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token constant" style="color:r
|
|||
|
The supported operator types are <code>GREATER_THAN</code>, <code>GREATER_THAN_OR_EQUAL_TO</code>, <code>LESS_THAN</code>, <code>LESS_THAN_OR_EQUAL_TO</code>, and <code>BETWEEN</code> (requires minValue, maxValue).
|
|||
|
The supported parameter types are <code>NUMBER</code>.</p><p>You can use same endpoint with assertion urn input to update an existing Volume Assertion and corresponding Monitor:</p><div class="language-graphql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-graphql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">mutation</span><span class="token plain"> </span><span class="token definition-mutation function" style="color:rgb(130, 170, 255)">upsertDatasetVolumeAssertionMonitor</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property-query">upsertDatasetVolumeAssertionMonitor</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">assertionUrn</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token description string" style="color:rgb(195, 232, 141)">"</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)"><</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)">urn</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">of</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">assertion</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">created</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">in</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">earlier</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">query</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)">></span><span class="token description string" style="color:rgb(195, 232, 141)">"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">input</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">entityUrn</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token description string" style="color:rgb(195, 232, 141)">"</span><span class="token description string language-markdown ta
|
|||
|
<script src="/assets/js/runtime~main.ec69bdbc.js"></script>
|
|||
|
<script src="/assets/js/main.89423a85.js"></script>
|
|||
|
</body>
|
|||
|
</html>
|