159 lines
108 KiB
HTML
Raw Normal View History

<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/managed-datahub/observe/volume-assertions" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.3">
<title data-rh="true">Volume Assertions | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/managed-datahub/observe/volume-assertions"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Volume Assertions | DataHub"><meta data-rh="true" name="description" content="This page provides an overview of working with DataHub Volume Assertions"><meta data-rh="true" property="og:description" content="This page provides an overview of working with DataHub Volume Assertions"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/managed-datahub/observe/volume-assertions"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/managed-datahub/observe/volume-assertions" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/managed-datahub/observe/volume-assertions" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
<link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
<link rel="preconnect" href="https://www.googletagmanager.com">
<script>window.dataLayer=window.dataLayer||[]</script>
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
<meta httpequiv="Content-Security-Policy" content="frame-ancestors &#39;self&#39; https://*.acryl.io https://acryldata.io http://localhost:*">
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
<script src="/scripts/rb2b.js" async defer="defer"></script>
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
<script src="/scripts/reo.js"></script>
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.d8fe2eb8.css">
<link rel="preload" href="/assets/js/runtime~main.ec69bdbc.js" as="script">
<link rel="preload" href="/assets/js/main.89423a85.js" as="script">
</head>
<body class="navigation-with-keyboard">
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p>DataHub Secures $35 Million Series B</p><a href="https://datahub.com/news/series-b-announcement/" target="_blank" class="button"><div>Read the announcement<span></span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/managed-datahub/observe/volume-assertions">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/managed-datahub/observe/volume-assertions">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Learn</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/weekly-demo" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Weekly Demo"></div><div class="title_c7DP">Weekly Demo</div></a></div><div><a href="https://datahub.com/use-cases" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-forum.png" alt="Use Cases"></div><div class="title_c7DP">Use Cases</div></a></div><div><a href="httpps://datahub.com/adoption-stories" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Adoption Stories"></div><div class="title_c7DP">Adoption Stories</div></a></div><div><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Blog"></div><div class="title_c7DP">Blog</div></a></div><div><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Youtube"></div><div class="title_c7DP">Youtube</div></a></div></div></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/slack/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Join Slack"></div><div class="title_c7DP">Join Slack</div></a></div><div><a href="https://datahub.com/events" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Events"></div><div class="title_c7DP">Events</div></a></div><div><a href="https://datahub.com/champions/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Champions"></div><div class="title_c7DP">Champions</div></a></div><div><a href="https://datahub.com/share-your-journey/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Share Your Journey"></div><div class="title_c7DP">Share Your Journey</div></a></div></div></ul></div><a href="https://datahub.com/products/why-datahub-cloud/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.cloud-cta {
color: var(--ifm-menu-color-active);
font-weight: 600;
background: linear-gradient(40deg, var(--ifm-menu-color-active), var(--ifm-menu-color-active));
background-size: 200% 100%;
-webkit-background-clip: text;
background-clip: text;
transition: background-image 0.3s ease;
}
.cloud-cta:hover {
color: transparent;
background: linear-gradient(40deg, var(--ifm-menu-color-active), #ff1493);
background-size: 200% 100%;
-webkit-background-clip: text;
background-clip: text;
animation: gradientShift 3s ease infinite;
}
@keyframes gradientShift {
0%, 100% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
}
</style>
<div class="cloud-cta">Get Cloud</div>
</a><a href="https://datahub.com/slack?utm_source=docs&amp;utm_medium=header&amp;utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.slack-logo:hover {
opacity: 0.8;
}
</style>
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category &#x27;What Is DataHub?&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" aria-expanded="true" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category &#x27;Features&#x27;" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" aria-expanded="true" tabindex="0" href="/docs/managed-datahub/observe/assertions">Assertions (Data Quality)</a><button aria-label="Toggle the collapsible sidebar category &#x27;Assertions (Data Quality)&#x27;" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/column-assertions">Column Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/custom-sql-assertions">Custom SQL Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/freshness-assertions">Freshness Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/schema-assertions">Schema Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/managed-datahub/observe/volume-assertions">Volume Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sideba
If you are interested in learning more about <strong>DataHub Cloud Observe</strong> or trying it out, please <a href="https://datahub.com/products/data-observability/" target="_blank" rel="noopener noreferrer">visit our website</a>.</p></blockquote><h2 class="anchor anchorWithStickyNavbar_LWe7" id="introduction">Introduction<a href="#introduction" class="hash-link" aria-label="Direct link to Introduction" title="Direct link to Introduction"></a></h2><p>Can you remember a time when the meaning of Data Warehouse Table that you depended on fundamentally changed, with little or no notice?
If the answer is yes, how did you find out? We&#x27;ll take a guess - someone looking at an internal reporting dashboard or worse, a user using your your product, sounded an alarm when
a number looked a bit out of the ordinary. Perhaps your table initially tracked purchases made on your company&#x27;s e-commerce web store, but suddenly began to include purchases made
through your company&#x27;s new mobile app.</p><p>There are many reasons why an important Table on Snowflake, Redshift, BigQuery, or Databricks may change in its meaning - application code bugs, new feature rollouts,
changes to key metric definitions, etc. Often times, these changes break important assumptions made about the data used in building key downstream data products
like reporting dashboards or data-driven product features.</p><p>What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data
issues <em>before</em> anyone else? With DataHub Cloud <strong>Volume Assertions</strong>, you can.</p><p>DataHub Cloud allows users to define expectations about the normal volume, or size, of a particular warehouse Table,
and then monitor those expectations over time as the table grows and changes.</p><p>In this article, we&#x27;ll cover the basics of monitoring Volume Assertions - what they are, how to configure them, and more - so that you and your team can
start building trust in your most important data assets.</p><p>Let&#x27;s get started!</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="support">Support<a href="#support" class="hash-link" aria-label="Direct link to Support" title="Direct link to Support"></a></h2><p>Volume Assertions are currently supported for:</p><ol><li>Snowflake</li><li>Redshift</li><li>BigQuery</li><li>Databricks</li><li>DataHub Dataset Profile (collected via ingestion)</li></ol><p>Note that an Ingestion Source <em>must</em> be configured with the data platform of your choice in DataHub Cloud&#x27;s <strong>Ingestion</strong>
tab.</p><blockquote><p>Note that Volume Assertions are not yet supported if you are connecting to your warehouse
using the DataHub CLI.</p></blockquote><h2 class="anchor anchorWithStickyNavbar_LWe7" id="what-is-a-volume-assertion">What is a Volume Assertion?<a href="#what-is-a-volume-assertion" class="hash-link" aria-label="Direct link to What is a Volume Assertion?" title="Direct link to What is a Volume Assertion?"></a></h2><p>A <strong>Volume Assertion</strong> is a configurable Data Quality rule used to monitor a Data Warehouse Table
for unexpected or sudden changes in &quot;volume&quot;, or row count. Volume Assertions can be particularly useful when you have frequently-changing
Tables which have a relatively stable pattern of growth or decline.</p><p>For example, imagine that we work for a company with a Snowflake Table that stores user clicks collected from our e-commerce website.
This table is updated with new data on a specific cadence: once per hour (In practice, daily or even weekly are also common).
In turn, there is a downstream Business Analytics Dashboard in Looker that shows important metrics like
the number of people clicking our &quot;Daily Sale&quot; banners, and this dashboard is generated from data stored in our &quot;clicks&quot; table.
It is important that our clicks Table is updated with the correct number of rows each hour, else it could mean
that our downstream metrics dashboard becomes incorrect. The risk of this situation is obvious: our organization
may make bad decisions based on incomplete information.</p><p>In such cases, we can use a <strong>Volume Assertion</strong> that checks whether the Snowflake &quot;clicks&quot; Table is growing in an expected
way, and that there are no sudden increases or sudden decreases in the rows being added or removed from the table.
If too many rows are added or removed within an hour, we can notify key stakeholders and begin to root cause before the problem impacts stakeholders of the data.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="anatomy-of-a-volume-assertion">Anatomy of a Volume Assertion<a href="#anatomy-of-a-volume-assertion" class="hash-link" aria-label="Direct link to Anatomy of a Volume Assertion" title="Direct link to Anatomy of a Volume Assertion"></a></h3><p>At the most basic level, <strong>Volume Assertions</strong> consist of a few important parts:</p><ol><li>An <strong>Evaluation Schedule</strong></li><li>A <strong>Volume Condition</strong></li><li>A <strong>Volume Source</strong></li></ol><p>In this section, we&#x27;ll give an overview of each.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="1-evaluation-schedule">1. Evaluation Schedule<a href="#1-evaluation-schedule" class="hash-link" aria-label="Direct link to 1. Evaluation Schedule" title="Direct link to 1. Evaluation Schedule"></a></h4><p>The <strong>Evaluation Schedule</strong>: This defines how often to check a given warehouse Table for its volume. This should usually
be configured to match the expected change frequency of the Table, although it can also be less frequently depending
on the requirements. You can also specify specific days of the week, hours in the day, or even
minutes in an hour.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="2-volume-condition">2. Volume Condition<a href="#2-volume-condition" class="hash-link" aria-label="Direct link to 2. Volume Condition" title="Direct link to 2. Volume Condition"></a></h4><p>The <strong>Volume Condition</strong>: This defines the type of condition that we&#x27;d like to monitor, or when the Assertion
should result in failure.</p><p>There are a 2 different categories of conditions: <strong>Total</strong> Volume and <strong>Change</strong> Volume.</p><p><em>Total</em> volume conditions are those which are defined against the point-in-time total row count for a table. They allow you to specify conditions like:</p><ol><li><strong>Table has too many rows</strong>: The table should always have less than 1000 rows</li><li><strong>Table has too few rows</strong>: The table should always have more than 1000 rows</li><li><strong>Table row count is outside a range</strong>: The table should always have between 1000 and 2000 rows.</li></ol><p><em>Change</em> volume conditions are those which are defined against the growth or decline rate of a table, measured between subsequent checks
of the table volume. They allow you to specify conditions like:</p><ol><li><strong>Table growth is too fast</strong>: When the table volume is checked, it should have &lt; 1000 more rows than it had during the previous check.</li><li><strong>Table growth is too slow</strong>: When the table volume is checked, it should have &gt; 1000 more rows than it had during the previous check.</li><li><strong>Table growth is outside a range</strong>: When the table volume is checked, it should have between 1000 and 2000 more rows than it had during the previous check.</li></ol><p>For change volume conditions, both <em>absolute</em> row count deltas and relative percentage deltas are supported for identifying
table that are following an abnormal pattern of growth.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="3-volume-source">3. Volume Source<a href="#3-volume-source" class="hash-link" aria-label="Direct link to 3. Volume Source" title="Direct link to 3. Volume Source"></a></h4><p>The <strong>Volume Source</strong>: This is the mechanism that DataHub Cloud should use to determine the table volume (row count). The supported
source types vary by the platform, but generally fall into these categories:</p><ul><li><p><strong>Information Schema</strong>: A system Table that is exposed by the Data Warehouse which contains live information about the Databases
and Tables stored inside the Data Warehouse, including their row count. It is usually efficient to check, but can in some cases be slightly delayed to update
once a change has been made to a table.</p></li><li><p><strong>Query</strong>: A <code>COUNT(*)</code> query is used to retrieve the latest row count for a table, with optional SQL filters applied (depending on platform).
This can be less efficient to check depending on the size of the table. This approach is more portable, as it does not involve
system warehouse tables, it is also easily portable across Data Warehouse and Data Lake providers.</p></li><li><p><strong>DataHub Dataset Profile</strong>: The DataHub Dataset Profile aspect is used to retrieve the latest row count information for a table.
Using this option avoids contacting your data platform, and instead uses the DataHub Dataset Profile metadata to evaluate Volume Assertions.
Note if you have not configured an ingestion source through DataHub, then this may be the only option available.</p></li></ul><p>Volume Assertions also have an off switch: they can be started or stopped at any time with the click of button.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="creating-a-volume-assertion">Creating a Volume Assertion<a href="#creating-a-volume-assertion" class="hash-link" aria-label="Direct link to Creating a Volume Assertion" title="Direct link to Creating a Volume Assertion"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="prerequisites">Prerequisites<a href="#prerequisites" class="hash-link" aria-label="Direct link to Prerequisites" title="Direct link to Prerequisites"></a></h3><ol><li><p><strong>Permissions</strong>: To create or delete Volume Assertions for a specific entity on DataHub, you&#x27;ll need to be granted the
<code>Edit Assertions</code> and <code>Edit Monitors</code> privileges for the entity. This will be granted to Entity owners as part of the <code>Asset Owners - Metadata Policy</code>
by default.</p></li><li><p>(Optional) <strong>Data Platform Connection</strong>: In order to create a Volume Assertion that queries the source data platform directly (instead of DataHub metadata), you&#x27;ll need to have an <strong>Ingestion Source</strong> configured to your
Data Platform: Snowflake, BigQuery, or Redshift under the <strong>Integrations</strong> tab.</p></li></ol><p>Once these are in place, you&#x27;re ready to create your Volume Assertions!</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="steps">Steps<a href="#steps" class="hash-link" aria-label="Direct link to Steps" title="Direct link to Steps"></a></h3><ol><li>Navigate to the Table that to monitor for volume</li><li>Click the <strong>Quality</strong> tab</li></ol><p align="left"><img loading="lazy" width="80%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/profile-validation-tab.png" class="img_ev3q"></p><ol start="3"><li>Click <strong>+ Create Assertion</strong></li></ol><p align="left"><img loading="lazy" width="45%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/assertion-builder-volume-choose-type.png" class="img_ev3q"></p><ol start="4"><li><p>Choose <strong>Volume</strong></p></li><li><p>Configure the evaluation <strong>schedule</strong>. This is the frequency at which the assertion will be evaluated to produce a pass or fail result, and the times
when the table volume will be checked.</p></li><li><p>Configure the evaluation <strong>condition type</strong>. This determines the cases in which the new assertion will fail when it is evaluated.</p></li></ol><p align="left"><img loading="lazy" width="30%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/assertion-builder-volume-condition-type.png" class="img_ev3q"></p><ol start="7"><li>(Optional) Click <strong>Advanced</strong> to customize the volume <strong>source</strong>. This is the mechanism that will be used to obtain the table
row count metric. Each Data Platform supports different options including Information Schema, Query, and DataHub Dataset Profile.</li></ol><p align="left"><img loading="lazy" width="30%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/assertion-builder-volume-select-source-type.png" class="img_ev3q"></p><ul><li><strong>Information Schema</strong>: Check the Data Platform system metadata tables to determine the table row count.</li><li><strong>Query</strong>: Issue a <code>COUNT(*)</code> query to the table to determine the row count.</li><li><strong>DataHub Dataset Profile</strong>: Use the DataHub Dataset Profile metadata to determine the row count.</li></ul><ol start="8"><li>Configure actions that should be taken when the Volume Assertion passes or fails</li></ol><p align="left"><img loading="lazy" width="40%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/assertion-builder-actions.png" class="img_ev3q"></p><ul><li><p><strong>Raise incident</strong>: Automatically raise a new DataHub <code>Volume</code> Incident for the Table whenever the Volume Assertion is failing. This
may indicate that the Table is unfit for consumption. Configure Slack Notifications under <strong>Settings</strong> to be notified when
an incident is created due to an Assertion failure.</p></li><li><p><strong>Resolve incident</strong>: Automatically resolved any incidents that were raised due to failures in this Volume Assertion. Note that
any other incidents will not be impacted.</p></li></ul><ol start="9"><li><p>Click <strong>Next</strong> and provide a description.</p></li><li><p>Click <strong>Save</strong>.</p></li></ol><p>And that&#x27;s it! DataHub will now begin to monitor your Volume Assertion for the table.</p><p>Once your assertion has run, you will begin to see Success or Failure status for the Table</p><p align="left"><img loading="lazy" width="45%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/profile-passing-volume-assertions-expanded.png" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="anomaly-detection-with-smart-assertions-">Anomaly Detection with Smart Assertions ⚡<a href="#anomaly-detection-with-smart-assertions-" class="hash-link" aria-label="Direct link to Anomaly Detection with Smart Assertions ⚡" title="Direct link to Anomaly Detection with Smart Assertions ⚡"></a></h2><p>As part of the <strong>DataHub Cloud Observe</strong> module, DataHub Cloud also provides <strong>Smart Assertions</strong> out of the box. These are
dynamic, AI-powered Volume Assertions that you can use to monitor the volume of important warehouse Tables, without
requiring any manual setup.</p><p>You can create smart assertions by simply selecting the <code>Detect with AI</code> option in the UI:</p><p align="left"><img loading="lazy" width="90%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/volume/volume-smart-assertion.png" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="stopping-a-volume-assertion">Stopping a Volume Assertion<a href="#stopping-a-volume-assertion" class="hash-link" aria-label="Direct link to Stopping a Volume Assertion" title="Direct link to Stopping a Volume Assertion"></a></h2><p>In order to temporarily stop the evaluation of the assertion:</p><ol><li>Navigate to the <strong>Quality</strong> tab of the Table with the assertion</li><li>Click <strong>Volume</strong> to open the Volume Assertion assertions</li><li>Click the &quot;Stop&quot; button for the assertion you wish to pause.</li></ol><p align="left"><img loading="lazy" width="25%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/stop-assertion.png" class="img_ev3q"></p><p>To resume the assertion, simply click <strong>Start</strong>.</p><p align="left"><img loading="lazy" width="25%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/start-assertion.png" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="creating-volume-assertions-via-api">Creating Volume Assertions via API<a href="#creating-volume-assertions-via-api" class="hash-link" aria-label="Direct link to Creating Volume Assertions via API" title="Direct link to Creating Volume Assertions via API"></a></h2><p>Under the hood, DataHub Cloud implements Volume Assertion Monitoring using two concepts:</p><ul><li><p><strong>Assertion</strong>: The specific expectation for volume, e.g. &quot;The table was changed int the past 7 hours&quot;
or &quot;The table is changed on a schedule of every day by 8am&quot;. This is the &quot;what&quot;.</p></li><li><p><strong>Monitor</strong>: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific
mechanisms. This is the &quot;how&quot;.</p></li></ul><p>Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you&#x27;ll need the
<code>Edit Assertions</code> and <code>Edit Monitors</code> privileges for it.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="graphql">GraphQL<a href="#graphql" class="hash-link" aria-label="Direct link to GraphQL" title="Direct link to GraphQL"></a></h4><p>In order to create or update a Volume Assertion, you can use the <code>upsertDatasetVolumeAssertionMonitor</code> mutation.</p><h5 class="anchor anchorWithStickyNavbar_LWe7" id="examples">Examples<a href="#examples" class="hash-link" aria-label="Direct link to Examples" title="Direct link to Examples"></a></h5><p>To create a Volume Assertion Entity that verifies that the row count for a table is between 10 and 20 rows, and runs every 8 hours:</p><div class="language-graphql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-graphql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">mutation</span><span class="token plain"> </span><span class="token definition-mutation function" style="color:rgb(130, 170, 255)">upsertDatasetVolumeAssertionMonitor</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property-query">upsertDatasetVolumeAssertionMonitor</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">input</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">entityUrn</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token description string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)">urn</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">of</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">entity</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">being</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">monitored</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token description string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token constant" style="color:r
The supported operator types are <code>GREATER_THAN</code>, <code>GREATER_THAN_OR_EQUAL_TO</code>, <code>LESS_THAN</code>, <code>LESS_THAN_OR_EQUAL_TO</code>, and <code>BETWEEN</code> (requires minValue, maxValue).
The supported parameter types are <code>NUMBER</code>.</p><p>You can use same endpoint with assertion urn input to update an existing Volume Assertion and corresponding Monitor:</p><div class="language-graphql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-graphql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">mutation</span><span class="token plain"> </span><span class="token definition-mutation function" style="color:rgb(130, 170, 255)">upsertDatasetVolumeAssertionMonitor</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property-query">upsertDatasetVolumeAssertionMonitor</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">assertionUrn</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token description string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)">urn</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">of</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">assertion</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">created</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">in</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">earlier</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">query</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token description string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">input</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">entityUrn</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token description string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token description string language-markdown ta
<script src="/assets/js/runtime~main.ec69bdbc.js"></script>
<script src="/assets/js/main.89423a85.js"></script>
</body>
</html>