167 lines
125 KiB
HTML
Raw Normal View History

<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/managed-datahub/observe/column-assertions" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.3">
<title data-rh="true">Column Assertions | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/managed-datahub/observe/column-assertions"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Column Assertions | DataHub"><meta data-rh="true" name="description" content="This page provides an overview of working with DataHub Column Assertions"><meta data-rh="true" property="og:description" content="This page provides an overview of working with DataHub Column Assertions"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/managed-datahub/observe/column-assertions"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/managed-datahub/observe/column-assertions" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/managed-datahub/observe/column-assertions" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
<link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
<link rel="preconnect" href="https://www.googletagmanager.com">
<script>window.dataLayer=window.dataLayer||[]</script>
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
<meta httpequiv="Content-Security-Policy" content="frame-ancestors &#39;self&#39; https://*.acryl.io https://acryldata.io http://localhost:*">
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
<script src="/scripts/rb2b.js" async defer="defer"></script>
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
<script src="/scripts/reo.js"></script>
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.d8fe2eb8.css">
<link rel="preload" href="/assets/js/runtime~main.8ae4198a.js" as="script">
<link rel="preload" href="/assets/js/main.9d79f7e2.js" as="script">
</head>
<body class="navigation-with-keyboard">
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p>DataHub Secures $35 Million Series B</p><a href="https://datahub.com/news/series-b-announcement/" target="_blank" class="button"><div>Read the announcement<span></span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/managed-datahub/observe/column-assertions">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/managed-datahub/observe/column-assertions">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Learn</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/weekly-demo" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Weekly Demo"></div><div class="title_c7DP">Weekly Demo</div></a></div><div><a href="https://datahub.com/use-cases" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-forum.png" alt="Use Cases"></div><div class="title_c7DP">Use Cases</div></a></div><div><a href="httpps://datahub.com/adoption-stories" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Adoption Stories"></div><div class="title_c7DP">Adoption Stories</div></a></div><div><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Blog"></div><div class="title_c7DP">Blog</div></a></div><div><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Youtube"></div><div class="title_c7DP">Youtube</div></a></div></div></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/slack/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Join Slack"></div><div class="title_c7DP">Join Slack</div></a></div><div><a href="https://datahub.com/events" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Events"></div><div class="title_c7DP">Events</div></a></div><div><a href="https://datahub.com/champions/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Champions"></div><div class="title_c7DP">Champions</div></a></div><div><a href="https://datahub.com/share-your-journey/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Share Your Journey"></div><div class="title_c7DP">Share Your Journey</div></a></div></div></ul></div><a href="https://datahub.com/products/why-datahub-cloud/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.cloud-cta {
color: var(--ifm-menu-color-active);
font-weight: 600;
background: linear-gradient(40deg, var(--ifm-menu-color-active), var(--ifm-menu-color-active));
background-size: 200% 100%;
-webkit-background-clip: text;
background-clip: text;
transition: background-image 0.3s ease;
}
.cloud-cta:hover {
color: transparent;
background: linear-gradient(40deg, var(--ifm-menu-color-active), #ff1493);
background-size: 200% 100%;
-webkit-background-clip: text;
background-clip: text;
animation: gradientShift 3s ease infinite;
}
@keyframes gradientShift {
0%, 100% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
}
</style>
<div class="cloud-cta">Get Cloud</div>
</a><a href="https://datahub.com/slack?utm_source=docs&amp;utm_medium=header&amp;utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.slack-logo:hover {
opacity: 0.8;
}
</style>
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category &#x27;What Is DataHub?&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" aria-expanded="true" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category &#x27;Features&#x27;" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" aria-expanded="true" tabindex="0" href="/docs/managed-datahub/observe/assertions">Assertions (Data Quality)</a><button aria-label="Toggle the collapsible sidebar category &#x27;Assertions (Data Quality)&#x27;" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/managed-datahub/observe/column-assertions">Column Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/custom-sql-assertions">Custom SQL Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/freshness-assertions">Freshness Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/schema-assertions">Schema Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item saasOnly"><a class="menu__link" tabindex="0" href="/docs/managed-datahub/observe/volume-assertions">Volume Assertions</a></li><li class="theme-doc-sidebar-item-link theme-doc-sideba
If you are interested in learning more about <strong>DataHub Cloud Observe</strong> or trying it out, please <a href="https://datahub.com/products/data-observability/" target="_blank" rel="noopener noreferrer">visit our website</a>.</p></blockquote><h2 class="anchor anchorWithStickyNavbar_LWe7" id="introduction">Introduction<a href="#introduction" class="hash-link" aria-label="Direct link to Introduction" title="Direct link to Introduction"></a></h2><p>Can you remember a time when an important warehouse table column changed dramatically, with little or no notice? Perhaps the number of null values suddenly spiked, or a new value was added to a fixed set of possible values. If the answer is yes, how did you initially find out? We&#x27;ll take a guess - someone looking at an internal reporting dashboard or worse, a user using your your product, sounded an alarm when a number looked a bit out of the ordinary.</p><p>There are many reasons why important columns in your Snowflake, Redshift, BigQuery, or Databricks tables may change - application code bugs, new feature rollouts, etc. Oftentimes, these changes break important assumptions made about the data used in building key downstream data products like reporting dashboards or data-driven product features.</p><p>What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data issues before anyone else? With DataHub Cloud Column Assertions, you can.</p><p>With DataHub Cloud, you can define <strong>Column Value</strong> assertions to ensure each value in a column matches specific constraints, and <strong>Column Metric</strong> assertions to ensure that computed metrics from columns align with your expectations. As soon as things go wrong, your team will be the first to know, before the data issue becomes a larger data incident.</p><p>In this guide, we&#x27;ll cover the basics of Column Assertions - what they are, how to configure them, and more - so that you and your team can start building trust in your most important data assets.</p><p>Let&#x27;s dive in!</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="support">Support<a href="#support" class="hash-link" aria-label="Direct link to Support" title="Direct link to Support"></a></h2><p>Column Assertions are currently supported for:</p><ol><li>Snowflake</li><li>Redshift</li><li>BigQuery</li><li>Databricks</li><li>DataHub Dataset Profile Metrics (collected via ingestion)</li></ol><p>Note that an Ingestion Source <em>must</em> be configured with the data platform of your choice in
DataHub Cloud&#x27;s <strong>Ingestion</strong> tab.</p><blockquote><p>Note that Column Assertions are not yet supported if you are connecting to your warehouse
using the DataHub CLI.</p></blockquote><h2 class="anchor anchorWithStickyNavbar_LWe7" id="what-is-a-column-assertion">What is a Column Assertion?<a href="#what-is-a-column-assertion" class="hash-link" aria-label="Direct link to What is a Column Assertion?" title="Direct link to What is a Column Assertion?"></a></h2><p>A <strong>Column Assertion</strong> is a highly configurable Data Quality rule used to monitor specific columns of a Data Warehouse table for unexpected changes.</p><p>Column Assertions are defined to validate a specific column, and can be used to</p><ol><li>Validate that the values of the column match some constraints (regex, allowed values, max, min, etc) across rows OR</li><li>Validate that specific column aggregation metrics match some expectations across rows.</li></ol><p>Column Assertions can be particularly useful for documenting and enforcing column-level &quot;contracts&quot;, i.e. formal specifications about the expected contents of a particular column that can be used for coordinating among producers and consumers of the data.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="anatomy-of-column-assertion">Anatomy of Column Assertion<a href="#anatomy-of-column-assertion" class="hash-link" aria-label="Direct link to Anatomy of Column Assertion" title="Direct link to Anatomy of Column Assertion"></a></h3><p>Column Assertions can be divided into two main types: <strong>Column Value</strong> and <strong>Column Metric</strong> Assertions.</p><p>A <strong>Column Value Assertion</strong> is used to monitor the value of a specific column in a table, and ensure that every row
adheres to a specific condition. In comparison, a <strong>Column Metric Assertion</strong> is used to compute a metric for that column,
and ensure that the value of that metric adheres to a specific condition.</p><p>At the most basic level, both types consist of a few important parts:</p><ol><li>An <strong>Evaluation Schedule</strong></li><li>A <strong>Column Selection</strong></li><li>A <strong>Evaluation Criteria</strong></li><li>A <strong>Row Evaluation Type</strong></li></ol><p>In this section, we&#x27;ll give an overview of each.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="1-evaluation-schedule">1. Evaluation Schedule<a href="#1-evaluation-schedule" class="hash-link" aria-label="Direct link to 1. Evaluation Schedule" title="Direct link to 1. Evaluation Schedule"></a></h4><p>The <strong>Evaluation Schedule</strong>: This defines how often to evaluate the Column Assertion against the given warehouse table.
This should usually be configured to match the expected change frequency of the table, although it can also be less
frequently depending on your requirements. You can also specify specific days of the week, hours in the day, or even
minutes in an hour.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="2-column-selection">2. Column Selection<a href="#2-column-selection" class="hash-link" aria-label="Direct link to 2. Column Selection" title="Direct link to 2. Column Selection"></a></h4><p>The <strong>Column Selection</strong>: This defines the column that should be monitored by the Column Assertion. You can choose from
any of the columns from the table listed in the dropdown. Note that columns of struct / object type are not currently supported.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="3-evaluation-criteria">3. Evaluation Criteria<a href="#3-evaluation-criteria" class="hash-link" aria-label="Direct link to 3. Evaluation Criteria" title="Direct link to 3. Evaluation Criteria"></a></h4><p>The <strong>Evaluation Criteria</strong>: This defines the condition that must be satisfied in order for the Column
Assertion to pass.</p><p>For <strong>Column Value Assertions</strong>, you will be able to choose from a set of operators that can be applied to the column
value. The options presented will vary based on the data type of the selected column. For example, if you&#x27;ve selected a numeric column, you
can verify that the column value is greater than a particular value. For string types, you can check that the column value
matches a particular regex pattern. Additionally, you are able to control the behavior of the check in the presence of NULL values. If the
<strong>Allow Nulls</strong> option is <em>disabled</em>, then any null values encountered will be reported as a failure when evaluating the
assertion. If <strong>Allow Nulls</strong> is enabled, then nulls will be ignored; the condition will be evaluated for rows where the column value is non-null.</p><p>For <strong>Column Metric Assertions</strong>, you will be able to choose from a list of common column metrics - MAX, MIN, MEAN, NULL COUNT, etc - and then compare these metric values to an expected value. The list of metrics will vary based on the type of the selected column. For example
if you&#x27;ve selected a numeric column, you can choose to compute the MEAN value of the column, and then assert that it is greater than a
specific number. For string types, you can choose to compute the MAX LENGTH of the string across all column values, and then assert that it
is less than a specific number.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="4-row-selection-set">4. Row Selection Set<a href="#4-row-selection-set" class="hash-link" aria-label="Direct link to 4. Row Selection Set" title="Direct link to 4. Row Selection Set"></a></h4><p>The <strong>Row Selection Set</strong>: This defines which rows in the table the Column Assertion will be evaluated across. You can choose
from the following options:</p><ul><li><p><strong>All Table Rows</strong>: Evaluate the Column Assertion across all rows in the table. This is the default option. Note that
this may not be desirable for large tables.</p></li><li><p><strong>Only Rows That Have Changed</strong>: Evaluate the Column Assertion only against rows that have changed since the last
evaluation of the assertion. If you choose this option, you will need to specify a <strong>High Watermark Column</strong> to help determine which rows
have changed. A <strong>High Watermark Column</strong> is a column that contains a constantly incrementing value - a date, a time, or
another always-increasing number - that can be used to find the &quot;new rows&quot; that were added since previous evaluation. When selected, a query will be issued to the table to find only the rows that have changed since the previous assertion evaluation.</p></li></ul><h2 class="anchor anchorWithStickyNavbar_LWe7" id="creating-a-column-assertion">Creating a Column Assertion<a href="#creating-a-column-assertion" class="hash-link" aria-label="Direct link to Creating a Column Assertion" title="Direct link to Creating a Column Assertion"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="prerequisites">Prerequisites<a href="#prerequisites" class="hash-link" aria-label="Direct link to Prerequisites" title="Direct link to Prerequisites"></a></h3><ol><li><p><strong>Permissions</strong>: To create or delete Column Assertions for a specific entity on DataHub, you&#x27;ll need to be granted the
<code>Edit Assertions</code> and <code>Edit Monitors</code> privileges for the entity. This will be granted to Entity owners as part of the <code>Asset Owners - Metadata Policy</code>
by default.</p></li><li><p>(Optional) <strong>Data Platform Connection</strong>: In order to create a Column Assertion that queries the data source directly (instead of DataHub metadata), you&#x27;ll need to have an <strong>Ingestion Source</strong>
configured to your Data Platform: Snowflake, BigQuery, or Redshift under the <strong>Ingestion</strong> tab.</p></li></ol><p>Once these are in place, you&#x27;re ready to create your Column Assertions!</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="steps">Steps<a href="#steps" class="hash-link" aria-label="Direct link to Steps" title="Direct link to Steps"></a></h3><ol><li>Navigate to the Table that you want to monitor</li><li>Click the <strong>Quality</strong> tab</li></ol><p align="left"><img loading="lazy" width="90%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/profile-validation-tab.png" class="img_ev3q"></p><ol start="3"><li>Click <strong>+ Create Assertion</strong></li></ol><p align="left"><img loading="lazy" width="40%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/column/assertion-builder-column-choose-type.png" class="img_ev3q"></p><ol start="4"><li><p>Choose <strong>Column</strong></p></li><li><p>Configure the evaluation <strong>schedule</strong>. This is the frequency at which the assertion will be evaluated to produce a
pass or fail result, and the times when the column values will be checked.</p></li><li><p>Configure the <strong>column assertion type</strong>. You can choose from <strong>Column Value</strong> or <strong>Column Metric</strong>.
<strong>Column Value</strong> assertions are used to monitor the value of a specific column in a table, and ensure that every row
adheres to a specific condition. <strong>Column Metric</strong> assertions are used to compute a metric for that column, and then compare the value of that metric to your expectations.</p></li></ol><p align="left"><img loading="lazy" width="30%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/column/assertion-builder-column-assertion-type.png" class="img_ev3q"></p><ol start="7"><li>Configure the <strong>column selection</strong>. This defines the column that should be monitored by the Column Assertion.
You can choose from any of the columns from the table listed in the dropdown.</li></ol><p align="left"><img loading="lazy" width="30%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/column/assertion-builder-column-field-selection.png" class="img_ev3q"></p><ol start="8"><li><p>Configure the <strong>evaluation criteria</strong>. This step varies based on the type of assertion you chose in the previous step.</p><ul><li><p><strong>Column Value Assertions</strong>: You will be able to choose from a set of operators that can be applied to the column
value. The options presented will vary based on the data type of the selected column. For example with numeric types, you
can check that the column value is greater than a specific value. For string types, you can check that the column value
matches a particular regex pattern. You will also be able to control the behavior of null values in the column. If the
<strong>Allow Nulls</strong> option is <em>disabled</em>, any null values encountered will be reported as a failure when evaluating the
assertion.</p></li><li><p><strong>Column Metric Assertions</strong>: You will be able to choose from a list of common metrics and then specify the operator
and value to compare against. The list of metrics will vary based on the data type of the selected column. For example
with numeric types, you can choose to compute the average value of the column, and then assert that it is greater than a
specific number. For string types, you can choose to compute the max length of all column values, and then assert that it
is less than a specific number.</p></li></ul></li><li><p>Configure the <strong>row evaluation type</strong>. This defines which rows in the table the Column Assertion should evaluate. You can choose
from the following options:</p><ul><li><p><strong>All Table Rows</strong>: Evaluate the Column Assertion against all rows in the table. This is the default option. Note that
this may not be desirable for large tables.</p></li><li><p><strong>Only Rows That Have Changed</strong>: Evaluate the Column Assertion only against rows that have changed since the last
evaluation. If you choose this option, you will need to specify a <strong>High Watermark Column</strong> to help determine which rows
have changed. A <strong>High Watermark Column</strong> is a column that contains a constantly-incrementing value - a date, a time, or
another always-increasing number. When selected, a query will be issued to the table find only the rows which have changed since the last assertion run.</p></li></ul></li></ol><p align="left"><img loading="lazy" width="60%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/column/assertion-builder-column-row-evaluation-type.png" class="img_ev3q"></p><ol start="10"><li><p>(Optional) Click <strong>Advanced</strong> to further customize the Column Assertion. The options listed here will vary based on the
type of assertion you chose in the previous step.</p><ul><li><p><strong>Invalid Values Threshold</strong>: For <strong>Column Value</strong> assertions, you can configure the number of invalid values
(i.e. rows) that are allowed to fail before the assertion is marked as failing. This is useful if you want to allow a limited number
of invalid values in the column. By default this is 0, meaning the assertion will fail if any rows have an invalid column value.</p></li><li><p><strong>Source</strong>: For <strong>Column Metric</strong> assertions, you can choose the mechanism that will be used to obtain the column
metric. <strong>Query</strong> will issue a query to the dataset to compute the metric. <strong>DataHub Dataset Profile</strong> will use the
DataHub Dataset Profile metadata to compute the metric. Note that this option requires that dataset profiling
statistics are up-to-date as of the assertion run time.</p></li><li><p><strong>Additional Filters</strong>: You can choose to add additional filters to the query that will be used to evaluate the
assertion. This is useful if you want to limit the assertion to a subset of rows in the table. Note this option will not
be available if you choose <strong>DataHub Dataset Profile</strong> as the <strong>source</strong>.</p></li></ul></li><li><p>Configure actions that should be taken when the Column Assertion passes or fails</p></li></ol><p align="left"><img loading="lazy" width="45%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/assertion-builder-actions.png" class="img_ev3q"></p><ul><li><strong>Raise incident</strong>: Automatically raise a new DataHub <code>Column</code> Incident for the Table whenever the Column Assertion is failing. This
may indicate that the Table is unfit for consumption. Configure Slack Notifications under <strong>Settings</strong> to be notified when
an incident is created due to an Assertion failure.</li><li><strong>Resolve incident</strong>: Automatically resolved any incidents that were raised due to failures in this Column Assertion. Note that
any other incidents will not be impacted.</li></ul><ol start="12"><li>Click <strong>Next</strong> and then <strong>Save</strong>.</li></ol><p>And that&#x27;s it! DataHub will now begin to monitor your Column Assertion for the table.</p><p>Once your assertion has run, you will begin to see Success or Failure status for the Table</p><p align="left"><img loading="lazy" width="40%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/column/profile-passing-column-assertions-expanded.png" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="anomaly-detection-with-smart-assertions-">Anomaly Detection with Smart Assertions ⚡<a href="#anomaly-detection-with-smart-assertions-" class="hash-link" aria-label="Direct link to Anomaly Detection with Smart Assertions ⚡" title="Direct link to Anomaly Detection with Smart Assertions ⚡"></a></h2><p>As part of the <strong>DataHub Cloud Observe</strong> module, DataHub Cloud also provides <a href="/docs/managed-datahub/observe/smart-assertions">Smart Assertions</a> out of the box. These are dynamic, AI-powered Column Metric Assertions that you can use to monitor anomalies on column metrics of important warehouse Tables, without requiring any manual setup.</p><p>You can create smart assertions by simply selecting the column and the metric you wish to monitor, and then clicking the <code>Detect with AI</code> option in the UI:</p><p align="left"><img loading="lazy" width="40%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/column/column-smart-assertion.png" class="img_ev3q"></p><p><em>Coming soon: we&#x27;re making it easier to create Smart Assertions for multiple fields on a table, across multiple metrics, all in one go. If you&#x27;re interested in this today, please let your DataHub representative know.</em></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="stopping-a-column-assertion">Stopping a Column Assertion<a href="#stopping-a-column-assertion" class="hash-link" aria-label="Direct link to Stopping a Column Assertion" title="Direct link to Stopping a Column Assertion"></a></h2><p>In order to temporarily stop the evaluation of the assertion:</p><ol><li>Navigate to the <strong>Quality</strong> tab of the Table with the assertion</li><li>Click <strong>Column</strong> to open the Column Assertion assertions</li><li>Click the &quot;Stop&quot; button for the assertion you wish to pause.</li></ol><p align="left"><img loading="lazy" width="25%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/stop-assertion.png" class="img_ev3q"></p><p>To resume the assertion, simply click <strong>Start</strong>.</p><p align="left"><img loading="lazy" width="25%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/shared/start-assertion.png" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="creating-column-assertions-via-api">Creating Column Assertions via API<a href="#creating-column-assertions-via-api" class="hash-link" aria-label="Direct link to Creating Column Assertions via API" title="Direct link to Creating Column Assertions via API"></a></h2><p>Under the hood, DataHub Cloud implements Column Assertion Monitoring using two concepts:</p><ul><li><strong>Assertion</strong>: The specific expectation for the column metric. e.g. &quot;The value of an integer column is greater than 10 for all rows in the table.&quot; This is the &quot;what&quot;.</li><li><strong>Monitor</strong>: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific
mechanisms. This is the &quot;how&quot;.</li></ul><p>Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you&#x27;ll need the
<code>Edit Assertions</code> and <code>Edit Monitors</code> privileges for it.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="graphql">GraphQL<a href="#graphql" class="hash-link" aria-label="Direct link to GraphQL" title="Direct link to GraphQL"></a></h4><p>In order to create or update a Column Assertion, you can the <code>upsertDatasetColumnAssertionMonitor</code> mutation.</p><h5 class="anchor anchorWithStickyNavbar_LWe7" id="examples">Examples<a href="#examples" class="hash-link" aria-label="Direct link to Examples" title="Direct link to Examples"></a></h5><p>Creating a Field Values Column Assertion that runs every 8 hours:</p><div class="language-graphql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-graphql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">mutation</span><span class="token plain"> </span><span class="token definition-mutation function" style="color:rgb(130, 170, 255)">upsertDatasetFieldAssertionMonitor</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property-query">upsertDatasetFieldAssertionMonitor</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">input</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">entityUrn</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token description string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)">&lt;</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)">urn</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">of</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">entity</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">being</span><span class="token description string language-markdown tag" style="color:rgb(255, 85, 114)"> </span><span class="token description string language-markdown tag attr-name" style="color:rgb(255, 203, 107)">monitored</span><span class="token description string language-markdown tag punctuation" style="color:rgb(199, 146, 234)">&gt;</span><span class="token description string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token constant" style="color:rgb(130, 170, 255)">FIELD_VALUES</span><span class="token plain"></span><
<script src="/assets/js/runtime~main.8ae4198a.js"></script>
<script src="/assets/js/main.9d79f7e2.js"></script>
</body>
</html>