175 lines
143 KiB
HTML
Raw Permalink Normal View History

<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/modeling/metadata-model" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.3">
<title data-rh="true">The Metadata Model | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/metadata-modeling/metadata-model"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="The Metadata Model | DataHub"><meta data-rh="true" name="description" content="DataHub takes a schema-first approach to modeling metadata. We use the open-source Pegasus schema language (PDL) extended with a custom set of annotations to model metadata. The DataHub storage, serving, indexing and ingestion layer operates directly on top of the metadata model and supports strong types all the way from the client to the storage layer."><meta data-rh="true" property="og:description" content="DataHub takes a schema-first approach to modeling metadata. We use the open-source Pegasus schema language (PDL) extended with a custom set of annotations to model metadata. The DataHub storage, serving, indexing and ingestion layer operates directly on top of the metadata model and supports strong types all the way from the client to the storage layer."><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/metadata-modeling/metadata-model"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/metadata-modeling/metadata-model" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/metadata-modeling/metadata-model" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
<link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
<link rel="preconnect" href="https://www.googletagmanager.com">
<script>window.dataLayer=window.dataLayer||[]</script>
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
<meta httpequiv="Content-Security-Policy" content="frame-ancestors &#39;self&#39; https://*.acryl.io https://acryldata.io http://localhost:*">
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
<script src="/scripts/rb2b.js" async defer="defer"></script>
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
<script src="/scripts/reo.js"></script>
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.d8fe2eb8.css">
<link rel="preload" href="/assets/js/runtime~main.5b50c8a3.js" as="script">
<link rel="preload" href="/assets/js/main.edc0853c.js" as="script">
</head>
<body class="navigation-with-keyboard">
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p>DataHub Secures $35 Million Series B</p><a href="https://datahub.com/news/series-b-announcement/" target="_blank" class="button"><div>Read the announcement<span></span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/metadata-modeling/metadata-model">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/metadata-modeling/metadata-model">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Learn</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/weekly-demo" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Weekly Demo"></div><div class="title_c7DP">Weekly Demo</div></a></div><div><a href="https://datahub.com/use-cases" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-forum.png" alt="Use Cases"></div><div class="title_c7DP">Use Cases</div></a></div><div><a href="httpps://datahub.com/adoption-stories" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Adoption Stories"></div><div class="title_c7DP">Adoption Stories</div></a></div><div><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Blog"></div><div class="title_c7DP">Blog</div></a></div><div><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Youtube"></div><div class="title_c7DP">Youtube</div></a></div></div></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/slack/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Join Slack"></div><div class="title_c7DP">Join Slack</div></a></div><div><a href="https://datahub.com/events" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Events"></div><div class="title_c7DP">Events</div></a></div><div><a href="https://datahub.com/champions/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Champions"></div><div class="title_c7DP">Champions</div></a></div><div><a href="https://datahub.com/share-your-journey/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Share Your Journey"></div><div class="title_c7DP">Share Your Journey</div></a></div></div></ul></div><a href="https://datahub.com/products/why-datahub-cloud/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.cloud-cta {
color: var(--ifm-menu-color-active);
font-weight: 600;
background: linear-gradient(40deg, var(--ifm-menu-color-active), var(--ifm-menu-color-active));
background-size: 200% 100%;
-webkit-background-clip: text;
background-clip: text;
transition: background-image 0.3s ease;
}
.cloud-cta:hover {
color: transparent;
background: linear-gradient(40deg, var(--ifm-menu-color-active), #ff1493);
background-size: 200% 100%;
-webkit-background-clip: text;
background-clip: text;
animation: gradientShift 3s ease infinite;
}
@keyframes gradientShift {
0%, 100% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
}
</style>
<div class="cloud-cta">Get Cloud</div>
</a><a href="https://datahub.com/slack?utm_source=docs&amp;utm_medium=header&amp;utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.slack-logo:hover {
opacity: 0.8;
}
</style>
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category &#x27;What Is DataHub?&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category &#x27;Features&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>DataHub Cloud</div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/managed-datahub-overview">DataHub Cloud Overview</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/welcome-acryl">Getting Started with DataHub Cloud</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authentication/guides/sso/initialize-oidc">Configure Single Sign-On</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/remote-executor/about">Remote Executor</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/datahub-api/entity-events-api">DataHub API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--subli
of an <strong>Entity Registry</strong>, a catalog of Entities that comprise the Metadata Graph along with the aspects associated with each. Put
simply, this is where the &quot;schema&quot; of the model is defined.</p><p>Traditionally, the Entity Registry was constructed using <a href="https://github.com/datahub-project/datahub/tree/master/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot" target="_blank" rel="noopener noreferrer">Snapshot</a> models, which are schemas that explicitly tie
an Entity to the Aspects associated with it. An example is <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/DatasetSnapshot.pdl" target="_blank" rel="noopener noreferrer">DatasetSnapshot</a>, which defines the core <code>Dataset</code> Entity.
The Aspects of the Dataset entity are captured via a union field inside a special &quot;Aspect&quot; schema. An example is
<a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DatasetAspect.pdl" target="_blank" rel="noopener noreferrer">DatasetAspect</a>.
This file associates dataset-specific aspects (like <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetProperties.pdl" target="_blank" rel="noopener noreferrer">DatasetProperties</a>) and common aspects (like <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/common/Ownership.pdl" target="_blank" rel="noopener noreferrer">Ownership</a>,
<a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/common/InstitutionalMemory.pdl" target="_blank" rel="noopener noreferrer">InstitutionalMemory</a>,
and <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/common/Status.pdl" target="_blank" rel="noopener noreferrer">Status</a>)
to the Dataset Entity. This approach to defining Entities will soon be deprecated in favor of a new approach.</p><p>As of January 2022, DataHub has deprecated support for Snapshot models as a means of adding new entities. Instead,
the Entity Registry is defined inside a YAML configuration file called <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/resources/entity-registry.yml" target="_blank" rel="noopener noreferrer">entity-registry.yml</a>,
which is provided to DataHub&#x27;s Metadata Service at start up. This file declares Entities and Aspects by referring to their <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/common/Ownership.pdl#L7" target="_blank" rel="noopener noreferrer">names</a>.
At boot time, DataHub validates the structure of the registry file and ensures that it can find PDL schemas associated with
each aspect name provided by configuration (via the <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/common/Ownership.pdl#L6" target="_blank" rel="noopener noreferrer">@Aspect</a> annotation).</p><p>By moving to this format, evolving the Metadata Model becomes much easier. Adding Entities &amp; Aspects becomes a matter of adding a
to the YAML configuration, instead of creating new Snapshot / Aspect files.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="exploring-datahubs-metadata-model">Exploring DataHub&#x27;s Metadata Model<a href="#exploring-datahubs-metadata-model" class="hash-link" aria-label="Direct link to Exploring DataHub&#x27;s Metadata Model" title="Direct link to Exploring DataHub&#x27;s Metadata Model"></a></h2><p>To explore the current DataHub metadata model, you can inspect this high-level picture that shows the different entities and edges between them showing the relationships between them.</p><p align="center"><img loading="lazy" width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/datahub-metadata-model.png" class="img_ev3q"></p><p>To navigate the aspect model for specific entities and explore relationships using the <code>foreign-key</code> concept, you can view them in our demo environment or navigate the auto-generated docs in the <strong>Metadata Modeling/Entities</strong> section on the left.</p><p>For example, here are helpful links to the most popular entities in DataHub&#x27;s metadata model:</p><ul><li><a href="/docs/generated/metamodel/entities/dataset">Dataset</a>: <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dataset,PROD)/Schema?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Profile</a> <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dataset,PROD)/Documentation?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Documentation</a></li><li><a href="/docs/generated/metamodel/entities/dashboard">Dashboard</a>: <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dashboard,PROD)/Schema?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Profile</a> <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dashboard,PROD)/Documentation?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Documentation</a></li><li><a href="/docs/generated/metamodel/entities/corpuser">User (a.k.a CorpUser)</a>: <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Corpuser,PROD)/Schema?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Profile</a> <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Corpuser,PROD)/Documentation?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Documentation</a></li><li><a href="/docs/generated/metamodel/entities/dataflow">Pipeline (a.k.a DataFlow)</a>: <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,DataFlow,PROD)/Schema?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Profile</a> <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,DataFlow,PROD)/Documentation?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Documentation</a></li><li><a href="/docs/generated/metamodel/entities/mlfeaturetable">Feature Table (a.k.a. MLFeatureTable)</a>: <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,MlFeatureTable,PROD)/Schema?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Profile</a> <a href="https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,MlFeatureTable,PROD)/Documentation?is_lineage_mode=false" target="_blank" rel="noopener noreferrer">Documentation</a></li><li>For the full list of entities in the metadata model, browse them <a href="https://demo.datahub.com/browse/dataset/prod/datahub/entities" target="_blank" rel="noopener noreferrer">here</a> or use the <strong>Metadata Modeling/Entities</strong> section on the left.</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="generating-documentation-for-the-metadata-model">Generating documentation for the Metadata Model<a href="#generating-documentation-for-the-metadata-model" class="hash-link" aria-label="Direct link to Generating documentation for th
urn of the entity to retrieve.</p><p>For example, to fetch a Chart entity, we can use the following <code>curl</code>:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">curl --location --request GET &#x27;http://localhost:8080/entities/urn%3Ali%3Achart%3Acustomers</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>This request will return a set of versioned aspects, each at the latest version.</p><p>As you&#x27;ll notice, we perform the lookup using the url-encoded <em>Urn</em> associated with an entity.
The response would be an &quot;Entity&quot; record containing the Entity Snapshot (which in turn contains the latest aspects associated with the Entity).</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="fetching-versioned-aspects">Fetching Versioned Aspects<a href="#fetching-versioned-aspects" class="hash-link" aria-label="Direct link to Fetching Versioned Aspects" title="Direct link to Fetching Versioned Aspects"></a></h4><p>DataHub also supports fetching individual pieces of metadata about an Entity, which we call aspects. To do so,
you&#x27;ll provide both an Entity&#x27;s primary key (urn) along with the aspect name and version that you&#x27;d like to retrieve.</p><p>For example, to fetch the latest version of a Dataset&#x27;s SchemaMetadata aspect, you would issue the following query:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">curl &#x27;http://localhost:8080/aspects/urn%3Ali%3Adataset%3A(urn%3Ali%3AdataPlatform%3Afoo%2Cbar%2CPROD)?aspect=schemaMetadata&amp;version=0&#x27;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;version&quot;:0,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;aspect&quot;:{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;com.linkedin.schema.SchemaMetadata&quot;:{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;created&quot;:{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;actor&quot;:&quot;urn:li:corpuser:fbar&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;time&quot;:0</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;platformSchema&quot;:{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;com.linkedin.schema.KafkaSchema&quot;:{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;documentSchema&quot;:&quot;{\&quot;type\&quot;:\&quot;record\&quot;,\&quot;name\&quot;:\&quot;MetadataChangeEvent\&quot;,\&quot;namespace\&quot;:\&quot;com.linkedin.mxe\&quot;,\&quot;doc\&quot;:\&quot;Kafka event for proposing a metadata change for an entity.\&quot;,\&quot;fields\&quot;:[{\&quot;name\&quot;:\&quot;auditHeader\&quot;,\&quot;type\&quot;:{\&quot;type\&quot;:\&quot;record\&quot;,\&quot;name\&quot;:\&quot;KafkaAuditHeader\&quot;,\&quot;namespace\&quot;:\&quot;com.linkedin.avro2pegasus.events\&quot;,\&quot;doc\&quot;:\&quot;Header\&quot;}}]}&quot;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> }</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;lastModified&quot;:{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;actor&quot;:&quot;urn:li:corpuser:fbar&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;time&quot;:0</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;schemaName&quot;:&quot;FooEvent&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;fields&quot;:[</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;fieldPath&quot;:&quot;foo&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token pl
to fetch recent profiling runs &amp; statistics about a Dataset. To do so, you can issue a &quot;get&quot; request against the <code>/aspects</code> endpoint.</p><p>For example, to fetch dataset profiles (ie. stats) for a Dataset, you would issue the following query:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">curl -X POST &#x27;http://localhost:8080/aspects?action=getTimeseriesAspectValues&#x27; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--data &#x27;{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;urn&quot;: &quot;urn:li:dataset:(urn:li:dataPlatform:redshift,global_dev.larxynx_carcinoma_data_2020,PROD)&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;entity&quot;: &quot;dataset&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;aspect&quot;: &quot;datasetProfile&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;startTimeMillis&quot;: 1625122800000,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;endTimeMillis&quot;: 1627455600000</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">}&#x27;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;value&quot;:{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;limit&quot;:2000,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;aspectName&quot;:&quot;datasetProfile&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;endTimeMillis&quot;:1627455600000,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;startTimeMillis&quot;:1625122800000,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;entityName&quot;:&quot;dataset&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;values&quot;:[</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;aspect&quot;:{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;value&quot;:&quot;{\&quot;timestampMillis\&quot;:1626912000000,\&quot;fieldProfiles\&quot;:[{\&quot;uniqueProportion\&quot;:1.0,\&quot;sampleValues\&quot;:[\&quot;123MMKK12\&quot;,\&quot;13KDFMKML\&quot;,\&quot;123NNJJJL\&quot;],\&quot;fieldPath\&quot;:\&quot;id\&quot;,\&quot;nullCount\&quot;:0,\&quot;nullProportion\&quot;:0.0,\&quot;uniqueCount\&quot;:3742},{\&quot;uniqueProportion\&quot;:1.0,\&quot;min\&quot;:\&quot;1524406400000\&quot;,\&quot;max\&quot;:\&quot;1624406400000\&quot;,\&quot;sampleValues\&quot;:[\&quot;1640023230002\&quot;,\&quot;1640343012207\&quot;,\&quot;16303412330117\&quot;],\&quot;mean\&quot;:\&quot;1555406400000\&quot;,\&quot;fieldPath\&quot;:\&quot;date\&quot;,\&quot;nullCount\&quot;:0,\&quot;nullProportion\&quot;:0.0,\&quot;uniqueCount\&quot;:3742},{\&quot;uniqueProportion\&quot;:0.037,\&quot;min\&quot;:\&quot;21\&quot;,\&quot;median\&quot;:\&quot;68\&quot;,\&quot;max\&quot;:\&quot;92\&quot;,\&quot;sampleValues\&quot;:[\&quot;45
that permit serialization of aspects in different ways. By default, the content type will be JSON, and the aspect can be deserialized into a normal JSON object
in the language of your choice. Note that this will soon become the de-facto way to both write and read individual aspects.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="search-query">Search Query<a href="#search-query" class="hash-link" aria-label="Direct link to Search Query" title="Direct link to Search Query"></a></h3><p>A search query allows you to search for entities matching an arbitrary string.</p><p>For example, to search for entities matching the term &quot;customers&quot;, we can use the following CURL:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">curl --location --request POST &#x27;http://localhost:8080/entities?action=search&#x27; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--header &#x27;X-RestLi-Protocol-Version: 2.0.0&#x27; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--header &#x27;Content-Type: application/json&#x27; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--data-raw &#x27;{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;input&quot;: &quot;\&quot;customers\&quot;&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;entity&quot;: &quot;chart&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;start&quot;: 0,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;count&quot;: 10</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">}&#x27;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>The notable parameters are <code>input</code> and <code>entity</code>. <code>input</code> specifies the query we are issuing and <code>entity</code> specifies the Entity Type we want to search over. This is the common name of the Entity as defined in the @Entity definition. The response contains a list of Urns, that can be used to fetch the full entity.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="relationship-query">Relationship Query<a href="#relationship-query" class="hash-link" aria-label="Direct link to Relationship Query" title="Direct link to Relationship Query"></a></h3><p>A relationship query allows you to find Entity connected to a particular source Entity via an edge of a particular type.</p><p>For example, to find the owners of a particular Chart, we can use the following CURL:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">curl --location --request GET --header &#x27;X-RestLi-Protocol-Version: 2.0.0&#x27; &#x27;http://localhost:8080/relationships?direction=OUTGOING&amp;urn=urn%3Ali%3Achart%3Acustomers&amp;types=List(OwnedBy)&#x27;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" cl
to the primary entity (urn:li:chart:customer) by an relationship named &quot;OwnedBy&quot;. That is, it permits fetching the owners of a given
chart.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="special-aspects">Special Aspects<a href="#special-aspects" class="hash-link" aria-label="Direct link to Special Aspects" title="Direct link to Special Aspects"></a></h3><p>There are a few special aspects worth mentioning:</p><ol><li>Key aspects: Contain the properties that uniquely identify an Entity.</li><li>Browse Paths aspect: Represents a hierarchical path associated with an Entity.</li></ol><h4 class="anchor anchorWithStickyNavbar_LWe7" id="key-aspects">Key aspects<a href="#key-aspects" class="hash-link" aria-label="Direct link to Key aspects" title="Direct link to Key aspects"></a></h4><p>As introduced above, Key aspects are structs / records that contain the fields that uniquely identify an Entity. There are
some constraints about the fields that can be present in Key aspects:</p><ul><li>All fields must be of STRING or ENUM type</li><li>All fields must be REQUIRED</li></ul><p>Keys can be created from and turned into <em>Urns</em>, which represent the stringified version of the Key record.
The algorithm used to do the conversion is straightforward: the fields of the Key aspect are substituted into a
string template based on their index (order of definition) using the following template:</p><div class="language-aidl codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-aidl codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">// Case 1: # key fields == 1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">urn:li:&lt;entity-name&gt;:key-field-1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">// Case 2: # key fields &gt; 1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">urn:li:&lt;entity-name&gt;:(key-field-1, key-field-2, ... key-field-n)</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>By convention, key aspects are defined under <a href="https://github.com/datahub-project/datahub/tree/master/metadata-models/src/main/pegasus/com/linkedin/metadata/key" target="_blank" rel="noopener noreferrer">metadata-models/src/main/pegasus/com/linkedin/metadata/key</a>.</p><h5 class="anchor anchorWithStickyNavbar_LWe7" id="example">Example<a href="#example" class="hash-link" aria-label="Direct link to Example" title="Direct link to Example"></a></h5><p>A CorpUser can be uniquely identified by a &quot;username&quot;, which should typically correspond to an LDAP name.</p><p>Thus, it&#x27;s Key Aspect is defined as the following:</p><div class="language-aidl codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-aidl codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">namespace com.linkedin.metadata.key</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">/**</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> * Key for a CorpUser</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> */</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">@Aspect = {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;name&quot;: &quot;corpUserKey&quot;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">}</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">record CorpUserKey {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> /**</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> * The name of the AD/LDAP user.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> */</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> username: string</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">}</span><br></span></code></pre><div class="buttonGroup__atx"><
entities. They manifest within the &quot;Explore&quot; features on the UI, allowing users to navigate through trees of related entities of a given type.</p><p>To support browsing a particular entity, add the &quot;browsePaths&quot; aspect to the entity in your <code>entity-registry.yml</code> file.</p><div class="language-aidl codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-aidl codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">/// entity-registry.yml</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">entities:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> - name: dataset</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> doc: Datasets represent logical or physical data assets stored or represented in various data platforms. Tables, Views, Streams are all instances of datasets.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> keyAspect: datasetKey</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> aspects:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> - browsePaths</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>By declaring this aspect, you can produce custom browse paths as well as query for browse paths manually using a CURL like the following:</p><div class="language-aidl codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-aidl codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">curl --location --request POST &#x27;http://localhost:8080/entities?action=browse&#x27; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--header &#x27;X-RestLi-Protocol-Version: 2.0.0&#x27; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--header &#x27;Content-Type: application/json&#x27; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--data-raw &#x27;{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;path&quot;: &quot;/my/custom/browse/path&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;entity&quot;: &quot;dataset&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;start&quot;: 0,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;limit&quot;: 10</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">}&#x27;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2
However, they differ in what they represent and how they are handled by DataHub&#x27;s Metadata Service.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="1-versioned-aspects">1. Versioned Aspects<a href="#1-versioned-aspects" class="hash-link" aria-label="Direct link to 1. Versioned Aspects" title="Direct link to 1. Versioned Aspects"></a></h4><p>Versioned Aspects each have a <strong>numeric version</strong> associated with them. When a field in an aspect changes, a new
version is automatically created and stored within DataHub&#x27;s backend. In practice, all versioned aspects are stored inside a relational database
that can be backed up and restored. Versioned aspects power much of the UI experience you&#x27;re used to, including Ownership, Descriptions,
Tags, Glossary Terms, and more. Examples include Ownership, Global Tags, and Glossary Terms.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="2-timeseries-aspects">2. Timeseries Aspects<a href="#2-timeseries-aspects" class="hash-link" aria-label="Direct link to 2. Timeseries Aspects" title="Direct link to 2. Timeseries Aspects"></a></h4><p>Timeseries Aspects each have a <strong>timestamp</strong> associated with them. They are useful for representing
time-ordered events about an Entity. For example, the results of profiling a Dataset, or a set of Data Quality checks that
run every day. It is important to note that Timeseries aspects are NOT persisted inside the relational store, and are instead
persisted only in the search index (e.g. elasticsearch) and the message queue (Kafka). This makes restoring timeseries aspects
in a disaster scenario a bit more challenge. Timeseries aspects can be queried by time range, which is what makes them most different from Versioned Aspects.
A timeseries aspect can be identified by the &quot;timeseries&quot; <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetProfile.pdl#L10" target="_blank" rel="noopener noreferrer">type</a> in its <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetProfile.pdl#L8" target="_blank" rel="noopener noreferrer">@Aspect</a> annotation.
Examples include <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetProfile.pdl" target="_blank" rel="noopener noreferrer">DatasetProfile</a> &amp; <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetUsageStatistics.pdl" target="_blank" rel="noopener noreferrer">DatasetUsageStatistics</a>.</p><p>Timeseries aspects are aspects that have a timestampMillis field, and are meant for aspects that continuously change on a
timely basis e.g. data profiles, usage statistics, etc.</p><p>Each timeseries aspect must be declared &quot;type&quot;: &quot;timeseries&quot; and must
include <a href="https://github.com/datahub-project/datahub/tree/master/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesAspectBase.pdl" target="_blank" rel="noopener noreferrer">TimeseriesAspectBase</a>
, which contains a timestampMillis field.</p><p>Timeseries aspect can also have fields annotated with @Searchable and @Relationship.</p><p>Please refer
to <a href="https://github.com/datahub-project/datahub/tree/master/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetProfile.pdl" target="_blank" rel="noopener noreferrer">DatasetProfile</a>
to see an example of a timeseries aspect.</p><p>Because timeseries aspects are updated on a frequent basis, ingests of these aspects go straight to elastic search (
instead of being stored in local DB).</p><p>You can retrieve timeseries aspects using the &quot;aspects?action=getTimeseriesAspectValues&quot; end point.</p><h5 class="anchor anchorWithStickyNavbar_LWe7" id="aggregatable-timeseries-aspects">Aggregatable Timeseries aspects<a href="#aggregatable-timeseries-aspects" class="hash-link" aria-label="Direct link to Aggregatable Timeseries aspects" title="Direct link to Aggregatable Timeseries aspects"></a></h5><p>Being able to perform SQL like <em>group by + aggregate</em> operations on the timeseries aspects is a very natural use-case for
this kind of data (dataset profiles, usage statistics etc.). This section describes how to define, ingest and perform an
aggregation query against a timeseries aspect.</p><h6 class="anchor anchorWithStickyNavbar_LWe7" id="defining-a-new-aggregatable-timeseries-aspect">Defining a new aggregatable Timeseries aspect.<a href="#defining-a-new-aggregatable-timeseries-aspect" class="hash-link" aria-label="Direct link to Defining a new aggregatable Timeseries aspect." title="Direct link to Defining a new aggregatable Timeseries aspect."></a></h6><p>The <em>@TimeseriesField</em> and the <em>@TimeseriesFieldCollection</em> are two new annotations that can be attached to a field of
a <em>Timeseries aspect</em> that allows it to be part of an aggregatable query. The kinds of aggregations allowed on these
annotated fields depends on the type of the field, as well as the kind of aggregation, as
described <a href="#performing-an-aggregation-on-a-timeseries-aspect">here</a>.</p><ul><li><p><code>@TimeseriesField = {}</code> - this annotation can be used with any type of non-collection type field of the aspect such as
primitive types and records (see the fields <em>stat</em>, <em>strStat</em> and <em>strArray</em> fields
of <a href="https://github.com/datahub-project/datahub/blob/master/test-models/src/main/pegasus/com/datahub/test/TestEntityProfile.pdl" target="_blank" rel="noopener noreferrer">TestEntityProfile.pdl</a>).</p></li><li><p>The <code>@TimeseriesFieldCollection {&quot;key&quot;:&quot;&lt;name of the key field of collection item type&gt;&quot;}</code> annotation allows for
aggregation support on the items of a collection type (supported only for the array type collections for now), where the
value of <code>&quot;key&quot;</code> is the name of the field in the collection item type that will be used to specify the group-by clause (
see <em>userCounts</em> and <em>fieldCounts</em> fields of <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetUsageStatistics.pdl" target="_blank" rel="noopener noreferrer">DatasetUsageStatistics.pdl</a>).</p></li></ul><p>In addition to defining the new aspect with appropriate Timeseries annotations,
the <a href="https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/resources/entity-registry.yml" target="_blank" rel="noopener noreferrer">entity-registry.yml</a>
file needs to be updated as well. Just add the new aspect name under the list of aspects against the appropriate entity as shown below, such as <code>datasetUsageStatistics</code> for the aspect DatasetUsageStatistics.</p><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token key atrule">entities</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">-</span><span class="token plain"> </span><span class="token key atrule">name</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> dataset</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">keyAspect</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> datasetKey</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">aspects</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">-</span><span class="token plain"> datasetProfile</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">-</span><span class="token plain"> datasetUsageStatistics</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h6 class="anchor anchorWithStickyNavbar_LWe7" id="ingesting-a-timeseries-aspect">Ingesting a Timeseries aspect<a href="#ingesting-a-timeseries-aspect" class="hash-link" aria-label="Direct link to Ingesting a Timeseries aspect" title="Direct link to Ingesting a Timeseries aspect"></a></h6><p>The timeseries aspects can be ingested via the GMS REST endpoint <code>/aspects?action=ingestProposal</code> or via the python API.</p><p>Example1: Via GMS REST API using curl.</p><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token function" style="color:rgb(130, 170, 255)">curl</span><span class="token plain"> --location --request POST </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;http://localhost:8080/aspects?action=ingestProposal&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--header </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;X-RestLi-Protocol-Version: 2.0.0&#x27;</span><span class="token plain"> </span><span class="token punctu
accepts the following params.</p><ul><li><code>entityName</code> - The name of the entity the aspect is associated with.</li><li><code>aspectName</code> - The name of the aspect.</li><li><code>filter</code> - Any pre-filtering criteria before grouping and aggregations are performed.</li><li><code>metrics</code> - A list of aggregation specification. The <code>fieldPath</code> member of an aggregation specification refers to the
field name against which the aggregation needs to be performed, and the <code>aggregationType</code> specifies the kind of aggregation.</li><li><code>buckets</code> - A list of grouping bucket specifications. Each grouping bucket has a <code>key</code> field that refers to the field
to use for grouping. The <code>type</code> field specifies the kind of grouping bucket.</li></ul><p>We support three kinds of aggregations that can be specified in an aggregation query on the Timeseries annotated fields.
The values that <code>aggregationType</code> can take are:</p><ul><li><code>LATEST</code>: The latest value of the field in each bucket. Supported for any type of field.</li><li><code>SUM</code>: The cumulative sum of the field in each bucket. Supported only for integral types.</li><li><code>CARDINALITY</code>: The number of unique values or the cardinality of the set in each bucket. Supported for string and
record types.</li></ul><p>We support two types of grouping for defining the buckets to perform aggregations against:</p><ul><li><code>DATE_GROUPING_BUCKET</code>: Allows for creating time-based buckets such as by second, minute, hour, day, week, month,
quarter, year etc. Should be used in conjunction with a timestamp field whose value is in milliseconds since <em>epoch</em>.
The <code>timeWindowSize</code> param specifies the date histogram bucket width.</li><li><code>STRING_GROUPING_BUCKET</code>: Allows for creating buckets grouped by the unique values of a field. Should always be used in
conjunction with a string type field.</li></ul><p>The API returns a generic SQL like table as the <code>table</code> member of the output that contains the results of
the <code>group-by/aggregate</code> query, in addition to echoing the input params.</p><ul><li><code>columnNames</code>: the names of the table columns. The group-by <code>key</code> names appear in the same order as they are specified
in the request. Aggregation specifications follow the grouping fields in the same order as specified in the request,
and will be named <code>&lt;agg_name&gt;_&lt;fieldPath&gt;</code>.</li><li><code>columnTypes</code>: the data types of the columns.</li><li><code>rows</code>: the data values, each row corresponding to the respective bucket(s).</li></ul><p>Example: Latest unique user count for each day.</p><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># QUERY</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">curl</span><span class="token plain"> --location --request POST </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;http://localhost:8080/analytics?action=getTimeseriesStats&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--header </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;X-RestLi-Protocol-Version: 2.0.0&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--header </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;Content-Type: application/json&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">--data-raw </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> &quot;entityName&quot;: &quot;dataset&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> &quot;aspectName&quot;: &quot;datasetUsageStatistics&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> &quot;filter&quot;: {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> &quot;criteria&quot;: []</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> },</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> &quot;metrics&quot;: [</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> &quot;fieldPath&quot;: &quot;uniqueUserCount&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> &quot;aggregationType&quot;: &quot;LATEST&quot;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> }</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> ],</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rgb(195, 232, 141)"> &quot;buckets&quot;: [</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token string" style="color:rg
<script src="/assets/js/runtime~main.5b50c8a3.js"></script>
<script src="/assets/js/main.edc0853c.js"></script>
</body>
</html>