mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-23 09:32:04 +00:00
160 lines
165 KiB
HTML
160 lines
165 KiB
HTML
![]() |
<!doctype html>
|
|||
|
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/advanced/monitoring" data-has-hydrated="false">
|
|||
|
<head>
|
|||
|
<meta charset="UTF-8">
|
|||
|
<meta name="generator" content="Docusaurus v2.4.3">
|
|||
|
<title data-rh="true">Monitoring DataHub | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/advanced/monitoring"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Monitoring DataHub | DataHub"><meta data-rh="true" name="description" content="Overview"><meta data-rh="true" property="og:description" content="Overview"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/advanced/monitoring"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/advanced/monitoring" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/advanced/monitoring" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
|
|||
|
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
|
|||
|
|
|||
|
<link rel="preconnect" href="https://www.google-analytics.com">
|
|||
|
<link rel="preconnect" href="https://www.googletagmanager.com">
|
|||
|
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
|
|||
|
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
|
|||
|
<link rel="preconnect" href="https://www.googletagmanager.com">
|
|||
|
<script>window.dataLayer=window.dataLayer||[]</script>
|
|||
|
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
|
|||
|
|
|||
|
|
|||
|
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<meta httpequiv="Content-Security-Policy" content="frame-ancestors 'self' https://*.acryl.io https://acryldata.io http://localhost:*">
|
|||
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
|
|||
|
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
|
|||
|
<script src="/scripts/rb2b.js" async defer="defer"></script>
|
|||
|
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
|
|||
|
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
|
|||
|
<script src="/scripts/reo.js"></script>
|
|||
|
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.d8fe2eb8.css">
|
|||
|
<link rel="preload" href="/assets/js/runtime~main.310f59c4.js" as="script">
|
|||
|
<link rel="preload" href="/assets/js/main.49198d73.js" as="script">
|
|||
|
</head>
|
|||
|
<body class="navigation-with-keyboard">
|
|||
|
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
|
|||
|
|
|||
|
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
|
|||
|
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p>DataHub Secures $35 Million Series B</p><a href="https://datahub.com/news/series-b-announcement/" target="_blank" class="button"><div>Read the announcement<span> →</span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/advanced/monitoring">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/advanced/monitoring">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Learn</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/weekly-demo" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Weekly Demo"></div><div class="title_c7DP">Weekly Demo</div></a></div><div><a href="https://datahub.com/use-cases" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-forum.png" alt="Use Cases"></div><div class="title_c7DP">Use Cases</div></a></div><div><a href="httpps://datahub.com/adoption-stories" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Adoption Stories"></div><div class="title_c7DP">Adoption Stories</div></a></div><div><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Blog"></div><div class="title_c7DP">Blog</div></a></div><div><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Youtube"></div><div class="title_c7DP">Youtube</div></a></div></div></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/slack/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Join Slack"></div><div class="title_c7DP">Join Slack</div></a></div><div><a href="https://datahub.com/events" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Events"></div><div class="title_c7DP">Events</div></a></div><div><a href="https://datahub.com/champions/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Champions"></div><div class="title_c7DP">Champions</div></a></div><div><a href="https://datahub.com/share-your-journey/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Share Your Journey"></div><div class="title_c7DP">Share Your Journey</div></a></div></div></ul></div><a href="https://datahub.com/products/why-datahub-cloud/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
|
|||
|
<style>
|
|||
|
.cloud-cta {
|
|||
|
color: var(--ifm-menu-color-active);
|
|||
|
font-weight: 600;
|
|||
|
background: linear-gradient(40deg, var(--ifm-menu-color-active), var(--ifm-menu-color-active));
|
|||
|
background-size: 200% 100%;
|
|||
|
-webkit-background-clip: text;
|
|||
|
background-clip: text;
|
|||
|
transition: background-image 0.3s ease;
|
|||
|
}
|
|||
|
.cloud-cta:hover {
|
|||
|
color: transparent;
|
|||
|
background: linear-gradient(40deg, var(--ifm-menu-color-active), #ff1493);
|
|||
|
background-size: 200% 100%;
|
|||
|
-webkit-background-clip: text;
|
|||
|
background-clip: text;
|
|||
|
animation: gradientShift 3s ease infinite;
|
|||
|
}
|
|||
|
@keyframes gradientShift {
|
|||
|
0%, 100% { background-position: 0% 50%; }
|
|||
|
50% { background-position: 100% 50%; }
|
|||
|
}
|
|||
|
</style>
|
|||
|
<div class="cloud-cta">Get Cloud</div>
|
|||
|
</a><a href="https://datahub.com/slack?utm_source=docs&utm_medium=header&utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
|
|||
|
<style>
|
|||
|
.slack-logo:hover {
|
|||
|
opacity: 0.8;
|
|||
|
}
|
|||
|
</style>
|
|||
|
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
|
|||
|
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category 'What Is DataHub?'" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category 'Features'" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>DataHub Cloud</div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/managed-datahub-overview">DataHub Cloud Overview</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/welcome-acryl">Getting Started with DataHub Cloud</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authentication/guides/sso/initialize-oidc">Configure Single Sign-On</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/remote-executor/about">Remote Executor</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/datahub-api/entity-events-api">DataHub API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--subli
|
|||
|
and ensuring system reliability. This comprehensive guide covers how to implement observability in DataHub through tracing and metrics,
|
|||
|
and how to extract valuable insights from your running instances.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="why-monitor-datahub">Why Monitor DataHub?<a href="#why-monitor-datahub" class="hash-link" aria-label="Direct link to Why Monitor DataHub?" title="Direct link to Why Monitor DataHub?"></a></h2><p>Effective monitoring enables you to:</p><ul><li>Identify Performance Bottlenecks: Pinpoint slow queries or API endpoints</li><li>Debug Issues Faster: Trace requests across distributed components to locate failures</li><li>Meet SLAs: Track and alert on key performance indicators</li></ul><h2 class="anchor anchorWithStickyNavbar_LWe7" id="observability-components">Observability Components<a href="#observability-components" class="hash-link" aria-label="Direct link to Observability Components" title="Direct link to Observability Components"></a></h2><p>DataHub's observability strategy consists of two complementary approaches:</p><ol><li><p>Metrics Collection</p><p><strong>Purpose:</strong> Aggregate statistical data about system behavior over time
|
|||
|
<strong>Technology:</strong> Transitioning from DropWizard/JMX to Micrometer</p><p><strong>Current State:</strong> DropWizard metrics exposed via JMX, collected by Prometheus
|
|||
|
<strong>Future Direction:</strong> Native Micrometer integration for Spring-based metrics
|
|||
|
<strong>Compatibility:</strong> Prometheus-compatible format with support for other metrics backends</p><p>Key Metrics Categories:</p><ul><li>Performance Metrics: Request latency, throughput, error rates</li><li>Resource Metrics: CPU, memory utilization</li><li>Application Metrics: Cache hit rates, queue depths, processing times</li><li>Business Metrics: Entity counts, ingestion rates, search performance</li></ul></li><li><p>Distributed Tracing</p><p><strong>Purpose:</strong> Track individual requests as they flow through multiple services and components
|
|||
|
<strong>Technology:</strong> OpenTelemetry-based instrumentation</p><ul><li>Provides end-to-end visibility of request lifecycles</li><li>Automatically instruments popular libraries (Kafka, JDBC, Elasticsearch)</li><li>Supports multiple backend systems (Jaeger, Zipkin, etc.)</li><li>Enables custom span creation with minimal code changes</li></ul><p>Key Benefits:</p><ul><li>Visualize request flow across microservices</li><li>Identify latency hotspots</li><li>Understand service dependencies</li><li>Debug complex distributed transactions</li></ul></li></ol><h2 class="anchor anchorWithStickyNavbar_LWe7" id="graphql-instrumentation-micrometer">GraphQL Instrumentation (Micrometer)<a href="#graphql-instrumentation-micrometer" class="hash-link" aria-label="Direct link to GraphQL Instrumentation (Micrometer)" title="Direct link to GraphQL Instrumentation (Micrometer)"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="overview-1">Overview<a href="#overview-1" class="hash-link" aria-label="Direct link to Overview" title="Direct link to Overview"></a></h3><p>DataHub provides comprehensive instrumentation for its GraphQL API through Micrometer metrics, enabling detailed performance
|
|||
|
monitoring and debugging capabilities. The instrumentation system offers flexible configuration options to balance between
|
|||
|
observability depth and performance overhead.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="why-path-level-graphql-instrumentation-matters">Why Path-Level GraphQL Instrumentation Matters<a href="#why-path-level-graphql-instrumentation-matters" class="hash-link" aria-label="Direct link to Why Path-Level GraphQL Instrumentation Matters" title="Direct link to Why Path-Level GraphQL Instrumentation Matters"></a></h3><p>Traditional GraphQL monitoring only tells you "the search query is slow" but not <strong>why</strong>. Without path-level instrumentation,
|
|||
|
you're blind to which specific fields are causing performance bottlenecks in complex nested queries.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="real-world-example">Real-World Example<a href="#real-world-example" class="hash-link" aria-label="Direct link to Real-World Example" title="Direct link to Real-World Example"></a></h3><p>Consider this GraphQL query:</p><div class="language-graphql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-graphql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">query</span><span class="token plain"> </span><span class="token definition-query function" style="color:rgb(130, 170, 255)">getSearchResults</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property-query">search</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token attr-name" style="color:rgb(255, 203, 107)">input</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">query</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"sales data"</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token object">searchResults</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token object">entity</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">...</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">on</span><span class="token plain"> </span><span class="token class-name" style="color:rgb(255, 203, 107)">Dataset</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">name</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token object">owner</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Path: /search/searchResults/entity/owner</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token object">corpUser</span><
|
|||
|
<strong>With path metrics</strong>: "Lineage resolution is the bottleneck"</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="key-benefits">Key Benefits<a href="#key-benefits" class="hash-link" aria-label="Direct link to Key Benefits" title="Direct link to Key Benefits"></a></h3><h4 class="anchor anchorWithStickyNavbar_LWe7" id="1-surgical-optimization">1. <strong>Surgical Optimization</strong><a href="#1-surgical-optimization" class="hash-link" aria-label="Direct link to 1-surgical-optimization" title="Direct link to 1-surgical-optimization"></a></h4><p>Instead of guessing, you know exactly which resolver needs optimization. Maybe lineage needs better caching or pagination.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="2-smart-query-patterns">2. <strong>Smart Query Patterns</strong><a href="#2-smart-query-patterns" class="hash-link" aria-label="Direct link to 2-smart-query-patterns" title="Direct link to 2-smart-query-patterns"></a></h4><p>Identify expensive patterns like:</p><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># These paths consistently slow:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">/</span><span class="token important">*/lineage/upstreamEntities/*</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">/</span><span class="token important">*/siblings/*/platform</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Action: Add field-level caching or lazy loading</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="3-client-specific-debugging">3. <strong>Client-Specific Debugging</strong><a href="#3-client-specific-debugging" class="hash-link" aria-label="Direct link to 3-client-specific-debugging" title="Direct link to 3-client-specific-debugging"></a></h4><p>Different clients request different fields. Path instrumentation shows:</p><ul><li>Web UI requests are slow (requesting everything)</li><li>API integrations timeout (requesting deep lineage)</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="4-n1-query-detection">4. <strong>N+1 Query Detection</strong><a href="#4-n1-query-detection" class="hash-link" aria-label="Direct link to 4-n1-query-detection" title="Direct link to 4-n1-query-detection"></a></h4><p>Spot resolver patterns that indicate N+1 problems:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">/users/0/permissions - 10ms</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">/users/1/permissions - 10ms</span><br></span><span class="token-line" style="color:#bfc7d5"><span cl
|
|||
|
and efficiency. This instrumentation is crucial for DataHub, where caching significantly impacts query performance and system load.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="automatic-cache-metrics">Automatic Cache Metrics<a href="#automatic-cache-metrics" class="hash-link" aria-label="Direct link to Automatic Cache Metrics" title="Direct link to Automatic Cache Metrics"></a></h3><p>When caches are registered with Micrometer, comprehensive metrics are automatically collected without code changes:</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="core-metrics">Core Metrics<a href="#core-metrics" class="hash-link" aria-label="Direct link to Core Metrics" title="Direct link to Core Metrics"></a></h4><ul><li><strong><code>cache.size</code></strong> (Gauge) - Current number of entries in the cache</li><li><strong><code>cache.gets</code></strong> (Counter) - Cache access attempts, tagged with:<ul><li><code>result=hit</code> - Successful cache hits</li><li><code>result=miss</code> - Cache misses requiring backend fetch</li></ul></li><li><strong><code>cache.puts</code></strong> (Counter) - Number of entries added to cache</li><li><strong><code>cache.evictions</code></strong> (Counter) - Number of entries evicted</li><li><strong><code>cache.eviction.weight</code></strong> (Counter) - Total weight of evicted entries (for size-based eviction)</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="derived-metrics">Derived Metrics<a href="#derived-metrics" class="hash-link" aria-label="Direct link to Derived Metrics" title="Direct link to Derived Metrics"></a></h4><p>Calculate key performance indicators using Prometheus queries:</p><div class="language-promql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-promql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Cache hit rate (should be >80% for hot caches)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">sum(rate(cache_gets_total{result="hit"}[5m])) by (cache) /</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">sum(rate(cache_gets_total[5m])) by (cache)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Cache miss rate</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">1 - (cache_hit_rate)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Eviction rate (indicates cache pressure)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">rate(cache_evictions_total[5m])</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="datahub-cache-configuration">DataHub Cache Configuration<a href="#datahub-cache-configuration" class="hash-link" aria-label="Direct link to DataHub Cache Configuration" title="Direct link to DataHub Cache Configuration"></a></h3><p>DataHub uses multiple cache layers, each automatically instrumented:</p><h4 class="anchor anchorWithS
|
|||
|
bottlenecks and resource utilization. For DataHub's concurrent operations, this monitoring is essential for maintaining
|
|||
|
performance under load.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="automatic-executor-metrics">Automatic Executor Metrics<a href="#automatic-executor-metrics" class="hash-link" aria-label="Direct link to Automatic Executor Metrics" title="Direct link to Automatic Executor Metrics"></a></h3><h4 class="anchor anchorWithStickyNavbar_LWe7" id="pool-state-metrics">Pool State Metrics<a href="#pool-state-metrics" class="hash-link" aria-label="Direct link to Pool State Metrics" title="Direct link to Pool State Metrics"></a></h4><ul><li><strong><code>executor.pool.size</code></strong> (Gauge) - Current number of threads in pool</li><li><strong><code>executor.pool.core</code></strong> (Gauge) - Core (minimum) pool size</li><li><strong><code>executor.pool.max</code></strong> (Gauge) - Maximum allowed pool size</li><li><strong><code>executor.active</code></strong> (Gauge) - Threads actively executing tasks</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="queue-metrics">Queue Metrics<a href="#queue-metrics" class="hash-link" aria-label="Direct link to Queue Metrics" title="Direct link to Queue Metrics"></a></h4><ul><li><strong><code>executor.queued</code></strong> (Gauge) - Tasks waiting in queue</li><li><strong><code>executor.queue.remaining</code></strong> (Gauge) - Available queue capacity</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="performance-metrics">Performance Metrics<a href="#performance-metrics" class="hash-link" aria-label="Direct link to Performance Metrics" title="Direct link to Performance Metrics"></a></h4><ul><li><strong><code>executor.completed</code></strong> (Counter) - Total completed tasks</li><li><strong><code>executor.seconds</code></strong> (Timer) - Task execution time distribution</li><li><strong><code>executor.rejected</code></strong> (Counter) - Tasks rejected due to saturation</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="datahub-executor-configurations">DataHub Executor Configurations<a href="#datahub-executor-configurations" class="hash-link" aria-label="Direct link to DataHub Executor Configurations" title="Direct link to DataHub Executor Configurations"></a></h3><h4 class="anchor anchorWithStickyNavbar_LWe7" id="1-graphql-query-executor">1. GraphQL Query Executor<a href="#1-graphql-query-executor" class="hash-link" aria-label="Direct link to 1. GraphQL Query Executor" title="Direct link to 1. GraphQL Query Executor"></a></h4><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token key atrule">graphQL.concurrency</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">separateThreadPool</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token boolean important" style="color:rgb(255, 88, 116)">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">corePoolSize</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">20</span><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Base threads</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">maxPoolSize</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token number" style="color:rg
|
|||
|
are units of work, containing various context about the work being done as well as time taken to finish the work. By
|
|||
|
looking at the trace, we can more easily identify performance bottlenecks.</p><p>We enable tracing by using the <a href="https://github.com/open-telemetry/opentelemetry-java-instrumentation" target="_blank" rel="noopener noreferrer">OpenTelemetry java instrumentation library</a>.
|
|||
|
This project provides a Java agent JAR that is attached to java applications. The agent injects bytecode to capture
|
|||
|
telemetry from popular libraries.</p><p>Using the agent we are able to</p><ol><li>Plug and play different tracing tools based on the user's setup: Jaeger, Zipkin, or other tools</li><li>Get traces for Kafka, JDBC, and Elasticsearch without any additional code</li><li>Track traces of any function with a simple <code>@WithSpan</code> annotation</li></ol><p>You can enable the agent by setting env variable <code>ENABLE_OTEL</code> to <code>true</code> for GMS and MAE/MCE consumers. In our
|
|||
|
example <a href="https://github.com/datahub-project/datahub/blob/master/docker/monitoring/docker-compose.monitoring.yml" target="_blank" rel="noopener noreferrer">docker-compose</a>, we export metrics to a local Jaeger
|
|||
|
instance by setting env variable <code>OTEL_TRACES_EXPORTER</code> to <code>jaeger</code>
|
|||
|
and <code>OTEL_EXPORTER_JAEGER_ENDPOINT</code> to <code>http://jaeger-all-in-one:14250</code>, but you can easily change this behavior by
|
|||
|
setting the correct env variables. Refer to
|
|||
|
this <a href="https://github.com/open-telemetry/opentelemetry-java/blob/main/sdk-extensions/autoconfigure/README.md" target="_blank" rel="noopener noreferrer">doc</a> for
|
|||
|
all configs.</p><p>Once the above is set up, you should be able to see a detailed trace as a request is sent to GMS. We added
|
|||
|
the <code>@WithSpan</code> annotation in various places to make the trace more readable. You should start to see traces in the
|
|||
|
tracing collector of choice. Our example <a href="https://github.com/datahub-project/datahub/blob/master/docker/monitoring/docker-compose.monitoring.yml" target="_blank" rel="noopener noreferrer">docker-compose</a> deploys
|
|||
|
an instance of Jaeger with port 16686. The traces should be available at http://localhost:16686.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="configuration-note">Configuration Note<a href="#configuration-note" class="hash-link" aria-label="Direct link to Configuration Note" title="Direct link to Configuration Note"></a></h3><p>We recommend using either <code>grpc</code> or <code>http/protobuf</code>, configured using <code>OTEL_EXPORTER_OTLP_PROTOCOL</code>. Avoid using <code>http</code> will not work as expected due to the size of
|
|||
|
the generated spans.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="micrometer">Micrometer<a href="#micrometer" class="hash-link" aria-label="Direct link to Micrometer" title="Direct link to Micrometer"></a></h2><p>DataHub is transitioning to Micrometer as its primary metrics framework, representing a significant upgrade in observability
|
|||
|
capabilities. Micrometer is a vendor-neutral application metrics facade that provides a simple, consistent API for the most
|
|||
|
popular monitoring systems, allowing you to instrument your JVM-based application code without vendor lock-in.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="why-micrometer">Why Micrometer?<a href="#why-micrometer" class="hash-link" aria-label="Direct link to Why Micrometer?" title="Direct link to Why Micrometer?"></a></h3><ol><li><p>Native Spring Integration</p><p>As DataHub uses Spring Boot, Micrometer provides seamless integration with:</p><ul><li>Auto-configuration of common metrics</li><li>Built-in metrics for HTTP requests, JVM, caches, and more</li><li>Spring Boot Actuator endpoints for metrics exposure</li><li>Automatic instrumentation of Spring components</li></ul></li><li><p>Multi-Backend Support</p><p>Unlike the legacy DropWizard approach that primarily targets JMX, Micrometer natively supports:</p><ul><li>Prometheus (recommended for cloud-native deployments)</li><li>JMX (for backward compatibility)</li><li>StatsD</li><li>CloudWatch</li><li>Datadog</li><li>New Relic</li><li>And many more...</li></ul></li><li><p>Dimensional Metrics</p><p>Micrometer embraces modern dimensional metrics with <strong>labels/tags</strong>, enabling:</p><ul><li>Rich querying and aggregation capabilities</li><li>Better cardinality control</li><li>More flexible dashboards and alerts</li><li>Natural integration with cloud-native monitoring systems</li></ul></li></ol><h2 class="anchor anchorWithStickyNavbar_LWe7" id="micrometer-transition-plan">Micrometer Transition Plan<a href="#micrometer-transition-plan" class="hash-link" aria-label="Direct link to Micrometer Transition Plan" title="Direct link to Micrometer Transition Plan"></a></h2><p>DataHub is undertaking a strategic transition from DropWizard metrics (exposed via JMX) to Micrometer, a modern vendor-neutral metrics facade.
|
|||
|
This transition aims to provide better cloud-native monitoring capabilities while maintaining backward compatibility for existing
|
|||
|
monitoring infrastructure.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="current-state">Current State<a href="#current-state" class="hash-link" aria-label="Direct link to Current State" title="Direct link to Current State"></a></h3><p>What We Have Now:</p><ul><li>Primary System: DropWizard metrics exposed through JMX</li><li>Collection Method: Prometheus-JMX exporter scrapes JMX metrics</li><li>Dashboards: Grafana dashboards consuming JMX-sourced metrics</li><li>Code Pattern: MetricUtils class for creating counters and timers</li><li>Integration: Basic Spring integration with manual metric creation</li></ul><p align="center"><img loading="lazy" width="80%" src="https://raw.githubusercontent.com/datahub-project/static-assets/0f6ae5ae889ee4e780504ca566670867acf975ff/imgs/advanced/monitoring/monitoring_current.svg" class="img_ev3q"></p><p>Limitations:</p><ul><li>JMX-centric approach limits monitoring backend options</li><li>No unified observability (separate instrumentation for metrics and traces)</li><li>No support for dimensional metrics and tags</li><li>Manual instrumentation required for most components</li><li>Legacy naming conventions without proper tagging</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="transition-state">Transition State<a href="#transition-state" class="hash-link" aria-label="Direct link to Transition State" title="Direct link to Transition State"></a></h3><p>What We're Building:</p><ul><li>Primary System: Micrometer with native Prometheus support</li><li>Collection Method: Direct Prometheus scraping via /actuator/prometheus</li><li>Unified Telemetry: Single instrumentation point for both metrics and traces</li><li>Modern Patterns: Dimensional metrics with rich tagging</li><li>Multi-Backend: Support for Prometheus, StatsD, CloudWatch, Datadog, etc.</li><li>Auto-Instrumentation: Automatic metrics for Spring components</li></ul><p align="center"><img loading="lazy" width="80%" src="https://raw.githubusercontent.com/datahub-project/static-assets/0f6ae5ae889ee4e780504ca566670867acf975ff/imgs/advanced/monitoring/monitoring_transition.svg" class="img_ev3q"></p><p>Key Decisions and Rationale:</p><ol><li><p>Dual Registry Approach</p><p><strong>Decision:</strong> Run both systems in parallel with tag-based routing</p><p><strong>Rationale:</strong></p><ul><li>Zero downtime or disruption</li><li>Gradual migration at component level</li><li>Easy rollback if issues arise</li></ul></li><li><p>Prometheus as Primary Target</p><p><strong>Decision:</strong> Focus on Prometheus for new metrics</p><p><strong>Rationale:</strong></p><ul><li>Industry standard for cloud-native applications</li><li>Rich query language and ecosystem</li><li>Better suited for dimensional metrics</li></ul></li><li><p>Observation API Adoption</p><p><strong>Decision:</strong> Promote Observation API for new instrumentation</p><p><strong>Rationale:</strong></p><ul><li>Single instrumentation for metrics + traces</li><li>Reduced code complexity</li><li>Consistent naming across telemetry types</li></ul></li></ol><h3 class="anchor anchorWithStickyNavbar_LWe7" id="future-state">Future State<a href="#future-state" class="hash-link" aria-label="Direct link to Future State" title="Direct link to Future State"></a></h3><p align="center"><img loading="lazy" width="80%" src="https://raw.githubusercontent.com/datahub-project/static-assets/0f6ae5ae889ee4e780504ca566670867acf975ff/imgs/advanced/monitoring/monitoring_future.svg" class="img_ev3q"></p><p>Once fully adopted, Micrometer will transform DataHub's observability from a collection of separate tools into a unified platform.
|
|||
|
This means developers can focus on building features while getting comprehensive telemetry "for free."</p><p>Intelligent and Adaptive Monitoring</p><ul><li>Dynamic Instrumentation: Enable detailed metrics for specific entities or operations on-demand without code changes</li><li>Environment-Aware Metrics: Automatically route metrics to Prometheus in Kubernetes, CloudWatch in AWS, or Azure Monitor in Azure</li><li>Built-in SLO Tracking: Define Service Level Objectives declaratively and automatically track error budgets</li></ul><p>Developer and Operator Experience</p><ul><li>Adding @Observed to a method automatically generates latency percentiles, error rates, and distributed trace spans</li><li>Every service exposes golden signals (latency, traffic, errors, saturation) out-of-the-box</li><li>Business metrics (entity ingestion rates, search performance) seamlessly correlate with system metrics</li><li>Self-documenting telemetry where metrics, traces, and logs tell a coherent operational story</li></ul><h2 class="anchor anchorWithStickyNavbar_LWe7" id="dropwizard--jmx">DropWizard & JMX<a href="#dropwizard--jmx" class="hash-link" aria-label="Direct link to DropWizard & JMX" title="Direct link to DropWizard & JMX"></a></h2><p>We originally decided to use <a href="https://metrics.dropwizard.io/4.2.0/" target="_blank" rel="noopener noreferrer">Dropwizard Metrics</a> to export custom metrics to JMX,
|
|||
|
and then use <a href="https://github.com/prometheus/jmx_exporter" target="_blank" rel="noopener noreferrer">Prometheus-JMX exporter</a> to export all JMX metrics to
|
|||
|
Prometheus. This allows our code base to be independent of the metrics collection tool, making it easy for people to use
|
|||
|
their tool of choice. You can enable the agent by setting env variable <code>ENABLE_PROMETHEUS</code> to <code>true</code> for GMS and MAE/MCE
|
|||
|
consumers. Refer to this example <a href="https://github.com/datahub-project/datahub/blob/master/docker/monitoring/docker-compose.monitoring.yml" target="_blank" rel="noopener noreferrer">docker-compose</a> for setting the
|
|||
|
variables.</p><p>In our example <a href="https://github.com/datahub-project/datahub/blob/master/docker/monitoring/docker-compose.monitoring.yml" target="_blank" rel="noopener noreferrer">docker-compose</a>, we have configured prometheus to
|
|||
|
scrape from 4318 ports of each container used by the JMX exporter to export metrics. We also configured grafana to
|
|||
|
listen to prometheus and create useful dashboards. By default, we provide two
|
|||
|
dashboards: <a href="https://grafana.com/grafana/dashboards/14845" target="_blank" rel="noopener noreferrer">JVM dashboard</a> and DataHub dashboard.</p><p>In the JVM dashboard, you can find detailed charts based on JVM metrics like CPU/memory/disk usage. In the DataHub
|
|||
|
dashboard, you can find charts to monitor each endpoint and the kafka topics. Using the example implementation, go
|
|||
|
to http://localhost:3001 to find the grafana dashboards! (Username: admin, PW: admin)</p><p>To make it easy to track various metrics within the code base, we created MetricUtils class. This util class creates a
|
|||
|
central metric registry, sets up the JMX reporter, and provides convenient functions for setting up counters and timers.
|
|||
|
You can run the following to create a counter and increment.</p><div class="language-java codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-java codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">metricUtils</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token function" style="color:rgb(130, 170, 255)">counter</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token keyword" style="font-style:italic">this</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token function" style="color:rgb(130, 170, 255)">getClass</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">"metricName"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token function" style="color:rgb(130, 170, 255)">increment</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>You can run the following to time a block of code.</p><div class="language-java codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-java codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token class-name" style="color:rgb(255, 203, 107)">Timer</span><span class="token class-name punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token class-name" style="color:rgb(255, 203, 107)">Context</span><span class="token plain"> ignored</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">metricUtils</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token function" style="color:rgb(130, 170, 255)">timer</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token keyword" style="font-style:italic">this</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token function" style="color:rgb(130, 170, 255)">getClass</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token string" style="color:rgb(195, 232, 141)">"timerName"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token function" style="color:rgb(130, 170, 255
|
|||
|
this <a href="https://github.com/datahub-project/datahub/tree/master/docker/monitoring" target="_blank" rel="noopener noreferrer">directory</a>. Take a look at the docker-compose
|
|||
|
files, which adds necessary env variables to existing containers, and spawns new containers (Jaeger, Prometheus,
|
|||
|
Grafana).</p><p>You can add in the above docker-compose using the <code>-f <<path-to-compose-file>></code> when running docker-compose commands.
|
|||
|
For instance,</p><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token function" style="color:rgb(130, 170, 255)">docker-compose</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -f quickstart/docker-compose.quickstart.yml </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -f monitoring/docker-compose.monitoring.yml </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> pull </span><span class="token operator" style="color:rgb(137, 221, 255)">&&</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">docker-compose</span><span class="token plain"> -p datahub </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -f quickstart/docker-compose.quickstart.yml </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -f monitoring/docker-compose.monitoring.yml </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> up</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>We set up quickstart.sh, dev.sh, and dev-without-neo4j.sh to add the above docker-compose when MONITORING=true. For
|
|||
|
instance <code>MONITORING=true ./docker/quickstart.sh</code> will add the correct env variables to start collecting traces and
|
|||
|
metrics, and also deploy Jaeger, Prometheus, and Grafana. We will soon support this as a flag during quickstart.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="health-check-endpoint">Health check endpoint<a href="#health-check-endpoint" class="hash-link" aria-label="Direct link to Health check endpoint" title="Direct link to Health check endpoint"></a></h2><p>For monitoring healthiness of your DataHub service, <code>/admin</code> endpoint can be used.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="slackUtm_uoBr"><div class="slackUtm_uoBr"><hr>Need more help? Join the conversation in <a href="https://datahub.com/slack?utm_source=docs&utm_medium=footer&utm_campaign=docs_footer&utm_content=docs/advanced/monitoring">Slack!</a></div></div><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/datahub-project/datahub/blob/master/docs/advanced/monitoring.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_VsjB"></div></div></footer><div class="feedbackWrapper_mUHF"><div class="feedbackWidget_PX4d"><div class="feedbackButtons_wn3V"><strong>Is this page helpful?</strong><div><button class="feedbackButton_UgQs"><span role="img" aria-label="like" class="anticon anticon-like"><svg viewBox="64 64 896 896" focusable="false" data-icon="like" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 533.7c16.8-22.2 26.1-49.4 26.1-77.7 0-44.9-25.1-87.4-65.5-111.1a67.67 67.67 0 00-34.3-9.3H572.4l6-122.9c1.4-29.7-9.1-57.9-29.5-79.4A106.62 106.62 0 00471 99.9c-52 0-98 35-111.8 85.1l-85.9 311H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 32 32 32h601.3c9.2 0 18.2-1.8 26.5-5.4 47.6-20.3 78.3-66.8 78.3-118.4 0-12.6-1.8-25-5.4-37 16.8-22.2 26.1-49.4 26.1-77.7 0-12.6-1.8-25-5.4-37 16.8-22.2 26.1-49.4 26.1-77.7-.2-12.6-2-25.1-5.6-37.1zM184 852V568h81v284h-81zm636.4-353l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 16.5-7.2 32.2-19.6 43l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 16.5-7.2 32.2-19.6 43l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 22.4-13.2 42.6-33.6 51.8H329V564.8l99.5-360.5a44.1 44.1 0 0142.2-32.3c7.6 0 15.1 2.2 21.1 6.7 9.9 7.4 15.2 18.6 14.6 30.5l-9.6 198.4h314.4C829 418.5 840 436.9 840 456c0 16.5-7.2 32.1-19.6 43z"></path></svg></span></button><button class="feedbackButton_UgQs"><span role="img" aria-label="dislike" class="anticon anticon-dislike"><svg viewBox="64 64 896 896" focusable="false" data-icon="dislike" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 490.3c3.6-12 5.4-24.4 5.4-37 0-28.3-9.3-55.5-26.1-77.7 3.6-12 5.4-24.4 5.4-37 0-28.3-9.3-55.5-26.1-77.7 3.6-12 5.4-24.4 5.4-37 0-51.6-30.7-98.1-78.3-118.4a66.1 66.1 0 00-26.5-5.4H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 32 32 32h129.3l85.8 310.8C372.9 889 418.9 924 470.9 924c29.7 0 57.4-11.8 77.9-33.4 20.5-21.5 31-49.7 29.5-79.4l-6-122.9h239.9c12.1 0 23.9-3.2 34.3-9.3 40.4-23.5 65.5-66.1 65.5-111 0-28.3-9.3-55.5-26.1-77.7zM184 456V172h81v284h-81zm627.2 160.4H496.8l9.6 198.4c.6 11.9-4.7 23.1-14.6 30.5-6.1 4.5-13.6 6.8-21.1 6.7a44.28 44.28 0 01-42.2-32.3L329 459.2V172h415.4a56.85 56.85 0 0133.6 51.8c0 9.7-2.3 18.9-6.9 27.3l-13.9 25.4 21.9 19a56.76 56.76 0 0119.6 43c0 9.7-2.3 18.9-6.9 27.3l-13.9 25.4 21.9 19a56.76 56.76 0 0119.6 43c0 9.7-2.3 18.9-6.9 27.3l-14 25.5 21.9 19a56.76 56.76 0 0119.6 43c0 19.1-11 37.5-28.8 48.4z"></path></svg></span></button></div></div></div></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/advanced/db-retention"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Configuring Database Retention</div></a><a class="pagination-nav__li
|
|||
|
<script src="/assets/js/runtime~main.310f59c4.js"></script>
|
|||
|
<script src="/assets/js/main.49198d73.js"></script>
|
|||
|
</body>
|
|||
|
</html>
|