92 lines
114 KiB
HTML
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/how/load-indices" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.3">
<title data-rh="true">Load Indices: High-Performance Bulk Index Loading | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/how/load-indices"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Load Indices: High-Performance Bulk Index Loading | DataHub"><meta data-rh="true" name="description" content="LoadIndices is a high-performance upgrade task designed for bulk loading metadata aspects directly from the database into Elasticsearch/OpenSearch indices. Unlike RestoreIndices which focuses on correctness and consistency, LoadIndices is optimized for speed and throughput during initial deployments or large-scale data migrations."><meta data-rh="true" property="og:description" content="LoadIndices is a high-performance upgrade task designed for bulk loading metadata aspects directly from the database into Elasticsearch/OpenSearch indices. Unlike RestoreIndices which focuses on correctness and consistency, LoadIndices is optimized for speed and throughput during initial deployments or large-scale data migrations."><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/how/load-indices"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/how/load-indices" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/how/load-indices" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
<link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
<link rel="preconnect" href="https://www.googletagmanager.com">
<script>window.dataLayer=window.dataLayer||[]</script>
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
<meta httpequiv="Content-Security-Policy" content="frame-ancestors &#39;self&#39; https://*.acryl.io https://acryldata.io http://localhost:*">
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
<script src="/scripts/rb2b.js" async defer="defer"></script>
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
<script src="/scripts/reo.js"></script>
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.1e47f27f.css">
<link rel="preload" href="/assets/js/runtime~main.76cdbaa9.js" as="script">
<link rel="preload" href="/assets/js/main.b7ce4b14.js" as="script">
</head>
<body class="navigation-with-keyboard">
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p><strong>CONTEXT:</strong> The Future of Agentic AI is On Demand</p><a href="https://datahub.com/resources/context/?utm_source=event&utm_medium=&utm_campaign=16839686-FY25-Q4-EVENT-CONTEXT-MAIS&utm_term=docs" target="_blank" class="button"><div>Watch Now<span></span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/how/load-indices">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/features">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><a href="https://datahub.com/slack?utm_source=docs&amp;utm_medium=header&amp;utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.slack-logo:hover {
opacity: 0.8;
}
</style>
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
</a><a href="https://github.com/datahub-project/datahub" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.github-logo:hover {
opacity: 0.8;
}
</style>
<img class="github-logo" src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category &#x27;What Is DataHub?&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category &#x27;Features&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>DataHub Cloud</div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/managed-datahub-overview">DataHub Cloud vs. DataHub OSS</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/welcome-acryl">Getting Started with DataHub Cloud</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/upgrade_core_to_cloud">Upgrading from DataHub Core to Cloud</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authentication/guides/sso/initialize-oidc">Configure Single Sign-On</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/remote-executor/about">Remote Executor</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/datahub-api/entity-events-api">DataHub API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/slack/saas-slack-app">Slack</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/teams/saas-teams-app">Teams</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/operator-guide/setting-up-events-api-on-aws-eventbridge">Operator Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item saasOnly"><a class="menu__link" href="/docs/managed-datahub/change-proposals">Change Proposals</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/chrome-extension">Cloud Chrome Extension</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item saasOnly"><a class="menu__link" href="/docs/managed-datahub/subscription-and-notification">Subscriptions &amp; Notifications</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/release-notes/v_0_3_15">DataHub Cloud Release History</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Integrations</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/metadata-ingestion">Overview</a><button aria-label="Toggle the collapsible sidebar category &#x27;Overview&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/metadata-ingestion/cli-ingestion">Quickstart Guides</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/metadata-ingestion/source_overview">Sources</a><button aria-label="Toggle the collapsible sidebar category &#x27;Sources&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/metadata-ingestion/schedule_docs/intro">Advanced Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>API & SDKs</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="true" href="/docs/metadata-modeling/metadata-model">Open Source DataHub Metadata Standard</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/metadata-modeling/metadata-model">The Metadata Model</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/what/mxe">Core Metadata Events</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/generated/metamodel/entities/role">Entity Docs</a></div></li></ul></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/what-is-datahub/datahub-concepts">Concepts</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/metadata-standards">Metadata Standards</a><button aria-label="Toggle the collapsible sidebar category &#x27;Metadata Standards&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/api/datahub-apis">APIs and SDKs Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/api/graphql/overview">API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/metadata-ingestion/as-a-library">Python SDK</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/metadata-integration/java/as-a-library">Java SDK</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/cli">DataHub CLI</a><button aria-label="Toggle the collapsible sidebar category &#x27;DataHub CLI&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/act-on-metadata">DataHub Actions</a><button aria-label="Toggle the collapsible sidebar category &#x27;DataHub Actions&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/api/tutorials/datasets">Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Admin</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authentication">Authentication</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authorization">Authorization</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/how/delete-metadata">Advanced Guides</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/how/delete-metadata">Removing Metadata from DataHub</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/how/configuring-authorization-with-apache-ranger">Configuring Authorization with Apache Ranger</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/managed-datahub/configuring-identity-provisioning-with-ms-entra">SCIM Provisioning</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/how/backup-datahub">Backup &amp; Restore</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/how/restore-indices">Search and Graph Reindexing</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/how/load-indices">Load Indices: High-Performance Bulk Index Loading</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/advanced/db-retention">Configuring Database Retention</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/advanced/monitoring">Monitoring DataHub</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/deploy/telemetry">Telemetry</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/how/kafka-config">Configuring Kafka</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/how/configure-cdc">Configuring Change Data Capture (CDC) Mode</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/how/jattach-guide">Debugging by Jattach</a></li></ul></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Deployment</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/deployment-guides">Deployment Guides</a><button aria-label="Toggle the collapsible sidebar category &#x27;Deployment Guides&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/deploy/confluent-cloud">Advanced Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Developers</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/architecture/architecture">Architecture</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/developers">Developing on DataHub</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/advanced/mcp-mcl">Advanced Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Community</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/community">Community</a><button aria-label="Toggle the collapsible sidebar category &#x27;Community&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/releases">Release History</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_gTbr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Advanced Guides</span><meta itemprop="position" content="1"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Load Indices: High-Performance Bulk Index Loading</span><meta itemprop="position" content="2"></li></ul></nav><span class="theme-doc-version-badge badge badge--secondary">Version: Next</span><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><h1>Load Indices: High-Performance Bulk Index Loading</h1><p>LoadIndices is a high-performance upgrade task designed for bulk loading metadata aspects directly from the database into Elasticsearch/OpenSearch indices. Unlike RestoreIndices which focuses on correctness and consistency, LoadIndices is optimized for speed and throughput during initial deployments or large-scale data migrations.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="overview">Overview<a href="#overview" class="hash-link" aria-label="Direct link to Overview" title="Direct link to Overview"></a></h2><p>LoadIndices bypasses the standard event-driven processing pipeline to directly stream data from the <code>metadata_aspect_v2</code> table into search indices using optimized bulk operations. This approach provides significant performance improvements for large installations while making specific architectural trade-offs that prioritize speed over consistency.</p><p>🚨 <strong>CRITICAL WARNING</strong>: LoadIndices is designed for specific use cases only and should <strong>NEVER</strong> be used in production environments with active concurrent writes, MCL-dependent systems, or real-time consistency requirements. See <a href="#performance-trade-offs--implications">Performance Trade-offs &amp; Implications</a> for complete details.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="key-features">Key Features<a href="#key-features" class="hash-link" aria-label="Direct link to Key Features" title="Direct link to Key Features"></a></h3><ul><li><strong>🚀 High Performance</strong>: Direct streaming from database with optimized bulk operations</li><li><strong>⚡ Fast Bulk Loading</strong>: Optimized for speed over consistency during initial loads</li><li><strong>🔧 Refresh Management</strong>: Automatically disables refresh intervals during loading for optimal performance</li><li><strong>📊 Comprehensive Monitoring</strong>: Real-time progress reporting and performance metrics</li><li><strong>⚙️ Configurable Isolation</strong>: Utilizes READ_UNCOMMITTED transactions for faster scanning</li></ul><hr><h2 class="anchor anchorWithStickyNavbar_LWe7" id="performance-trade-offs--implications">Performance Trade-offs &amp; Implications<a href="#performance-trade-offs--implications" class="hash-link" aria-label="Direct link to Performance Trade-offs &amp; Implications" title="Direct link to Performance Trade-offs &amp; Implications"></a></h2><p>⚠️ <strong>Critical Understanding</strong>: LoadIndices prioritizes <strong>performance over consistency</strong> by making several architectural trade-offs. Understanding these implications is crucial before using LoadIndices in production environments.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-key-trade-offs-made">🚨 Key Trade-offs Made<a href="#-key-trade-offs-made" class="hash-link" aria-label="Direct link to 🚨 Key Trade-offs Made" title="Direct link to 🚨 Key Trade-offs Made"></a></h3><h4 class="anchor anchorWithStickyNavbar_LWe7" id="1-bypass-kafkamcl-event-pipeline"><strong>1. BYPASS Kafka/MCL Event Pipeline</strong><a href="#1-bypass-kafkamcl-event-pipeline" class="hash-link" aria-label="Direct link to 1-bypass-kafkamcl-event-pipeline" title="Direct link to 1-bypass-kafkamcl-event-pipeline"></a></h4><ul><li><strong>What</strong>: LoadIndices completely bypasses Kafka MCL (Metadata Change Log) topics that normally propagate all metadata changes</li><li><strong>Architecture</strong>: <code>Database → LoadIndices → Elasticsearch</code> <strong>vs</strong> normal flow of <code>Database → Kafka MCL → Multiple Consumers → Elasticsearch/Graph/etc</code></li><li><strong>Impact</strong>: <strong>No MCL events published</strong> - downstream systems lose visibility into metadata changes</li><li><strong>Critical Implication</strong>:<ul><li><strong>MCL-Dependent Analytics</strong>: Won&#x27;t have audit trail of metadata changes</li><li><strong>Integrations</strong>: External systems won&#x27;t be notified of changes</li><li><strong>Custom MCL Consumers</strong>: Any custom consumers will miss these events entirely</li><li><strong>✅ Graph Service</strong>: WILL be updated (UpdateIndicesService handles graph indices) <strong>⚠️ Only when Elasticsearch is used for graph storage</strong></li></ul></li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="2-broken-datahub-event-architecture"><strong>2. BROKEN DataHub Event Architecture</strong><a href="#2-broken-datahub-event-architecture" class="hash-link" aria-label="Direct link to 2-broken-datahub-event-architecture" title="Direct link to 2-broken-datahub-event-architecture"></a></h4><ul><li><strong>What</strong>: Violates DataHub&#x27;s core design principle that &quot;all metadata changes flow through Kafka MCL&quot;</li><li><strong>Normal Flow</strong>: <code>Metadata Change → MCL Event → Kafka → Multiple Consumers → Various Stores</code></li><li><strong>LoadIndices Flow</strong>: <code>Metadata Change → LoadIndices → Direct ES Write</code> (<strong>Skips Kafka entirely</strong>)</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="3-read_uncommitted-isolation"><strong>3. READ_UNCOMMITTED Isolation</strong><a href="#3-read_uncommitted-isolation" class="hash-link" aria-label="Direct link to 3-read_uncommitted-isolation" title="Direct link to 3-read_uncommitted-isolation"></a></h4><ul><li><strong>What</strong>: Uses <code>TxIsolation.READ_UNCOMMITTED</code> for faster database scanning</li><li><strong>Impact</strong>: May read <strong>uncommitted changes</strong> or <strong>dirty reads</strong> from concurrent transactions</li><li><strong>Implication</strong>: Data consistency not guaranteed during active writes to database</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="4-refresh-interval-manipulation"><strong>4. Refresh Interval Manipulation</strong><a href="#4-refresh-interval-manipulation" class="hash-link" aria-label="Direct link to 4-refresh-interval-manipulation" title="Direct link to 4-refresh-interval-manipulation"></a></h4><ul><li><strong>What</strong>: Automatically disables refresh intervals during bulk operations</li><li><strong>Impact</strong>: <strong>Recent updates may not be immediately searchable</strong></li><li><strong>Implication</strong>: Users won&#x27;t see real-time updates in search until refresh intervals are restored</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="5-no-write-concurrency-controls"><strong>5. No Write Concurrency Controls</strong><a href="#5-no-write-concurrency-controls" class="hash-link" aria-label="Direct link to 5-no-write-concurrency-controls" title="Direct link to 5-no-write-concurrency-controls"></a></h4><ul><li><strong>What</strong>: No coordination with concurrent Elasticsearch writes from live ingestion</li><li><strong>Impact</strong>: <strong>Potential conflicts</strong> with active ingestion pipelines</li><li><strong>Implication</strong>: Concurrent writes may cause data inconsistency or operation failures</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-when-not-to-use-loadindices">⚠️ When NOT to Use LoadIndices<a href="#-when-not-to-use-loadindices" class="hash-link" aria-label="Direct link to ⚠️ When NOT to Use LoadIndices" title="Direct link to ⚠️ When NOT to Use LoadIndices"></a></h3><p><strong>❌ DO NOT use LoadIndices if you have:</strong></p><ul><li><strong>Active ingestion pipelines</strong> writing to Elasticsearch simultaneously</li><li><strong>MCL-dependent systems</strong> that need event notifications</li><li><strong>Neo4j-based graph storage</strong> (graph updates will be missing)</li><li><strong>Real-time search requirements</strong> during the loading process</li><li><strong>Production traffic</strong> that requires immediate search consistency</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-when-loadindices-is-appropriate">✅ When LoadIndices is Appropriate<a href="#-when-loadindices-is-appropriate" class="hash-link" aria-label="Direct link to ✅ When LoadIndices is Appropriate" title="Direct link to ✅ When LoadIndices is Appropriate"></a></h3><p><strong>✅ Safe to use LoadIndices when:</strong></p><ul><li><strong>Fresh deployment</strong> with empty Elasticsearch cluster</li><li><strong>Offline migration</strong> with no concurrent users</li><li><strong>Standalone indexing</strong> without DataHub services running</li><li><strong>Read-only replica environments</strong> with no active writes</li><li><strong>Development/testing</strong> environments</li><li><strong>Disaster recovery</strong> scenarios where faster restoration is prioritized</li><li><strong>Independent cluster setup</strong> where you need to populate indices before services start</li><li><strong>Elasticsearch-based graph storage</strong> (graph gets updated automatically)</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-safety-requirements">🔒 Safety Requirements<a href="#-safety-requirements" class="hash-link" aria-label="Direct link to 🔒 Safety Requirements" title="Direct link to 🔒 Safety Requirements"></a></h3><p>Before using LoadIndices in any environment:</p><ol><li><p><strong>Verify Minimal Infrastructure</strong>:</p><ul><li><strong>Database</strong>: MySQL/PostgreSQL with <code>metadata_aspect_v2</code> table accessible (via Ebean ORM)</li><li><strong>Elasticsearch</strong>: Running cluster accessible via HTTP/HTTPS</li><li><strong>DataHub Services</strong>: ✅ <strong>NOT required</strong> - LoadIndices can run independently</li><li><strong>⚠️ Check Graph Storage</strong>: Verify if using Elasticsearch-based graph storage</li><li><strong>⚠️ Check Database Type</strong>: Confirm NOT using Cassandra (not supported)</li></ul></li><li><p><strong>Stop All Ingestion</strong> (if DataHub is running):</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Disable all Kafka consumers</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">kubectl scale deployment --replicas</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token plain"> datahub-mae-consumer</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">kubectl scale deployment --replicas</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token plain"> datahub-mce-consumer</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">kubectl scale deployment --replicas</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token plain"> datahub-gms</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></li><li><p><strong>Check Database Configuration</strong>:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Check if using Cassandra (LoadIndices NOT supported)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">grep</span><span class="token plain"> -i cassandra /path/to/datahub/docker/docker-compose.yml</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Verify MySQL/PostgreSQL database is configured</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">grep</span><span class="token plain"> -E </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;mysql\|postgres&quot;</span><span class="token plain"> /path/to/datahub/docker/docker-compose.yml</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># ⚠️ If Cassandra detected, LoadIndices is NOT available</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Must use RestoreIndices instead</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></li><li><p><strong>Check Graph Storage Configuration</strong>:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Check if using Neo4j (graph updates will be MISSING)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">grep</span><span class="token plain"> -r </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;neo4j&quot;</span><span class="token plain"> /path/to/datahub/docker/docker-compose.yml</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Check DataHub configuration for graph service selection</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">grep</span><span class="token plain"> -i </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;graph.*elasticsearch\|neo4j&quot;</span><span class="token plain"> /path/to/datahub/conf/application.yml</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># ⚠️ If Neo4j is detected, LoadIndices will NOT update graph</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></li><li><p><strong>Verify No Concurrent Writes</strong>:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Check for active Elasticsearch indexing</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">curl</span><span class="token plain"> -s </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;localhost:9200/_nodes/stats&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">|</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">grep</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;index_current&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Should show &quot;index_current&quot;: 0</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></li><li><p><strong>Index Clean State</strong>:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Ensure clean indexing state</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">curl</span><span class="token plain"> -s </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;localhost:9200/_nodes/stats&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">|</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">grep</span><span class="token plain"> -E </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;refresh.*active&quot;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></li><li><p><strong>Coordinate with Operations</strong>:</p><ul><li><strong>Maintenance window</strong> scheduling</li><li><strong>User notification</strong> of search unavailability</li><li><strong>Monitoring</strong> of downstream system dependencies</li></ul></li></ol><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-consistency-guarantees">📊 Consistency Guarantees<a href="#-consistency-guarantees" class="hash-link" aria-label="Direct link to 📊 Consistency Guarantees" title="Direct link to 📊 Consistency Guarantees"></a></h3><table><thead><tr><th>Level</th><th>LoadIndices</th><th>RestoreIndices</th></tr></thead><tbody><tr><td><strong>URN-level Ordering</strong></td><td>✅ Guaranteed</td><td>✅ Guaranteed</td></tr><tr><td><strong>Real-time Searchability</strong></td><td>❌ Delayed</td><td>✅ Immediate</td></tr><tr><td><strong>Graph Service Updates (ES-based)</strong></td><td>✅ Updated</td><td>✅ Updated</td></tr><tr><td><strong>Graph Service Updates (Neo4j-based)</strong></td><td>❌ Missing</td><td>✅ Updated</td></tr><tr><td><strong>MCL Event Propagation</strong></td><td>❌ Bypassed</td><td>✅ Full propagation</td></tr><tr><td><strong>Concurrent Write Safety</strong></td><td>❌ Not safe</td><td>✅ Safe</td></tr></tbody></table><h4 class="anchor anchorWithStickyNavbar_LWe7" id="2-restore-normal-operations"><strong>2. Restore Normal Operations</strong><a href="#2-restore-normal-operations" class="hash-link" aria-label="Direct link to 2-restore-normal-operations" title="Direct link to 2-restore-normal-operations"></a></h4><ul><li><strong>Re-enable ingestion</strong> pipelines gradually</li><li><strong>Monitor Elasticsearch</strong> for conflicts</li><li><strong>Validate downstream systems</strong> are synchronized</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="3-emergency-rollback-plan"><strong>3. Emergency Rollback Plan</strong><a href="#3-emergency-rollback-plan" class="hash-link" aria-label="Direct link to 3-emergency-rollback-plan" title="Direct link to 3-emergency-rollback-plan"></a></h4><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># If issues arise, prepare rollback:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># 1. Stop LoadIndices immediately</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># 2. Restore from backup indices</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># 3. Re-run with RestoreIndices for correctness</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><hr><h2 class="anchor anchorWithStickyNavbar_LWe7" id="how-loadindices-works">How LoadIndices Works<a href="#how-loadindices-works" class="hash-link" aria-label="Direct link to How LoadIndices Works" title="Direct link to How LoadIndices Works"></a></h2><p>LoadIndices operates as an upgrade task that can run <strong>independently</strong> without requiring DataHub services to be running. It consists of two main steps:</p><ol><li><strong>BuildIndicesStep</strong>: Creates and configures Elasticsearch indices (creates indices if they don&#x27;t exist)</li><li><strong>LoadIndicesStep</strong>: Streams aspects from database and bulk loads them into indices</li></ol><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-independent-operation-mode">🔧 Independent Operation Mode<a href="#-independent-operation-mode" class="hash-link" aria-label="Direct link to 🔧 Independent Operation Mode" title="Direct link to 🔧 Independent Operation Mode"></a></h3><p><strong>Key Advantage</strong>: LoadIndices only requires:</p><ul><li><strong>MySQL/PostgreSQL</strong> source database (via Ebean ORM)</li><li><strong>Elasticsearch/OpenSearch</strong> destination cluster</li><li><strong>No DataHub services</strong> (maui, frontend, etc.) required</li><li><strong>Cassandra</strong>: ⚠️ <strong>NOT supported</strong> (Ebean doesn&#x27;t support Cassandra)</li></ul><p>This enables <strong>offline bulk operations</strong> during maintenance windows or initial deployments where DataHub infrastructure is being set up incrementally.</p><p><strong>Index Creation</strong>: The BuildIndicesStep automatically creates all required Elasticsearch indices based on <code>IndexConvention</code> patterns, so empty Elasticsearch clusters are fully supported.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="architecture-flow">Architecture Flow<a href="#architecture-flow" class="hash-link" aria-label="Direct link to Architecture Flow" title="Direct link to Architecture Flow"></a></h3><div class="language-mermaid codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-mermaid codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">graph TD</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> A[LoadIndices Upgrade] --&gt; B[BuildIndicesStep]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> B --&gt; C[Create/Configure Indices]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> C --&gt; D[LoadIndicesStep]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> D --&gt; E[Disable Refresh Intervals]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> E --&gt; F[Stream Aspects from DB]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> F --&gt; G[Batch Processing]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> G --&gt; H[Convert to MCL Events]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> H --&gt; I[Bulk Write to ES]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> I --&gt; J[Restore Refresh Intervals]</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="key-differences-from-restoreindices">Key Differences from RestoreIndices<a href="#key-differences-from-restoreindices" class="hash-link" aria-label="Direct link to Key Differences from RestoreIndices" title="Direct link to Key Differences from RestoreIndices"></a></h3><table><thead><tr><th>Aspect</th><th>RestoreIndices</th><th>LoadIndices</th></tr></thead><tbody><tr><td><strong>Purpose</strong></td><td>Correctness &amp; consistency</td><td>Speed &amp; throughput</td></tr><tr><td><strong>Processing</strong></td><td>Event-driven via MCL events</td><td>Direct bulk operations</td></tr><tr><td><strong>Isolation</strong></td><td>READ_COMMITTED</td><td>READ_UNCOMMITTED</td></tr><tr><td><strong>Refresh Management</strong></td><td>Static configuration</td><td>Dynamic disable/restore</td></tr><tr><td><strong>Performance Focus</strong></td><td>Accurate replay</td><td>Maximal speed</td></tr><tr><td><strong>Use Case</strong></td><td>Recovery from inconsistencies</td><td>Initial loads &amp; migrations</td></tr></tbody></table><hr><h2 class="anchor anchorWithStickyNavbar_LWe7" id="deployment--execution">Deployment &amp; Execution<a href="#deployment--execution" class="hash-link" aria-label="Direct link to Deployment &amp; Execution" title="Direct link to Deployment &amp; Execution"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-standalone-deployment-advantage">🚀 Standalone Deployment Advantage<a href="#-standalone-deployment-advantage" class="hash-link" aria-label="Direct link to 🚀 Standalone Deployment Advantage" title="Direct link to 🚀 Standalone Deployment Advantage"></a></h3><p><strong>Key Benefit</strong>: LoadIndices can run with <strong>minimal infrastructure</strong> without requiring DataHub services to be running:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Minimal requirements</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">✅ MySQL/PostgreSQL database </span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">with metadata_aspect_v2 table</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">✅ Elasticsearch/OpenSearch cluster</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">❌ DataHub GMS/Maui services - NOT needed</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">❌ Kafka cluster - NOT needed</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">❌ Frontend services - NOT needed</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-execution-methods">🔧 Execution Methods<a href="#-execution-methods" class="hash-link" aria-label="Direct link to 🔧 Execution Methods" title="Direct link to 🔧 Execution Methods"></a></h3><p>LoadIndices can be executed via:</p><ol><li><strong>Gradle Task</strong> (Recommended)</li></ol><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># From datahub-upgrade directory</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./gradlew runLoadIndices</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># With custom thread count</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./gradlew runLoadIndices -PesThreadCount</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">6</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><ol start="2"><li><p><strong>IDE Execution</strong>: Run <code>UpgradeTask.main()</code> with LoadIndices arguments</p></li><li><p><strong>Standalone JAR</strong>: Build and run datahub-upgrade JAR independently</p></li></ol><hr><h2 class="anchor anchorWithStickyNavbar_LWe7" id="loadindices-configuration-options">LoadIndices Configuration Options<a href="#loadindices-configuration-options" class="hash-link" aria-label="Direct link to LoadIndices Configuration Options" title="Direct link to LoadIndices Configuration Options"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-performance--throttling">🔄 Performance &amp; Throttling<a href="#-performance--throttling" class="hash-link" aria-label="Direct link to 🔄 Performance &amp; Throttling" title="Direct link to 🔄 Performance &amp; Throttling"></a></h3><table><thead><tr><th>Argument</th><th>Description</th><th>Default</th><th>Example</th></tr></thead><tbody><tr><td><code>batchSize</code></td><td>Number of aspects per batch for processing</td><td><code>10000</code></td><td><code>-a batchSize=5000</code></td></tr><tr><td><code>limit</code></td><td>Maximum number of aspects to process</td><td><code>Integer.MAX_VALUE</code> (no limit)</td><td><code>-a limit=50000</code></td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-time-filtering">📅 Time Filtering<a href="#-time-filtering" class="hash-link" aria-label="Direct link to 📅 Time Filtering" title="Direct link to 📅 Time Filtering"></a></h3><table><thead><tr><th>Argument</th><th>Description</th><th>Example</th></tr></thead><tbody><tr><td><code>gePitEpochMs</code></td><td>Only process aspects created <strong>after</strong> this timestamp (milliseconds)</td><td><code>-a gePitEpochMs=1609459200000</code></td></tr><tr><td><code>lePitEpochMs</code></td><td>Only process aspects created <strong>before</strong> this timestamp (milliseconds)</td><td><code>-a lePitEpochMs=1640995200000</code></td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-content-filtering">🔍 Content Filtering<a href="#-content-filtering" class="hash-link" aria-label="Direct link to 🔍 Content Filtering" title="Direct link to 🔍 Content Filtering"></a></h3><table><thead><tr><th>Argument</th><th>Description</th><th>Example</th></tr></thead><tbody><tr><td><code>urnLike</code></td><td>SQL LIKE pattern to filter URNs</td><td><code>-a urnLike=urn:li:dataset:%</code></td></tr><tr><td><code>aspectNames</code></td><td>Comma-separated list of aspect names to process</td><td><code>-a aspectNames=ownership,schemaMetadata</code></td></tr><tr><td><code>lastUrn</code></td><td>Resume processing from this URN (inclusive)</td><td><code>-a lastUrn=urn:li:dataset:my-dataset</code></td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-system-configuration">⚙️ System Configuration<a href="#-system-configuration" class="hash-link" aria-label="Direct link to ⚙️ System Configuration" title="Direct link to ⚙️ System Configuration"></a></h3><table><thead><tr><th>Environment Variable</th><th>Description</th><th>Default</th><th>Example</th></tr></thead><tbody><tr><td><code>ELASTICSEARCH_THREAD_COUNT</code></td><td>Number of I/O threads for BulkProcessor</td><td><code>2</code> (app config), <code>4</code> (Gradle task)</td><td><code>ELASTICSEARCH_THREAD_COUNT=4</code></td></tr><tr><td><code>ES_BULK_ASYNC</code></td><td>Enable asynchronous bulk operations</td><td><code>true</code></td><td><code>ES_BULK_ASYNC=true</code></td></tr><tr><td><code>ES_BULK_REQUESTS_LIMIT</code></td><td>Maximum bulk requests per buffer</td><td><code>10000</code></td><td><code>ES_BULK_REQUESTS_LIMIT=15000</code></td></tr><tr><td><code>ES_BULK_FLUSH_PERIOD</code></td><td>Bulk flush interval in seconds</td><td><code>300</code> (5 minutes)</td><td><code>ES_BULK_FLUSH_PERIOD=300</code></td></tr></tbody></table><hr><h2 class="anchor anchorWithStickyNavbar_LWe7" id="running-loadindices">Running LoadIndices<a href="#running-loadindices" class="hash-link" aria-label="Direct link to Running LoadIndices" title="Direct link to Running LoadIndices"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-docker-compose">🐳 Docker Compose<a href="#-docker-compose" class="hash-link" aria-label="Direct link to 🐳 Docker Compose" title="Direct link to 🐳 Docker Compose"></a></h3><p>If you&#x27;re using Docker Compose with the DataHub source repository:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Basic LoadIndices execution</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./docker/datahub-upgrade/datahub-upgrade.sh -u LoadIndices</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># LoadIndices with performance tuning</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./docker/datahub-upgrade/datahub-upgrade.sh -u LoadIndices </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">batchSize</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">15000</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">limit</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">100000</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-gradle-task-development">🎯 Gradle Task (Development)<a href="#-gradle-task-development" class="hash-link" aria-label="Direct link to 🎯 Gradle Task (Development)" title="Direct link to 🎯 Gradle Task (Development)"></a></h3><p>For development and testing environments:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Run LoadIndices with default settings</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./gradlew :datahub-upgrade:runLoadIndices</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Run with custom thread count and batch size</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./gradlew :datahub-upgrade:runLoadIndices </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -PesThreadCount</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">4</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -PbatchSize</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">15000</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -Plimit</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">50000</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>The Gradle task supports these parameters:</p><ul><li><code>esThreadCount</code>: Set <code>ELASTICSEARCH_THREAD_COUNT</code> (default: <code>4</code>)</li><li><code>batchSize</code>: Override batch size (default: <code>10000</code>)</li><li><code>limit</code>: Set processing limit</li><li><code>urnLike</code>: Filter by URN pattern</li><li><code>aspectNames</code>: Filter by aspect names</li><li><code>lePitEpochMs</code>: Process records created before this timestamp</li><li><code>gePitEpochMs</code>: Process records created after this timestamp</li><li><code>lastUrn</code>: Resume processing from this URN (inclusive)</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-docker-environment-variables">🐳 Docker Environment Variables<a href="#-docker-environment-variables" class="hash-link" aria-label="Direct link to 🐳 Docker Environment Variables" title="Direct link to 🐳 Docker Environment Variables"></a></h3><p>Configure LoadIndices through Docker environment:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Target specific entity types</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">docker</span><span class="token plain"> run --rm datahub-upgrade </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -u LoadIndices </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">urnLike</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">urn:li:dataset:% </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">batchSize</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">20000</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Process specific aspects only</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">docker</span><span class="token plain"> run --rm datahub-upgrade </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -u LoadIndices </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">aspectNames</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">ownership,status,schemaMetadata </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">batchSize</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">15000</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Time-based filtering</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">docker</span><span class="token plain"> run --rm datahub-upgrade </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -u LoadIndices </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">gePitEpochMs</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">1640995200000</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">limit</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">50000</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Resume from a specific URN</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token function" style="color:rgb(130, 170, 255)">docker</span><span class="token plain"> run --rm datahub-upgrade </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -u LoadIndices </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">lastUrn</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">urn:li:dataset:my-dataset </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">batchSize</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">10000</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-resume-functionality">🔄 Resume Functionality<a href="#-resume-functionality" class="hash-link" aria-label="Direct link to 🔄 Resume Functionality" title="Direct link to 🔄 Resume Functionality"></a></h3><p>LoadIndices supports resuming from a specific URN when processing is interrupted:</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="resume-from-last-processed-urn"><strong>Resume from Last Processed URN</strong><a href="#resume-from-last-processed-urn" class="hash-link" aria-label="Direct link to resume-from-last-processed-urn" title="Direct link to resume-from-last-processed-urn"></a></h4><p>When LoadIndices runs, it logs the last URN processed in each batch:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">Batch completed - Last URN processed: urn:li:dataset:my-dataset</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Processed 10000 aspects - 150.2 aspects/sec - Last URN: urn:li:dataset:my-dataset</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>To resume from where you left off:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Resume from the last URN that was successfully processed</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./gradlew :datahub-upgrade:runLoadIndices </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">lastUrn</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">urn:li:dataset:my-dataset </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">batchSize</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">10000</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="resume-best-practices"><strong>Resume Best Practices</strong><a href="#resume-best-practices" class="hash-link" aria-label="Direct link to resume-best-practices" title="Direct link to resume-best-practices"></a></h4><ul><li><strong>Use the exact URN</strong>: Copy the URN exactly as logged (including any URL encoding)</li><li><strong>Inclusive processing</strong>: The <code>lastUrn</code> parameter processes from the specified URN onwards (inclusive)</li><li><strong>Monitor progress</strong>: Watch the logs for the &quot;Last URN processed&quot; messages to track progress</li><li><strong>Batch boundaries</strong>: Resume works at the URN level, not batch level - some aspects may be reprocessed</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="example-resume-workflow"><strong>Example Resume Workflow</strong><a href="#example-resume-workflow" class="hash-link" aria-label="Direct link to example-resume-workflow" title="Direct link to example-resume-workflow"></a></h4><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># 1. Start initial processing</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./gradlew :datahub-upgrade:runLoadIndices -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">batchSize</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">5000</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># 2. If interrupted, check logs for last URN:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># &quot;Batch completed - Last URN processed: urn:li:dataset:my-dataset&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># 3. Resume from that URN</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./gradlew :datahub-upgrade:runLoadIndices </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">lastUrn</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">urn:li:dataset:my-dataset </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> -a </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">batchSize</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">5000</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><hr><h2 class="anchor anchorWithStickyNavbar_LWe7" id="performance-optimization">Performance Optimization<a href="#performance-optimization" class="hash-link" aria-label="Direct link to Performance Optimization" title="Direct link to Performance Optimization"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-elasticsearchopensearch-configuration">🚀 Elasticsearch/OpenSearch Configuration<a href="#-elasticsearchopensearch-configuration" class="hash-link" aria-label="Direct link to 🚀 Elasticsearch/OpenSearch Configuration" title="Direct link to 🚀 Elasticsearch/OpenSearch Configuration"></a></h3><h4 class="anchor anchorWithStickyNavbar_LWe7" id="bulk-processing-tuning">Bulk Processing Tuning<a href="#bulk-processing-tuning" class="hash-link" aria-label="Direct link to Bulk Processing Tuning" title="Direct link to Bulk Processing Tuning"></a></h4><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Optimize bulk settings for LoadIndices</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">ES_BULK_REQUESTS_LIMIT</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">15000</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">ES_BULK_FLUSH_PERIOD</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">10</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">ES_BULK_ASYNC</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">true</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">ELASTICSEARCH_THREAD_COUNT</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">4</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="connection-pool-optimization">Connection Pool Optimization<a href="#connection-pool-optimization" class="hash-link" aria-label="Direct link to Connection Pool Optimization" title="Direct link to Connection Pool Optimization"></a></h4><p>LoadIndices automatically configures connection pooling based on thread count:</p><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># datahub-upgrade/build.gradle configuration</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">environment &quot;ELASTICSEARCH_THREAD_COUNT&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;4&quot;</span><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Auto-adjusts maxConnectionsPerRoute</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h2 class="anchor anchorWithStickyNavbar_LWe7" id="comparison-with-restoreindices">Comparison with RestoreIndices<a href="#comparison-with-restoreindices" class="hash-link" aria-label="Direct link to Comparison with RestoreIndices" title="Direct link to Comparison with RestoreIndices"></a></h2><p>Understanding when to use LoadIndices vs RestoreIndices is crucial for optimal performance and data consistency.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-purpose--design-philosophy">🎯 Purpose &amp; Design Philosophy<a href="#-purpose--design-philosophy" class="hash-link" aria-label="Direct link to 🎯 Purpose &amp; Design Philosophy" title="Direct link to 🎯 Purpose &amp; Design Philosophy"></a></h3><table><thead><tr><th>Aspect</th><th>RestoreIndices</th><th>LoadIndices</th></tr></thead><tbody><tr><td><strong>Primary Purpose</strong></td><td>Data consistency &amp; correctness</td><td>Speed &amp; throughput</td></tr><tr><td><strong>Design Philosophy</strong></td><td>Event-driven precision</td><td>Performance optimization</td></tr><tr><td><strong>Consistency Model</strong></td><td>Full consistency guarantee</td><td>Speed-optimized trade-offs</td></tr><tr><td><strong>Use Case</strong></td><td>Production recovery</td><td>Bulk migrations &amp; initial loads</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-technical-comparison">📊 Technical Comparison<a href="#-technical-comparison" class="hash-link" aria-label="Direct link to 📊 Technical Comparison" title="Direct link to 📊 Technical Comparison"></a></h3><table><thead><tr><th>Feature</th><th>RestoreIndices</th><th>LoadIndices</th></tr></thead><tbody><tr><td><strong>Database Isolation</strong></td><td>READ_COMMITTED</td><td>READ_UNCOMMITTED</td></tr><tr><td><strong>MCL Events</strong></td><td>✅ Full MCL pipeline</td><td>❌ Bypasses MCL entirely</td></tr><tr><td><strong>Graph Updates (Elasticsearch)</strong></td><td>✅ Updated</td><td>✅ Updated</td></tr><tr><td><strong>Graph Updates (Neo4j)</strong></td><td>✅ Updated</td><td>❌ Missing</td></tr><tr><td><strong>Database Support</strong></td><td>MySQL, PostgreSQL, Cassandra</td><td>MySQL, PostgreSQL only</td></tr><tr><td><strong>Performance</strong></td><td>Slower, safer</td><td>Faster, optimized</td></tr><tr><td><strong>Real-time Consistency</strong></td><td>✅ Immediate</td><td>❌ Delayed until refresh</td></tr><tr><td><strong>Concurrency Safety</strong></td><td>✅ Safe</td><td>❌ Not safe</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_LWe7" id="-when-to-use-each-tool">🚀 When to Use Each Tool<a href="#-when-to-use-each-tool" class="hash-link" aria-label="Direct link to 🚀 When to Use Each Tool" title="Direct link to 🚀 When to Use Each Tool"></a></h3><h4 class="anchor anchorWithStickyNavbar_LWe7" id="-use-restoreindices-for"><strong>Use RestoreIndices For:</strong><a href="#-use-restoreindices-for" class="hash-link" aria-label="Direct link to -use-restoreindices-for" title="Direct link to -use-restoreindices-for"></a></h4><ul><li><strong>Production recovery</strong> from inconsistencies</li><li><strong>Neo4j-based graph storage</strong> deployments</li><li><strong>Cassandra-based</strong> metadata storage</li><li><strong>Active ingestion</strong> pipelines running</li><li><strong>MCL-dependent systems</strong> requiring event notifications</li><li><strong>Precise event replay</strong> scenarios</li></ul><h4 class="anchor anchorWithStickyNavbar_LWe7" id="-use-loadindices-for"><strong>Use LoadIndices For:</strong><a href="#-use-loadindices-for" class="hash-link" aria-label="Direct link to -use-loadindices-for" title="Direct link to -use-loadindices-for"></a></h4><ul><li><strong>Fresh deployments</strong> with empty clusters</li><li><strong>Bulk migrations</strong> during maintenance windows</li><li><strong>MySQL/PostgreSQL + Elasticsearch</strong> configurations</li><li><strong>Offline scenarios</strong> with no concurrent writes</li><li><strong>Development/testing</strong> environments</li><li><strong>Performance-critical</strong> initial data loads</li></ul></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="slackUtm_uoBr"><div class="slackUtm_uoBr"><hr>Need more help? Join the conversation in <a href="https://datahub.com/slack?utm_source=docs&amp;utm_medium=footer&amp;utm_campaign=docs_footer&amp;utm_content=docs/how/load-indices">Slack!</a></div></div><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/datahub-project/datahub/blob/master/docs/how/load-indices.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_VsjB"></div></div></footer><div class="feedbackWrapper_mUHF"><div class="feedbackWidget_PX4d"><div class="feedbackButtons_wn3V"><strong>Is this page helpful?</strong><div><button class="feedbackButton_UgQs"><span role="img" aria-label="like" class="anticon anticon-like"><svg viewBox="64 64 896 896" focusable="false" data-icon="like" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 533.7c16.8-22.2 26.1-49.4 26.1-77.7 0-44.9-25.1-87.4-65.5-111.1a67.67 67.67 0 00-34.3-9.3H572.4l6-122.9c1.4-29.7-9.1-57.9-29.5-79.4A106.62 106.62 0 00471 99.9c-52 0-98 35-111.8 85.1l-85.9 311H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 32 32 32h601.3c9.2 0 18.2-1.8 26.5-5.4 47.6-20.3 78.3-66.8 78.3-118.4 0-12.6-1.8-25-5.4-37 16.8-22.2 26.1-49.4 26.1-77.7 0-12.6-1.8-25-5.4-37 16.8-22.2 26.1-49.4 26.1-77.7-.2-12.6-2-25.1-5.6-37.1zM184 852V568h81v284h-81zm636.4-353l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 16.5-7.2 32.2-19.6 43l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 16.5-7.2 32.2-19.6 43l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 22.4-13.2 42.6-33.6 51.8H329V564.8l99.5-360.5a44.1 44.1 0 0142.2-32.3c7.6 0 15.1 2.2 21.1 6.7 9.9 7.4 15.2 18.6 14.6 30.5l-9.6 198.4h314.4C829 418.5 840 436.9 840 456c0 16.5-7.2 32.1-19.6 43z"></path></svg></span></button><button class="feedbackButton_UgQs"><span role="img" aria-label="dislike" class="anticon anticon-dislike"><svg viewBox="64 64 896 896" focusable="false" data-icon="dislike" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 490.3c3.6-12 5.4-24.4 5.4-37 0-28.3-9.3-55.5-26.1-77.7 3.6-12 5.4-24.4 5.4-37 0-28.3-9.3-55.5-26.1-77.7 3.6-12 5.4-24.4 5.4-37 0-51.6-30.7-98.1-78.3-118.4a66.1 66.1 0 00-26.5-5.4H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 32 32 32h129.3l85.8 310.8C372.9 889 418.9 924 470.9 924c29.7 0 57.4-11.8 77.9-33.4 20.5-21.5 31-49.7 29.5-79.4l-6-122.9h239.9c12.1 0 23.9-3.2 34.3-9.3 40.4-23.5 65.5-66.1 65.5-111 0-28.3-9.3-55.5-26.1-77.7zM184 456V172h81v284h-81zm627.2 160.4H496.8l9.6 198.4c.6 11.9-4.7 23.1-14.6 30.5-6.1 4.5-13.6 6.8-21.1 6.7a44.28 44.28 0 01-42.2-32.3L329 459.2V172h415.4a56.85 56.85 0 0133.6 51.8c0 9.7-2.3 18.9-6.9 27.3l-13.9 25.4 21.9 19a56.76 56.76 0 0119.6 43c0 9.7-2.3 18.9-6.9 27.3l-13.9 25.4 21.9 19a56.76 56.76 0 0119.6 43c0 9.7-2.3 18.9-6.9 27.3l-14 25.5 21.9 19a56.76 56.76 0 0119.6 43c0 19.1-11 37.5-28.8 48.4z"></path></svg></span></button></div></div></div></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/how/restore-indices"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Search and Graph Reindexing</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/advanced/db-retention"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Configuring Database Retention</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#overview" class="table-of-contents__link toc-highlight">Overview</a><ul><li><a href="#key-features" class="table-of-contents__link toc-highlight">Key Features</a></li></ul></li><li><a href="#performance-trade-offs--implications" class="table-of-contents__link toc-highlight">Performance Trade-offs &amp; Implications</a><ul><li><a href="#-key-trade-offs-made" class="table-of-contents__link toc-highlight">🚨 Key Trade-offs Made</a></li><li><a href="#-when-not-to-use-loadindices" class="table-of-contents__link toc-highlight">⚠️ When NOT to Use LoadIndices</a></li><li><a href="#-when-loadindices-is-appropriate" class="table-of-contents__link toc-highlight">✅ When LoadIndices is Appropriate</a></li><li><a href="#-safety-requirements" class="table-of-contents__link toc-highlight">🔒 Safety Requirements</a></li><li><a href="#-consistency-guarantees" class="table-of-contents__link toc-highlight">📊 Consistency Guarantees</a></li></ul></li><li><a href="#how-loadindices-works" class="table-of-contents__link toc-highlight">How LoadIndices Works</a><ul><li><a href="#-independent-operation-mode" class="table-of-contents__link toc-highlight">🔧 Independent Operation Mode</a></li><li><a href="#architecture-flow" class="table-of-contents__link toc-highlight">Architecture Flow</a></li><li><a href="#key-differences-from-restoreindices" class="table-of-contents__link toc-highlight">Key Differences from RestoreIndices</a></li></ul></li><li><a href="#deployment--execution" class="table-of-contents__link toc-highlight">Deployment &amp; Execution</a><ul><li><a href="#-standalone-deployment-advantage" class="table-of-contents__link toc-highlight">🚀 Standalone Deployment Advantage</a></li><li><a href="#-execution-methods" class="table-of-contents__link toc-highlight">🔧 Execution Methods</a></li></ul></li><li><a href="#loadindices-configuration-options" class="table-of-contents__link toc-highlight">LoadIndices Configuration Options</a><ul><li><a href="#-performance--throttling" class="table-of-contents__link toc-highlight">🔄 Performance &amp; Throttling</a></li><li><a href="#-time-filtering" class="table-of-contents__link toc-highlight">📅 Time Filtering</a></li><li><a href="#-content-filtering" class="table-of-contents__link toc-highlight">🔍 Content Filtering</a></li><li><a href="#-system-configuration" class="table-of-contents__link toc-highlight">⚙️ System Configuration</a></li></ul></li><li><a href="#running-loadindices" class="table-of-contents__link toc-highlight">Running LoadIndices</a><ul><li><a href="#-docker-compose" class="table-of-contents__link toc-highlight">🐳 Docker Compose</a></li><li><a href="#-gradle-task-development" class="table-of-contents__link toc-highlight">🎯 Gradle Task (Development)</a></li><li><a href="#-docker-environment-variables" class="table-of-contents__link toc-highlight">🐳 Docker Environment Variables</a></li><li><a href="#-resume-functionality" class="table-of-contents__link toc-highlight">🔄 Resume Functionality</a></li></ul></li><li><a href="#performance-optimization" class="table-of-contents__link toc-highlight">Performance Optimization</a><ul><li><a href="#-elasticsearchopensearch-configuration" class="table-of-contents__link toc-highlight">🚀 Elasticsearch/OpenSearch Configuration</a></li></ul></li><li><a href="#comparison-with-restoreindices" class="table-of-contents__link toc-highlight">Comparison with RestoreIndices</a><ul><li><a href="#-purpose--design-philosophy" class="table-of-contents__link toc-highlight">🎯 Purpose &amp; Design Philosophy</a></li><li><a href="#-technical-comparison" class="table-of-contents__link toc-highlight">📊 Technical Comparison</a></li><li><a href="#-when-to-use-each-tool" class="table-of-contents__link toc-highlight">🚀 When to Use Each Tool</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Docs</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/">Introduction</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/quickstart">Quickstart</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://datahub.com/slack" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="footer__link-item">YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/docs/townhalls">Town Halls</a></li><li class="footer__item"><a href="https://datahub.com/resources/?2004611554=dh-stories" target="_blank" rel="noopener noreferrer" class="footer__link-item">Customer Stories<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">More</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://demo.datahub.com/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Demo</a></li><li class="footer__item"><a href="https://feature-requests.datahubproject.io/roadmap" target="_blank" rel="noopener noreferrer" class="footer__link-item">Roadmap<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/docs/contributing">Contributing</a></li><li class="footer__item"><a href="https://github.com/datahub-project/datahub" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://feature-requests.datahubproject.io/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Feature Requests<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">Copyright © 2015-2025 DataHub Project Authors.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.76cdbaa9.js"></script>
<script src="/assets/js/main.b7ce4b14.js"></script>
</body>
</html>