mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-04 23:57:03 +00:00
176 lines
109 KiB
HTML
176 lines
109 KiB
HTML
![]() |
<!doctype html>
|
|||
|
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/deploy/aws" data-has-hydrated="false">
|
|||
|
<head>
|
|||
|
<meta charset="UTF-8">
|
|||
|
<meta name="generator" content="Docusaurus v2.4.3">
|
|||
|
<title data-rh="true">Deploying to AWS | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/deploy/aws"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Deploying to AWS | DataHub"><meta data-rh="true" name="description" content="The following is a set of instructions to quickstart DataHub on AWS Elastic Kubernetes Service (EKS). Note, the guide"><meta data-rh="true" property="og:description" content="The following is a set of instructions to quickstart DataHub on AWS Elastic Kubernetes Service (EKS). Note, the guide"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/deploy/aws"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/deploy/aws" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/deploy/aws" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
|
|||
|
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
|
|||
|
|
|||
|
<link rel="preconnect" href="https://www.google-analytics.com">
|
|||
|
<link rel="preconnect" href="https://www.googletagmanager.com">
|
|||
|
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
|
|||
|
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
|
|||
|
<link rel="preconnect" href="https://www.googletagmanager.com">
|
|||
|
<script>window.dataLayer=window.dataLayer||[]</script>
|
|||
|
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
|
|||
|
|
|||
|
|
|||
|
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<meta httpequiv="Content-Security-Policy" content="frame-ancestors 'self' https://*.acryl.io https://acryldata.io http://localhost:*">
|
|||
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
|
|||
|
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
|
|||
|
<script src="/scripts/rb2b.js" async defer="defer"></script>
|
|||
|
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
|
|||
|
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
|
|||
|
<script src="/scripts/reo.js"></script>
|
|||
|
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.d8fe2eb8.css">
|
|||
|
<link rel="preload" href="/assets/js/runtime~main.50e13f51.js" as="script">
|
|||
|
<link rel="preload" href="/assets/js/main.edc0853c.js" as="script">
|
|||
|
</head>
|
|||
|
<body class="navigation-with-keyboard">
|
|||
|
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
|
|||
|
|
|||
|
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
|
|||
|
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p>DataHub Secures $35 Million Series B</p><a href="https://datahub.com/news/series-b-announcement/" target="_blank" class="button"><div>Read the announcement<span> →</span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/deploy/aws">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/deploy/aws">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Learn</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/weekly-demo" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Weekly Demo"></div><div class="title_c7DP">Weekly Demo</div></a></div><div><a href="https://datahub.com/use-cases" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-forum.png" alt="Use Cases"></div><div class="title_c7DP">Use Cases</div></a></div><div><a href="httpps://datahub.com/adoption-stories" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Adoption Stories"></div><div class="title_c7DP">Adoption Stories</div></a></div><div><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Blog"></div><div class="title_c7DP">Blog</div></a></div><div><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Youtube"></div><div class="title_c7DP">Youtube</div></a></div></div></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/slack/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Join Slack"></div><div class="title_c7DP">Join Slack</div></a></div><div><a href="https://datahub.com/events" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Events"></div><div class="title_c7DP">Events</div></a></div><div><a href="https://datahub.com/champions/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Champions"></div><div class="title_c7DP">Champions</div></a></div><div><a href="https://datahub.com/share-your-journey/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Share Your Journey"></div><div class="title_c7DP">Share Your Journey</div></a></div></div></ul></div><a href="https://datahub.com/products/why-datahub-cloud/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
|
|||
|
<style>
|
|||
|
.cloud-cta {
|
|||
|
color: var(--ifm-menu-color-active);
|
|||
|
font-weight: 600;
|
|||
|
background: linear-gradient(40deg, var(--ifm-menu-color-active), var(--ifm-menu-color-active));
|
|||
|
background-size: 200% 100%;
|
|||
|
-webkit-background-clip: text;
|
|||
|
background-clip: text;
|
|||
|
transition: background-image 0.3s ease;
|
|||
|
}
|
|||
|
.cloud-cta:hover {
|
|||
|
color: transparent;
|
|||
|
background: linear-gradient(40deg, var(--ifm-menu-color-active), #ff1493);
|
|||
|
background-size: 200% 100%;
|
|||
|
-webkit-background-clip: text;
|
|||
|
background-clip: text;
|
|||
|
animation: gradientShift 3s ease infinite;
|
|||
|
}
|
|||
|
@keyframes gradientShift {
|
|||
|
0%, 100% { background-position: 0% 50%; }
|
|||
|
50% { background-position: 100% 50%; }
|
|||
|
}
|
|||
|
</style>
|
|||
|
<div class="cloud-cta">Get Cloud</div>
|
|||
|
</a><a href="https://datahub.com/slack?utm_source=docs&utm_medium=header&utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
|
|||
|
<style>
|
|||
|
.slack-logo:hover {
|
|||
|
opacity: 0.8;
|
|||
|
}
|
|||
|
</style>
|
|||
|
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
|
|||
|
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category 'What Is DataHub?'" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category 'Features'" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>DataHub Cloud</div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/managed-datahub-overview">DataHub Cloud Overview</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/welcome-acryl">Getting Started with DataHub Cloud</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authentication/guides/sso/initialize-oidc">Configure Single Sign-On</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/remote-executor/about">Remote Executor</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/datahub-api/entity-events-api">DataHub API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--subli
|
|||
|
assumes that you do not have a kubernetes cluster set up. If you are deploying DataHub to an existing cluster, please
|
|||
|
skip the corresponding sections.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="prerequisites">Prerequisites<a href="#prerequisites" class="hash-link" aria-label="Direct link to Prerequisites" title="Direct link to Prerequisites"></a></h2><p>This guide requires the following tools:</p><ul><li><a href="https://kubernetes.io/docs/tasks/tools/" target="_blank" rel="noopener noreferrer">kubectl</a> to manage kubernetes resources</li><li><a href="https://helm.sh/docs/intro/install/" target="_blank" rel="noopener noreferrer">helm</a> to deploy the resources based on helm charts. Note, we only support Helm 3.</li><li><a href="https://eksctl.io/installation/" target="_blank" rel="noopener noreferrer">eksctl</a> to create and manage clusters on EKS</li><li><a href="https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html" target="_blank" rel="noopener noreferrer">AWS CLI</a> to manage AWS resources</li></ul><p>To use the above tools, you need to set up AWS credentials by following
|
|||
|
this <a href="https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html" target="_blank" rel="noopener noreferrer">guide</a>.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="start-up-a-kubernetes-cluster-on-aws-eks">Start up a kubernetes cluster on AWS EKS<a href="#start-up-a-kubernetes-cluster-on-aws-eks" class="hash-link" aria-label="Direct link to Start up a kubernetes cluster on AWS EKS" title="Direct link to Start up a kubernetes cluster on AWS EKS"></a></h2><p>Let’s follow this <a href="https://docs.aws.amazon.com/eks/latest/userguide/getting-started-eksctl.html" target="_blank" rel="noopener noreferrer">guide</a> to create a new
|
|||
|
cluster using eksctl. Run the following command with cluster-name set to the cluster name of choice, and region set to
|
|||
|
the AWS region you are operating on.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">eksctl create cluster \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --name <<cluster-name>> \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --region <<region>> \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --with-oidc \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --nodes=3</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>The command will provision an EKS cluster powered by 3 EC2 m3.large nodes and provision a VPC based networking layer.</p><p>If you are planning to run the storage layer (MySQL, Elasticsearch, Kafka) as pods in the cluster, you need at least 3
|
|||
|
nodes. If you decide to use managed storage services, you can reduce the number of nodes or use m3.medium nodes to save
|
|||
|
cost. Refer to this <a href="https://eksctl.io/usage/creating-and-managing-clusters/" target="_blank" rel="noopener noreferrer">guide</a> to further customize the cluster
|
|||
|
before provisioning.</p><p>Note, OIDC setup is required for following this guide when setting up the load balancer.</p><p>Run <code>kubectl get nodes</code> to confirm that the cluster has been setup correctly. You should get results like below</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">NAME STATUS ROLES AGE VERSION</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">ip-192-168-49-49.us-west-2.compute.internal Ready <none> 3h v1.18.9-eks-d1db3c</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">ip-192-168-64-56.us-west-2.compute.internal Ready <none> 3h v1.18.9-eks-d1db3c</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">ip-192-168-8-126.us-west-2.compute.internal Ready <none> 3h v1.18.9-eks-d1db3c</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="install-ebs-csi-driver-core-dns-and-vpc-cni-plugin-for-kubernetes">Install EBS CSI driver, Core DNS, and VPC CNI plugin for Kubernetes<a href="#install-ebs-csi-driver-core-dns-and-vpc-cni-plugin-for-kubernetes" class="hash-link" aria-label="Direct link to Install EBS CSI driver, Core DNS, and VPC CNI plugin for Kubernetes" title="Direct link to Install EBS CSI driver, Core DNS, and VPC CNI plugin for Kubernetes"></a></h3><p>Once your cluster is running, make sure to install the EBS CSI driver, Core DNS, and VPC CNI plugin for Kubernetes. <a href="https://docs.aws.amazon.com/eks/latest/userguide/eks-add-ons.html" target="_blank" rel="noopener noreferrer">add-ons</a>. By default Core DNS and VPC CNI plugins are installed. You need to manually install the EBS CSI driver. It show look this in your console when you are done.</p><p><img loading="lazy" src="https://github.com/user-attachments/assets/5a9a2af0-e804-4896-85bb-dc5834208719" alt="Screenshot 2024-11-15 at 4 42 09 PM" class="img_ev3q"></p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="add-the-amazonebscsidriverpolicy-role-to-the-eks-node-group">Add the AmazonEBSCSIDriverPolicy role to the EKS node group<a href="#add-the-amazonebscsidriverpolicy-role-to-the-eks-node-group" class="hash-link" aria-label="Direct link to Add the AmazonEBSCSIDriverPolicy role to the EKS node group" title="Direct link to Add the AmazonEBSCSIDriverPolicy role to the EKS node group"></a></h3><p>Next is to add the AmazonEBSCSIDriverPolicy role to the EKS node group. You will from the EKS Node group by going to the Compute tab in your EKS cluster and clicking on the IAM entry for the EKS node group. Add the AmazonEBSCSIDriverPolicy policy.</p><p><img loading="lazy" src="https://github.com/user-attachments/assets/8971c8d6-8543-408b-9a07-814aacb2532d" alt="Screenshot 2024-11-15 at 4 42 29 PM" class="img_ev3q">
|
|||
|
<img loading="lazy" src="https://github.com/user-attachments/assets/397f9131-5f13-4d9f-a664-9921d9bbf44e" alt="Screenshot 2024-11-15 at 4 42 46 PM" class="img_ev3q"></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="setup-datahub-using-helm">Setup DataHub using Helm<a href="#setup-datahub-using-helm" class="hash-link" aria-label="Direct link to Setup DataHub using Helm" title="Direct link to Setup DataHub using Helm"></a></h2><p>Once the kubernetes cluster has been set up, you can deploy DataHub and it’s prerequisites using helm. Please follow the
|
|||
|
steps in this <a href="/docs/deploy/kubernetes">guide</a></p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="expose-endpoints-using-a-load-balancer">Expose endpoints using a load balancer<a href="#expose-endpoints-using-a-load-balancer" class="hash-link" aria-label="Direct link to Expose endpoints using a load balancer" title="Direct link to Expose endpoints using a load balancer"></a></h2><p>Now that all the pods are up and running, you need to expose the datahub-frontend end point by setting
|
|||
|
up <a href="https://kubernetes.io/docs/concepts/services-networking/ingress/" target="_blank" rel="noopener noreferrer">ingress</a>. To do this, you need to first set up an
|
|||
|
ingress controller. There are
|
|||
|
many <a href="https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/" target="_blank" rel="noopener noreferrer">ingress controllers</a> to choose
|
|||
|
from, but here, we will follow
|
|||
|
this <a href="https://docs.aws.amazon.com/eks/latest/userguide/aws-load-balancer-controller.html" target="_blank" rel="noopener noreferrer">guide</a> to set up the AWS
|
|||
|
Application Load Balancer(ALB) Controller.</p><p>First, if you did not use eksctl to setup the kubernetes cluster, make sure to go through the prerequisites listed
|
|||
|
<a href="https://docs.aws.amazon.com/eks/latest/userguide/alb-ingress.html" target="_blank" rel="noopener noreferrer">here</a>.</p><p>Download the IAM policy document for allowing the controller to make calls to AWS APIs on your behalf.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">curl -o iam_policy.json https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/main/docs/install/iam_policy.json</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Create an IAM policy based on the policy document by running the following.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">aws iam create-policy \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --policy-name AWSLoadBalancerControllerIAMPolicy \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --policy-document file://iam_policy.json</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Use eksctl to create a service account that allows us to attach the above policy to kubernetes pods.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">eksctl create iamserviceaccount \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --cluster=<<cluster-name>> \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --namespace=kube-system \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --name=aws-load-balancer-controller \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --attach-policy-arn=arn:aws:iam::<<account-id>>:policy/AWSLoadBalancerControllerIAMPolicy \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --override-existing-serviceaccounts \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --approve</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" c
|
|||
|
return a result like the following.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">NAME READY UP-TO-DATE AVAILABLE AGE</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">aws-load-balancer-controller 2/2 2 2 142m</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Now that the controller has been set up, we can enable ingress by updating the values.yaml (or any other values.yaml
|
|||
|
file used to deploy datahub). Change datahub-frontend values to the following.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">datahub-frontend:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> enabled: true</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> image:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> repository: acryldata/datahub-frontend-react</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> tag: "head"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ingress:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> enabled: true</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> annotations:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> kubernetes.io/ingress.class: alb</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> alb.ingress.kubernetes.io/scheme: internet-facing</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> alb.ingress.kubernetes.io/target-type: instance</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> alb.ingress.kubernetes.io/certificate-arn: <<certificate-arn>></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> alb.ingress.kubernetes.io/inbound-cidrs: 0.0.0.0/0</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS":443}]'</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> alb.ingress.kubernetes.io/ssl-redirect: '443'</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> hosts:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> - host: <<host-name>></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> paths:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> - /*</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Do not use the 'latest' or 'debug' tags for any of the images, as those are not supported and are present only due to legacy reasons. Please use 'head' or version-specific tags, like v0.8.40. For production, we recommend using version-specific tags, not 'head'.</p><p>You need to request a certificate in the AWS Certificate Manager by following this
|
|||
|
<a href="https://docs.aws.amazon.com/acm/latest/userguide/gs-acm-request-public.html" target="_blank" rel="noopener noreferrer">guide</a>, and replace certificate-arn with
|
|||
|
the ARN of the new certificate. You also need to replace host-name with the hostname of choice like
|
|||
|
demo.datahub.com.</p><p>To have the metadata <a href="/docs/authentication/introducing-metadata-service-authentication#configuring-metadata-service-authentication">authentication service</a> enabled and use <a href="/docs/authentication/personal-access-tokens#creating-personal-access-tokens">API tokens</a> from the UI you will need to set the configuration in the values.yaml for the <code>gms</code> and the <code>frontend</code> deployments. This could be done by enabling the <code>metadata_service_authentication</code>:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">datahub:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> metadata_service_authentication:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> enabled: true</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>After updating the yaml file, run the following to apply the updates.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">helm upgrade --install datahub datahub/datahub --values values.yaml</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Once the upgrade completes, run <code>kubectl get ingress</code> to verify the ingress setup. You should see a result like the
|
|||
|
following.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">NAME CLASS HOSTS ADDRESS PORTS AGE</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">datahub-datahub-frontend <none> demo.datahub.com k8s-default-datahubd-80b034d83e-904097062.us-west-2.elb.amazonaws.com 80 3h5m</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Note down the elb address in the address column. Add the DNS CNAME record to the host domain pointing the host-name (
|
|||
|
from above) to the elb address. DNS updates generally take a few minutes to an hour. Once that is done, you should be
|
|||
|
able to access datahub-frontend through the host-name.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="use-aws-managed-services-for-the-storage-layer">Use AWS managed services for the storage layer<a href="#use-aws-managed-services-for-the-storage-layer" class="hash-link" aria-label="Direct link to Use AWS managed services for the storage layer" title="Direct link to Use AWS managed services for the storage layer"></a></h2><p>Managing the storage services like MySQL, Elasticsearch, and Kafka as kubernetes pods requires a great deal of
|
|||
|
maintenance workload. To reduce the workload, you can use managed services like AWS <a href="https://aws.amazon.com/rds" target="_blank" rel="noopener noreferrer">RDS</a>,
|
|||
|
<a href="https://aws.amazon.com/elasticsearch-service/" target="_blank" rel="noopener noreferrer">Elasticsearch Service</a>, and <a href="https://aws.amazon.com/msk/" target="_blank" rel="noopener noreferrer">Managed Kafka</a>
|
|||
|
as the storage layer for DataHub. Support for using AWS Neptune as graph DB is coming soon.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="rds">RDS<a href="#rds" class="hash-link" aria-label="Direct link to RDS" title="Direct link to RDS"></a></h3><p>Provision a MySQL database in AWS RDS that shares the VPC with the kubernetes cluster or has VPC peering set up between
|
|||
|
the VPC of the kubernetes cluster. Once the database is provisioned, you should be able to see the following page. Take
|
|||
|
a note of the endpoint marked by the red box.</p><p align="center"><img loading="lazy" width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/aws/aws-rds.png" class="img_ev3q"></p><p>First, add the DB password to kubernetes by running the following.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">kubectl delete secret mysql-secrets</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">kubectl create secret generic mysql-secrets --from-literal=mysql-root-password=<<password>></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Update the sql settings under global in the values.yaml as follows.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> sql:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> datasource:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> host: "<<rds-endpoint>>:3306"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> hostForMysqlClient: "<<rds-endpoint>>"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> port: "3306"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> url: "jdbc:mysql://<<rds-endpoint>>:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> driver: "com.mysql.jdbc.Driver"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> username: "root"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> password:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> secretRef: mysql-secrets</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> secretKey: mysql-root-password</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Run <code>helm upgrade --install datahub datahub/datahub --values values.yaml</code> to apply the changes.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="elasticsear
|
|||
|
cluster or has VPC peering set up between the VPC of the kubernetes cluster. Once the domain is provisioned, you should
|
|||
|
be able to see the following page. Take a note of the endpoint marked by the red box.</p><p align="center"><img loading="lazy" width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/aws/aws-elasticsearch.png" class="img_ev3q"></p><p>Update the elasticsearch settings under global in the values.yaml as follows.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> elasticsearch:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> host: <<elasticsearch-endpoint>></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> port: "443"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> useSSL: "true"</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>You can also allow communication via HTTP (without SSL) by using the settings below.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> elasticsearch:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> host: <<elasticsearch-endpoint>></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> port: "80"</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>If you have fine-grained access control enabled with basic authentication, first run the following to create a k8s
|
|||
|
secret with the password.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">kubectl delete secret elasticsearch-secrets</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">kubectl create secret generic elasticsearch-secrets --from-literal=elasticsearch-password=<<password>></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Then use the settings below.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> elasticsearch:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> host: <<elasticsearch-endpoint>></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> port: "443"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> useSSL: "true"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> auth:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> username: <<username>></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> password:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> secretRef: elasticsearch-secrets</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> secretKey: elasticsearch-password</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>If you have access control enabled with IAM auth, enable AWS auth signing in Datahub</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> OPENSEARCH_USE_AWS_IAM_AUTH=true</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path>
|
|||
|
service uses OpenDistro version of Elasticsearch, which does not support the "datastream" functionality. As such, we use
|
|||
|
a different way of creating time based indices.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> elasticsearchSetupJob:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> enabled: true</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> image:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> repository: acryldata/datahub-elasticsearch-setup</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> tag: "***"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> extraEnvs:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> - name: USE_AWS_ELASTICSEARCH</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> value: "true"</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Run <code>helm upgrade --install datahub datahub/datahub --values values.yaml</code> to apply the changes.</p><p><strong>Note:</strong>
|
|||
|
If you have a custom setup of elastic search cluster and are deploying through docker, you can modify the configurations
|
|||
|
in datahub to point to the specific ES instance -</p><ol><li>If you are using <code>docker quickstart</code> you can modify the hostname and port of the ES instance in docker compose
|
|||
|
quickstart files located <a href="https://github.com/datahub-project/datahub/blob/master/docker/quickstart/" target="_blank" rel="noopener noreferrer">here</a>.<ol><li>Once you have modified the quickstart recipes you can run the quickstart command using a specific docker compose
|
|||
|
file. Sample command for that is<ul><li><code>datahub docker quickstart --quickstart-compose-file docker/quickstart/docker-compose-without-neo4j.quickstart.yml</code></li></ul></li></ol></li><li>If you are not using quickstart recipes, you can modify environment variable in GMS to point to the ES instance. The
|
|||
|
env files for datahub-gms are located <a href="https://github.com/datahub-project/datahub/blob/master/docker/datahub-gms/env/" target="_blank" rel="noopener noreferrer">here</a>.</li></ol><p>Further, you can find a list of properties supported to work with a custom ES
|
|||
|
instance <a href="https://github.com/datahub-project/datahub/blob/master/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java" target="_blank" rel="noopener noreferrer">here</a>
|
|||
|
and <a href="https://github.com/datahub-project/datahub/blob/master/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java" target="_blank" rel="noopener noreferrer">here</a>
|
|||
|
.</p><p>A mapping between the property name used in the above two files and the name used in docker/env file can be
|
|||
|
found <a href="https://github.com/datahub-project/datahub/blob/master/metadata-service/configuration/src/main/resources/application.yaml" target="_blank" rel="noopener noreferrer">here</a>.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="managed-streaming-for-apache-kafka-msk">Managed Streaming for Apache Kafka (MSK)<a href="#managed-streaming-for-apache-kafka-msk" class="hash-link" aria-label="Direct link to Managed Streaming for Apache Kafka (MSK)" title="Direct link to Managed Streaming for Apache Kafka (MSK)"></a></h3><p>Provision an MSK cluster that shares the VPC with the kubernetes cluster or has VPC peering set up between the VPC of
|
|||
|
the kubernetes cluster. Once the domain is provisioned, click on the “View client information” button in the ‘Cluster
|
|||
|
Summary” section. You should see a page like below. Take a note of the endpoints marked by the red boxes.</p><p align="center"><img loading="lazy" width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/aws/aws-msk.png" class="img_ev3q"></p><p>Update the kafka settings under global in the values.yaml as follows.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">kafka:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> bootstrap:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> server: "<<bootstrap-server endpoint>>"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> zookeeper:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> server: "<<zookeeper endpoint>>"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> schemaregistry:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> url: "http://prerequisites-cp-schema-registry:8081"</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> partitions: 3</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> replicationFactor: 3</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Note, the number of partitions and replicationFactor should match the number of bootstrap servers. This is by default 3
|
|||
|
for AWS MSK.</p><p>Run <code>helm upgrade --install datahub datahub/datahub --values values.yaml</code> to apply the changes.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="aws-glue-schema-registry">AWS Glue Schema Registry<a href="#aws-glue-schema-registry" class="hash-link" aria-label="Direct link to AWS Glue Schema Registry" title="Direct link to AWS Glue Schema Registry"></a></h3><blockquote><p><strong>WARNING</strong>: AWS Glue Schema Registry DOES NOT have a python SDK. As such, python based libraries like ingestion or datahub-actions (UI ingestion) is not supported when using AWS Glue Schema Registry</p></blockquote><p>You can use AWS Glue schema registry instead of the kafka schema registry. To do so, first provision an AWS Glue schema
|
|||
|
registry in the "Schema Registry" tab in the AWS Glue console page.</p><p>Once the registry is provisioned, you can change helm chart as follows.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">kafka:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> bootstrap:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> zookeeper:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> schemaregistry:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> type: AWS_GLUE</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> glue:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> region: <<AWS region of registry>></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> registry: <<name of registry>></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Note, it will use the name of the topic as the schema name in the registry.</p><p>Before you update the pods, you need to give the k8s worker nodes the correct permissions to access the schema registry.</p><p>The minimum permissions required looks like this</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "Version": "2012-10-17",</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "Statement": [</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "Sid": "VisualEditor0",</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "Effect": "Allow",</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "Action": [</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "glue:GetRegistry",</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "glue:ListRegistries",</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "glue:CreateSchema",</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> "glue:UpdateSchema",</span><br></span><span class="t
|
|||
|
Refer to <a href="https://github.com/awslabs/aws-glue-schema-registry/issues/68" target="_blank" rel="noopener noreferrer">this issue</a> for any updates.</p><p>Glue currently doesn't support AWS Signature V4. As such, we cannot use service accounts to give permissions to access
|
|||
|
the schema registry. The workaround is to give the above permission to the EKS worker node's IAM role. Refer
|
|||
|
to <a href="https://github.com/awslabs/aws-glue-schema-registry/issues/69" target="_blank" rel="noopener noreferrer">this issue</a> for any updates.</p><p>Run <code>helm upgrade --install datahub datahub/datahub --values values.yaml</code> to apply the changes.</p><p>Note, you will be seeing log "Schema Version Id is null. Trying to register the schema" on every request. This log is
|
|||
|
misleading, so should be ignored. Schemas are cached, so it does not register a new version on every request (aka no
|
|||
|
performance issues). This has been fixed by <a href="https://github.com/awslabs/aws-glue-schema-registry/pull/64" target="_blank" rel="noopener noreferrer">this PR</a> but
|
|||
|
the code has not been released yet. We will update version once a new release is out.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="iam-policies-for-ui-based-ingestion">IAM policies for UI-based ingestion<a href="#iam-policies-for-ui-based-ingestion" class="hash-link" aria-label="Direct link to IAM policies for UI-based ingestion" title="Direct link to IAM policies for UI-based ingestion"></a></h3><p>This section details how to attach policies to the acryl-datahub-actions pod that powers UI-based ingestion. For some of
|
|||
|
the ingestion recipes, you sepecify login creds in the recipe itself, making it easy to set up auth to grab metadata
|
|||
|
from the data source. However, for AWS resources, the recommendation is to use IAM roles and policies to gate requests
|
|||
|
to access metadata on these resources.</p><p>To do this, let's follow
|
|||
|
this <a href="https://docs.aws.amazon.com/eks/latest/userguide/create-service-account-iam-policy-and-role.html" target="_blank" rel="noopener noreferrer">guide</a> to
|
|||
|
associate a kubernetes service account with an IAM role. Then we can attach this IAM role to the acryl-datahub-actions
|
|||
|
pod to let the pod assume the specified role.</p><p>First, you must create an IAM policy with all the permissions needed to run ingestion. This is specific to each
|
|||
|
connector and the set of metadata you are trying to pull. i.e. profiling requires more permissions, since it needs
|
|||
|
access to the data, not just the metadata. Let's say assume the ARN of that policy
|
|||
|
is <code>arn:aws:iam::<<account-id>>:policy/policy1</code>.</p><p>Then, create a service account with the policy attached is to use <a href="https://eksctl.io/" target="_blank" rel="noopener noreferrer">eksctl</a>. You can run the
|
|||
|
following command to do so.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">eksctl create iamserviceaccount \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --name <<service-account-name>> \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --namespace <<namespace>> \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --cluster <<eks-cluster-name>> \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --attach-policy-arn <<policy-ARN>> \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --approve \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --override-existing-serviceaccounts</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>For example, running the following will create a service account "acryl-datahub-actions" in the datahub namespace of
|
|||
|
datahub EKS cluster with <code>arn:aws:iam::<<account-id>>:policy/policy1</code> attached.</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">eksctl create iamserviceaccount \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --name acryl-datahub-actions \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --namespace datahub \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --cluster datahub \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --attach-policy-arn arn:aws:iam::<<account-id>>:policy/policy1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --approve \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --override-existing-serviceaccounts</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Lastly, in the helm values.yaml, you can add the following to the acryl-datahub-actions to attach the service account to
|
|||
|
the acryl-datahub-actions pod.</p><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token key atrule">acryl-datahub-actions</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">enabled</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token boolean important" style="color:rgb(255, 88, 116)">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">serviceAccount</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">name</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> <<service</span><span class="token punctuation" style="color:rgb(199, 146, 234)">-</span><span class="token plain">account</span><span class="token punctuation" style="color:rgb(199, 146, 234)">-</span><span class="token plain">name</span><span class="token punctuation" style="color:rgb(199, 146, 234)">></span><span class="token punctuation" style="color:rgb(199, 146, 234)">></span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">...</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="slackUtm_uoBr"><div class="slackUtm_uoBr"><hr>Need more help? Join the conversation in <a href="https://datahub.com/slack?utm_source=docs&utm_medium=footer&utm_campaign=docs_footer&utm_content=docs/deploy/aws">Slack!</a></div></div><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/datahub-project/datahub/blob/master/docs/deploy/aws.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_VsjB"></div></div></footer><div class="feedbackWrapper_mUHF"><div class="feedbackWidget_PX4d"><div class="feedbackButtons_wn3V"><strong>Is this page helpful?</strong><div><button class="feedbackButton_UgQs"><span role="img" aria-label="like" class="anticon anticon-like"><svg viewBox="64 64 896 896" focusable="false" data-icon="like" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 533.7c16.8-22.2 26.1-49.4 26.1-77.7 0-44.9-25.1-87.4-65.5-111.1a67.67 67.67 0 00-34.3-9.3H572.4l6-122.9c1.4-29.7-9.1-57.9-29.5-79.4A106.62 106.62 0 00471 99.9c-52 0-98 35-111.8 85.1l-85.9 311H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 3
|
|||
|
<script src="/assets/js/runtime~main.50e13f51.js"></script>
|
|||
|
<script src="/assets/js/main.edc0853c.js"></script>
|
|||
|
</body>
|
|||
|
</html>
|