mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-27 19:39:52 +00:00
133 lines
211 KiB
HTML
133 lines
211 KiB
HTML
![]() |
<!doctype html>
|
|||
|
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/generated/ingestion/sources/iceberg" data-has-hydrated="false">
|
|||
|
<head>
|
|||
|
<meta charset="UTF-8">
|
|||
|
<meta name="generator" content="Docusaurus v2.4.3">
|
|||
|
<title data-rh="true">Iceberg | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/generated/ingestion/sources/iceberg"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Iceberg | DataHub"><meta data-rh="true" name="description" content="Testing"><meta data-rh="true" property="og:description" content="Testing"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/generated/ingestion/sources/iceberg"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/generated/ingestion/sources/iceberg" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/generated/ingestion/sources/iceberg" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
|
|||
|
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
|
|||
|
|
|||
|
<link rel="preconnect" href="https://www.google-analytics.com">
|
|||
|
<link rel="preconnect" href="https://www.googletagmanager.com">
|
|||
|
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
|
|||
|
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
|
|||
|
<link rel="preconnect" href="https://www.googletagmanager.com">
|
|||
|
<script>window.dataLayer=window.dataLayer||[]</script>
|
|||
|
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
|
|||
|
|
|||
|
|
|||
|
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<meta httpequiv="Content-Security-Policy" content="frame-ancestors 'self' https://*.acryl.io https://acryldata.io http://localhost:*">
|
|||
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
|
|||
|
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
|
|||
|
<script src="/scripts/rb2b.js" async defer="defer"></script>
|
|||
|
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
|
|||
|
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
|
|||
|
<script src="/scripts/reo.js"></script>
|
|||
|
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.d8fe2eb8.css">
|
|||
|
<link rel="preload" href="/assets/js/runtime~main.2d61b452.js" as="script">
|
|||
|
<link rel="preload" href="/assets/js/main.9d79f7e2.js" as="script">
|
|||
|
</head>
|
|||
|
<body class="navigation-with-keyboard">
|
|||
|
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
|
|||
|
|
|||
|
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
|
|||
|
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p>DataHub Secures $35 Million Series B</p><a href="https://datahub.com/news/series-b-announcement/" target="_blank" class="button"><div>Read the announcement<span> →</span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/generated/ingestion/sources/iceberg">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/generated/ingestion/sources/iceberg">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li><li>
|
|||
|
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
|
|||
|
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
|
|||
|
</a>
|
|||
|
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Learn</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/weekly-demo" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Weekly Demo"></div><div class="title_c7DP">Weekly Demo</div></a></div><div><a href="https://datahub.com/use-cases" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-forum.png" alt="Use Cases"></div><div class="title_c7DP">Use Cases</div></a></div><div><a href="httpps://datahub.com/adoption-stories" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Adoption Stories"></div><div class="title_c7DP">Adoption Stories</div></a></div><div><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Blog"></div><div class="title_c7DP">Blog</div></a></div><div><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Youtube"></div><div class="title_c7DP">Youtube</div></a></div></div></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu dropdown__menu_Z8FC"><div class="wrapper_kp81"><div><a href="https://datahub.com/slack/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-join-slack.png" alt="Join Slack"></div><div class="title_c7DP">Join Slack</div></a></div><div><a href="https://datahub.com/events" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-events.png" alt="Events"></div><div class="title_c7DP">Events</div></a></div><div><a href="https://datahub.com/champions/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-champions.png" alt="Champions"></div><div class="title_c7DP">Champions</div></a></div><div><a href="https://datahub.com/share-your-journey/" target="_blank" rel="noopener noreferrer" class="card_BUD7"><div class="icon_BgHd"><img src="/img/icon-share-your-journey.png" alt="Share Your Journey"></div><div class="title_c7DP">Share Your Journey</div></a></div></div></ul></div><a href="https://datahub.com/products/why-datahub-cloud/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
|
|||
|
<style>
|
|||
|
.cloud-cta {
|
|||
|
color: var(--ifm-menu-color-active);
|
|||
|
font-weight: 600;
|
|||
|
background: linear-gradient(40deg, var(--ifm-menu-color-active), var(--ifm-menu-color-active));
|
|||
|
background-size: 200% 100%;
|
|||
|
-webkit-background-clip: text;
|
|||
|
background-clip: text;
|
|||
|
transition: background-image 0.3s ease;
|
|||
|
}
|
|||
|
.cloud-cta:hover {
|
|||
|
color: transparent;
|
|||
|
background: linear-gradient(40deg, var(--ifm-menu-color-active), #ff1493);
|
|||
|
background-size: 200% 100%;
|
|||
|
-webkit-background-clip: text;
|
|||
|
background-clip: text;
|
|||
|
animation: gradientShift 3s ease infinite;
|
|||
|
}
|
|||
|
@keyframes gradientShift {
|
|||
|
0%, 100% { background-position: 0% 50%; }
|
|||
|
50% { background-position: 100% 50%; }
|
|||
|
}
|
|||
|
</style>
|
|||
|
<div class="cloud-cta">Get Cloud</div>
|
|||
|
</a><a href="https://datahub.com/slack?utm_source=docs&utm_medium=header&utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
|
|||
|
<style>
|
|||
|
.slack-logo:hover {
|
|||
|
opacity: 0.8;
|
|||
|
}
|
|||
|
</style>
|
|||
|
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
|
|||
|
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category 'What Is DataHub?'" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category 'Features'" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>DataHub Cloud</div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/managed-datahub-overview">DataHub Cloud Overview</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/welcome-acryl">Getting Started with DataHub Cloud</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/upgrade_core_to_cloud">Upgrading from DataHub Core to Cloud</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authentication/guides/sso/initialize-oidc">Configure Single Sign-On</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/remote-executor/about">Remote Executor</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/datahub-api/entity-events-api">DataHub API</a></div></li><li class="theme
|
|||
|
Typically, Iceberg tables are stored in a distributed file system like S3 or Azure Data Lake Storage (ADLS) and registered in a catalog. There are various catalog
|
|||
|
implementations like Filesystem-based, RDBMS-based or even REST-based catalogs. This Iceberg source plugin relies on the
|
|||
|
<a href="https://py.iceberg.apache.org/" target="_blank" rel="noopener noreferrer">pyiceberg library</a>.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="cli-based-ingestion">CLI based Ingestion<a href="#cli-based-ingestion" class="hash-link" aria-label="Direct link to CLI based Ingestion" title="Direct link to CLI based Ingestion"></a></h3><h3 class="anchor anchorWithStickyNavbar_LWe7" id="config-details">Config Details<a href="#config-details" class="hash-link" aria-label="Direct link to Config Details" title="Direct link to Config Details"></a></h3><div class="tabs-container tabList__CuJ"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_LNqP tabs__item--active">Options</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_LNqP">Schema</li></ul><div class="margin-top--md"><div role="tabpanel" class="tabItem_Ymn6"><p>Note that a <code>.</code> is used to denote nested fields in the YAML recipe.</p><div class="config-table"><table><thead><tr><th align="left">Field</th><th align="left">Description</th></tr></thead><tbody><tr><td align="left"><div class="path-line"><span class="path-main">catalog</span> <abbr title="Required">✅</abbr></div> <div class="type-name-line"><span class="type-name">map(str,object)</span></div></td><td align="left"></td></tr><tr><td align="left"><div class="path-line"><span class="path-main">group_ownership_property</span></div> <div class="type-name-line"><span class="type-name">string</span></div></td><td align="left">Iceberg table property to look for a <code>CorpGroup</code> owner. Can only hold a single group value. If property has no value, no owner information will be emitted.</td></tr><tr><td align="left"><div class="path-line"><span class="path-main">namespace_pattern</span></div> <div class="type-name-line"><span class="type-name">AllowDenyPattern</span></div></td><td align="left">Regex patterns for namespaces to filter in ingestion. <div class="default-line default-line-with-docs">Default: <span class="default-value">{<!-- -->'<!-- -->allow<!-- -->'<!-- -->: <!-- -->[<!-- -->'<!-- -->.<!-- -->*<!-- -->'<!-- -->]<!-- -->, <!-- -->'<!-- -->deny<!-- -->'<!-- -->: <!-- -->[<!-- -->]<!-- -->, <!-- -->'<!-- -->ignoreCase<!-- -->'<!-- -->: True<!-- -->}</span></div></td></tr><tr><td align="left"><div class="path-line"><span class="path-main">platform_instance</span></div> <div class="type-name-line"><span class="type-name">string</span></div></td><td align="left">The instance of the platform that all assets produced by this recipe belong to. This should be unique within the platform. See <a href="https://docs.datahub.com/docs/platform-instances/" target="_blank" rel="noopener noreferrer">https://docs.datahub.com/docs/platform-instances/</a> for more details.</td></tr><tr><td align="left"><div class="path-line"><span class="path-main">processing_threads</span></div> <div class="type-name-line"><span class="type-name">integer</span></div></td><td align="left">How many threads will be processing tables <div class="default-line default-line-with-docs">Default: <span class="default-value">1</span></div></td></tr><tr><td align="left"><div class="path-line"><span class="path-main">table_pattern</span></div> <div class="type-name-line"><span class="type-name">AllowDenyPattern</span></div></td><td align="left">Regex patterns for tables to filter in ingestion. <div class="default-line default-line-with-docs">Default: <span class="default-value">{<!-- -->'<!-- -->allow<!-- -->'<!-- -->: <!-- -->[<!-- -->'<!-- -->.<!-- -->*<!-- -->'<!-- -->]<!-- -->, <!-- -->'<!-- -->deny<!-- -->'<!-- -->: <!-- -->[<!-- -->]<!-- -->, <!-- -->'<!-- -->ignoreCase<!-- -->'<!-- -->: True<!-- -->}</span></div></td></tr><tr><td align="left"><div class="path-line"><span class="path-main">user_ownership_property</span></div> <div class="type-name-line"><span class="type-name">string</span></div></td><td align="left">Iceb
|
|||
|
library to extract metadata from them. The recipe for the source consists of 2 parts:</p><ol><li><code>catalog</code> part which is passed as-is to the <code>pyiceberg</code> library and configures the connection and its details (i.e. authentication).
|
|||
|
The name of catalog specified in the recipe has no consequence, it is just a formal requirement from the library.
|
|||
|
Only one catalog will be considered for the ingestion.</li><li>The remaining configuration consists of parameters, such as <code>env</code> or <code>stateful_ingestion</code> which are standard
|
|||
|
DataHub's ingestor configuration parameters and are described in the <a href="#config-details">Config Details</a> chapter.</li></ol><p>This chapter showcases several examples of setting up connections to an Iceberg catalog, varying based on the underlying
|
|||
|
implementation. Iceberg is designed to have catalog and warehouse separated, which is reflected in how we configure it.
|
|||
|
It is especially visible when using Iceberg REST Catalog - which can use many blob storages
|
|||
|
(AWS S3, Azure Blob Storage, MinIO) as a warehouse.</p><p>Note that, for advanced users, it is possible to specify a custom catalog client implementation via <code>py-catalog-impl</code>
|
|||
|
configuration option - refer to <code>pyiceberg</code> documentation on details.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="glue-catalog--s3-warehouse">Glue catalog + S3 warehouse<a href="#glue-catalog--s3-warehouse" class="hash-link" aria-label="Direct link to Glue catalog + S3 warehouse" title="Direct link to Glue catalog + S3 warehouse"></a></h3><p>The minimal configuration for connecting to Glue catalog with S3 warehouse:</p><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token key atrule">source</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"iceberg"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">config</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">env</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> dev</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">catalog</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">my_catalog</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"glue"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.region</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"us-west-2"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">region_name</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"us-west-2"</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Where <code>us-west-2</code> is the region from which you want to ingest. The above configuration will work assuming your pod or e
|
|||
|
you run your datahub CLI is already authenticated to AWS and has proper permissions granted (see below). If you need
|
|||
|
to specify secrets directly, use the following configuration as the template:</p><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token key atrule">source</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"iceberg"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">config</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">env</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> dev</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">catalog</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">demo</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"glue"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.region</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"us-west-2"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.access-key-id</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"${AWS_ACCESS_KEY_ID}"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.secret-access-key</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"${AWS_SECRET_ACCESS_KEY}"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.session-token</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"${AWS_SESSION_TOKEN}"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">aws_access_key_id</span
|
|||
|
It is possible (but not recommended due to security concerns) to provide those values in plaintext, directly in the recipe.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="glue-and-s3-permissions-required">Glue and S3 permissions required<a href="#glue-and-s3-permissions-required" class="hash-link" aria-label="Direct link to Glue and S3 permissions required" title="Direct link to Glue and S3 permissions required"></a></h4><p>The role used by the ingestor for ingesting metadata from Glue Iceberg Catalog and S3 warehouse is:</p><div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">"Version"</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"2012-10-17"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">"Statement"</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">"Effect"</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"Allow"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">"Action"</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">"glue:GetDatabases"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"glue:GetTables"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"glue:GetTable"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">"Resource"</span><span class="token operator" style="color:rgb(137, 221, 255)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"*"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span clas
|
|||
|
MinIO listens on port <code>9000</code> at <code>minio-host</code>. The <code>uri</code> parameter points at Iceberg REST Catalog (IRC) endpoint (in this case <code>iceberg-catalog:8181</code>).</p><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token key atrule">source</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"iceberg"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">config</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">env</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> dev</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">catalog</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">demo</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"rest"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">uri</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"http://iceberg-catalog:8181"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.access-key-id</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"${AWS_ACCESS_KEY_ID}"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.secret-access-key</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"${AWS_SECRET_ACCESS_KEY}"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.region</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"eu-east-1"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.endpoint</span><span class="t
|
|||
|
see <a href="https://py.iceberg.apache.org/configuration/#rest-catalog" target="_blank" rel="noopener noreferrer">https://py.iceberg.apache.org/configuration/#rest-catalog</a> for details. Moreover, the assumption here is that the
|
|||
|
environment (i.e. pod) is already authenticated to perform actions against AWS S3.</p><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token key atrule">source</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"iceberg"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">config</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">env</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> dev</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">catalog</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">demo</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"rest"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">uri</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"http://iceberg-catalog-uri"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">token</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"token-value"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">s3.region</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"us-west-2"</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="special-rest-connection-parameters-for-resiliency">Special REST conn
|
|||
|
DataHub, allowing to inject connection resiliency parameters to the REST connection made by the ingestor. <code>connection</code>
|
|||
|
allows for 2 parameters:</p><ul><li><code>timeout</code> is provided as amount of seconds, it needs to be whole number (or <code>null</code> to turn it off)</li><li><code>retry</code> is a complex object representing parameters used to create <a href="https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#module-urllib3.util.retry" target="_blank" rel="noopener noreferrer">urllib3 Retry object</a>.
|
|||
|
There are many possible parameters, most important would be <code>total</code> (total retries) and <code>backoff_factor</code>. See the linked docs
|
|||
|
for the details.</li></ul><div class="language-yaml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-yaml codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token key atrule">source</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"iceberg"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">config</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">env</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> dev</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">catalog</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">demo</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">type</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"rest"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">uri</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">"http://iceberg-catalog-uri"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">connection</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">retry</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">backoff_factor</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">0.5</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">total</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token key atrule">timeout</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token number"
|
|||
|
exceptions appearing when increasing <code>processing_threads</code> configuration parameter, try to increase limit of open
|
|||
|
files (e.g. using <code>ulimit</code> in Linux).</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="datahub-iceberg-rest-catalog">DataHub Iceberg REST Catalog<a href="#datahub-iceberg-rest-catalog" class="hash-link" aria-label="Direct link to DataHub Iceberg REST Catalog" title="Direct link to DataHub Iceberg REST Catalog"></a></h2><p>DataHub also implements the Iceberg REST Catalog. See the <a href="/docs/iceberg-catalog">Iceberg Catalog documentation</a> for more details.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="code-coordinates">Code Coordinates<a href="#code-coordinates" class="hash-link" aria-label="Direct link to Code Coordinates" title="Direct link to Code Coordinates"></a></h3><ul><li>Class Name: <code>datahub.ingestion.source.iceberg.iceberg.IcebergSource</code></li><li>Browse on <a href="https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py" target="_blank" rel="noopener noreferrer">GitHub</a></li></ul><h2>Questions</h2><p>If you've got any questions on configuring ingestion for Iceberg, feel free to ping us on <a href="https://datahub.com/slack" target="_blank" rel="noopener noreferrer">our Slack</a>.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="slackUtm_uoBr"><div class="slackUtm_uoBr"><hr>Need more help? Join the conversation in <a href="https://datahub.com/slack?utm_source=docs&utm_medium=footer&utm_campaign=docs_footer&utm_content=docs/generated/ingestion/sources/iceberg">Slack!</a></div></div><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/datahub-project/datahub/blob/master/docs/generated/ingestion/sources/iceberg.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_VsjB"></div></div></footer><div class="feedbackWrapper_mUHF"><div class="feedbackWidget_PX4d"><div class="feedbackButtons_wn3V"><strong>Is this page helpful?</strong><div><button class="feedbackButton_UgQs"><span role="img" aria-label="like" class="anticon anticon-like"><svg viewBox="64 64 896 896" focusable="false" data-icon="like" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 533.7c16.8-22.2 26.1-49.4 26.1-77.7 0-44.9-25.1-87.4-65.5-111.1a67.67 67.67 0 00-34.3-9.3H572.4l6-122.9c1.4-29.7-9.1-57.9-29.5-79.4A106.62 106.62 0 00471 99.9c-52 0-98 35-111.8 85.1l-85.9 311H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 32 32 32h601.3c9.2 0 18.2-1.8 26.5-5.4 47.6-20.3 78.3-66.8 78.3-118.4 0-12.6-1.8-25-5.4-37 16.8-22.2 26.1-49.4 26.1-77.7 0-12.6-1.8-25-5.4-37 16.8-22.2 26.1-49.4 26.1-77.7-.2-12.6-2-25.1-5.6-37.1zM184 852V568h81v284h-81zm636.4-353l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 16.5-7.2 32.2-19.6 43l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 16.5-7.2 32.2-19.6 43l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 22.4-13.2 42.6-33.6 51.8H329V564.8l99.5-360.5a44.1 44.1 0 0142.2-32.3c7.6 0 15.1 2.2 21.1 6.7 9.9 7.4 15.2 18.6 14.6 30.5l-9.6 198.4h314.4C829 418.5 840 436.9 840 456c0 16.5-7.2 32.1-19.6 43z"></path></svg></span></button><button class="feedbackButton_UgQs"><span role="img" aria-label="dislike" class="anticon anticon-dislike"><svg viewBox="64 64 896 896" focusable="false" data-icon="dislike" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 490.3c3.6-12 5.4-24.4 5.4-37 0-28.3-9.3-55.5-26.1-77.7 3.6-12 5.4-24.4 5.4-37 0-28.3-9.3-55.5-26.1-77.7 3.6-12 5.4-24.4 5.4-37 0-51.6-30.7-98.1-78.3-118.4a66.1 66.1 0 00-26.5-5.4H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 32 32 32h129.3l85.8 310.8C372.9 889 418.9 924 470.9 924c29.7 0 57.4-11.8 77.9-33.4 20.5-21.5 31-49.7 29.5-79.4l-6-122.9h239.9c12.1 0 23.9-3.2 34.3-9.3 40.4-23.5 65.5-66.1 65.5-111 0-28.3-9.3-55.5-26.1-
|
|||
|
<script src="/assets/js/runtime~main.2d61b452.js"></script>
|
|||
|
<script src="/assets/js/main.9d79f7e2.js"></script>
|
|||
|
</body>
|
|||
|
</html>
|