2025-11-20 14:25:26 +00:00

96 lines
110 KiB
HTML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-current plugin-docs plugin-id-default docs-doc-id-docs/lineage/airflow" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.3">
<title data-rh="true">Airflow Integration | DataHub</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.datahub.com/docs/lineage/airflow"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Airflow Integration | DataHub"><meta data-rh="true" name="description" content="If you&#x27;re looking to schedule DataHub ingestion using Airflow, see the guide on scheduling ingestion with Airflow."><meta data-rh="true" property="og:description" content="If you&#x27;re looking to schedule DataHub ingestion using Airflow, see the guide on scheduling ingestion with Airflow."><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.datahub.com/docs/lineage/airflow"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/lineage/airflow" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.datahub.com/docs/lineage/airflow" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://RK0UG797F3-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/learn/rss.xml" title="DataHub RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/learn/atom.xml" title="DataHub Atom Feed">
<link rel="preconnect" href="https://www.google-analytics.com">
<link rel="preconnect" href="https://www.googletagmanager.com">
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PKGVLETT4C"></script>
<script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-PKGVLETT4C",{})</script>
<link rel="preconnect" href="https://www.googletagmanager.com">
<script>window.dataLayer=window.dataLayer||[]</script>
<script>!function(e,t,a,n,g){e[n]=e[n]||[],e[n].push({"gtm.start":(new Date).getTime(),event:"gtm.js"});var m=t.getElementsByTagName(a)[0],r=t.createElement(a);r.async=!0,r.src="https://www.googletagmanager.com/gtm.js?id=GTM-5M8T9HNN",m.parentNode.insertBefore(r,m)}(window,document,"script","dataLayer")</script>
<link rel="search" type="application/opensearchdescription+xml" title="DataHub" href="/opensearch.xml">
<meta httpequiv="Content-Security-Policy" content="frame-ancestors &#39;self&#39; https://*.acryl.io https://acryldata.io http://localhost:*">
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap">
<script src="https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38" async defer="defer"></script>
<script src="/scripts/rb2b.js" async defer="defer"></script>
<script src="https://app.revenuehero.io/scheduler.min.js"></script>
<script src="https://tag.clearbitscripts.com/v1/pk_2e321cabe30432a5c44c0424781aa35f/tags.js" referrerpolicy="strict-origin-when-cross-origin"></script>
<script src="/scripts/reo.js"></script>
<script id="runllm-widget-script" type="module" src="https://widget.runllm.com" crossorigin="true" runllm-name="DataHub" runllm-assistant-id="81" runllm-position="BOTTOM_RIGHT" runllm-keyboard-shortcut="Mod+j" runllm-preset="docusaurus" runllm-theme-color="#1890FF" runllm-brand-logo="https://docs.datahub.com/img/datahub-logo-color-mark.svg" runllm-community-url="https://datahub.com/slack" runllm-community-type="slack" runllm-disable-ask-a-person="true" async></script><link rel="stylesheet" href="/assets/css/styles.1e47f27f.css">
<link rel="preload" href="/assets/js/runtime~main.7bcf907e.js" as="script">
<link rel="preload" href="/assets/js/main.99352010.js" as="script">
</head>
<body class="navigation-with-keyboard">
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5M8T9HNN" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:transparent;color:#ffffff" role="banner"><div class="content_knG7 announcementBarContent_xLdY"><div class="shimmer-banner"><p><strong>CONTEXT:</strong> The Future of Agentic AI is On Demand</p><a href="https://datahub.com/resources/context/?utm_term=docs" target="_blank" class="button"><div>Watch Now<span></span></div></a></div></div></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a href="https://datahub.com" target="_blank" rel="noopener noreferrer" class="navbar__brand"><div class="navbar__logo"><img src="/img/datahub-logo-color-light-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/datahub-logo-color-dark-horizontal.svg" alt="DataHub Logo" class="themedImage_ToTc themedImage--dark_i4oU"></div></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link versionNavItem_cbn8">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/lineage/airflow">Next</a></li><li><a class="dropdown__link" href="/docs/1.1.0/lineage/airflow">1.1.0</a></li><li><hr class="dropdown-separator" style="margin: 0.4rem;"></li><li><div class="dropdown__link"><b>Archived versions</b></div></li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/features">1.0.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-t9sv4w3gr-acryldata.vercel.app/docs/0.15.0/features">0.15.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-8jkm4uler-acryldata.vercel.app/docs/0.14.1/features">0.14.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app/docs/features">0.14.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-2uuxmgza2-acryldata.vercel.app/docs/features">0.12.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li><li>
<a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
</li></ul></div></div><div class="navbar__items navbar__items--right"><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs">Docs</a><a class="navbar__item navbar__link" href="/integrations">Integrations</a><a href="https://datahub.com/slack?utm_source=docs&amp;utm_medium=header&amp;utm_campaign=docs_header" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.slack-logo:hover {
opacity: 0.8;
}
</style>
<img class="slack-logo" src="https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
</a><a href="https://github.com/datahub-project/datahub" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">
<style>
.github-logo:hover {
opacity: 0.8;
}
</style>
<img class="github-logo" src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" , alt="slack" , height="20px" style="margin: 10px 0 0 0;">
</a><div class="searchBox_ZlJk"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Getting Started</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/features">What Is DataHub?</a><button aria-label="Toggle the collapsible sidebar category &#x27;What Is DataHub?&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/features">Features</a><button aria-label="Toggle the collapsible sidebar category &#x27;Features&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>DataHub Cloud</div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/managed-datahub-overview">DataHub Cloud vs. DataHub OSS</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/welcome-acryl">Getting Started with DataHub Cloud</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/upgrade_core_to_cloud">Upgrading from DataHub Core to Cloud</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authentication/guides/sso/initialize-oidc">Configure Single Sign-On</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/remote-executor/about">Remote Executor</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/datahub-api/entity-events-api">DataHub API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/slack/saas-slack-app">Slack</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/teams/saas-teams-app">Teams</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/operator-guide/setting-up-events-api-on-aws-eventbridge">Operator Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item saasOnly"><a class="menu__link" href="/docs/managed-datahub/change-proposals">Change Proposals</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/managed-datahub/chrome-extension">Cloud Chrome Extension</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item saasOnly"><a class="menu__link" href="/docs/managed-datahub/subscription-and-notification">Subscriptions &amp; Notifications</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/managed-datahub/release-notes/v_0_3_15">DataHub Cloud Release History</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Integrations</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/metadata-ingestion">Overview</a><button aria-label="Toggle the collapsible sidebar category &#x27;Overview&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/metadata-ingestion/cli-ingestion">Quickstart Guides</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" aria-expanded="true" href="/docs/metadata-ingestion/source_overview">Sources</a><button aria-label="Toggle the collapsible sidebar category &#x27;Sources&#x27;" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/lineage/airflow">Airflow</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/lineage/dagster">Dagster</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/lineage/prefect">Prefect</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/metadata-integration/java/acryl-spark-lineage">Spark</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/metadata-ingestion/integration_docs/great-expectations">Great Expectations</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/metadata-integration/java/datahub-protobuf">Protobuf Schemas</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/abs">ABS Data Lake</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/athena">Athena</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/azure-ad">Azure AD</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/bigquery">BigQuery</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/business-glossary">Business Glossary</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/cassandra">Cassandra</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/clickhouse">ClickHouse</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/cockroachdb">CockroachDB</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/csv-enricher">CSV Enricher</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/databricks">Databricks</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/datahub">DataHub</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/datahubapply">DataHubApply</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/datahubdebug">DataHubDebug</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/datahubgc">DataHubGc</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/dbt">dbt</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/delta-lake">Delta Lake</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/demo-data">Demo Data</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/dremio">Dremio</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/druid">Druid</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/dynamodb">DynamoDB</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/elasticsearch">Elasticsearch</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/excel">Excel</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/feast">Feast</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/file-based-lineage">File Based Lineage</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/fivetran">Fivetran</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/glue">Glue</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/gcs">Google Cloud Storage</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/grafana">Grafana</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/hex">Hex</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/hive">Hive</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/hive-metastore">Hive Metastore</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/iceberg">Iceberg</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/json-schema">JSON Schemas</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/kafka">Kafka</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/kafka-connect">Kafka Connect</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/ldap">LDAP</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/looker">Looker</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/mariadb">MariaDB</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/metabase">Metabase</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/metadata-file">Metadata File</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/mssql">Microsoft SQL Server</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/mlflow">MLflow</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/mode">Mode</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/mongodb">MongoDB</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/mysql">MySQL</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/neo4j">Neo4j</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/nifi">NiFi</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/okta">Okta</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/openapi">OpenAPI</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/oracle">Oracle</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/postgres">Postgres</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/powerbi">PowerBI</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/powerbi-report-server">PowerBI Report Server</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/preset">Preset</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/presto">Presto</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/pulsar">Pulsar</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/qlik-sense">Qlik Sense</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/redash">Redash</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/redshift">Redshift</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/s3">S3 / Local Files</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/sagemaker">SageMaker</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/salesforce">Salesforce</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/sac">SAP Analytics Cloud</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/hana">SAP HANA</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/sigma">Sigma</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/slack">Slack</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/snaplogic">SnapLogic</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/snowflake">Snowflake</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/sql-queries">SQL Queries</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/sqlalchemy">SQLAlchemy</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/superset">Superset</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/tableau">Tableau</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/teradata">Teradata</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/trino">Trino</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/vertexai">Vertex AI</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/generated/ingestion/sources/vertica">Vertica</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/metadata-ingestion/schedule_docs/intro">Advanced Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>API & SDKs</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="true" href="/docs/metadata-modeling/metadata-model">Open Source DataHub Metadata Standard</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/metadata-modeling/metadata-model">The Metadata Model</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/what/mxe">Core Metadata Events</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/generated/metamodel/entities/role">Entity Docs</a></div></li></ul></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/what-is-datahub/datahub-concepts">Concepts</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/metadata-standards">Metadata Standards</a><button aria-label="Toggle the collapsible sidebar category &#x27;Metadata Standards&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/api/datahub-apis">APIs and SDKs Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/api/graphql/overview">API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/metadata-ingestion/as-a-library">Python SDK</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/metadata-integration/java/as-a-library">Java SDK</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/cli">DataHub CLI</a><button aria-label="Toggle the collapsible sidebar category &#x27;DataHub CLI&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/act-on-metadata">DataHub Actions</a><button aria-label="Toggle the collapsible sidebar category &#x27;DataHub Actions&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/api/tutorials/datasets">Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Admin</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authentication">Authentication</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/authorization">Authorization</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/how/delete-metadata">Advanced Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Deployment</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/deployment-guides">Deployment Guides</a><button aria-label="Toggle the collapsible sidebar category &#x27;Deployment Guides&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/deploy/confluent-cloud">Advanced Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Developers</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/architecture/architecture">Architecture</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/developers">Developing on DataHub</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/advanced/mcp-mcl">Advanced Guides</a></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menuHtmlItem_M9Kj menu__list-item"><div>Community</div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" aria-expanded="false" href="/docs/category/community">Community</a><button aria-label="Toggle the collapsible sidebar category &#x27;Community&#x27;" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/releases">Release History</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_gTbr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item"><a class="breadcrumbs__link" itemprop="item" href="/docs/metadata-ingestion/source_overview"><span itemprop="name">Sources</span></a><meta itemprop="position" content="1"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Airflow</span><meta itemprop="position" content="2"></li></ul></nav><span class="theme-doc-version-badge badge badge--secondary">Version: Next</span><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><h1>Airflow Integration</h1><div class="theme-admonition theme-admonition-note alert alert--secondary admonition_LlT9"><div class="admonitionHeading_tbUL"><span class="admonitionIcon_kALy"><svg viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</div><div class="admonitionContent_S0QG"><p>If you&#x27;re looking to schedule DataHub ingestion using Airflow, see the guide on <a href="/docs/metadata-ingestion/schedule_docs/airflow">scheduling ingestion with Airflow</a>.</p></div></div><p>The DataHub Airflow plugin supports:</p><ul><li>Automatic column-level lineage extraction from various operators e.g. SQL operators (including <code>MySqlOperator</code>, <code>PostgresOperator</code>, <code>SnowflakeOperator</code>, <code>BigQueryInsertJobOperator</code>, and more), <code>S3FileTransformOperator</code>, and more.</li><li>Airflow DAG and tasks, including properties, ownership, and tags.</li><li>Task run information, including task successes and failures.</li><li>Manual lineage annotations using <code>inlets</code> and <code>outlets</code> on Airflow operators.</li></ul><p>The plugin requires Airflow 2.7+ and Python 3.9+. If you&#x27;re using Airflow older than 2.7, it&#x27;s possible to use the plugin with older versions of <code>acryl-datahub-airflow-plugin</code>. See the <a href="#compatibility">compatibility section</a> for more details.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="datahub-plugin-setup">DataHub Plugin Setup<a href="#datahub-plugin-setup" class="hash-link" aria-label="Direct link to DataHub Plugin Setup" title="Direct link to DataHub Plugin Setup"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="installation">Installation<a href="#installation" class="hash-link" aria-label="Direct link to Installation" title="Direct link to Installation"></a></h3><p>The plugin requires Airflow 2.7+ and Python 3.9+. If you don&#x27;t meet these requirements, see the <a href="#compatibility">compatibility section</a> for other options.</p><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">pip </span><span class="token function" style="color:rgb(130, 170, 255)">install</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;acryl-datahub-airflow-plugin&gt;=1.1.0.4&#x27;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="configuration">Configuration<a href="#configuration" class="hash-link" aria-label="Direct link to Configuration" title="Direct link to Configuration"></a></h3><p>Set up a DataHub connection in Airflow, either via command line or the Airflow UI.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="command-line">Command Line<a href="#command-line" class="hash-link" aria-label="Direct link to Command Line" title="Direct link to Command Line"></a></h4><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">airflow connections </span><span class="token function" style="color:rgb(130, 170, 255)">add</span><span class="token plain"> --conn-type </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;datahub-rest&#x27;</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;datahub_rest_default&#x27;</span><span class="token plain"> --conn-host </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;http://datahub-gms:8080&#x27;</span><span class="token plain"> --conn-password </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;&lt;optional datahub auth token&gt;&#x27;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>If you are using DataHub Cloud then please use <code>https://YOUR_PREFIX.acryl.io/gms</code> as the <code>--conn-host</code> parameter.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="airflow-ui">Airflow UI<a href="#airflow-ui" class="hash-link" aria-label="Direct link to Airflow UI" title="Direct link to Airflow UI"></a></h4><p>On the Airflow UI, go to Admin -&gt; Connections and click the &quot;+&quot; symbol to create a new connection. Select &quot;DataHub REST Server&quot; from the dropdown for &quot;Connection Type&quot; and enter the appropriate values.</p><p align="center"><img loading="lazy" width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/airflow/plugin_connection_setup.png" class="img_ev3q"></p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="optional-configurations">Optional Configurations<a href="#optional-configurations" class="hash-link" aria-label="Direct link to Optional Configurations" title="Direct link to Optional Configurations"></a></h4><p>No additional configuration is required to use the plugin. However, there are some optional configuration parameters that can be set in the <code>airflow.cfg</code> file.</p><div class="language-ini codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockTitle_Ktv7">airflow.cfg</div><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-ini codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token section punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token section section-name selector" style="color:rgb(199, 146, 234)">datahub</span><span class="token section punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Optional - additional config here.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token key attr-name" style="color:rgb(255, 203, 107)">enabled</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">=</span><span class="token plain"> </span><span class="token value attr-value">True # default</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><table><thead><tr><th>Name</th><th>Default value</th><th>Description</th></tr></thead><tbody><tr><td>enabled</td><td>true</td><td>If the plugin should be enabled.</td></tr><tr><td>conn_id</td><td>datahub_rest_default</td><td>The name of the datahub rest connection.</td></tr><tr><td>cluster</td><td>prod</td><td>name of the airflow cluster, this is equivalent to the <code>env</code> of the instance</td></tr><tr><td>platform_instance</td><td>None</td><td>The instance of the platform that all assets produced by this plugin belong to. It is optional.</td></tr><tr><td>capture_ownership_info</td><td>true</td><td>Extract DAG ownership.</td></tr><tr><td>capture_ownership_as_group</td><td>false</td><td>When extracting DAG ownership, treat DAG owner as a group rather than a user</td></tr><tr><td>capture_tags_info</td><td>true</td><td>Extract DAG tags.</td></tr><tr><td>capture_executions</td><td>true</td><td>Extract task runs and success/failure statuses. This will show up in DataHub &quot;Runs&quot; tab.</td></tr><tr><td>materialize_iolets</td><td>true</td><td>Create or un-soft-delete all entities referenced in lineage.</td></tr><tr><td>enable_extractors</td><td>true</td><td>Enable automatic lineage extraction.</td></tr><tr><td>disable_openlineage_plugin</td><td>true</td><td>Disable the OpenLineage plugin to avoid duplicative processing.</td></tr><tr><td>log_level</td><td><em>no change</em></td><td>[debug]<!-- --> Set the log level for the plugin.</td></tr><tr><td>debug_emitter</td><td>false</td><td>[debug]<!-- --> If true, the plugin will log the emitted events.</td></tr><tr><td>dag_filter_str</td><td>{ &quot;allow&quot;: <!-- -->[&quot;.*&quot;]<!-- --> }</td><td>AllowDenyPattern value in form of JSON string to filter the DAGs from running.</td></tr><tr><td>enable_datajob_lineage</td><td>true</td><td>If true, the plugin will emit input/output lineage for DataJobs.</td></tr></tbody></table><h2 class="anchor anchorWithStickyNavbar_LWe7" id="automatic-lineage-extraction">Automatic lineage extraction<a href="#automatic-lineage-extraction" class="hash-link" aria-label="Direct link to Automatic lineage extraction" title="Direct link to Automatic lineage extraction"></a></h2><p>To automatically extract lineage information, the plugin builds on top of Airflow&#x27;s built-in <a href="https://openlineage.io/docs/integrations/airflow/default-extractors" target="_blank" rel="noopener noreferrer">OpenLineage extractors</a>.
As such, we support a superset of the default operators that Airflow/OpenLineage supports.</p><p>The SQL-related extractors have been updated to use <a href="/docs/lineage/sql_parsing">DataHub&#x27;s SQL lineage parser</a>, which is more robust than the built-in one and uses DataHub&#x27;s metadata information to generate column-level lineage.</p><p>Supported operators:</p><ul><li><code>SQLExecuteQueryOperator</code>, including any subclasses. Note that in newer versions of Airflow (generally Airflow 2.5+), most SQL operators inherit from this class.</li><li><code>AthenaOperator</code> and <code>AWSAthenaOperator</code></li><li><code>BigQueryOperator</code> and <code>BigQueryExecuteQueryOperator</code></li><li><code>BigQueryInsertJobOperator</code> (incubating)</li><li><code>MySqlOperator</code></li><li><code>PostgresOperator</code></li><li><code>RedshiftSQLOperator</code></li><li><code>SnowflakeOperator</code> and <code>SnowflakeOperatorAsync</code></li><li><code>SqliteOperator</code></li><li><code>TrinoOperator</code></li></ul><p>Known limitations:</p><ul><li>We do not fully support operators that run multiple SQL statements at once. In these cases, we&#x27;ll only capture lineage from the first SQL statement.</li></ul><h2 class="anchor anchorWithStickyNavbar_LWe7" id="manual-lineage-annotation">Manual Lineage Annotation<a href="#manual-lineage-annotation" class="hash-link" aria-label="Direct link to Manual Lineage Annotation" title="Direct link to Manual Lineage Annotation"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="using-inlets-and-outlets">Using <code>inlets</code> and <code>outlets</code><a href="#using-inlets-and-outlets" class="hash-link" aria-label="Direct link to using-inlets-and-outlets" title="Direct link to using-inlets-and-outlets"></a></h3><p>You can manually annotate lineage by setting <code>inlets</code> and <code>outlets</code> on your Airflow operators. This is useful if you&#x27;re using an operator that doesn&#x27;t support automatic lineage extraction, or if you want to override the automatic lineage extraction.</p><p>We have a few code samples that demonstrate how to use <code>inlets</code> and <code>outlets</code>:</p><ul><li><a href="https://github.com/datahub-project/datahub/blob/master/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py" target="_blank" rel="noopener noreferrer"><code>lineage_backend_demo.py</code></a></li><li><a href="https://github.com/datahub-project/datahub/blob/master/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py" target="_blank" rel="noopener noreferrer"><code>lineage_backend_taskflow_demo.py</code></a> - uses the <a href="https://airflow.apache.org/docs/apache-airflow/stable/concepts/taskflow.html" target="_blank" rel="noopener noreferrer">TaskFlow API</a></li></ul><p>For more information, take a look at the <a href="https://airflow.apache.org/docs/apache-airflow/stable/lineage.html" target="_blank" rel="noopener noreferrer">Airflow lineage docs</a>.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="custom-operators">Custom Operators<a href="#custom-operators" class="hash-link" aria-label="Direct link to Custom Operators" title="Direct link to Custom Operators"></a></h3><p>If you have created a <a href="https://airflow.apache.org/docs/apache-airflow/stable/howto/custom-operator.html" target="_blank" rel="noopener noreferrer">custom Airflow operator</a> that inherits from the BaseOperator class,
when overriding the <code>execute</code> function, set inlets and outlets via <code>context[&#x27;ti&#x27;].task.inlets</code> and <code>context[&#x27;ti&#x27;].task.outlets</code>.
The DataHub Airflow plugin will then pick up those inlets and outlets after the task runs.</p><p>You can only set table-level lineage using inlets and outlets. For column-level lineage, you need to write a custom extractor for your custom operator.</p><div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">class</span><span class="token plain"> </span><span class="token class-name" style="color:rgb(255, 203, 107)">DbtOperator</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">BaseOperator</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">execute</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">self</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> context</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># do something</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> inlets</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> outlets </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> self</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">_get_lineage</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># inlets/outlets are lists of either datahub_airflow_plugin.entities.Dataset or datahub_airflow_plugin.entities.Urn</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> context</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;ti&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">task</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">inlets </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> self</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">inlets</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> context</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;ti&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">task</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">outlets </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> self</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">outlets</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">_get_lineage</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">self</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Do some processing to get inlets/outlets</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">return</span><span class="token plain"> inlets</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> outlets</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>If you override the <code>pre_execute</code> and <code>post_execute</code> function, ensure they include the <code>@prepare_lineage</code> and <code>@apply_lineage</code> decorators respectively. Reference the <a href="https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/lineage.html#lineage" target="_blank" rel="noopener noreferrer">Airflow docs</a> for more details.</p><p>See example implementation of a custom operator using SQL parser to capture table level lineage <a href="https://github.com/datahub-project/datahub/blob/master/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/custom_operator_sql_parsing.py" target="_blank" rel="noopener noreferrer">here</a></p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="custom-extractors">Custom Extractors<a href="#custom-extractors" class="hash-link" aria-label="Direct link to Custom Extractors" title="Direct link to Custom Extractors"></a></h3><p>You can also create a custom extractor to extract lineage from any operator. This is useful if you&#x27;re using a built-in Airflow operator for which we don&#x27;t support automatic lineage extraction.</p><p>See this <a href="https://github.com/datahub-project/datahub/pull/10452" target="_blank" rel="noopener noreferrer">example PR</a> which adds a custom extractor for the <code>BigQueryInsertJobOperator</code> operator.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="cleanup-obsolete-pipelines-and-tasks-from-datahub">Cleanup obsolete pipelines and tasks from Datahub<a href="#cleanup-obsolete-pipelines-and-tasks-from-datahub" class="hash-link" aria-label="Direct link to Cleanup obsolete pipelines and tasks from Datahub" title="Direct link to Cleanup obsolete pipelines and tasks from Datahub"></a></h2><p>There might be a case where the DAGs are removed from the Airflow but the corresponding pipelines and tasks are still there in the Datahub, let&#x27;s call such pipelines ans tasks, <code>obsolete pipelines and tasks</code></p><p>Following are the steps to cleanup them from the datahub:</p><ul><li>create a DAG named <code>Datahub_Cleanup</code>, i.e.</li></ul><div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">from</span><span class="token plain"> datetime </span><span class="token keyword" style="font-style:italic">import</span><span class="token plain"> datetime</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">from</span><span class="token plain"> airflow </span><span class="token keyword" style="font-style:italic">import</span><span class="token plain"> DAG</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">from</span><span class="token plain"> airflow</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">operators</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">bash </span><span class="token keyword" style="font-style:italic">import</span><span class="token plain"> BashOperator</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">from</span><span class="token plain"> datahub_airflow_plugin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">entities </span><span class="token keyword" style="font-style:italic">import</span><span class="token plain"> Dataset</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> Urn</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">with</span><span class="token plain"> DAG</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;Datahub_Cleanup&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> start_date</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">datetime</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">2024</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">1</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">1</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> schedule_interval</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token boolean" style="color:rgb(255, 88, 116)">None</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> catchup</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token boolean" style="color:rgb(255, 88, 116)">False</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">as</span><span class="token plain"> dag</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> task </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> BashOperator</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> task_id</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;cleanup_obsolete_data&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> dag</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">dag</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> bash_command</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;echo &#x27;cleaning up the obsolete data from datahub&#x27;&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><ul><li>ingest this DAG, and it will remove all the obsolete pipelines and tasks from the Datahub based on the <code>cluster</code> value set in the <code>airflow.cfg</code></li></ul><h2 class="anchor anchorWithStickyNavbar_LWe7" id="get-all-datajobs-associated-with-a-dataflow">Get all dataJobs associated with a dataFlow<a href="#get-all-datajobs-associated-with-a-dataflow" class="hash-link" aria-label="Direct link to Get all dataJobs associated with a dataFlow" title="Direct link to Get all dataJobs associated with a dataFlow"></a></h2><p>If you are looking to find all tasks (aka DataJobs) that belong to a specific pipeline (aka DataFlow), you can use the following GraphQL query:</p><div class="language-graphql codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-graphql codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token keyword" style="font-style:italic">query</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property-query">dataFlow</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token attr-name" style="color:rgb(255, 203, 107)">urn</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;urn:li:dataFlow:(airflow,db_etl,prod)&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">childJobs</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token property-query">relationships</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">input</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">types</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;IsPartOf&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">direction</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token constant" style="color:rgb(130, 170, 255)">INCOMING</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">start</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token attr-name" style="color:rgb(255, 203, 107)">count</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">100</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">total</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token object">relationships</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token object">entity</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">...</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">on</span><span class="token plain"> </span><span class="token class-name" style="color:rgb(255, 203, 107)">DataJob</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token property">urn</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h2 class="anchor anchorWithStickyNavbar_LWe7" id="emit-lineage-directly">Emit Lineage Directly<a href="#emit-lineage-directly" class="hash-link" aria-label="Direct link to Emit Lineage Directly" title="Direct link to Emit Lineage Directly"></a></h2><p>If you can&#x27;t use the plugin or annotate inlets/outlets, you can also emit lineage using the <code>DatahubEmitterOperator</code>.</p><p>Reference <a href="https://github.com/datahub-project/datahub/blob/master/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py" target="_blank" rel="noopener noreferrer"><code>lineage_emission_dag.py</code></a> for a full example.</p><p>In order to use this example, you must first configure the Datahub hook. Like in ingestion, we support a Datahub REST hook and a Kafka-based hook. See the plugin configuration for examples.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="debugging">Debugging<a href="#debugging" class="hash-link" aria-label="Direct link to Debugging" title="Direct link to Debugging"></a></h2><h3 class="anchor anchorWithStickyNavbar_LWe7" id="missing-lineage">Missing lineage<a href="#missing-lineage" class="hash-link" aria-label="Direct link to Missing lineage" title="Direct link to Missing lineage"></a></h3><p>If you&#x27;re not seeing lineage in DataHub, check the following:</p><ul><li>Validate that the plugin is loaded in Airflow. Go to Admin -&gt; Plugins and check that the DataHub plugin is listed.</li><li>It should also print a log line like <code>INFO [datahub_airflow_plugin.datahub_listener] DataHub plugin using DataHubRestEmitter: configured to talk to &lt;datahub_url&gt;</code> during Airflow startup, and the <code>airflow plugins</code> command should list <code>datahub_plugin</code> with a listener enabled.</li><li>If using the plugin&#x27;s automatic lineage, ensure that the <code>enable_extractors</code> config is set to true and that automatic lineage is supported for your operator.</li><li>If using manual lineage annotation, ensure that you&#x27;re using the <code>datahub_airflow_plugin.entities.Dataset</code> or <code>datahub_airflow_plugin.entities.Urn</code> classes for your inlets and outlets.</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="incorrect-urls">Incorrect URLs<a href="#incorrect-urls" class="hash-link" aria-label="Direct link to Incorrect URLs" title="Direct link to Incorrect URLs"></a></h3><p>If your URLs aren&#x27;t being generated correctly (usually they&#x27;ll start with <code>http://localhost:8080</code> instead of the correct hostname), you may need to set the webserver <code>base_url</code> config.</p><div class="language-ini codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockTitle_Ktv7">airflow.cfg</div><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-ini codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token section punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token section section-name selector" style="color:rgb(199, 146, 234)">webserver</span><span class="token section punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token key attr-name" style="color:rgb(255, 203, 107)">base_url</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">=</span><span class="token plain"> </span><span class="token value attr-value">http://airflow.mycorp.example.com</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="typeerror--missing-3-required-positional-arguments">TypeError ... missing 3 required positional arguments<a href="#typeerror--missing-3-required-positional-arguments" class="hash-link" aria-label="Direct link to TypeError ... missing 3 required positional arguments" title="Direct link to TypeError ... missing 3 required positional arguments"></a></h3><p>If you see errors like the following:</p><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">ERROR - on_task_instance_success</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> missing </span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token plain"> required positional arguments: </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;previous_state&#x27;</span><span class="token plain">, </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;task_instance&#x27;</span><span class="token plain">, and </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;session&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Traceback </span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">most recent call last</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain">:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> File </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;/home/airflow/.local/lib/python3.8/site-packages/datahub_airflow_plugin/datahub_listener.py&quot;</span><span class="token plain">, line </span><span class="token number" style="color:rgb(247, 140, 108)">124</span><span class="token plain">, </span><span class="token keyword" style="font-style:italic">in</span><span class="token plain"> wrapper</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> f</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">*args, **kwargs</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">TypeError: on_task_instance_success</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> missing </span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token plain"> required positional arguments: </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;previous_state&#x27;</span><span class="token plain">, </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;task_instance&#x27;</span><span class="token plain">, and </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;session&#x27;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>The solution is to upgrade <code>acryl-datahub-airflow-plugin&gt;=0.12.0.4</code> or upgrade <code>pluggy&gt;=1.2.0</code>. See this <a href="https://github.com/datahub-project/datahub/pull/9365" target="_blank" rel="noopener noreferrer">PR</a> for details.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="scheduler-stalling">Scheduler stalling<a href="#scheduler-stalling" class="hash-link" aria-label="Direct link to Scheduler stalling" title="Direct link to Scheduler stalling"></a></h3><p>For extremely large Airflow deployments with thousands of tasks, you may see issues where the plugin interferes with the performance of the Airflow scheduler. In those cases, you can set the <code>DATAHUB_AIRFLOW_PLUGIN_RUN_IN_THREAD_TIMEOUT=0</code> environment variable. This makes the DataHub plugin run fully in background threads, but can cause us to miss some metadata if the scheduler shuts down soon after processing a task.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="disabling-the-datahub-plugin">Disabling the DataHub Plugin<a href="#disabling-the-datahub-plugin" class="hash-link" aria-label="Direct link to Disabling the DataHub Plugin" title="Direct link to Disabling the DataHub Plugin"></a></h3><p>There are two ways to disable the DataHub Plugin:</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="1-disable-via-configuration">1. Disable via Configuration<a href="#1-disable-via-configuration" class="hash-link" aria-label="Direct link to 1. Disable via Configuration" title="Direct link to 1. Disable via Configuration"></a></h4><p>Set the <code>datahub.enabled</code> configuration property to <code>False</code> in the <code>airflow.cfg</code> file and restart the Airflow environment to reload the configuration and disable the plugin.</p><div class="language-ini codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockTitle_Ktv7">airflow.cfg</div><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-ini codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token section punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token section section-name selector" style="color:rgb(199, 146, 234)">datahub</span><span class="token section punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token key attr-name" style="color:rgb(255, 203, 107)">enabled</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">=</span><span class="token plain"> </span><span class="token value attr-value">False</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="2-disable-via-airflow-variable-kill-switch">2. Disable via Airflow Variable (Kill-Switch)<a href="#2-disable-via-airflow-variable-kill-switch" class="hash-link" aria-label="Direct link to 2. Disable via Airflow Variable (Kill-Switch)" title="Direct link to 2. Disable via Airflow Variable (Kill-Switch)"></a></h4><p>If a restart is not possible and you need a faster way to disable the plugin, you can use the kill-switch. Create and set the <code>datahub_airflow_plugin_disable_listener</code> Airflow variable to <code>true</code>. This ensures that the listener won&#x27;t process anything.</p><h4 class="anchor anchorWithStickyNavbar_LWe7" id="command-line-1">Command Line<a href="#command-line-1" class="hash-link" aria-label="Direct link to Command Line" title="Direct link to Command Line"></a></h4><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#bfc7d5"><span class="token plain">airflow variables </span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">set</span><span class="token plain"> datahub_airflow_plugin_disable_listener </span><span class="token boolean" style="color:rgb(255, 88, 116)">true</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h4 class="anchor anchorWithStickyNavbar_LWe7" id="airflow-ui-1">Airflow UI<a href="#airflow-ui-1" class="hash-link" aria-label="Direct link to Airflow UI" title="Direct link to Airflow UI"></a></h4><ol><li>Go to Admin -&gt; Variables.</li><li>Click the &quot;+&quot; symbol to create a new variable.</li><li>Set the key to <code>datahub_airflow_plugin_disable_listener</code> and the value to <code>true</code>.</li></ol><p>This will immediately disable the plugin without requiring a restart.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="compatibility">Compatibility<a href="#compatibility" class="hash-link" aria-label="Direct link to Compatibility" title="Direct link to Compatibility"></a></h2><p>We try to support Airflow releases for ~2 years after their release. This is a best-effort guarantee - it&#x27;s not always possible due to dependency / security issues cropping up in older versions.</p><p>We no longer officially support Airflow &lt;2.7. However, you can use older versions of <code>acryl-datahub-airflow-plugin</code> with older versions of Airflow.
We previously had two implementations of the plugin - v1 and v2. The v2 plugin is now the default, and the v1 plugin has since been removed. The v1 plugin had many limitations, chiefly that it does not support automatic lineage extraction. Docs for the v1 plugin can be accessed in our <a href="https://docs-website-r5eolot5n-acryldata.vercel.app/docs/lineage/airflow#datahub-plugin-v1" target="_blank" rel="noopener noreferrer">docs archive</a>.</p><p>The first two options support Python 3.7+, and latter three require Python 3.8+.</p><ul><li>Airflow 1.10.x, use acryl-datahub-airflow-plugin &lt;= 0.9.1.0 (v1 plugin).</li><li>Airflow 2.0.x, use acryl-datahub-airflow-plugin &lt;= 0.11.0.1 (v1 plugin).</li><li>Airflow 2.2.x, use acryl-datahub-airflow-plugin &lt;= 0.14.1.5 (v2 plugin).</li><li>Airflow 2.3 - 2.4.3, use acryl-datahub-airflow-plugin &lt;= 1.0.0 (v2 plugin).</li><li>Airflow 2.5 and 2.6, use acryl-datahub-airflow-plugin &lt;= 1.1.0.4 (v2 plugin).</li></ul><p>DataHub also previously supported an Airflow <a href="https://airflow.apache.org/docs/apache-airflow/2.2.0/lineage.html#lineage-backend" target="_blank" rel="noopener noreferrer">lineage backend</a> implementation. The lineage backend functionality was pretty limited - it did not support automatic lineage extraction, did not capture task failures, and did not work in AWS MWAA - and so it has been removed from the codebase. The <a href="https://docs-website-1wmaehubl-acryldata.vercel.app/docs/lineage/airflow/#using-datahubs-airflow-lineage-backend-deprecated" target="_blank" rel="noopener noreferrer">documentation for the lineage backend</a> has been archived.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="additional-references">Additional references<a href="#additional-references" class="hash-link" aria-label="Direct link to Additional references" title="Direct link to Additional references"></a></h2><p>Related Datahub videos:</p><ul><li><a href="https://www.youtube.com/watch?v=3wiaqhb8UR0" target="_blank" rel="noopener noreferrer">Airflow Lineage</a></li><li><a href="https://www.youtube.com/watch?v=YpUOqDU5ZYg" target="_blank" rel="noopener noreferrer">Airflow Run History in DataHub</a></li></ul></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="slackUtm_uoBr"><div class="slackUtm_uoBr"><hr>Need more help? Join the conversation in <a href="https://datahub.com/slack?utm_source=docs&amp;utm_medium=footer&amp;utm_campaign=docs_footer&amp;utm_content=docs/lineage/airflow">Slack!</a></div></div><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/datahub-project/datahub/blob/master/docs/lineage/airflow.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_VsjB"></div></div></footer><div class="feedbackWrapper_mUHF"><div class="feedbackWidget_PX4d"><div class="feedbackButtons_wn3V"><strong>Is this page helpful?</strong><div><button class="feedbackButton_UgQs"><span role="img" aria-label="like" class="anticon anticon-like"><svg viewBox="64 64 896 896" focusable="false" data-icon="like" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 533.7c16.8-22.2 26.1-49.4 26.1-77.7 0-44.9-25.1-87.4-65.5-111.1a67.67 67.67 0 00-34.3-9.3H572.4l6-122.9c1.4-29.7-9.1-57.9-29.5-79.4A106.62 106.62 0 00471 99.9c-52 0-98 35-111.8 85.1l-85.9 311H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 32 32 32h601.3c9.2 0 18.2-1.8 26.5-5.4 47.6-20.3 78.3-66.8 78.3-118.4 0-12.6-1.8-25-5.4-37 16.8-22.2 26.1-49.4 26.1-77.7 0-12.6-1.8-25-5.4-37 16.8-22.2 26.1-49.4 26.1-77.7-.2-12.6-2-25.1-5.6-37.1zM184 852V568h81v284h-81zm636.4-353l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 16.5-7.2 32.2-19.6 43l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 16.5-7.2 32.2-19.6 43l-21.9 19 13.9 25.4a56.2 56.2 0 016.9 27.3c0 22.4-13.2 42.6-33.6 51.8H329V564.8l99.5-360.5a44.1 44.1 0 0142.2-32.3c7.6 0 15.1 2.2 21.1 6.7 9.9 7.4 15.2 18.6 14.6 30.5l-9.6 198.4h314.4C829 418.5 840 436.9 840 456c0 16.5-7.2 32.1-19.6 43z"></path></svg></span></button><button class="feedbackButton_UgQs"><span role="img" aria-label="dislike" class="anticon anticon-dislike"><svg viewBox="64 64 896 896" focusable="false" data-icon="dislike" width="1em" height="1em" fill="currentColor" aria-hidden="true"><path d="M885.9 490.3c3.6-12 5.4-24.4 5.4-37 0-28.3-9.3-55.5-26.1-77.7 3.6-12 5.4-24.4 5.4-37 0-28.3-9.3-55.5-26.1-77.7 3.6-12 5.4-24.4 5.4-37 0-51.6-30.7-98.1-78.3-118.4a66.1 66.1 0 00-26.5-5.4H144c-17.7 0-32 14.3-32 32v364c0 17.7 14.3 32 32 32h129.3l85.8 310.8C372.9 889 418.9 924 470.9 924c29.7 0 57.4-11.8 77.9-33.4 20.5-21.5 31-49.7 29.5-79.4l-6-122.9h239.9c12.1 0 23.9-3.2 34.3-9.3 40.4-23.5 65.5-66.1 65.5-111 0-28.3-9.3-55.5-26.1-77.7zM184 456V172h81v284h-81zm627.2 160.4H496.8l9.6 198.4c.6 11.9-4.7 23.1-14.6 30.5-6.1 4.5-13.6 6.8-21.1 6.7a44.28 44.28 0 01-42.2-32.3L329 459.2V172h415.4a56.85 56.85 0 0133.6 51.8c0 9.7-2.3 18.9-6.9 27.3l-13.9 25.4 21.9 19a56.76 56.76 0 0119.6 43c0 9.7-2.3 18.9-6.9 27.3l-13.9 25.4 21.9 19a56.76 56.76 0 0119.6 43c0 9.7-2.3 18.9-6.9 27.3l-14 25.5 21.9 19a56.76 56.76 0 0119.6 43c0 19.1-11 37.5-28.8 48.4z"></path></svg></span></button></div></div></div></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/metadata-ingestion/source_overview"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Sources</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/lineage/dagster"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Dagster Integration</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#datahub-plugin-setup" class="table-of-contents__link toc-highlight">DataHub Plugin Setup</a><ul><li><a href="#installation" class="table-of-contents__link toc-highlight">Installation</a></li><li><a href="#configuration" class="table-of-contents__link toc-highlight">Configuration</a></li></ul></li><li><a href="#automatic-lineage-extraction" class="table-of-contents__link toc-highlight">Automatic lineage extraction</a></li><li><a href="#manual-lineage-annotation" class="table-of-contents__link toc-highlight">Manual Lineage Annotation</a><ul><li><a href="#using-inlets-and-outlets" class="table-of-contents__link toc-highlight">Using <code>inlets</code> and <code>outlets</code></a></li><li><a href="#custom-operators" class="table-of-contents__link toc-highlight">Custom Operators</a></li><li><a href="#custom-extractors" class="table-of-contents__link toc-highlight">Custom Extractors</a></li></ul></li><li><a href="#cleanup-obsolete-pipelines-and-tasks-from-datahub" class="table-of-contents__link toc-highlight">Cleanup obsolete pipelines and tasks from Datahub</a></li><li><a href="#get-all-datajobs-associated-with-a-dataflow" class="table-of-contents__link toc-highlight">Get all dataJobs associated with a dataFlow</a></li><li><a href="#emit-lineage-directly" class="table-of-contents__link toc-highlight">Emit Lineage Directly</a></li><li><a href="#debugging" class="table-of-contents__link toc-highlight">Debugging</a><ul><li><a href="#missing-lineage" class="table-of-contents__link toc-highlight">Missing lineage</a></li><li><a href="#incorrect-urls" class="table-of-contents__link toc-highlight">Incorrect URLs</a></li><li><a href="#typeerror--missing-3-required-positional-arguments" class="table-of-contents__link toc-highlight">TypeError ... missing 3 required positional arguments</a></li><li><a href="#scheduler-stalling" class="table-of-contents__link toc-highlight">Scheduler stalling</a></li><li><a href="#disabling-the-datahub-plugin" class="table-of-contents__link toc-highlight">Disabling the DataHub Plugin</a></li></ul></li><li><a href="#compatibility" class="table-of-contents__link toc-highlight">Compatibility</a></li><li><a href="#additional-references" class="table-of-contents__link toc-highlight">Additional references</a></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Docs</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/">Introduction</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/quickstart">Quickstart</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://datahub.com/slack" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w" target="_blank" rel="noopener noreferrer" class="footer__link-item">YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://medium.com/datahub-project" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/docs/townhalls">Town Halls</a></li><li class="footer__item"><a href="https://datahub.com/resources/?2004611554=dh-stories" target="_blank" rel="noopener noreferrer" class="footer__link-item">Customer Stories<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">More</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://demo.datahub.com/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Demo</a></li><li class="footer__item"><a href="https://feature-requests.datahubproject.io/roadmap" target="_blank" rel="noopener noreferrer" class="footer__link-item">Roadmap<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/docs/contributing">Contributing</a></li><li class="footer__item"><a href="https://github.com/datahub-project/datahub" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://feature-requests.datahubproject.io/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Feature Requests<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">Copyright © 2015-2025 DataHub Project Authors.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.7bcf907e.js"></script>
<script src="/assets/js/main.99352010.js"></script>
</body>
</html>