datahub/assets/js/add9dc96.0731b8dc.js
2025-08-22 14:09:31 +00:00

1 line
15 KiB
JavaScript

"use strict";(self.webpackChunkdocs_website=self.webpackChunkdocs_website||[]).push([[29211],{15680:(e,t,n)=>{n.d(t,{xA:()=>c,yg:()=>y});var a=n(96540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t<arguments.length;t++){var n=null!=arguments[t]?arguments[t]:{};t%2?o(Object(n),!0).forEach((function(t){r(e,t,n[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):o(Object(n)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(n,t))}))}return e}function s(e,t){if(null==e)return{};var n,a,r=function(e,t){if(null==e)return{};var n,a,r={},o=Object.keys(e);for(a=0;a<o.length;a++)n=o[a],t.indexOf(n)>=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a<o.length;a++)n=o[a],t.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var l=a.createContext({}),u=function(e){var t=a.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=u(e.components);return a.createElement(l.Provider,{value:t},e.children)},d="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),d=u(n),g=r,y=d["".concat(l,".").concat(g)]||d[g]||p[g]||o;return n?a.createElement(y,i(i({ref:t},c),{},{components:n})):a.createElement(y,i({ref:t},c))}));function y(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=g;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[d]="string"==typeof e?e:r,i[1]=s;for(var u=2;u<o;u++)i[u]=n[u];return a.createElement.apply(null,i)}return a.createElement.apply(null,n)}g.displayName="MDXCreateElement"},99067:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>c,contentTitle:()=>l,default:()=>y,frontMatter:()=>s,metadata:()=>u,toc:()=>d});n(96540);var a=n(15680);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){return t=null!=t?t:{},Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):function(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}(Object(t)).forEach((function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(t,n))})),e}function i(e,t){if(null==e)return{};var n,a,r=function(e,t){if(null==e)return{};var n,a,r={},o=Object.keys(e);for(a=0;a<o.length;a++)n=o[a],t.indexOf(n)>=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a<o.length;a++)n=o[a],t.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}const s={title:"DataHub Cloud Event Source",sidebar_label:"Cloud Event Source",slug:"/actions/sources/datahub-cloud-event-source",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/actions/sources/datahub-cloud-event-source.md"},l="DataHub Cloud Event Source",u={unversionedId:"docs/actions/sources/datahub-cloud-event-source",id:"docs/actions/sources/datahub-cloud-event-source",title:"DataHub Cloud Event Source",description:"Prerequisites",source:"@site/genDocs/docs/actions/sources/datahub-cloud-event-source.md",sourceDirName:"docs/actions/sources",slug:"/actions/sources/datahub-cloud-event-source",permalink:"/docs/actions/sources/datahub-cloud-event-source",draft:!1,editUrl:"https://github.com/datahub-project/datahub/blob/master/docs/actions/sources/datahub-cloud-event-source.md",tags:[],version:"current",frontMatter:{title:"DataHub Cloud Event Source",sidebar_label:"Cloud Event Source",slug:"/actions/sources/datahub-cloud-event-source",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/actions/sources/datahub-cloud-event-source.md"},sidebar:"overviewSidebar",previous:{title:"Concepts",permalink:"/docs/actions/concepts"},next:{title:"Kafka Event Source",permalink:"/docs/actions/sources/kafka-event-source"}},c={},d=[{value:"Prerequisites",id:"prerequisites",level:2},{value:"Compatibility",id:"compatibility",level:3},{value:"Privileges",id:"privileges",level:3},{value:"Overview",id:"overview",level:2},{value:"Processing Guarantees",id:"processing-guarantees",level:3},{value:"Supported Events",id:"supported-events",level:2},{value:"Configure the Event Source",id:"configure-the-event-source",level:2},{value:"Quickstart",id:"quickstart",level:3},{value:"Advanced Configurations",id:"advanced-configurations",level:3},{value:"FAQ",id:"faq",level:2}],p={toc:d},g="wrapper";function y(e){var{components:t}=e,n=i(e,["components"]);return(0,a.yg)(g,o(function(e){for(var t=1;t<arguments.length;t++){var n=null!=arguments[t]?arguments[t]:{},a=Object.keys(n);"function"==typeof Object.getOwnPropertySymbols&&(a=a.concat(Object.getOwnPropertySymbols(n).filter((function(e){return Object.getOwnPropertyDescriptor(n,e).enumerable})))),a.forEach((function(t){r(e,t,n[t])}))}return e}({},p,n),{components:t,mdxType:"MDXLayout"}),(0,a.yg)("h1",{id:"datahub-cloud-event-source"},"DataHub Cloud Event Source"),(0,a.yg)("h2",{id:"prerequisites"},"Prerequisites"),(0,a.yg)("h3",{id:"compatibility"},"Compatibility"),(0,a.yg)("p",null,"The ",(0,a.yg)("strong",{parentName:"p"},"DataHub Cloud Event Source")," is only compatible with versions of DataHub Cloud above ",(0,a.yg)("inlineCode",{parentName:"p"},"v0.3.7"),"."),(0,a.yg)("h3",{id:"privileges"},"Privileges"),(0,a.yg)("p",null,"By default, users do not have access to the Events API of DataHub Cloud. In order to access the API, the user or service account\nassociated with the access token used to configure this events source ",(0,a.yg)("em",{parentName:"p"},"must")," have the ",(0,a.yg)("inlineCode",{parentName:"p"},"Get Platform Events")," platform privilege, which\ncan be granted using an ",(0,a.yg)("a",{parentName:"p",href:"/docs/authorization/access-policies-guide/"},"Access Policy"),"."),(0,a.yg)("h2",{id:"overview"},"Overview"),(0,a.yg)("p",null,"The DataHub Cloud Event Source allows you to use DataHub Actions with an instance of DataHub Cloud hosted by ",(0,a.yg)("a",{parentName:"p",href:"https://acryl.io"},"DataHub"),"."),(0,a.yg)("p",null,"Under the hood, the DataHub Cloud Event Source communicates with DataHub Cloud to extract change events in realtime.\nThe state of progress is automatically saved to DataHub Cloud after messages are processed, allowing you to seamlessly pause and restart the consumer, using the provided ",(0,a.yg)("inlineCode",{parentName:"p"},"name")," to uniquely identify the consumer state."),(0,a.yg)("p",null,"On initial startup of a new consumer id, the DataHub event source will automatically begin the ",(0,a.yg)("em",{parentName:"p"},"latest"),' events by default. Afterwards, the message stream processed offsets will be continually saved. However, the source can also optionally be configured to "look back" in time\nby a certain number of days on initial bootstrap using the ',(0,a.yg)("inlineCode",{parentName:"p"},"lookback_days")," parameter. To reset all previously saved offsets for a consumer,\nyou can set ",(0,a.yg)("inlineCode",{parentName:"p"},"reset_offsets")," to ",(0,a.yg)("inlineCode",{parentName:"p"},"True"),"."),(0,a.yg)("h3",{id:"processing-guarantees"},"Processing Guarantees"),(0,a.yg)("p",null,'This event source implements an "ack" function which is invoked if and only if an event is successfully processed\nby the Actions framework, meaning that the event made it through the Transformers and into the Action without\nany errors. Under the hood, the "ack" method synchronously commits DataHub Cloud Consumer Offsets on behalf of the Action. This means that by default, the framework provides ',(0,a.yg)("em",{parentName:"p"},"at-least once")," processing semantics. That is, in the unusual case that a failure occurs when attempting to commit offsets back to Kafka, that event may be replayed on restart of the Action."),(0,a.yg)("p",null,"If you've configured your Action pipeline ",(0,a.yg)("inlineCode",{parentName:"p"},"failure_mode")," to be ",(0,a.yg)("inlineCode",{parentName:"p"},"CONTINUE")," (the default), then events which\nfail to be processed will simply be logged to a ",(0,a.yg)("inlineCode",{parentName:"p"},"failed_events.log")," file for further investigation (dead letter queue). The DataHub Cloud Event Source will continue to make progress against the underlying topics and continue to commit offsets even in the case of failed messages."),(0,a.yg)("p",null,"If you've configured your Action pipeline ",(0,a.yg)("inlineCode",{parentName:"p"},"failure_mode")," to be ",(0,a.yg)("inlineCode",{parentName:"p"},"THROW"),', then events which fail to be processed result in an Action Pipeline error. This in turn terminates the pipeline before committing offsets back to DataHub Cloud. Thus the message will not be marked as "processed" by the Action consumer.'),(0,a.yg)("h2",{id:"supported-events"},"Supported Events"),(0,a.yg)("p",null,"The DataHub Cloud Event Source produces"),(0,a.yg)("ul",null,(0,a.yg)("li",{parentName:"ul"},(0,a.yg)("a",{parentName:"li",href:"/docs/managed-datahub/datahub-api/entity-events-api"},"Entity Change Event V1"))),(0,a.yg)("p",null,"Note that the DataHub Cloud Event Source does ",(0,a.yg)("em",{parentName:"p"},"not")," yet support the full ",(0,a.yg)("a",{parentName:"p",href:"/docs/actions/events/metadata-change-log-event"},"Metadata Change Log V1")," event stream."),(0,a.yg)("h2",{id:"configure-the-event-source"},"Configure the Event Source"),(0,a.yg)("p",null,"Use the following config(s) to get started with the DataHub Cloud Event Source."),(0,a.yg)("h3",{id:"quickstart"},"Quickstart"),(0,a.yg)("p",null,"To start listening for new events from now, you can use the following recipe:"),(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-yml"},'name: "unique-action-name"\ndatahub:\n server: "https://<your-organization>.acryl.io"\n token: "<your-datahub-cloud-token>"\nsource:\n type: "datahub-cloud"\naction:\n # action configs\n')),(0,a.yg)("p",null,"Note that the ",(0,a.yg)("inlineCode",{parentName:"p"},"datahub")," configuration block is ",(0,a.yg)("strong",{parentName:"p"},"required")," to connect to your DataHub Cloud instance."),(0,a.yg)("h3",{id:"advanced-configurations"},"Advanced Configurations"),(0,a.yg)("p",null,"To reset the offsets for the action pipeline and start consuming events from 7 days ago, you can use the following recipe:"),(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-yml"},'name: "unique-action-name"\ndatahub:\n server: "https://<your-organization>.acryl.io"\n token: "<your-datahub-cloud-token>"\nsource:\n type: "datahub-cloud"\n config:\n lookback_days: 7 # Look back 7 days for events\n reset_offsets: true # Ignore stored offsets and start fresh\n kill_after_idle_timeout: true # Enable shutdown after idle period\n idle_timeout_duration_seconds: 60 # Idle timeout set to 60 seconds\n event_processing_time_max_duration_seconds: 45 # Max processing time of 45 seconds per batch\naction:\n # action configs\n')),(0,a.yg)("p",null,"Note that the ",(0,a.yg)("inlineCode",{parentName:"p"},"datahub")," configuration block is ",(0,a.yg)("strong",{parentName:"p"},"required")," to connect to your DataHub Cloud instance."),(0,a.yg)("details",null,(0,a.yg)("summary",null,"View All Configuration Options"),(0,a.yg)("table",null,(0,a.yg)("thead",{parentName:"table"},(0,a.yg)("tr",{parentName:"thead"},(0,a.yg)("th",{parentName:"tr",align:null},"Field"),(0,a.yg)("th",{parentName:"tr",align:"center"},"Required"),(0,a.yg)("th",{parentName:"tr",align:"center"},"Default"),(0,a.yg)("th",{parentName:"tr",align:null},"Description"))),(0,a.yg)("tbody",{parentName:"table"},(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},(0,a.yg)("inlineCode",{parentName:"td"},"topic")),(0,a.yg)("td",{parentName:"tr",align:"center"},"\u274c"),(0,a.yg)("td",{parentName:"tr",align:"center"},(0,a.yg)("inlineCode",{parentName:"td"},"PlatformEvent_v1")),(0,a.yg)("td",{parentName:"tr",align:null},"The name of the topic from which events will be consumed. Do not change this unless you know what you're doing!")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},(0,a.yg)("inlineCode",{parentName:"td"},"lookback_days")),(0,a.yg)("td",{parentName:"tr",align:"center"},"\u274c"),(0,a.yg)("td",{parentName:"tr",align:"center"},"None"),(0,a.yg)("td",{parentName:"tr",align:null},"Optional number of days to look back when polling for events.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},(0,a.yg)("inlineCode",{parentName:"td"},"reset_offsets")),(0,a.yg)("td",{parentName:"tr",align:"center"},"\u274c"),(0,a.yg)("td",{parentName:"tr",align:"center"},(0,a.yg)("inlineCode",{parentName:"td"},"False")),(0,a.yg)("td",{parentName:"tr",align:null},"When set to ",(0,a.yg)("inlineCode",{parentName:"td"},"True"),", the consumer will ignore any stored offsets and start fresh.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},(0,a.yg)("inlineCode",{parentName:"td"},"kill_after_idle_timeout")),(0,a.yg)("td",{parentName:"tr",align:"center"},"\u274c"),(0,a.yg)("td",{parentName:"tr",align:"center"},(0,a.yg)("inlineCode",{parentName:"td"},"False")),(0,a.yg)("td",{parentName:"tr",align:null},"If ",(0,a.yg)("inlineCode",{parentName:"td"},"True"),", stops the consumer after being idle for the specified timeout duration.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},(0,a.yg)("inlineCode",{parentName:"td"},"idle_timeout_duration_seconds")),(0,a.yg)("td",{parentName:"tr",align:"center"},"\u274c"),(0,a.yg)("td",{parentName:"tr",align:"center"},(0,a.yg)("inlineCode",{parentName:"td"},"30")),(0,a.yg)("td",{parentName:"tr",align:null},"Duration in seconds after which, if no events are received, the consumer is considered idle.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},(0,a.yg)("inlineCode",{parentName:"td"},"event_processing_time_max_duration_seconds")),(0,a.yg)("td",{parentName:"tr",align:"center"},"\u274c"),(0,a.yg)("td",{parentName:"tr",align:"center"},(0,a.yg)("inlineCode",{parentName:"td"},"30")),(0,a.yg)("td",{parentName:"tr",align:null},"Maximum allowed time in seconds for processing events before timing out."))))),(0,a.yg)("h2",{id:"faq"},"FAQ"),(0,a.yg)("ol",null,(0,a.yg)("li",{parentName:"ol"},"Is there a way to always start processing from the end of the topics on Actions start?")),(0,a.yg)("p",null,"Yes, simply set ",(0,a.yg)("inlineCode",{parentName:"p"},"reset_offsets")," to True for a single run of the action. Remember to disable this for subsequent runs if you don't want to miss any events!"),(0,a.yg)("ol",{start:2},(0,a.yg)("li",{parentName:"ol"},"What happens if I have multiple actions with the same pipeline ",(0,a.yg)("inlineCode",{parentName:"li"},"name")," running? Can I scale out horizontally?")),(0,a.yg)("p",null,"Today, there is undefined behavior deploying multiple actions with the same name using the DataHub Cloud Events Source.\nAll events must be processed by a single running action"))}y.isMDXComponent=!0}}]);