datahub/assets/js/ca362f6c.c2a532d0.js
2025-08-22 14:09:31 +00:00

1 line
9.8 KiB
JavaScript

"use strict";(self.webpackChunkdocs_website=self.webpackChunkdocs_website||[]).push([[49121],{7653:(e,t,n)=>{n.d(t,{A:()=>a});const a={icon:{tag:"svg",attrs:{"fill-rule":"evenodd",viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64c247.4 0 448 200.6 448 448S759.4 960 512 960 64 759.4 64 512 264.6 64 512 64zm127.98 274.82h-.04l-.08.06L512 466.75 384.14 338.88c-.04-.05-.06-.06-.08-.06a.12.12 0 00-.07 0c-.03 0-.05.01-.09.05l-45.02 45.02a.2.2 0 00-.05.09.12.12 0 000 .07v.02a.27.27 0 00.06.06L466.75 512 338.88 639.86c-.05.04-.06.06-.06.08a.12.12 0 000 .07c0 .03.01.05.05.09l45.02 45.02a.2.2 0 00.09.05.12.12 0 00.07 0c.02 0 .04-.01.08-.05L512 557.25l127.86 127.87c.04.04.06.05.08.05a.12.12 0 00.07 0c.03 0 .05-.01.09-.05l45.02-45.02a.2.2 0 00.05-.09.12.12 0 000-.07v-.02a.27.27 0 00-.05-.06L557.25 512l127.87-127.86c.04-.04.05-.06.05-.08a.12.12 0 000-.07c0-.03-.01-.05-.05-.09l-45.02-45.02a.2.2 0 00-.09-.05.12.12 0 00-.07 0z"}}]},name:"close-circle",theme:"filled"}},4732:(e,t,n)=>{n.d(t,{A:()=>l});var a=n(89379),i=n(96540),r=n(7653),o=n(89990),s=function(e,t){return i.createElement(o.A,(0,a.A)((0,a.A)({},e),{},{ref:t,icon:r.A}))};const l=i.forwardRef(s)},43655:(e,t,n)=>{n.d(t,{A:()=>h});var a=n(96540),i=n(20053);const r="availabilityCard_P5od",o="managedIcon_AxXO",s="platform_wqXv",l="platformAvailable_Y8lN";var c=n(4732),g=n(89379);const u={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm193.5 301.7l-210.6 292a31.8 31.8 0 01-51.7 0L318.5 484.9c-3.8-5.3 0-12.7 6.5-12.7h46.9c10.2 0 19.9 4.9 25.9 13.3l71.2 98.8 157.2-218c6-8.3 15.6-13.3 25.9-13.3H699c6.5 0 10.3 7.4 6.5 12.7z"}}]},name:"check-circle",theme:"filled"};var d=n(89990),m=function(e,t){return a.createElement(d.A,(0,g.A)((0,g.A)({},e),{},{ref:t,icon:u}))};const p=a.forwardRef(m);const f={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M811.4 418.7C765.6 297.9 648.9 212 512.2 212S258.8 297.8 213 418.6C127.3 441.1 64 519.1 64 612c0 110.5 89.5 200 199.9 200h496.2C870.5 812 960 722.5 960 612c0-92.7-63.1-170.7-148.6-193.3zm36.3 281a123.07 123.07 0 01-87.6 36.3H263.9c-33.1 0-64.2-12.9-87.6-36.3A123.3 123.3 0 01140 612c0-28 9.1-54.3 26.2-76.3a125.7 125.7 0 0166.1-43.7l37.9-9.9 13.9-36.6c8.6-22.8 20.6-44.1 35.7-63.4a245.6 245.6 0 0152.4-49.9c41.1-28.9 89.5-44.2 140-44.2s98.9 15.3 140 44.2c19.9 14 37.5 30.8 52.4 49.9 15.1 19.3 27.1 40.7 35.7 63.4l13.8 36.5 37.8 10c54.3 14.5 92.1 63.8 92.1 120 0 33.1-12.9 64.3-36.3 87.7z"}}]},name:"cloud",theme:"outlined"};var y=function(e,t){return a.createElement(d.A,(0,g.A)((0,g.A)({},e),{},{ref:t,icon:f}))};const b=a.forwardRef(y),h=({saasOnly:e,ossOnly:t})=>a.createElement("div",{className:(0,i.A)(r,"card")},a.createElement("strong",null,"Feature Availability"),a.createElement("div",null,a.createElement("span",{className:(0,i.A)(s,!e&&l)},"Self-Hosted DataHub ",e?a.createElement(c.A,null):a.createElement(p,null))),a.createElement("div",null,a.createElement(b,{className:o}),a.createElement("span",{className:(0,i.A)(s,!t&&l)},"DataHub Cloud ",t?a.createElement(c.A,null):a.createElement(p,null))))},54213:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>m,contentTitle:()=>u,default:()=>b,frontMatter:()=>g,metadata:()=>d,toc:()=>p});n(96540);var a=n(15680),i=n(43655),r=n(53720),o=n(5400);function s(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){return t=null!=t?t:{},Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):function(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}(Object(t)).forEach((function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(t,n))})),e}function c(e,t){if(null==e)return{};var n,a,i=function(e,t){if(null==e)return{};var n,a,i={},r=Object.keys(e);for(a=0;a<r.length;a++)n=r[a],t.indexOf(n)>=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a<r.length;a++)n=r[a],t.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}const g={title:"Profiling ingestions",slug:"/metadata-ingestion/docs/dev_guides/profiling_ingestions",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/docs/dev_guides/profiling_ingestions.md"},u="Profiling ingestions",d={unversionedId:"metadata-ingestion/docs/dev_guides/profiling_ingestions",id:"version-1.1.0/metadata-ingestion/docs/dev_guides/profiling_ingestions",title:"Profiling ingestions",description:"\ud83e\udd1d Version compatibility",source:"@site/versioned_docs/version-1.1.0/metadata-ingestion/docs/dev_guides/profiling_ingestions.md",sourceDirName:"metadata-ingestion/docs/dev_guides",slug:"/metadata-ingestion/docs/dev_guides/profiling_ingestions",permalink:"/docs/1.1.0/metadata-ingestion/docs/dev_guides/profiling_ingestions",draft:!1,editUrl:"https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/docs/dev_guides/profiling_ingestions.md",tags:[],version:"1.1.0",frontMatter:{title:"Profiling ingestions",slug:"/metadata-ingestion/docs/dev_guides/profiling_ingestions",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/docs/dev_guides/profiling_ingestions.md"},sidebar:"overviewSidebar",previous:{title:"SQL Profiling",permalink:"/docs/1.1.0/metadata-ingestion/docs/dev_guides/sql_profiles"},next:{title:"The Metadata Model",permalink:"/docs/1.1.0/metadata-modeling/metadata-model"}},m={},p=[{value:"How to use",id:"how-to-use",level:2}],f={toc:p},y="wrapper";function b(e){var{components:t}=e,n=c(e,["components"]);return(0,a.yg)(y,l(function(e){for(var t=1;t<arguments.length;t++){var n=null!=arguments[t]?arguments[t]:{},a=Object.keys(n);"function"==typeof Object.getOwnPropertySymbols&&(a=a.concat(Object.getOwnPropertySymbols(n).filter((function(e){return Object.getOwnPropertyDescriptor(n,e).enumerable})))),a.forEach((function(t){s(e,t,n[t])}))}return e}({},f,n),{components:t,mdxType:"MDXLayout"}),(0,a.yg)("h1",{id:"profiling-ingestions"},"Profiling ingestions"),(0,a.yg)(i.A,{mdxType:"FeatureAvailability"}),(0,a.yg)("p",null,(0,a.yg)("strong",{parentName:"p"},"\ud83e\udd1d Version compatibility")),(0,a.yg)("blockquote",null,(0,a.yg)("p",{parentName:"blockquote"},"DataHub Core (Open Source): ",(0,a.yg)("strong",{parentName:"p"},"0.11.1")," | DataHub Cloud: ",(0,a.yg)("strong",{parentName:"p"},"0.2.12"))),(0,a.yg)("p",null,"This page documents how to perform memory profiles of ingestion runs.\nIt is useful when trying to size the amount of resources necessary to ingest some source or when developing new features or sources."),(0,a.yg)("h2",{id:"how-to-use"},"How to use"),(0,a.yg)(r.A,{mdxType:"Tabs"},(0,a.yg)(o.A,{value:"ui",label:"UI",default:!0,mdxType:"TabItem"},(0,a.yg)("p",null,"Create an ingestion as specified in the ",(0,a.yg)("a",{parentName:"p",href:"/docs/1.1.0/ui-ingestion"},"Ingestion guide"),"."),(0,a.yg)("p",null,"Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion:"),(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-yaml"},'source: ...\n\nsink: ...\n\nflags:\n generate_memory_profiles: "<path to folder where dumps will be written to>"\n')),(0,a.yg)("p",null,"In the final panel, under the advanced section, add the ",(0,a.yg)("inlineCode",{parentName:"p"},"debug")," datahub package under the ",(0,a.yg)("strong",{parentName:"p"},"Extra DataHub Plugins")," section.\nAs seen below:"),(0,a.yg)("p",{align:"center"},(0,a.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/ingestion-advanced-extra-datahub-plugin.png"})),(0,a.yg)("p",null,"Finally, save and run the ingestion process.")),(0,a.yg)(o.A,{value:"cli",label:"CLI",default:!0,mdxType:"TabItem"},"Install the `debug` plugin for DataHub's CLI wherever the ingestion runs:",(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-bash"},"pip install 'acryl-datahub[debug]'\n")),(0,a.yg)("p",null,"This will install ",(0,a.yg)("a",{parentName:"p",href:"https://github.com/bloomberg/memray"},"memray")," in your python environment."),(0,a.yg)("p",null,"Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion:"),(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-yaml"},'source: ...\n\nsink: ...\n\nflags:\n generate_memory_profiles: "<path to folder where dumps will be written to>"\n')),(0,a.yg)("p",null,"Finally run the ingestion recipe"),(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-bash"},"$ datahub ingest -c recipe.yaml\n")))),(0,a.yg)("p",null,"Once the ingestion run starts a binary file will be created and appended to during the execution of the ingestion."),(0,a.yg)("p",null,"These files follow the pattern ",(0,a.yg)("inlineCode",{parentName:"p"},"file-<ingestion-run-urn>.bin")," for a unique identification.\nOnce the ingestion has finished you can use ",(0,a.yg)("inlineCode",{parentName:"p"},"memray")," to analyze the memory dump in a flamegraph view using:"),(0,a.yg)("p",null,(0,a.yg)("inlineCode",{parentName:"p"},"$ memray flamegraph file-None-file-2023_09_18-21_38_43.bin")),(0,a.yg)("p",null,"This will generate an interactive HTML file for analysis:"),(0,a.yg)("p",{align:"center"},(0,a.yg)("img",{width:"70%",src:"https://github.com/datahub-project/static-assets/blob/main/imgs/metadata-ingestion/memray-example.png?raw=true"})),(0,a.yg)("p",null,(0,a.yg)("inlineCode",{parentName:"p"},"memray")," has an extensive set of features for memory investigation. Take a look at their ",(0,a.yg)("a",{parentName:"p",href:"https://bloomberg.github.io/memray/overview.html"},"documentation")," to see the full feature set."))}b.isMDXComponent=!0}}]);