mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-31 12:52:13 +00:00
1 line
22 KiB
JavaScript
1 line
22 KiB
JavaScript
"use strict";(self.webpackChunkdocs_website=self.webpackChunkdocs_website||[]).push([[28665],{7653:(t,e,a)=>{a.d(e,{A:()=>n});const n={icon:{tag:"svg",attrs:{"fill-rule":"evenodd",viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64c247.4 0 448 200.6 448 448S759.4 960 512 960 64 759.4 64 512 264.6 64 512 64zm127.98 274.82h-.04l-.08.06L512 466.75 384.14 338.88c-.04-.05-.06-.06-.08-.06a.12.12 0 00-.07 0c-.03 0-.05.01-.09.05l-45.02 45.02a.2.2 0 00-.05.09.12.12 0 000 .07v.02a.27.27 0 00.06.06L466.75 512 338.88 639.86c-.05.04-.06.06-.06.08a.12.12 0 000 .07c0 .03.01.05.05.09l45.02 45.02a.2.2 0 00.09.05.12.12 0 00.07 0c.02 0 .04-.01.08-.05L512 557.25l127.86 127.87c.04.04.06.05.08.05a.12.12 0 00.07 0c.03 0 .05-.01.09-.05l45.02-45.02a.2.2 0 00.05-.09.12.12 0 000-.07v-.02a.27.27 0 00-.05-.06L557.25 512l127.87-127.86c.04-.04.05-.06.05-.08a.12.12 0 000-.07c0-.03-.01-.05-.05-.09l-45.02-45.02a.2.2 0 00-.09-.05.12.12 0 00-.07 0z"}}]},name:"close-circle",theme:"filled"}},4732:(t,e,a)=>{a.d(e,{A:()=>d});var n=a(89379),r=a(96540),o=a(7653),i=a(89990),s=function(t,e){return r.createElement(i.A,(0,n.A)((0,n.A)({},t),{},{ref:e,icon:o.A}))};const d=r.forwardRef(s)},43655:(t,e,a)=>{a.d(e,{A:()=>b});var n=a(96540),r=a(20053);const o="availabilityCard_P5od",i="managedIcon_AxXO",s="platform_wqXv",d="platformAvailable_Y8lN";var l=a(4732),c=a(89379);const u={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm193.5 301.7l-210.6 292a31.8 31.8 0 01-51.7 0L318.5 484.9c-3.8-5.3 0-12.7 6.5-12.7h46.9c10.2 0 19.9 4.9 25.9 13.3l71.2 98.8 157.2-218c6-8.3 15.6-13.3 25.9-13.3H699c6.5 0 10.3 7.4 6.5 12.7z"}}]},name:"check-circle",theme:"filled"};var p=a(89990),g=function(t,e){return n.createElement(p.A,(0,c.A)((0,c.A)({},t),{},{ref:e,icon:u}))};const m=n.forwardRef(g);const h={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M811.4 418.7C765.6 297.9 648.9 212 512.2 212S258.8 297.8 213 418.6C127.3 441.1 64 519.1 64 612c0 110.5 89.5 200 199.9 200h496.2C870.5 812 960 722.5 960 612c0-92.7-63.1-170.7-148.6-193.3zm36.3 281a123.07 123.07 0 01-87.6 36.3H263.9c-33.1 0-64.2-12.9-87.6-36.3A123.3 123.3 0 01140 612c0-28 9.1-54.3 26.2-76.3a125.7 125.7 0 0166.1-43.7l37.9-9.9 13.9-36.6c8.6-22.8 20.6-44.1 35.7-63.4a245.6 245.6 0 0152.4-49.9c41.1-28.9 89.5-44.2 140-44.2s98.9 15.3 140 44.2c19.9 14 37.5 30.8 52.4 49.9 15.1 19.3 27.1 40.7 35.7 63.4l13.8 36.5 37.8 10c54.3 14.5 92.1 63.8 92.1 120 0 33.1-12.9 64.3-36.3 87.7z"}}]},name:"cloud",theme:"outlined"};var y=function(t,e){return n.createElement(p.A,(0,c.A)((0,c.A)({},t),{},{ref:e,icon:h}))};const f=n.forwardRef(y),b=({saasOnly:t,ossOnly:e})=>n.createElement("div",{className:(0,r.A)(o,"card")},n.createElement("strong",null,"Feature Availability"),n.createElement("div",null,n.createElement("span",{className:(0,r.A)(s,!t&&d)},"Self-Hosted DataHub ",t?n.createElement(l.A,null):n.createElement(m,null))),n.createElement("div",null,n.createElement(f,{className:i}),n.createElement("span",{className:(0,r.A)(s,!e&&d)},"DataHub Cloud ",e?n.createElement(l.A,null):n.createElement(m,null))))},87024:(t,e,a)=>{a.r(e),a.d(e,{assets:()=>g,contentTitle:()=>u,default:()=>f,frontMatter:()=>c,metadata:()=>p,toc:()=>m});a(96540);var n=a(15680),r=a(43655),o=a(53720),i=a(5400);function s(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function d(t,e){return e=null!=e?e:{},Object.getOwnPropertyDescriptors?Object.defineProperties(t,Object.getOwnPropertyDescriptors(e)):function(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}(Object(e)).forEach((function(a){Object.defineProperty(t,a,Object.getOwnPropertyDescriptor(e,a))})),t}function l(t,e){if(null==t)return{};var a,n,r=function(t,e){if(null==t)return{};var a,n,r={},o=Object.keys(t);for(n=0;n<o.length;n++)a=o[n],e.indexOf(a)>=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(n=0;n<o.length;n++)a=o[n],e.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}const c={title:"Data Products",slug:"/dataproducts",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/dataproducts.md"},u="Data Products",p={unversionedId:"docs/dataproducts",id:"docs/dataproducts",title:"Data Products",description:"\ud83e\udd1d Version compatibility",source:"@site/genDocs/docs/dataproducts.md",sourceDirName:"docs",slug:"/dataproducts",permalink:"/docs/dataproducts",draft:!1,editUrl:"https://github.com/datahub-project/datahub/blob/master/docs/dataproducts.md",tags:[],version:"current",frontMatter:{title:"Data Products",slug:"/dataproducts",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/dataproducts.md"},sidebar:"overviewSidebar",previous:{title:"Data Contracts",permalink:"/docs/managed-datahub/observe/data-contract"},next:{title:"Dataset Usage & Query History",permalink:"/docs/features/dataset-usage-and-query-history"}},g={},m=[{value:"What are Data Products?",id:"what-are-data-products",level:2},{value:"Why Data Products?",id:"why-data-products",level:2},{value:"Benefits of Data Products",id:"benefits-of-data-products",level:2},{value:"How Can You Use Data Products?",id:"how-can-you-use-data-products",level:2},{value:"Data Products Setup, Prerequisites, and Permissions",id:"data-products-setup-prerequisites-and-permissions",level:2},{value:"Using Data Products",id:"using-data-products",level:2},{value:"Creating a Data Product (UI)",id:"creating-a-data-product-ui",level:3},{value:"Assigning an Asset to a Data Product (UI)",id:"assigning-an-asset-to-a-data-product-ui",level:3},{value:"Creating a Data Product (YAML + git)",id:"creating-a-data-product-yaml--git",level:3},{value:"Keeping the YAML file sync-ed with changes in UI",id:"keeping-the-yaml-file-sync-ed-with-changes-in-ui",level:3},{value:"Advanced cli commands for managing Data Products",id:"advanced-cli-commands-for-managing-data-products",level:3},{value:"What updates are planned for the Data Products feature?",id:"what-updates-are-planned-for-the-data-products-feature",level:3},{value:"Related Features",id:"related-features",level:3}],h={toc:m},y="wrapper";function f(t){var{components:e}=t,a=l(t,["components"]);return(0,n.yg)(y,d(function(t){for(var e=1;e<arguments.length;e++){var a=null!=arguments[e]?arguments[e]:{},n=Object.keys(a);"function"==typeof Object.getOwnPropertySymbols&&(n=n.concat(Object.getOwnPropertySymbols(a).filter((function(t){return Object.getOwnPropertyDescriptor(a,t).enumerable})))),n.forEach((function(e){s(t,e,a[e])}))}return t}({},h,a),{components:e,mdxType:"MDXLayout"}),(0,n.yg)("h1",{id:"data-products"},"Data Products"),(0,n.yg)(r.A,{mdxType:"FeatureAvailability"}),(0,n.yg)("p",null,(0,n.yg)("strong",{parentName:"p"},"\ud83e\udd1d Version compatibility")),(0,n.yg)("blockquote",null,(0,n.yg)("p",{parentName:"blockquote"},"DataHub Core: ",(0,n.yg)("strong",{parentName:"p"},"0.10.3")," | DataHub Cloud: ",(0,n.yg)("strong",{parentName:"p"},"0.2.8"))),(0,n.yg)("h2",{id:"what-are-data-products"},"What are Data Products?"),(0,n.yg)("p",null,"Data Products are an innovative way to organize and manage your Data Assets, such as Tables, Topics, Views, Pipelines, Charts, Dashboards, etc., within DataHub. These Data Products belong to a specific Domain and can be easily accessed by various teams or stakeholders within your organization."),(0,n.yg)("h2",{id:"why-data-products"},"Why Data Products?"),(0,n.yg)("p",null,"A key concept in data mesh architecture, Data Products are independent units of data managed by a specific domain team. They are responsible for defining, publishing, and maintaining their data assets while ensuring high-quality data that meets the needs of its consumers."),(0,n.yg)("h2",{id:"benefits-of-data-products"},"Benefits of Data Products"),(0,n.yg)("p",null,"Data Products help in curating a coherent set of logical entities, simplifying data discovery and governance. By grouping related Data Assets into a Data Product, it allows stakeholders to discover and understand available data easily, supporting data governance efforts by managing and controlling access to Data Products."),(0,n.yg)("h2",{id:"how-can-you-use-data-products"},"How Can You Use Data Products?"),(0,n.yg)("p",null,"Data Products can be easily published to the DataHub catalog, allowing other teams to discover and consume them. By doing this, data teams can streamline the process of sharing data, making data-driven decisions faster and more efficient."),(0,n.yg)("h2",{id:"data-products-setup-prerequisites-and-permissions"},"Data Products Setup, Prerequisites, and Permissions"),(0,n.yg)("p",null,"What you need to create and add data products:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("strong",{parentName:"li"},"Manage Data Product")," metadata privilege for Domains to create/delete Data Products at the entity level. If a user has this privilege for a given Domain, they will be able to create and delete Data Products underneath it."),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("strong",{parentName:"li"},"Edit Data Product")," metadata privilege to add or remove the Data Product for a given entity.")),(0,n.yg)("p",null,"You can create this privileges by creating a new ",(0,n.yg)("a",{parentName:"p",href:"/docs/authorization/policies"},"Metadata Policy"),"."),(0,n.yg)("h2",{id:"using-data-products"},"Using Data Products"),(0,n.yg)("p",null,"Data Products can be created using the UI or via a YAML file that is managed using software engineering (GitOps) practices."),(0,n.yg)("h3",{id:"creating-a-data-product-ui"},"Creating a Data Product (UI)"),(0,n.yg)("p",null,"To create a Data Product, first navigate to the Domain that will contain this Data Product."),(0,n.yg)("p",{align:"center"},(0,n.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/a84499c124c9123d6831a0e6ad8dd8caf70203a0/imgs/data_products/dataproducts-tab.png"})),(0,n.yg)("p",null,"Then navigate to the Data Products tab on the Domain's home page, and click '+ New Data Product'.\nThis will open a new modal where you can configure the settings for your data product. Inside the form, you can choose a name for your Data Product. Most often, this will align with the logical purpose of the Data Product, for example\n'Customer Orders' or 'Revenue Attribution'. You can also add documentation for your product to help other users easily discover it. Don't worry, this can be changed later."),(0,n.yg)("p",{align:"center"},(0,n.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/a84499c124c9123d6831a0e6ad8dd8caf70203a0/imgs/data_products/dataproducts-create.png"})),(0,n.yg)("p",null,"Once you've chosen a name and a description, click 'Create' to create the new Data Product. Once you've created the Data Product, you can click on it to continue on to the next step, adding assets to it."),(0,n.yg)("h3",{id:"assigning-an-asset-to-a-data-product-ui"},"Assigning an Asset to a Data Product (UI)"),(0,n.yg)("p",null,"You can assign an asset to a Data Product either using the Data Product page as the starting point or the Asset's page as the starting point.\nOn a Data Product page, click the 'Add Assets' button on the top right corner to add assets to the Data Product."),(0,n.yg)("p",{align:"center"},(0,n.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/a84499c124c9123d6831a0e6ad8dd8caf70203a0/imgs/data_products/dataproducts-add-assets.png"})),(0,n.yg)("p",null,"On an Asset's profile page, use the right sidebar to locate the Data Product section. Click 'Set Data Product', and then search for the Data Product you'd like to add this asset to. When you're done, click 'Add'."),(0,n.yg)("p",{align:"center"},(0,n.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/a84499c124c9123d6831a0e6ad8dd8caf70203a0/imgs/data_products/dataproducts-set.png"})),(0,n.yg)("p",null,"To remove an asset from a Data Product, click the 'x' icon on the Data Product label."),(0,n.yg)("blockquote",null,(0,n.yg)("p",{parentName:"blockquote"},"Notice: Adding or removing an asset from a Data Product requires the ",(0,n.yg)("inlineCode",{parentName:"p"},"Edit Data Product")," Metadata Privilege, which can be granted\nby a ",(0,n.yg)("a",{parentName:"p",href:"/docs/authorization/policies"},"Policy"),".")),(0,n.yg)("h3",{id:"creating-a-data-product-yaml--git"},"Creating a Data Product (YAML + git)"),(0,n.yg)("p",null,"DataHub ships with a YAML-based Data Product spec for defining and managing Data Products as code."),(0,n.yg)("p",null,'Here is an example of a Data Product named "Pet of the Week" which belongs to the ',(0,n.yg)("strong",{parentName:"p"},"Marketing")," domain and contains three data assets. The ",(0,n.yg)("strong",{parentName:"p"},"Spec")," tab describes the JSON Schema spec for a DataHub data product file."),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(i.A,{value:"sample",label:"Example",default:!0,mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-yaml"},"# Inlined from /metadata-ingestion/examples/data_product/dataproduct.yaml\nid: pet_of_the_week\ndomain: Marketing\ndisplay_name: Pet of the Week Campaign\ndescription: |-\n This campaign includes Pet of the Week data.\n\n# List of assets that belong to this Data Product\nassets:\n - urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.pet_details,PROD)\n - urn:li:dashboard:(looker,dashboards.19)\n - urn:li:dataFlow:(airflow,snowflake_load,prod)\n\nowners:\n - id: urn:li:corpuser:jdoe\n type: BUSINESS_OWNER\n - id: urn:li:corpuser:fbar\n type: urn:li:ownershipType:architect # Maps to a custom ownership type\n\n# Tags associated with this Data Product\ntags:\n - urn:li:tag:adoption\n\n# Glossary Terms associated with this Data Product\nterms:\n - urn:li:glossaryTerm:ClientsAndAccounts.AccountBalance\n\ninstitutional_memory:\n elements:\n - title: URL for campaign\n description: |-\n Go here to see the campaign.\n url: https://example.com/pet_of_the_week\n\n# Custom Properties\nproperties:\n lifecycle: production\n sla: 7am every day\n\n")),(0,n.yg)("admonition",{type:"note"},(0,n.yg)("p",{parentName:"admonition"},"When bare domain names like ",(0,n.yg)("inlineCode",{parentName:"p"},"Marketing")," is used, ",(0,n.yg)("inlineCode",{parentName:"p"},"datahub")," will first check if a domain like ",(0,n.yg)("inlineCode",{parentName:"p"},"urn:li:domain:Marketing")," is provisioned, failing that; it will check for a provisioned domain that has the same name. If we are unable to resolve bare domain names to provisioned domains, then yaml-based ingestion will refuse to proceeed until the domain is provisioned on DataHub."),(0,n.yg)("p",{parentName:"admonition"},"This applies to other fields as well, such as owners, ownership types, tags, and terms.")),(0,n.yg)("p",null,"You can also provide fully-qualified domain names (e.g. ",(0,n.yg)("inlineCode",{parentName:"p"},"urn:li:domain:dcadded3-2b70-4679-8b28-02ac9abc92eb"),") to ensure that no ingestion-time domain resolution is needed.")),(0,n.yg)(i.A,{value:"schema",label:"Spec",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-json"},'{\n "title": "DataProduct",\n "description": "This is a DataProduct class which represents a DataProduct\\n\\nArgs:\\n id (str): The id of the Data Product\\n domain (str): The domain that the Data Product belongs to. Either as a name or a fully-qualified urn.\\n owners (Optional[List[str, Ownership]]): A list of owners and their types.\\n institutional_memory (Optional[InstitutionMemory]): A list of institutional memory elements\\n display_name (Optional[str]): The name of the Data Product to display in the UI\\n description (Optional[str]): A documentation string for the Data Product\\n tags (Optional[List[str]]): An array of tags (either bare ids or urns) for the Data Product\\n terms (Optional[List[str]]): An array of terms (either bare ids or urns) for the Data Product\\n assets (List[str]): An array of entity urns that are part of the Data Product",\n "type": "object",\n "properties": {\n "id": {\n "title": "Id",\n "type": "string"\n },\n "domain": {\n "title": "Domain",\n "type": "string"\n },\n "assets": {\n "title": "Assets",\n "type": "array",\n "items": {\n "type": "string"\n }\n },\n "display_name": {\n "title": "Display Name",\n "type": "string"\n },\n "owners": {\n "title": "Owners",\n "type": "array",\n "items": {\n "anyOf": [\n {\n "type": "string"\n },\n {\n "$ref": "#/definitions/Ownership"\n }\n ]\n }\n },\n "institutional_memory": {\n "$ref": "#/definitions/InstitutionMemory"\n },\n "description": {\n "title": "Description",\n "type": "string"\n },\n "tags": {\n "title": "Tags",\n "type": "array",\n "items": {\n "type": "string"\n }\n },\n "terms": {\n "title": "Terms",\n "type": "array",\n "items": {\n "type": "string"\n }\n },\n "properties": {\n "title": "Properties",\n "type": "object",\n "additionalProperties": {\n "type": "string"\n }\n },\n "external_url": {\n "title": "External Url",\n "type": "string"\n }\n },\n "required": [\n "id",\n "domain"\n ],\n "additionalProperties": false,\n "definitions": {\n "Ownership": {\n "title": "Ownership",\n "type": "object",\n "properties": {\n "id": {\n "title": "Id",\n "type": "string"\n },\n "type": {\n "title": "Type",\n "type": "string"\n }\n },\n "required": [\n "id",\n "type"\n ],\n "additionalProperties": false\n },\n "InstitutionMemoryElement": {\n "title": "InstitutionMemoryElement",\n "type": "object",\n "properties": {\n "url": {\n "title": "Url",\n "type": "string"\n },\n "description": {\n "title": "Description",\n "type": "string"\n }\n },\n "required": [\n "url",\n "description"\n ],\n "additionalProperties": false\n },\n "InstitutionMemory": {\n "title": "InstitutionMemory",\n "type": "object",\n "properties": {\n "elements": {\n "title": "Elements",\n "type": "array",\n "items": {\n "$ref": "#/definitions/InstitutionMemoryElement"\n }\n }\n },\n "additionalProperties": false\n }\n }\n}\n\n')))),(0,n.yg)("p",null,"To sync this yaml file to DataHub, use the ",(0,n.yg)("inlineCode",{parentName:"p"},"datahub")," cli via the ",(0,n.yg)("inlineCode",{parentName:"p"},"dataproduct")," group of commands."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-shell"},"datahub dataproduct upsert -f user_dataproduct.yaml\n")),(0,n.yg)("h3",{id:"keeping-the-yaml-file-sync-ed-with-changes-in-ui"},"Keeping the YAML file sync-ed with changes in UI"),(0,n.yg)("p",null,"The ",(0,n.yg)("inlineCode",{parentName:"p"},"datahub")," cli allows you to keep this YAML file synced with changes happening in the UI. All you have to do is run the ",(0,n.yg)("inlineCode",{parentName:"p"},"datahub dataproduct diff")," command."),(0,n.yg)("p",null,"Here is an example invocation that checks if there is any diff and updates the file in place:"),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-shell"},"datahub dataproduct diff -f user_dataproduct.yaml --update\n")),(0,n.yg)("p",null,"This allows you to manage your data product definition in git while still allowing for edits in the UI. Business Users and Developers can both collaborate on the definition of a data product with ease using this workflow."),(0,n.yg)("h3",{id:"advanced-cli-commands-for-managing-data-products"},"Advanced cli commands for managing Data Products"),(0,n.yg)("p",null,"There are many more advanced cli commands for managing Data Products as code. Take a look at the ",(0,n.yg)("a",{parentName:"p",href:"/docs/cli#dataproduct-data-product-entity"},"Data Products section")," on the CLI reference guide for more details."),(0,n.yg)("h3",{id:"what-updates-are-planned-for-the-data-products-feature"},"What updates are planned for the Data Products feature?"),(0,n.yg)("p",null,"The following features are next on the roadmap for Data Products"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Support for marking data assets in a Data Product as private versus shareable for other teams to consume"),(0,n.yg)("li",{parentName:"ul"},"Support for declaring data lineage manually to upstream and downstream data products"),(0,n.yg)("li",{parentName:"ul"},"Support for declaring logical schema for Data Products"),(0,n.yg)("li",{parentName:"ul"},"Support for associating data contracts with Data Products"),(0,n.yg)("li",{parentName:"ul"},"Support for semantic versioning of the Data Product entity")),(0,n.yg)("h3",{id:"related-features"},"Related Features"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("a",{parentName:"li",href:"/docs/domains"},"Domains")),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("a",{parentName:"li",href:"/docs/glossary/business-glossary"},"Glossary Terms")),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("a",{parentName:"li",href:"/docs/tags"},"Tags"))))}f.isMDXComponent=!0}}]); |