"use strict";(self.webpackChunkdocs_website=self.webpackChunkdocs_website||[]).push([[33630],{88432:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>u,contentTitle:()=>p,default:()=>g,frontMatter:()=>l,metadata:()=>c,toc:()=>m});a(96540);var n=a(15680),s=a(53720),r=a(5400);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){return t=null!=t?t:{},Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):function(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}(Object(t)).forEach((function(a){Object.defineProperty(e,a,Object.getOwnPropertyDescriptor(t,a))})),e}function d(e,t){if(null==e)return{};var a,n,s=function(e,t){if(null==e)return{};var a,n,s={},r=Object.keys(e);for(n=0;n=0||(s[a]=e[a]);return s}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(s[a]=e[a])}return s}const l={title:"Dataset",slug:"/api/tutorials/datasets",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/api/tutorials/datasets.md"},p="Dataset",c={unversionedId:"docs/api/tutorials/datasets",id:"docs/api/tutorials/datasets",title:"Dataset",description:"Why Would You Use Datasets?",source:"@site/genDocs/docs/api/tutorials/datasets.md",sourceDirName:"docs/api/tutorials",slug:"/api/tutorials/datasets",permalink:"/docs/api/tutorials/datasets",draft:!1,editUrl:"https://github.com/datahub-project/datahub/blob/master/docs/api/tutorials/datasets.md",tags:[],version:"current",frontMatter:{title:"Dataset",slug:"/api/tutorials/datasets",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/api/tutorials/datasets.md"},sidebar:"overviewSidebar",previous:{title:"Developing an Action",permalink:"/docs/actions/guides/developing-an-action"},next:{title:"Deprecation",permalink:"/docs/api/tutorials/deprecation"}},u={},m=[{value:"Why Would You Use Datasets?",id:"why-would-you-use-datasets",level:2},{value:"Goal Of This Guide",id:"goal-of-this-guide",level:3},{value:"Prerequisites",id:"prerequisites",level:2},{value:"Create Dataset",id:"create-dataset",level:2},{value:"Expected Outcomes of Creating Dataset",id:"expected-outcomes-of-creating-dataset",level:3},{value:"Delete Dataset",id:"delete-dataset",level:2},{value:"Expected Outcomes of Deleting Dataset",id:"expected-outcomes-of-deleting-dataset",level:3}],h={toc:m},y="wrapper";function g(e){var{components:t}=e,a=d(e,["components"]);return(0,n.yg)(y,o(function(e){for(var t=1;t b.server("http://localhost:8080").token(token));\n Future response = emitter.emit(mcpw, null);\n System.out.println(response.get().getResponseContent());\n }\n}\n\n'))),(0,n.yg)(r.A,{value:"python",label:"Python",default:!0,mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-python"},'# Inlined from /metadata-ingestion/examples/library/dataset_schema.py\nfrom datahub.sdk import DataHubClient, Dataset\n\nclient = DataHubClient.from_env()\n\ndataset = Dataset(\n platform="hive",\n name="realestate_db.sales",\n schema=[\n # tuples of (field name / field path, data type, description)\n (\n "address.zipcode",\n "varchar(50)",\n "This is the zipcode of the address. Specified using extended form and limited to addresses in the United States",\n ),\n ("address.street", "varchar(100)", "Street corresponding to the address"),\n ("last_sold_date", "date", "Date of the last sale date for this property"),\n ],\n)\n\nclient.entities.upsert(dataset)\n\n')))),(0,n.yg)("h3",{id:"expected-outcomes-of-creating-dataset"},"Expected Outcomes of Creating Dataset"),(0,n.yg)("p",null,"You can now see ",(0,n.yg)("inlineCode",{parentName:"p"},"realestate_db.sales")," dataset has been created."),(0,n.yg)("p",{align:"center"},(0,n.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/dataset-created.png"})),(0,n.yg)("h2",{id:"delete-dataset"},"Delete Dataset"),(0,n.yg)("p",null,"You may want to delete a dataset if it is no longer needed, contains incorrect or sensitive information, or if it was created for testing purposes and is no longer necessary in production.\nIt is possible to ",(0,n.yg)("a",{parentName:"p",href:"/docs/how/delete-metadata"},"delete entities via CLI"),", but a programmatic approach is necessary for scalability."),(0,n.yg)("p",null,"There are two methods of deletion: soft delete and hard delete.\n",(0,n.yg)("strong",{parentName:"p"},"Soft delete")," sets the Status aspect of the entity to Removed, which hides the entity and all its aspects from being returned by the UI.\n",(0,n.yg)("strong",{parentName:"p"},"Hard delete")," physically deletes all rows for all aspects of the entity."),(0,n.yg)("p",null,"For more information about soft delete and hard delete, please refer to ",(0,n.yg)("a",{parentName:"p",href:"/docs/how/delete-metadata#delete-by-urn"},"Removing Metadata from DataHub"),"."),(0,n.yg)(s.A,{mdxType:"Tabs"},(0,n.yg)(r.A,{value:"graphql",label:"GraphQL",mdxType:"TabItem"},(0,n.yg)("blockquote",null,(0,n.yg)("p",{parentName:"blockquote"},"\ud83d\udeab Hard delete with ",(0,n.yg)("inlineCode",{parentName:"p"},"graphql")," is currently not supported.\nPlease check out ",(0,n.yg)("a",{parentName:"p",href:"/docs/api/datahub-apis#datahub-api-comparison"},"API feature comparison table")," for more information.")),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-json"},'mutation batchUpdateSoftDeleted {\n batchUpdateSoftDeleted(input:\n { urns: ["urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)"],\n deleted: true })\n}\n')),(0,n.yg)("p",null,"If you see the following response, the operation was successful:"),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-json"},'{\n "data": {\n "batchUpdateSoftDeleted": true\n },\n "extensions": {}\n}\n'))),(0,n.yg)(r.A,{value:"curl",label:"Curl",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-shell"},"curl --location --request POST 'http://localhost:8080/api/graphql' \\\n--header 'Authorization: Bearer ' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{ \"query\": \"mutation batchUpdateSoftDeleted { batchUpdateSoftDeleted(input: { deleted: true, urns: [\\\"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)\\\"] }) }\", \"variables\":{}}'\n")),(0,n.yg)("p",null,"Expected Response:"),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-json"},'{ "data": { "batchUpdateSoftDeleted": true }, "extensions": {} }\n'))),(0,n.yg)(r.A,{value:"python",label:"Python",default:!0,mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-python"},'# Inlined from /metadata-ingestion/examples/library/delete_dataset.py\nfrom datahub.emitter.mce_builder import make_dataset_urn\nfrom datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph\n\ngraph = DataHubGraph(\n config=DatahubClientConfig(\n server="http://localhost:8080",\n )\n)\n\ndataset_urn = make_dataset_urn(name="fct_users_created", platform="hive")\n\n# Soft-delete the dataset.\ngraph.delete_entity(urn=dataset_urn, hard=False)\n\nprint(f"Deleted dataset {dataset_urn}")\n\n')))),(0,n.yg)("h3",{id:"expected-outcomes-of-deleting-dataset"},"Expected Outcomes of Deleting Dataset"),(0,n.yg)("p",null,"The dataset ",(0,n.yg)("inlineCode",{parentName:"p"},"fct_users_deleted")," has now been deleted, so if you search for a hive dataset named ",(0,n.yg)("inlineCode",{parentName:"p"},"fct_users_delete"),", you will no longer be able to see it."),(0,n.yg)("p",{align:"center"},(0,n.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/dataset-deleted.png"})))}g.isMDXComponent=!0}}]);