"use strict";(self.webpackChunkdocs_website=self.webpackChunkdocs_website||[]).push([[92637],{23078:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>u,default:()=>h,frontMatter:()=>m,metadata:()=>d,toc:()=>c});a(96540);var r=a(15680),n=a(53720),s=a(5400);function o(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){return t=null!=t?t:{},Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):function(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}(Object(t)).forEach((function(a){Object.defineProperty(e,a,Object.getOwnPropertyDescriptor(t,a))})),e}function i(e,t){if(null==e)return{};var a,r,n=function(e,t){if(null==e)return{};var a,r,n={},s=Object.keys(e);for(r=0;r=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}const m={title:"Terms",slug:"/api/tutorials/terms",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/api/tutorials/terms.md"},u="Terms",d={unversionedId:"docs/api/tutorials/terms",id:"version-1.1.0/docs/api/tutorials/terms",title:"Terms",description:"Why Would You Use Terms on Datasets?",source:"@site/versioned_docs/version-1.1.0/docs/api/tutorials/terms.md",sourceDirName:"docs/api/tutorials",slug:"/api/tutorials/terms",permalink:"/docs/1.1.0/api/tutorials/terms",draft:!1,editUrl:"https://github.com/datahub-project/datahub/blob/master/docs/api/tutorials/terms.md",tags:[],version:"1.1.0",frontMatter:{title:"Terms",slug:"/api/tutorials/terms",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/api/tutorials/terms.md"},sidebar:"overviewSidebar",previous:{title:"Tags",permalink:"/docs/1.1.0/api/tutorials/tags"},next:{title:"Emitting Patch Updates to DataHub",permalink:"/docs/1.1.0/advanced/patch"}},p={},c=[{value:"Why Would You Use Terms on Datasets?",id:"why-would-you-use-terms-on-datasets",level:2},{value:"Goal Of This Guide",id:"goal-of-this-guide",level:3},{value:"Prerequisites",id:"prerequisites",level:2},{value:"Create Terms",id:"create-terms",level:2},{value:"Expected Outcome of Creating Terms",id:"expected-outcome-of-creating-terms",level:3},{value:"Read Terms",id:"read-terms",level:2},{value:"Add Terms",id:"add-terms",level:2},{value:"Add Terms to a dataset",id:"add-terms-to-a-dataset",level:3},{value:"Add Terms to a Column of a Dataset",id:"add-terms-to-a-column-of-a-dataset",level:3},{value:"Expected Outcome of Adding Terms",id:"expected-outcome-of-adding-terms",level:3},{value:"Remove Terms",id:"remove-terms",level:2},{value:"Expected Outcome of Removing Terms",id:"expected-outcome-of-removing-terms",level:3}],g={toc:c},y="wrapper";function h(e){var{components:t}=e,a=i(e,["components"]);return(0,r.yg)(y,l(function(e){for(var t=1;t\' \\\n--header \'Content-Type: application/json\' \\\n--data-raw \'{ "query": "mutation createGlossaryTerm { createGlossaryTerm(input: { name: \\"Rate of Return\\", id:\\"rateofreturn\\", description: \\"A rate of return (RoR) is the net gain or loss of an investment over a specified time period.\\" }) }", "variables":{}}\'\n')),(0,r.yg)("p",null,"Expected Response:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{\n "data": { "createGlossaryTerm": "urn:li:glossaryTerm:rateofreturn" },\n "extensions": {}\n}\n'))),(0,r.yg)(s.A,{value:"python",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-python"},'# Inlined from /metadata-ingestion/examples/library/create_term.py\nimport logging\n\nfrom datahub.emitter.mce_builder import make_term_urn\nfrom datahub.emitter.mcp import MetadataChangeProposalWrapper\nfrom datahub.emitter.rest_emitter import DatahubRestEmitter\n\n# Imports for metadata model classes\nfrom datahub.metadata.schema_classes import GlossaryTermInfoClass\n\nlog = logging.getLogger(__name__)\nlogging.basicConfig(level=logging.INFO)\n\nterm_urn = make_term_urn("rateofreturn")\nterm_properties_aspect = GlossaryTermInfoClass(\n definition="A rate of return (RoR) is the net gain or loss of an investment over a specified time period.",\n name="Rate of Return",\n termSource="",\n)\n\nevent: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(\n entityUrn=term_urn,\n aspect=term_properties_aspect,\n)\n\n# Create rest emitter\nrest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")\nrest_emitter.emit(event)\nlog.info(f"Created term {term_urn}")\n\n')))),(0,r.yg)("h3",{id:"expected-outcome-of-creating-terms"},"Expected Outcome of Creating Terms"),(0,r.yg)("p",null,"You can now see the new term ",(0,r.yg)("inlineCode",{parentName:"p"},"Rate of Return")," has been created."),(0,r.yg)("p",{align:"center"},(0,r.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/term-created.png"})),(0,r.yg)("p",null,"We can also verify this operation by programmatically searching ",(0,r.yg)("inlineCode",{parentName:"p"},"Rate of Return")," term after running this code using the ",(0,r.yg)("inlineCode",{parentName:"p"},"datahub")," cli."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-shell"},'datahub get --urn "urn:li:glossaryTerm:rateofreturn" --aspect glossaryTermInfo\n\n{\n "glossaryTermInfo": {\n "definition": "A rate of return (RoR) is the net gain or loss of an investment over a specified time period.",\n "name": "Rate of Return",\n "termSource": "INTERNAL"\n }\n}\n')),(0,r.yg)("h2",{id:"read-terms"},"Read Terms"),(0,r.yg)(n.A,{mdxType:"Tabs"},(0,r.yg)(s.A,{value:"graphql",label:"GraphQL",default:!0,mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'query {\n dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)") {\n glossaryTerms {\n terms {\n term {\n urn\n glossaryTermInfo {\n name\n description\n }\n }\n }\n }\n }\n}\n')),(0,r.yg)("p",null,"If you see the following response, the operation was successful:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-python"},'{\n "data": {\n "dataset": {\n "glossaryTerms": {\n "terms": [\n {\n "term": {\n "urn": "urn:li:glossaryTerm:CustomerAccount",\n "glossaryTermInfo": {\n "name": "CustomerAccount",\n "description": "account that represents an identified, named collection of balances and cumulative totals used to summarize customer transaction-related activity over a designated period of time"\n }\n }\n }\n ]\n }\n }\n },\n "extensions": {}\n}\n'))),(0,r.yg)(s.A,{value:"curl",label:"Curl",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-shell"},"curl --location --request POST 'http://localhost:8080/api/graphql' \\\n--header 'Authorization: Bearer ' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{ \"query\": \"{dataset(urn: \\\"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\\\") {glossaryTerms {terms {term {urn glossaryTermInfo { name description } } } } } }\", \"variables\":{}}'\n")),(0,r.yg)("p",null,"Expected Response:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{"data":{"dataset":{"glossaryTerms":{"terms":[{"term":{"urn":"urn:li:glossaryTerm:CustomerAccount","glossaryTermInfo":{"name":"CustomerAccount","description":"account that represents an identified, named collection of balances and cumulative totals used to summarize customer transaction-related activity over a designated period of time"}}}]}}},"extensions":{}}```\n'))),(0,r.yg)(s.A,{value:"python",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-python"},'# Inlined from /metadata-ingestion/examples/library/dataset_query_terms.py\nfrom datahub.sdk import DataHubClient, DatasetUrn\n\nclient = DataHubClient.from_env()\n\ndataset = client.entities.get(\n DatasetUrn(platform="hive", name="realestate_db.sales", env="PROD")\n)\n\nprint(dataset.terms)\n\n')))),(0,r.yg)("h2",{id:"add-terms"},"Add Terms"),(0,r.yg)("h3",{id:"add-terms-to-a-dataset"},"Add Terms to a dataset"),(0,r.yg)("p",null,"The following code shows you how can add terms to a dataset.\nIn the following code, we add a term ",(0,r.yg)("inlineCode",{parentName:"p"},"Rate of Return")," to a dataset named ",(0,r.yg)("inlineCode",{parentName:"p"},"fct_users_created"),"."),(0,r.yg)(n.A,{mdxType:"Tabs"},(0,r.yg)(s.A,{value:"graphql",label:"GraphQL",default:!0,mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'mutation addTerms {\n addTerms(\n input: {\n termUrns: ["urn:li:glossaryTerm:rateofreturn"],\n resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",\n }\n )\n}\n')),(0,r.yg)("p",null,"If you see the following response, the operation was successful:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-python"},'{\n "data": {\n "addTerms": true\n },\n "extensions": {}\n}\n'))),(0,r.yg)(s.A,{value:"curl",label:"Curl",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-shell"},'curl --location --request POST \'http://localhost:8080/api/graphql\' \\\n--header \'Authorization: Bearer \' \\\n--header \'Content-Type: application/json\' \\\n--data-raw \'{ "query": "mutation addTerm { addTerms(input: { termUrns: [\\"urn:li:glossaryTerm:rateofreturn\\"], resourceUrn: \\"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\\" }) }", "variables":{}}\'\n')),(0,r.yg)("p",null,"Expected Response:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{ "data": { "addTerms": true }, "extensions": {} }\n'))),(0,r.yg)(s.A,{value:"python",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-python"},'# Inlined from /metadata-ingestion/examples/library/dataset_add_term.py\nfrom datahub.sdk import DataHubClient, DatasetUrn, GlossaryTermUrn\n\nclient = DataHubClient.from_env()\n\ndataset = client.entities.get(\n DatasetUrn(platform="hive", name="realestate_db.sales", env="PROD")\n)\ndataset.add_term(GlossaryTermUrn("Classification.HighlyConfidential"))\n\nclient.entities.update(dataset)\n\n')))),(0,r.yg)("h3",{id:"add-terms-to-a-column-of-a-dataset"},"Add Terms to a Column of a Dataset"),(0,r.yg)(n.A,{mdxType:"Tabs"},(0,r.yg)(s.A,{value:"graphql",label:"GraphQL",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'mutation addTerms {\n addTerms(\n input: {\n termUrns: ["urn:li:glossaryTerm:rateofreturn"],\n resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",\n subResourceType:DATASET_FIELD,\n subResource:"user_name"})\n}\n'))),(0,r.yg)(s.A,{value:"curl",label:"Curl",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-shell"},'curl --location --request POST \'http://localhost:8080/api/graphql\' \\\n--header \'Authorization: Bearer \' \\\n--header \'Content-Type: application/json\' \\\n--data-raw \'{ "query": "mutation addTerms { addTerms(input: { termUrns: [\\"urn:li:glossaryTerm:rateofreturn\\"], resourceUrn: \\"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\\", subResourceType: DATASET_FIELD, subResource: \\"user_name\\" }) }", "variables":{}}\'\n')),(0,r.yg)("p",null,"Expected Response:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{ "data": { "addTerms": true }, "extensions": {} }\n'))),(0,r.yg)(s.A,{value:"python",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-python"},'# Inlined from /metadata-ingestion/examples/library/dataset_add_column_term.py\nfrom datahub.sdk import DataHubClient, DatasetUrn, GlossaryTermUrn\n\nclient = DataHubClient.from_env()\n\ndataset = client.entities.get(\n DatasetUrn(platform="hive", name="realestate_db.sales", env="PROD")\n)\n\ndataset["address.zipcode"].add_term(GlossaryTermUrn("Classification.Location"))\n\nclient.entities.update(dataset)\n\n')))),(0,r.yg)("h3",{id:"expected-outcome-of-adding-terms"},"Expected Outcome of Adding Terms"),(0,r.yg)("p",null,"You can now see ",(0,r.yg)("inlineCode",{parentName:"p"},"Rate of Return")," term has been added to ",(0,r.yg)("inlineCode",{parentName:"p"},"user_name")," column."),(0,r.yg)("p",{align:"center"},(0,r.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/term-added.png"})),(0,r.yg)("h2",{id:"remove-terms"},"Remove Terms"),(0,r.yg)("p",null,"The following code remove a term from a dataset.\nAfter running this code, ",(0,r.yg)("inlineCode",{parentName:"p"},"Rate of Return")," term will be removed from a ",(0,r.yg)("inlineCode",{parentName:"p"},"user_name")," column."),(0,r.yg)(n.A,{mdxType:"Tabs"},(0,r.yg)(s.A,{value:"graphql",label:"GraphQL",default:!0,mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'mutation removeTerm {\n removeTerm(\n input: {\n termUrn: "urn:li:glossaryTerm:rateofreturn",\n resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",\n subResourceType:DATASET_FIELD,\n subResource:"user_name"})\n}\n')),(0,r.yg)("p",null,"Note that you can also remove a term from a dataset if you don't specify ",(0,r.yg)("inlineCode",{parentName:"p"},"subResourceType")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"subResource"),"."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'mutation removeTerm {\n removeTerm(\n input: {\n termUrn: "urn:li:glossaryTerm:rateofreturn",\n resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",\n })\n}\n')),(0,r.yg)("p",null,"Also note that you can remove terms from multiple entities or subresource using ",(0,r.yg)("inlineCode",{parentName:"p"},"batchRemoveTerms"),"."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'mutation batchRemoveTerms {\n batchRemoveTerms(\n input: {\n termUrns: ["urn:li:glossaryTerm:rateofreturn"],\n resources: [\n { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"} ,\n { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"} ,]\n }\n )\n}\n'))),(0,r.yg)(s.A,{value:"curl",label:"Curl",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-shell"},'curl --location --request POST \'http://localhost:8080/api/graphql\' \\\n--header \'Authorization: Bearer \' \\\n--header \'Content-Type: application/json\' \\\n--data-raw \'{ "query": "mutation removeTerm { removeTerm(input: { termUrn: \\"urn:li:glossaryTerm:rateofreturn\\", resourceUrn: \\"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)\\" }) }", "variables":{}}\'\n'))),(0,r.yg)(s.A,{value:"python",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-python"},'# Inlined from /metadata-ingestion/examples/library/dataset_remove_term_execute_graphql.py\n# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)\nfrom datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph\n\ngms_endpoint = "http://localhost:8080"\ngraph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))\n\n# Query multiple aspects from entity\nquery = """\nmutation batchRemoveTerms {\n batchRemoveTerms(\n input: {\n termUrns: ["urn:li:glossaryTerm:rateofreturn"],\n resources: [\n { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"} ,\n { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"} ,]\n }\n )\n}\n"""\nresult = graph.execute_graphql(query=query)\n\nprint(result)\n\n')))),(0,r.yg)("h3",{id:"expected-outcome-of-removing-terms"},"Expected Outcome of Removing Terms"),(0,r.yg)("p",null,"You can now see ",(0,r.yg)("inlineCode",{parentName:"p"},"Rate of Return")," term has been removed to ",(0,r.yg)("inlineCode",{parentName:"p"},"user_name")," column."),(0,r.yg)("p",{align:"center"},(0,r.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/term-removed.png"})))}h.isMDXComponent=!0}}]);