datahub/assets/js/22f0d87d.1eefa3da.js
2025-08-08 21:37:06 +00:00

1 line
35 KiB
JavaScript

"use strict";(self.webpackChunkdocs_website=self.webpackChunkdocs_website||[]).push([[78444],{59426:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>g,default:()=>N,frontMatter:()=>p,metadata:()=>d,toc:()=>m});a(96540);var n=a(15680),i=a(53720),r=a(5400);function l(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){return t=null!=t?t:{},Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):function(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}(Object(t)).forEach((function(a){Object.defineProperty(e,a,Object.getOwnPropertyDescriptor(t,a))})),e}function s(e,t){if(null==e)return{};var a,n,i=function(e,t){if(null==e)return{};var a,n,i={},r=Object.keys(e);for(n=0;n<r.length;n++)a=r[n],t.indexOf(a)>=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n<r.length;n++)a=r[n],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}const p={sidebar_position:72,title:"Vertex AI",slug:"/generated/ingestion/sources/vertexai",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/generated/ingestion/sources/vertexai.md"},g="Vertex AI",d={unversionedId:"docs/generated/ingestion/sources/vertexai",id:"version-1.1.0/docs/generated/ingestion/sources/vertexai",title:"Vertex AI",description:"Testing",source:"@site/versioned_docs/version-1.1.0/docs/generated/ingestion/sources/vertexai.md",sourceDirName:"docs/generated/ingestion/sources",slug:"/generated/ingestion/sources/vertexai",permalink:"/docs/1.1.0/generated/ingestion/sources/vertexai",draft:!1,editUrl:"https://github.com/datahub-project/datahub/blob/master/docs/generated/ingestion/sources/vertexai.md",tags:[],version:"1.1.0",sidebarPosition:72,frontMatter:{sidebar_position:72,title:"Vertex AI",slug:"/generated/ingestion/sources/vertexai",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/generated/ingestion/sources/vertexai.md"},sidebar:"overviewSidebar",previous:{title:"Trino",permalink:"/docs/1.1.0/generated/ingestion/sources/trino"},next:{title:"Vertica",permalink:"/docs/1.1.0/generated/ingestion/sources/vertica"}},c={},m=[{value:"Important Capabilities",id:"important-capabilities",level:3},{value:"Prerequisites",id:"prerequisites",level:4},{value:"Credentials to access to GCP",id:"credentials-to-access-to-gcp",level:4},{value:"Permissions",id:"permissions",level:5},{value:"Create a service account and assign roles",id:"create-a-service-account-and-assign-roles",level:4},{value:"Integration Details",id:"integration-details",level:3},{value:"Concept Mapping",id:"concept-mapping",level:4},{value:"Lineage",id:"lineage",level:4},{value:"CLI based Ingestion",id:"cli-based-ingestion",level:3},{value:"Starter Recipe",id:"starter-recipe",level:3},{value:"Config Details",id:"config-details",level:3},{value:"Code Coordinates",id:"code-coordinates",level:3}],y={toc:m},u="wrapper";function N(e){var{components:t}=e,a=s(e,["components"]);return(0,n.yg)(u,o(function(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{},n=Object.keys(a);"function"==typeof Object.getOwnPropertySymbols&&(n=n.concat(Object.getOwnPropertySymbols(a).filter((function(e){return Object.getOwnPropertyDescriptor(a,e).enumerable})))),n.forEach((function(t){l(e,t,a[t])}))}return e}({},y,a),{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h1",{id:"vertex-ai"},"Vertex AI"),(0,n.yg)("p",null,(0,n.yg)("img",{parentName:"p",src:"https://img.shields.io/badge/support%20status-testing-lightgrey",alt:"Testing"})),(0,n.yg)("h3",{id:"important-capabilities"},"Important Capabilities"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Capability"),(0,n.yg)("th",{parentName:"tr",align:null},"Status"),(0,n.yg)("th",{parentName:"tr",align:null},"Notes"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Descriptions"),(0,n.yg)("td",{parentName:"tr",align:null},"\u2705"),(0,n.yg)("td",{parentName:"tr",align:null},"Extract descriptions for Vertex AI Registered Models and Model Versions")))),(0,n.yg)("p",null,"Ingesting metadata from VertexAI requires using the ",(0,n.yg)("strong",{parentName:"p"},"Vertex AI")," module."),(0,n.yg)("h4",{id:"prerequisites"},"Prerequisites"),(0,n.yg)("p",null,"Please refer to the ",(0,n.yg)("a",{parentName:"p",href:"https://cloud.google.com/vertex-ai/docs"},"Vertex AI documentation")," for basic information on Vertex AI."),(0,n.yg)("h4",{id:"credentials-to-access-to-gcp"},"Credentials to access to GCP"),(0,n.yg)("p",null,"Please read the section to understand how to set up application default Credentials to ",(0,n.yg)("a",{parentName:"p",href:"https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to"},"GCP docs"),"."),(0,n.yg)("h5",{id:"permissions"},"Permissions"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Grant the following permissions to the Service Account on every project where you would like to extract metadata from")),(0,n.yg)("p",null,"Default GCP Role which contains these permissions ",(0,n.yg)("a",{parentName:"p",href:"https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.viewer"},"roles/aiplatform.viewer")),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Permission"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.models.list")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view and list all ML models in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.models.get")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view details of a specific ML model")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.endpoints.list")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view and list all prediction endpoints in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.endpoints.get")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view details of a specific prediction endpoint")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.trainingPipelines.list")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view and list all training pipelines in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.trainingPipelines.get")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view details of a specific training pipeline")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.customJobs.list")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view and list all custom jobs in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.customJobs.get")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view details of a specific custom job")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.experiments.list")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view and list all experiments in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"laiplatform.experiments.get")),(0,n.yg)("td",{parentName:"tr",align:null},"Allows a user to view details of a specific experiment in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.metadataStores.list")),(0,n.yg)("td",{parentName:"tr",align:null},"allows a user to view and list all metadata store in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.metadataStores.get")),(0,n.yg)("td",{parentName:"tr",align:null},"allows a user to view details of a specific metadata store")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.executions.list")),(0,n.yg)("td",{parentName:"tr",align:null},"allows a user to view and list all executions in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.executions.get")),(0,n.yg)("td",{parentName:"tr",align:null},"allows a user to view details of a specific execution")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.datasets.list")),(0,n.yg)("td",{parentName:"tr",align:null},"allows a user to view and list all datasets in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.datasets.get")),(0,n.yg)("td",{parentName:"tr",align:null},"allows a user to view details of a specific dataset")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.pipelineJobs.get")),(0,n.yg)("td",{parentName:"tr",align:null},"allows a user to view and list all pipeline jobs in a project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("inlineCode",{parentName:"td"},"aiplatform.pipelineJobs.list")),(0,n.yg)("td",{parentName:"tr",align:null},"allows a user to view details of a specific pipeline job")))),(0,n.yg)("h4",{id:"create-a-service-account-and-assign-roles"},"Create a service account and assign roles"),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},"Setup a ServiceAccount as per ",(0,n.yg)("a",{parentName:"p",href:"https://cloud.google.com/iam/docs/creating-managing-service-accounts#iam-service-accounts-create-console"},"GCP docs")," and assign the previously created role to this service account.")),(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},"Download a service account JSON keyfile."),(0,n.yg)("ul",{parentName:"li"},(0,n.yg)("li",{parentName:"ul"},"Example credential file:")),(0,n.yg)("pre",{parentName:"li"},(0,n.yg)("code",{parentName:"pre",className:"language-json"},'{\n "type": "service_account",\n "project_id": "project-id-1234567",\n "private_key_id": "d0121d0000882411234e11166c6aaa23ed5d74e0",\n "private_key": "-----BEGIN PRIVATE KEY-----\\nMIIyourkey\\n-----END PRIVATE KEY-----",\n "client_email": "test@suppproject-id-1234567.iam.gserviceaccount.com",\n "client_id": "113545814931671546333",\n "auth_uri": "https://accounts.google.com/o/oauth2/auth",\n "token_uri": "https://oauth2.googleapis.com/token",\n "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",\n "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test%suppproject-id-1234567.iam.gserviceaccount.com"\n}\n'))),(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},"To provide credentials to the source, you can either:"))),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("p",{parentName:"li"},"Set an environment variable:"),(0,n.yg)("pre",{parentName:"li"},(0,n.yg)("code",{parentName:"pre",className:"language-sh"},'$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json"\n')),(0,n.yg)("p",{parentName:"li"},(0,n.yg)("em",{parentName:"p"},"or"))),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("p",{parentName:"li"},"Set credential config in your source based on the credential json file. For example:"),(0,n.yg)("pre",{parentName:"li"},(0,n.yg)("code",{parentName:"pre",className:"language-yml"},'credential:\n private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0"\n private_key: "-----BEGIN PRIVATE KEY-----\\nMIIyourkey\\n-----END PRIVATE KEY-----\\n"\n client_email: "test@suppproject-id-1234567.iam.gserviceaccount.com"\n client_id: "123456678890"\n')))),(0,n.yg)("h3",{id:"integration-details"},"Integration Details"),(0,n.yg)("p",null,"Ingestion Job extract Models, Datasets, Training Jobs, Endpoints, Experiment and Experiment Runs in a given project and region on Vertex AI."),(0,n.yg)("h4",{id:"concept-mapping"},"Concept Mapping"),(0,n.yg)("p",null,"This ingestion source maps the following Vertex AI Concepts to DataHub Concepts:"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"center"},"Source Concept"),(0,n.yg)("th",{parentName:"tr",align:"center"},"DataHub Concept"),(0,n.yg)("th",{parentName:"tr",align:"center"},"Notes"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Model"},(0,n.yg)("inlineCode",{parentName:"a"},"Model"))),(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"/docs/generated/metamodel/entities/mlmodelgroup/"},(0,n.yg)("inlineCode",{parentName:"a"},"MlModelGroup"))),(0,n.yg)("td",{parentName:"tr",align:"center"},"The name of a Model Group is the same as Model's name. Model serve as containers for multiple versions of the same model in Vertex AI.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/vertex-ai/docs/model-registry/versioning"},(0,n.yg)("inlineCode",{parentName:"a"},"Model Version"))),(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"/docs/generated/metamodel/entities/mlmodel/"},(0,n.yg)("inlineCode",{parentName:"a"},"MlModel"))),(0,n.yg)("td",{parentName:"tr",align:"center"},"The name of a Model is ",(0,n.yg)("inlineCode",{parentName:"td"},"{model_name}_{model_version}")," (e.g. my_vertexai_model_1 for model registered to Model Registry or Deployed to Endpoint. Each Model Version represents a specific iteration of a model with its own metadata.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},"Dataset ",(0,n.yg)("br",null),(0,n.yg)("br",null)),(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"/docs/generated/metamodel/entities/dataset"},(0,n.yg)("inlineCode",{parentName:"a"},"Dataset"))),(0,n.yg)("td",{parentName:"tr",align:"center"},"A Managed Dataset resource in Vertex AI is mapped to Dataset in DataHub. ",(0,n.yg)("br",null)," Supported types of datasets include (",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.TextDataset"},(0,n.yg)("inlineCode",{parentName:"a"},"Text")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.TabularDataset"},(0,n.yg)("inlineCode",{parentName:"a"},"Tabular")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.ImageDataset"},(0,n.yg)("inlineCode",{parentName:"a"},"Image Dataset")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.VideoDataset"},(0,n.yg)("inlineCode",{parentName:"a"},"Video")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.TimeSeriesDataset"},(0,n.yg)("inlineCode",{parentName:"a"},"TimeSeries")),")")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/vertex-ai/docs/beginner/beginners-guide"},(0,n.yg)("inlineCode",{parentName:"a"},"Training Job"))),(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"/docs/generated/metamodel/entities/dataprocessinstance/"},(0,n.yg)("inlineCode",{parentName:"a"},"DataProcessInstance"))),(0,n.yg)("td",{parentName:"tr",align:"center"},"A Training Job is mapped as DataProcessInstance in DataHub. ",(0,n.yg)("br",null)," Supported types of training jobs include (",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.AutoMLTextTrainingJob"},(0,n.yg)("inlineCode",{parentName:"a"},"AutoMLTextTrainingJob")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.AutoMLTabularTrainingJob"},(0,n.yg)("inlineCode",{parentName:"a"},"AutoMLTabularTrainingJob")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.AutoMLImageTrainingJob"},(0,n.yg)("inlineCode",{parentName:"a"},"AutoMLImageTrainingJob")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.AutoMLVideoTrainingJob"},(0,n.yg)("inlineCode",{parentName:"a"},"AutoMLVideoTrainingJob")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.AutoMLForecastingTrainingJob"},(0,n.yg)("inlineCode",{parentName:"a"},"AutoMLForecastingTrainingJob")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob"},(0,n.yg)("inlineCode",{parentName:"a"},"Custom Job")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomTrainingJob"},(0,n.yg)("inlineCode",{parentName:"a"},"Custom TrainingJob")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomContainerTrainingJob"},(0,n.yg)("inlineCode",{parentName:"a"},"Custom Container TrainingJob")),", ",(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomPythonPackageTrainingJob"},(0,n.yg)("inlineCode",{parentName:"a"},"Custom Python Packaging Job"))," )")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Experiment"},(0,n.yg)("inlineCode",{parentName:"a"},"Experiment"))),(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"/docs/generated/metamodel/entities/container/"},(0,n.yg)("inlineCode",{parentName:"a"},"Container"))),(0,n.yg)("td",{parentName:"tr",align:"center"},"Experiments organize related runs and serve as logical groupings for model development iterations. Each Experiment is mapped to a Container in DataHub.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.ExperimentRun"},(0,n.yg)("inlineCode",{parentName:"a"},"Experiment Run"))),(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"/docs/generated/metamodel/entities/dataprocessinstance/"},(0,n.yg)("inlineCode",{parentName:"a"},"DataProcessInstance"))),(0,n.yg)("td",{parentName:"tr",align:"center"},"An Experiment Run represents a single execution of a ML workflow. An Experiment Run tracks ML parameters, metricis, artifacts and metadata")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Execution"},(0,n.yg)("inlineCode",{parentName:"a"},"Execution"))),(0,n.yg)("td",{parentName:"tr",align:"center"},(0,n.yg)("a",{parentName:"td",href:"/docs/generated/metamodel/entities/dataprocessinstance/"},(0,n.yg)("inlineCode",{parentName:"a"},"DataProcessInstance"))),(0,n.yg)("td",{parentName:"tr",align:"center"},"Metadata Execution resource for Vertex AI. Metadata Execution is started in a experiment run and captures input and output artifacts.")))),(0,n.yg)("p",null,"Vertex AI Concept Diagram:"),(0,n.yg)("p",{align:"center"},(0,n.yg)("img",{width:"70%",src:"https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/metadata-ingestion/vertexai/concept-mapping.png"})),(0,n.yg)("h4",{id:"lineage"},"Lineage"),(0,n.yg)("p",null,"Lineage is emitted using Vertex AI API to capture the following relationships:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"A training job and a model (which training job produce a model)"),(0,n.yg)("li",{parentName:"ul"},"A dataset and a training job (which dataset was consumed by a training job to train a model)"),(0,n.yg)("li",{parentName:"ul"},"Experiment runs and an experiment"),(0,n.yg)("li",{parentName:"ul"},"Metadata execution and an experiment run")),(0,n.yg)("h3",{id:"cli-based-ingestion"},"CLI based Ingestion"),(0,n.yg)("h3",{id:"starter-recipe"},"Starter Recipe"),(0,n.yg)("p",null,"Check out the following recipe to get started with ingestion! See ",(0,n.yg)("a",{parentName:"p",href:"#config-details"},"below")," for full configuration options."),(0,n.yg)("p",null,"For general pointers on writing and running a recipe, see our ",(0,n.yg)("a",{parentName:"p",href:"/docs/1.1.0/metadata-ingestion#recipes"},"main recipe guide"),"."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-yaml"},'source:\n type: vertexai\n config:\n project_id: "acryl-poc"\n region: "us-west2"\n# You must either set GOOGLE_APPLICATION_CREDENTIALS or provide credential as shown below\n# credential:\n# private_key: \'-----BEGIN PRIVATE KEY-----\\\\nprivate-key\\\\n-----END PRIVATE KEY-----\\\\n\'\n# private_key_id: "project_key_id"\n# client_email: "client_email"\n# client_id: "client_id"\n\nsink:\n type: "datahub-rest"\n config:\n server: "http://localhost:8080"\n\n')),(0,n.yg)("h3",{id:"config-details"},"Config Details"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(r.A,{value:"options",label:"Options",default:!0,mdxType:"TabItem"},(0,n.yg)("p",null,"Note that a ",(0,n.yg)("inlineCode",{parentName:"p"},".")," is used to denote nested fields in the YAML recipe."),(0,n.yg)("div",{className:"config-table"},(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Field"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"project_id"),"\xa0",(0,n.yg)("abbr",{title:"Required"},"\u2705"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Project ID in Google Cloud Platform")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"region"),"\xa0",(0,n.yg)("abbr",{title:"Required"},"\u2705"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Region of your project in Google Cloud Platform")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"bucket_uri"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Bucket URI used in your project")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"vertexai_url"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"VertexUI URI ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},(0,n.yg)("a",{parentName:"td",href:"https://console.cloud.google.com/vertex-ai"},"https://console.cloud.google.com/vertex-ai"))))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"env"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"The environment that all assets produced by this connector belong to ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"PROD")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"credential"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"GCPCredential"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"GCP credential information")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"client_email"),"\xa0",(0,n.yg)("abbr",{title:"Required if credential is set"},"\u2753"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Client email")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"client_id"),"\xa0",(0,n.yg)("abbr",{title:"Required if credential is set"},"\u2753"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Client Id")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"private_key"),"\xa0",(0,n.yg)("abbr",{title:"Required if credential is set"},"\u2753"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Private key in a form of '-----BEGIN PRIVATE KEY-----\\nprivate-key\\n-----END PRIVATE KEY-----\\n'")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"private_key_id"),"\xa0",(0,n.yg)("abbr",{title:"Required if credential is set"},"\u2753"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Private key id")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"auth_provider_x509_cert_url"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Auth provider x509 certificate url ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},(0,n.yg)("a",{parentName:"td",href:"https://www.googleapis.com/oauth2/v1/certs"},"https://www.googleapis.com/oauth2/v1/certs"))))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"auth_uri"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Authentication uri ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},(0,n.yg)("a",{parentName:"td",href:"https://accounts.google.com/o/oauth2/auth"},"https://accounts.google.com/o/oauth2/auth"))))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"client_x509_cert_url"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"If not set it will be default to ",(0,n.yg)("a",{parentName:"td",href:"https://www.googleapis.com/robot/v1/metadata/x509/client_email"},"https://www.googleapis.com/robot/v1/metadata/x509/client_email"))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"project_id"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Project id to set the credentials")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"token_uri"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Token uri ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},(0,n.yg)("a",{parentName:"td",href:"https://oauth2.googleapis.com/token"},"https://oauth2.googleapis.com/token"))))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"credential."),(0,n.yg)("span",{className:"path-main"},"type"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Authentication type ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"service","_","account")))))))),(0,n.yg)(r.A,{value:"schema",label:"Schema",mdxType:"TabItem"},(0,n.yg)("p",null,"The ",(0,n.yg)("a",{parentName:"p",href:"https://json-schema.org/"},"JSONSchema")," for this configuration is inlined below."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-javascript"},'{\n "title": "VertexAIConfig",\n "description": "Any source that produces dataset urns in a single environment should inherit this class",\n "type": "object",\n "properties": {\n "env": {\n "title": "Env",\n "description": "The environment that all assets produced by this connector belong to",\n "default": "PROD",\n "type": "string"\n },\n "credential": {\n "title": "Credential",\n "description": "GCP credential information",\n "allOf": [\n {\n "$ref": "#/definitions/GCPCredential"\n }\n ]\n },\n "project_id": {\n "title": "Project Id",\n "description": "Project ID in Google Cloud Platform",\n "type": "string"\n },\n "region": {\n "title": "Region",\n "description": "Region of your project in Google Cloud Platform",\n "type": "string"\n },\n "bucket_uri": {\n "title": "Bucket Uri",\n "description": "Bucket URI used in your project",\n "type": "string"\n },\n "vertexai_url": {\n "title": "Vertexai Url",\n "description": "VertexUI URI",\n "default": "https://console.cloud.google.com/vertex-ai",\n "type": "string"\n }\n },\n "required": [\n "project_id",\n "region"\n ],\n "additionalProperties": false,\n "definitions": {\n "GCPCredential": {\n "title": "GCPCredential",\n "type": "object",\n "properties": {\n "project_id": {\n "title": "Project Id",\n "description": "Project id to set the credentials",\n "type": "string"\n },\n "private_key_id": {\n "title": "Private Key Id",\n "description": "Private key id",\n "type": "string"\n },\n "private_key": {\n "title": "Private Key",\n "description": "Private key in a form of \'-----BEGIN PRIVATE KEY-----\\\\nprivate-key\\\\n-----END PRIVATE KEY-----\\\\n\'",\n "type": "string"\n },\n "client_email": {\n "title": "Client Email",\n "description": "Client email",\n "type": "string"\n },\n "client_id": {\n "title": "Client Id",\n "description": "Client Id",\n "type": "string"\n },\n "auth_uri": {\n "title": "Auth Uri",\n "description": "Authentication uri",\n "default": "https://accounts.google.com/o/oauth2/auth",\n "type": "string"\n },\n "token_uri": {\n "title": "Token Uri",\n "description": "Token uri",\n "default": "https://oauth2.googleapis.com/token",\n "type": "string"\n },\n "auth_provider_x509_cert_url": {\n "title": "Auth Provider X509 Cert Url",\n "description": "Auth provider x509 certificate url",\n "default": "https://www.googleapis.com/oauth2/v1/certs",\n "type": "string"\n },\n "type": {\n "title": "Type",\n "description": "Authentication type",\n "default": "service_account",\n "type": "string"\n },\n "client_x509_cert_url": {\n "title": "Client X509 Cert Url",\n "description": "If not set it will be default to https://www.googleapis.com/robot/v1/metadata/x509/client_email",\n "type": "string"\n }\n },\n "required": [\n "private_key_id",\n "private_key",\n "client_email",\n "client_id"\n ],\n "additionalProperties": false\n }\n }\n}\n')))),(0,n.yg)("h3",{id:"code-coordinates"},"Code Coordinates"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Class Name: ",(0,n.yg)("inlineCode",{parentName:"li"},"datahub.ingestion.source.vertexai.vertexai.VertexAISource")),(0,n.yg)("li",{parentName:"ul"},"Browse on ",(0,n.yg)("a",{parentName:"li",href:"https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/vertexai/vertexai.py"},"GitHub"))),(0,n.yg)("h2",null,"Questions"),(0,n.yg)("p",null,"If you've got any questions on configuring ingestion for Vertex AI, feel free to ping us on ",(0,n.yg)("a",{parentName:"p",href:"https://datahub.com/slack"},"our Slack"),"."))}N.isMDXComponent=!0}}]);