mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-21 23:58:06 +00:00
569 lines
11 KiB
Markdown
569 lines
11 KiB
Markdown
# DataHub GMS GraphQL Service
|
|
|
|
Datahub GMS GraphQL Service wraps the Generalized Metadata Store (GMS) Rest.li calls around a GraphQL API.
|
|
|
|
## Pre-requisites
|
|
* You need to have [JDK8](https://www.oracle.com/java/technologies/jdk8-downloads.html)
|
|
installed on your machine to be able to build `Datahub GMS GraphQL Service`.
|
|
|
|
## Build
|
|
To build `Datahub GMS GraphQL Service`
|
|
|
|
`
|
|
./gradlew :datahub-gms-graphql-service:build
|
|
`
|
|
|
|
## Dependencies
|
|
|
|
Before starting `Datahub GMS GraphQL Service`, you need to make sure that [DataHub GMS](../gms/README.md) is up and running.
|
|
|
|
## Start via Docker image
|
|
Quickest way to try out `Datahub GMS Graphql Service` is running the [Docker image](../docker/datahub-gms-graphql-service).
|
|
|
|
## Start via command line
|
|
|
|
If you do modify things and want to try it out quickly without building the Docker image, you can also run
|
|
the application directly from command line after a successful [build](#build):
|
|
```
|
|
./gradlew :datahub-gms-graphql-service:bootRun
|
|
```
|
|
|
|
## API Calls
|
|
|
|
Inorder to Start using the graphql API we would recommend you download [GraphiQL](https://www.electronjs.org/apps/graphiql)
|
|
|
|
`Endpoint`: http://localhost:8091/graphql
|
|
|
|
## Sample API Calls
|
|
|
|
### Query Dataset
|
|
|
|
Request:
|
|
```
|
|
{
|
|
dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)") {
|
|
urn
|
|
platform
|
|
name
|
|
origin
|
|
description
|
|
platformNativeType
|
|
uri
|
|
tags
|
|
ownership {
|
|
owners {
|
|
owner {
|
|
username
|
|
urn
|
|
info {
|
|
displayName
|
|
email
|
|
fullName
|
|
manager {
|
|
urn
|
|
}
|
|
}
|
|
editableInfo {
|
|
aboutMe
|
|
skills
|
|
}
|
|
}
|
|
type
|
|
source {
|
|
url
|
|
}
|
|
}
|
|
lastModified {
|
|
actor
|
|
}
|
|
}
|
|
created {
|
|
actor
|
|
}
|
|
lastModified {
|
|
actor
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
Sample Response:
|
|
|
|
```
|
|
{
|
|
"data": {
|
|
"dataset": {
|
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)",
|
|
"platform": "urn:li:dataPlatform:foo",
|
|
"name": "bar",
|
|
"origin": "PROD",
|
|
"description": "Sample Dataset",
|
|
"platformNativeType": null,
|
|
"uri": null,
|
|
"tags": [
|
|
"Datahub",
|
|
"Sample"
|
|
],
|
|
"ownership": {
|
|
"owners": [
|
|
{
|
|
"owner": {
|
|
"username": "fbar",
|
|
"urn": "urn:li:corpuser:fbar",
|
|
"info": {
|
|
"displayName": "Foo Bar",
|
|
"email": "fbar@linkedin.com",
|
|
"fullName": "Foo Bar",
|
|
"manager": {
|
|
"urn": "urn:li:corpuser:datahub"
|
|
}
|
|
},
|
|
"editableInfo": {
|
|
"aboutMe": "About Me",
|
|
"skills": [
|
|
"Java",
|
|
"SQL"
|
|
]
|
|
}
|
|
},
|
|
"type": "DATAOWNER",
|
|
"source": null
|
|
}
|
|
],
|
|
"lastModified": {
|
|
"actor": "urn:li:corpuser:fbar"
|
|
}
|
|
},
|
|
"created": {
|
|
"actor": "urn:li:corpuser:fbar"
|
|
},
|
|
"lastModified": {
|
|
"actor": "urn:li:corpuser:fbar"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### Query MLModel
|
|
|
|
Sample Request:
|
|
|
|
```
|
|
{
|
|
mlModel(urn: "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)") {
|
|
urn
|
|
type
|
|
name
|
|
origin
|
|
description
|
|
tags
|
|
ownership {
|
|
owners {
|
|
owner {
|
|
urn
|
|
username
|
|
editableInfo {
|
|
pictureLink
|
|
}
|
|
info {
|
|
firstName
|
|
}
|
|
}
|
|
type
|
|
source {
|
|
type
|
|
url
|
|
}
|
|
}
|
|
}
|
|
properties {
|
|
description
|
|
date
|
|
version
|
|
type
|
|
hyperParameters {
|
|
key
|
|
value {
|
|
...on StringBox {
|
|
stringValue
|
|
}
|
|
... on IntBox {
|
|
intValue
|
|
}
|
|
... on FloatBox {
|
|
floatValue
|
|
}
|
|
... on BooleanBox {
|
|
booleanValue
|
|
}
|
|
}
|
|
}
|
|
mlFeatures
|
|
tags
|
|
}
|
|
status {
|
|
removed
|
|
}
|
|
institutionalMemory {
|
|
elements {
|
|
url
|
|
description
|
|
created {
|
|
actor
|
|
}
|
|
}
|
|
}
|
|
intendedUse {
|
|
primaryUses
|
|
primaryUsers
|
|
outOfScopeUses
|
|
}
|
|
factorPrompts {
|
|
relevantFactors {
|
|
groups
|
|
instrumentation
|
|
environment
|
|
}
|
|
evaluationFactors {
|
|
groups
|
|
instrumentation
|
|
environment
|
|
}
|
|
}
|
|
metrics {
|
|
decisionThreshold
|
|
performanceMeasures
|
|
}
|
|
trainingData {
|
|
dataset
|
|
motivation
|
|
preProcessing
|
|
}
|
|
evaluationData {
|
|
dataset
|
|
motivation
|
|
preProcessing
|
|
}
|
|
quantitativeAnalyses {
|
|
unitaryResults {
|
|
...on StringBox {
|
|
stringValue
|
|
}
|
|
}
|
|
intersectionalResults {
|
|
...on StringBox {
|
|
stringValue
|
|
}
|
|
}
|
|
}
|
|
ethicalConsiderations {
|
|
useCases
|
|
humanLife
|
|
mitigations
|
|
risksAndHarms
|
|
useCases
|
|
data
|
|
}
|
|
caveatsAndRecommendations {
|
|
caveats {
|
|
caveatDescription
|
|
needsFurtherTesting
|
|
groupsNotRepresented
|
|
}
|
|
recommendations
|
|
idealDatasetCharacteristics
|
|
}
|
|
cost {
|
|
costType
|
|
costValue {
|
|
costId
|
|
costCode
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
Sample Response:
|
|
|
|
```
|
|
{
|
|
"data": {
|
|
"mlModel": {
|
|
"urn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)",
|
|
"type": "MLMODEL",
|
|
"name": "scienceModel",
|
|
"origin": "PROD",
|
|
"description": "A sample model for predicting some outcome.",
|
|
"tags": [
|
|
"Sample"
|
|
],
|
|
"ownership": {
|
|
"owners": [
|
|
{
|
|
"owner": {
|
|
"urn": "urn:li:corpuser:jdoe",
|
|
"username": "jdoe",
|
|
"editableInfo": null,
|
|
"info": {
|
|
"firstName": null
|
|
}
|
|
},
|
|
"type": "DATAOWNER",
|
|
"source": null
|
|
},
|
|
{
|
|
"owner": {
|
|
"urn": "urn:li:corpuser:datahub",
|
|
"username": "datahub",
|
|
"editableInfo": {
|
|
"pictureLink": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web/packages/data-portal/public/assets/images/default_avatar.png"
|
|
},
|
|
"info": {
|
|
"firstName": null
|
|
}
|
|
},
|
|
"type": "DATAOWNER",
|
|
"source": null
|
|
}
|
|
]
|
|
},
|
|
"properties": {
|
|
"description": "A sample model for predicting some outcome.",
|
|
"date": null,
|
|
"version": null,
|
|
"type": "Naive Bayes classifier",
|
|
"hyperParameters": null,
|
|
"mlFeatures": null,
|
|
"tags": [
|
|
"Sample"
|
|
]
|
|
},
|
|
"status": {
|
|
"removed": false
|
|
},
|
|
"institutionalMemory": {
|
|
"elements": [
|
|
{
|
|
"url": "https://www.linkedin.com",
|
|
"description": "Sample doc",
|
|
"created": {
|
|
"actor": "urn:li:corpuser:jdoe"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
"intendedUse": {
|
|
"primaryUses": [
|
|
"Sample Model",
|
|
"Primary Use"
|
|
],
|
|
"primaryUsers": [
|
|
"ENTERPRISE"
|
|
],
|
|
"outOfScopeUses": [
|
|
"Production Deployment"
|
|
]
|
|
},
|
|
"factorPrompts": null,
|
|
"metrics": {
|
|
"decisionThreshold": [
|
|
"decisionThreshold"
|
|
],
|
|
"performanceMeasures": [
|
|
"performanceMeasures"
|
|
]
|
|
},
|
|
"trainingData": [
|
|
{
|
|
"dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,pageViewsHive,PROD)",
|
|
"motivation": "For science!",
|
|
"preProcessing": [
|
|
"Aggregation"
|
|
]
|
|
}
|
|
],
|
|
"evaluationData": [
|
|
{
|
|
"dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,pageViewsHive,PROD)",
|
|
"motivation": null,
|
|
"preProcessing": null
|
|
}
|
|
],
|
|
"quantitativeAnalyses": null,
|
|
"ethicalConsiderations": {
|
|
"useCases": [
|
|
"useCases"
|
|
],
|
|
"humanLife": [
|
|
"humanLife"
|
|
],
|
|
"mitigations": [
|
|
"mitigations"
|
|
],
|
|
"risksAndHarms": [
|
|
"risksAndHarms"
|
|
],
|
|
"data": [
|
|
"data"
|
|
]
|
|
},
|
|
"caveatsAndRecommendations": {
|
|
"caveats": null,
|
|
"recommendations": "recommendations",
|
|
"idealDatasetCharacteristics": [
|
|
"idealDatasetCharacteristics"
|
|
]
|
|
},
|
|
"cost": {
|
|
"costType": "ORG_COST_TYPE",
|
|
"costValue": {
|
|
"costId": null,
|
|
"costCode": "costCode"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### Query DataFlow
|
|
|
|
Request:
|
|
|
|
```
|
|
{
|
|
dataFlow(urn: "urn:li:dataFlow:(airflow,flow1,foo)") {
|
|
urn
|
|
type
|
|
orchestrator
|
|
flowId
|
|
info {
|
|
name
|
|
description
|
|
project
|
|
}
|
|
ownership {
|
|
owners {
|
|
owner {
|
|
username
|
|
urn
|
|
info {
|
|
displayName
|
|
email
|
|
fullName
|
|
manager {
|
|
urn
|
|
}
|
|
}
|
|
editableInfo {
|
|
aboutMe
|
|
skills
|
|
}
|
|
}
|
|
type
|
|
source {
|
|
url
|
|
}
|
|
}
|
|
lastModified {
|
|
actor
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
Sample response:
|
|
|
|
```
|
|
{
|
|
"data": {
|
|
"dataFlow": {
|
|
"urn": "urn:li:dataFlow:(airflow,flow1,foo)",
|
|
"type": "DATA_FLOW",
|
|
"orchestrator": "airflow",
|
|
"flowId": "flow1",
|
|
"info": {
|
|
"name": "flow1",
|
|
"description": "My own workflow",
|
|
"project": "X"
|
|
},
|
|
"ownership": {
|
|
"owners": [
|
|
{
|
|
"owner": {
|
|
"username": "test-user",
|
|
"urn": "urn:li:corpuser:test-user",
|
|
"info": null,
|
|
"editableInfo": null
|
|
},
|
|
"type": "DEVELOPER",
|
|
"source": null
|
|
}
|
|
],
|
|
"lastModified": {
|
|
"actor": "urn:li:corpuser:datahub"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### Query DataJob
|
|
|
|
Request:
|
|
|
|
```
|
|
{
|
|
dataJob(urn: "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow1,foo),task1)") {
|
|
urn
|
|
type
|
|
jobId
|
|
dataFlow {
|
|
urn
|
|
flowId
|
|
}
|
|
inputOutput {
|
|
inputDatasets {
|
|
urn
|
|
name
|
|
}
|
|
outputDatasets {
|
|
urn
|
|
name
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
Sample response
|
|
|
|
```
|
|
{
|
|
"data": {
|
|
"dataJob": {
|
|
"urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow1,foo),task1)",
|
|
"type": "DATA_JOB",
|
|
"jobId": "task1",
|
|
"dataFlow": {
|
|
"urn": "urn:li:dataFlow:(airflow,flow1,foo)",
|
|
"flowId": "flow1"
|
|
},
|
|
"inputOutput": {
|
|
"inputDatasets": [
|
|
{
|
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:redis,stuff,PROD)",
|
|
"name": "stuff"
|
|
}
|
|
],
|
|
"outputDatasets": []
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|