Remove GMS GraphQL Service (#3605)

This commit is contained in:
Arun Vasudevan 2021-11-22 17:53:20 -06:00 committed by GitHub
parent e6b4343adb
commit fde42e01e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 2 additions and 805 deletions

View File

@ -1,571 +0,0 @@
# DataHub GMS GraphQL Service
> **Disclaimer**: DataHub's standalone GraphQL Service is now deprecated. The GraphQL API is now served from the [Metadata Service](../metadata-service/README.md) directly.
> To explore the GraphQL Query & Mutation types, visit `<your-datahub-url>/api/graphiql`.
Datahub GMS GraphQL Service wraps the Generalized Metadata Store (GMS) Rest.li calls around a GraphQL API.
## Pre-requisites
* You need to have [JDK8](https://www.oracle.com/java/technologies/jdk8-downloads.html)
installed on your machine to be able to build `Datahub GMS GraphQL Service`.
## Build
To build `Datahub GMS GraphQL Service`
`
./gradlew :datahub-gms-graphql-service:build
`
## Dependencies
Before starting `Datahub GMS GraphQL Service`, you need to make sure that [DataHub GMS](../metadata-service/README.md) is up and running.
## Start via Docker image
Quickest way to try out `Datahub GMS Graphql Service` is running the [Docker image](../docker/datahub-gms-graphql-service).
## Start via command line
If you do modify things and want to try it out quickly without building the Docker image, you can also run
the application directly from command line after a successful [build](#build):
```
./gradlew :datahub-gms-graphql-service:bootRun
```
## API Calls
Inorder to Start using the graphql API we would recommend you download [GraphiQL](https://www.electronjs.org/apps/graphiql)
`Endpoint`: http://localhost:8091/graphql
## Sample API Calls
### Query Dataset
Request:
```
{
dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)") {
urn
platform
name
origin
description
platformNativeType
uri
tags
ownership {
owners {
owner {
username
urn
info {
displayName
email
fullName
manager {
urn
}
}
editableInfo {
aboutMe
skills
}
}
type
source {
url
}
}
lastModified {
actor
}
}
created {
actor
}
lastModified {
actor
}
}
}
```
Sample Response:
```
{
"data": {
"dataset": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)",
"platform": "urn:li:dataPlatform:foo",
"name": "bar",
"origin": "PROD",
"description": "Sample Dataset",
"platformNativeType": null,
"uri": null,
"tags": [
"Datahub",
"Sample"
],
"ownership": {
"owners": [
{
"owner": {
"username": "fbar",
"urn": "urn:li:corpuser:fbar",
"info": {
"displayName": "Foo Bar",
"email": "fbar@linkedin.com",
"fullName": "Foo Bar",
"manager": {
"urn": "urn:li:corpuser:datahub"
}
},
"editableInfo": {
"aboutMe": "About Me",
"skills": [
"Java",
"SQL"
]
}
},
"type": "DATAOWNER",
"source": null
}
],
"lastModified": {
"actor": "urn:li:corpuser:fbar"
}
},
"created": {
"actor": "urn:li:corpuser:fbar"
},
"lastModified": {
"actor": "urn:li:corpuser:fbar"
}
}
}
}
```
### Query MLModel
Sample Request:
```
{
mlModel(urn: "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)") {
urn
type
name
origin
description
tags
ownership {
owners {
owner {
urn
username
editableInfo {
pictureLink
}
info {
firstName
}
}
type
source {
type
url
}
}
}
properties {
description
date
version
type
hyperParameters {
key
value {
...on StringBox {
stringValue
}
... on IntBox {
intValue
}
... on FloatBox {
floatValue
}
... on BooleanBox {
booleanValue
}
}
}
mlFeatures
tags
}
status {
removed
}
institutionalMemory {
elements {
url
description
created {
actor
}
}
}
intendedUse {
primaryUses
primaryUsers
outOfScopeUses
}
factorPrompts {
relevantFactors {
groups
instrumentation
environment
}
evaluationFactors {
groups
instrumentation
environment
}
}
metrics {
decisionThreshold
performanceMeasures
}
trainingData {
dataset
motivation
preProcessing
}
evaluationData {
dataset
motivation
preProcessing
}
quantitativeAnalyses {
unitaryResults {
...on StringBox {
stringValue
}
}
intersectionalResults {
...on StringBox {
stringValue
}
}
}
ethicalConsiderations {
useCases
humanLife
mitigations
risksAndHarms
useCases
data
}
caveatsAndRecommendations {
caveats {
caveatDescription
needsFurtherTesting
groupsNotRepresented
}
recommendations
idealDatasetCharacteristics
}
cost {
costType
costValue {
costId
costCode
}
}
}
}
```
Sample Response:
```
{
"data": {
"mlModel": {
"urn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)",
"type": "MLMODEL",
"name": "scienceModel",
"origin": "PROD",
"description": "A sample model for predicting some outcome.",
"tags": [
"Sample"
],
"ownership": {
"owners": [
{
"owner": {
"urn": "urn:li:corpuser:jdoe",
"username": "jdoe",
"editableInfo": null,
"info": {
"firstName": null
}
},
"type": "DATAOWNER",
"source": null
},
{
"owner": {
"urn": "urn:li:corpuser:datahub",
"username": "datahub",
"editableInfo": {
"pictureLink": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web-react/src/images/default_avatar.png"
},
"info": {
"firstName": null
}
},
"type": "DATAOWNER",
"source": null
}
]
},
"properties": {
"description": "A sample model for predicting some outcome.",
"date": null,
"version": null,
"type": "Naive Bayes classifier",
"hyperParameters": null,
"mlFeatures": null,
"tags": [
"Sample"
]
},
"status": {
"removed": false
},
"institutionalMemory": {
"elements": [
{
"url": "https://www.linkedin.com",
"description": "Sample doc",
"created": {
"actor": "urn:li:corpuser:jdoe"
}
}
]
},
"intendedUse": {
"primaryUses": [
"Sample Model",
"Primary Use"
],
"primaryUsers": [
"ENTERPRISE"
],
"outOfScopeUses": [
"Production Deployment"
]
},
"factorPrompts": null,
"metrics": {
"decisionThreshold": [
"decisionThreshold"
],
"performanceMeasures": [
"performanceMeasures"
]
},
"trainingData": [
{
"dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,pageViewsHive,PROD)",
"motivation": "For science!",
"preProcessing": [
"Aggregation"
]
}
],
"evaluationData": [
{
"dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,pageViewsHive,PROD)",
"motivation": null,
"preProcessing": null
}
],
"quantitativeAnalyses": null,
"ethicalConsiderations": {
"useCases": [
"useCases"
],
"humanLife": [
"humanLife"
],
"mitigations": [
"mitigations"
],
"risksAndHarms": [
"risksAndHarms"
],
"data": [
"data"
]
},
"caveatsAndRecommendations": {
"caveats": null,
"recommendations": "recommendations",
"idealDatasetCharacteristics": [
"idealDatasetCharacteristics"
]
},
"cost": {
"costType": "ORG_COST_TYPE",
"costValue": {
"costId": null,
"costCode": "costCode"
}
}
}
}
}
```
### Query DataFlow
Request:
```
{
dataFlow(urn: "urn:li:dataFlow:(airflow,flow1,foo)") {
urn
type
orchestrator
flowId
info {
name
description
project
}
ownership {
owners {
owner {
username
urn
info {
displayName
email
fullName
manager {
urn
}
}
editableInfo {
aboutMe
skills
}
}
type
source {
url
}
}
lastModified {
actor
}
}
}
}
```
Sample response:
```
{
"data": {
"dataFlow": {
"urn": "urn:li:dataFlow:(airflow,flow1,foo)",
"type": "DATA_FLOW",
"orchestrator": "airflow",
"flowId": "flow1",
"info": {
"name": "flow1",
"description": "My own workflow",
"project": "X"
},
"ownership": {
"owners": [
{
"owner": {
"username": "test-user",
"urn": "urn:li:corpuser:test-user",
"info": null,
"editableInfo": null
},
"type": "DEVELOPER",
"source": null
}
],
"lastModified": {
"actor": "urn:li:corpuser:datahub"
}
}
}
}
}
```
### Query DataJob
Request:
```
{
dataJob(urn: "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow1,foo),task1)") {
urn
type
jobId
dataFlow {
urn
flowId
}
inputOutput {
inputDatasets {
urn
name
}
outputDatasets {
urn
name
}
}
}
}
```
Sample response
```
{
"data": {
"dataJob": {
"urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow1,foo),task1)",
"type": "DATA_JOB",
"jobId": "task1",
"dataFlow": {
"urn": "urn:li:dataFlow:(airflow,flow1,foo)",
"flowId": "flow1"
},
"inputOutput": {
"inputDatasets": [
{
"urn": "urn:li:dataset:(urn:li:dataPlatform:redis,stuff,PROD)",
"name": "stuff"
}
],
"outputDatasets": []
}
}
}
}
```

View File

@ -1,24 +0,0 @@
plugins {
id 'org.springframework.boot'
id 'java'
}
dependencies {
compile project(':datahub-graphql-core')
compile externalDependency.springBootStarterWeb
compile 'com.graphql-java:graphql-java-spring-boot-starter-webmvc:2.0'
compile 'com.graphql-java:graphiql-spring-boot-starter:5.0.2'
compile externalDependency.graphqlJava
compile externalDependency.antlr4Runtime
compile externalDependency.antlr4
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
}
configurations {
compile.exclude group: 'org.neo4j', module: 'neo4j'
}
bootJar {
mainClassName = 'com.linkedin.datahub.graphql.service.GmsGraphQLApplication'
}

View File

@ -1,20 +0,0 @@
package com.linkedin.datahub.graphql.context;
import com.datahub.metadata.authorization.Authorizer;
import org.springframework.stereotype.Component;
import com.linkedin.datahub.graphql.QueryContext;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
@Component
public class SpringQueryContext implements QueryContext {
boolean isAuthenticated;
String actor;
Authorizer authorizer;
}

View File

@ -1,25 +0,0 @@
package com.linkedin.datahub.graphql.service;
import com.datahub.metadata.authorization.AuthorizationRequest;
import com.datahub.metadata.authorization.AuthorizationResult;
import com.datahub.metadata.authorization.Authorizer;
/**
* Implementation of {@link Authorizer} that allows all authorization requests. Used to keep backwards
* compatibility with the deprecated DataHub Standalone GraphQL service.
*/
public class AllowAllAuthorizer implements Authorizer {
public AllowAllAuthorizer() { }
@Override
public AuthorizationResult authorize(AuthorizationRequest request) {
return new AuthorizationResult(null, null, AuthorizationResult.Type.ALLOW);
}
@Override
public AuthorizationMode mode() {
return AuthorizationMode.ALLOW_ALL;
}
}

View File

@ -1,14 +0,0 @@
package com.linkedin.datahub.graphql.service;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.data.elasticsearch.ElasticsearchAutoConfiguration;
import org.springframework.boot.autoconfigure.elasticsearch.rest.RestClientAutoConfiguration;
@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
@SpringBootApplication(exclude = {ElasticsearchAutoConfiguration.class, RestClientAutoConfiguration.class})
public class GmsGraphQLApplication {
public static void main(String[] args) {
SpringApplication.run(GmsGraphQLApplication.class, args);
}
}

View File

@ -1,34 +0,0 @@
package com.linkedin.datahub.graphql.service;
import javax.annotation.PostConstruct;
import org.springframework.context.annotation.Bean;
import org.springframework.stereotype.Component;
import com.linkedin.datahub.graphql.GmsGraphQLEngine;
import com.linkedin.datahub.graphql.GraphQLEngine;
import graphql.GraphQL;
@Component
public class GmsGraphQLProvider {
private GraphQL graphQL;
private GraphQLEngine graphQLEngine;
@Bean
public GraphQL graphQL() {
return graphQL;
}
@Bean
public GraphQLEngine graphQLEngine() {
return graphQLEngine;
}
@PostConstruct
public void init() {
this.graphQLEngine = new GmsGraphQLEngine().builder().build();
this.graphQL = graphQLEngine.getGraphQL();
}
}

View File

@ -1,35 +0,0 @@
package com.linkedin.datahub.graphql.service;
import java.util.concurrent.CompletableFuture;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Primary;
import org.springframework.stereotype.Component;
import org.springframework.web.context.request.WebRequest;
import com.linkedin.datahub.graphql.GraphQLEngine;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.context.SpringQueryContext;
import graphql.ExecutionResult;
import graphql.spring.web.servlet.GraphQLInvocation;
import graphql.spring.web.servlet.GraphQLInvocationData;
@Component
@Primary
public class QueryGraphQLInvocation implements GraphQLInvocation {
@Autowired
GraphQLEngine graphQLEngine;
public static final String APPNAME = "GmsGraphQLApp";
@Override
public CompletableFuture<ExecutionResult> invoke(GraphQLInvocationData invocationData, WebRequest webRequest) {
QueryContext queryContext = new SpringQueryContext(true, APPNAME, new AllowAllAuthorizer());
return CompletableFuture.supplyAsync(() -> graphQLEngine.execute(invocationData.getQuery(),
invocationData.getVariables(),
queryContext));
}
}

View File

@ -1,37 +0,0 @@
# Defining environment
ARG APP_ENV=prod
FROM adoptopenjdk/openjdk8:alpine-jre as base
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
# Workaround alpine issue with /lib64 not being in the ld library path
# https://gitlab.alpinelinux.org/alpine/aports/-/issues/10140
ENV LD_LIBRARY_PATH=/lib64
# Add glibc compat layer into alpine linux, needed by java-snappy if kafka topics are compressed with snappy
RUN apk add libc6-compat
FROM openjdk:8 as prod-build
COPY . datahub-src
RUN cd datahub-src && ./gradlew :datahub-gms-graphql-service:build
RUN cd datahub-src && cp datahub-gms-graphql-service/build/libs/datahub-gms-graphql-service.jar ../datahub-gms-graphql-service.jar
FROM base as prod-install
COPY --from=prod-build /datahub-gms-graphql-service.jar /datahub/datahub-gms-graphql-service/bin/
COPY --from=prod-build /datahub-src/docker/datahub-gms-graphql-service/start.sh /datahub/datahub-gms-graphql-service/scripts/
RUN chmod +x /datahub/datahub-gms-graphql-service/scripts/start.sh
FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
# See this excellent thread https://github.com/docker/cli/issues/1134
FROM ${APP_ENV}-install as final
RUN addgroup -S datahub && adduser -S datahub -G datahub
USER datahub
EXPOSE 8091
CMD /datahub/datahub-gms-graphql-service/scripts/start.sh

View File

@ -1,17 +0,0 @@
# Datahub GMS GraphQL Service Docker Image
## Run
To start `Datahub GMS GraphQL Service` along with all the dependencies
```
docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml -f docker-compose.dev.yml up datahub-gms-graphql-service
```
To start `Datahub GMS GraphQL Service` only without any dependencies
```
docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml -f docker-compose.dev.yml up --no-deps datahub-gms-graphql-service
```
For more details refer [Docker README](../README.md)

View File

@ -1,2 +0,0 @@
DATAHUB_GMS_HOST=datahub-gms
DATAHUB_GMS_PORT=8080

View File

@ -1,6 +0,0 @@
#!/bin/sh
dockerize \
-wait tcp://$DATAHUB_GMS_HOST:$DATAHUB_GMS_PORT \
-timeout 240s \
java -jar /datahub/datahub-gms-graphql-service/bin/datahub-gms-graphql-service.jar

View File

@ -1,14 +0,0 @@
version: '3.8'
services:
datahub-gms-graphql-service:
build:
context: ../
dockerfile: docker/datahub-gms-graphql-service/Dockerfile
image: linkedin/datahub-gms-graphql-service:${DATAHUB_VERSION:-head}
env_file: datahub-gms-graphql-service/env/docker.env
hostname: datahub-gms-graphql-service
container_name: datahub-gms-graphql-service
ports:
- "8091:8091"
depends_on:
- datahub-gms

View File

@ -196,7 +196,6 @@ module.exports = {
"datahub-frontend/README",
"datahub-graphql-core/README",
"metadata-service/README",
"datahub-gms-graphql-service/README",
// "metadata-jobs/README",
"metadata-jobs/mae-consumer-job/README",
"metadata-jobs/mce-consumer-job/README",
@ -227,7 +226,6 @@ module.exports = {
"docs/deploy/confluent-cloud",
// Purposely not including the following:
// - "docker/datahub-frontend/README",
// - "docker/datahub-gms-graphql-service/README",
// - "docker/datahub-gms/README",
// - "docker/datahub-mae-consumer/README",
// - "docker/datahub-mce-consumer/README",

View File

@ -4,7 +4,7 @@ The figure below shows the high-level system diagram for DataHub's Serving Tier.
![datahub-serving](../imgs/datahub-serving.png)
The primary service is called [gms](../../metadata-service) and exposes a REST API for performing CRUD operations on metadata. The metadata service also exposes search and graph query API-s to support secondary-index style queries, full-text search queries as well as relationship queries like lineage. In addition, the [datahub-frontend](../../datahub-frontend) and [datahub-gms-graphql](../../datahub-gms-graphql-service) services expose a GraphQL API on top of the metadata graph.
The primary service is called [gms](../../metadata-service) and exposes a REST API and a GraphQL API for performing CRUD operations on metadata. The metadata service also exposes search and graph query API-s to support secondary-index style queries, full-text search queries as well as relationship queries like lineage. In addition, the [datahub-frontend](../../datahub-frontend) service expose a GraphQL API on top of the metadata graph.
## DataHub Serving Tier Components

View File

@ -4,7 +4,7 @@
To deploy a new instance of DataHub, perform the following steps.
1. Install [docker](https://docs.docker.com/install/) and [docker-compose](https://docs.docker.com/compose/install/) (if
1. Install [docker](https://docs.docker.com/install/), [jq](https://stedolan.github.io/jq/download/) and [docker-compose](https://docs.docker.com/compose/install/) (if
using Linux). Make sure to allocate enough hardware resources for Docker engine. Tested & confirmed config: 2 CPUs,
8GB RAM, 2GB Swap area, and 10GB disk space.

View File

@ -1,6 +1,5 @@
include 'datahub-frontend'
include 'datahub-graphql-core'
include 'datahub-gms-graphql-service'
include 'datahub-web-react'
include 'metadata-service:auth'
include 'metadata-service:restli-api'