Revert "feat(graph): Make Dgraph a proper Neo4j alternative (#3578)" (#3740)

This commit is contained in:
Gabe Lyons 2021-12-14 10:49:03 -08:00 committed by GitHub
parent 89f6c47d51
commit 3fd3313544
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 450 additions and 1124 deletions

View File

@ -34,7 +34,6 @@ Dependencies:
* [Elasticsearch](elasticsearch-setup)
* [MySQL](mysql)
* [(Optional) Neo4j](neo4j)
* [(Optional) Dgraph](dgraph)
### Ingesting demo data.

View File

@ -1,45 +0,0 @@
DATASET_ENABLE_SCSI=false
EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mysql:3306
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=neo4j
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
JAVA_OPTS=-Xms1g -Xmx1g
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
MAE_CONSUMER_ENABLED=true
MCE_CONSUMER_ENABLED=true
# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false
# Uncomment to configure kafka topic names
# Make sure these names are consistent across the whole deployment
# METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
# METADATA_CHANGE_EVENT_NAME=MetadataChangeEvent_v4
# FAILED_METADATA_CHANGE_EVENT_NAME=FailedMetadataChangeEvent_v4
# Uncomment and set these to support SSL connection to Elasticsearch
# ELASTICSEARCH_USE_SSL=true
# ELASTICSEARCH_SSL_PROTOCOL=TLSv1.2
# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL=
# ELASTICSEARCH_SSL_TRUSTSTORE_FILE=
# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE=
# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD=
# ELASTICSEARCH_SSL_KEYSTORE_FILE=
# ELASTICSEARCH_SSL_KEYSTORE_TYPE=
# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD=
# To use simple username/password authentication to Elasticsearch over HTTPS
# set ELASTICSEARCH_USE_SSL=true and uncomment:
# ELASTICSEARCH_USERNAME=
# ELASTICSEARCH_PASSWORD=

View File

@ -2,15 +2,13 @@ DATASET_ENABLE_SCSI=false
EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mysql:3306
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=dgraph
DGRAPH_HOST=dgraph
DGRAPH_SECURITY=plain
GRAPH_SERVICE_IMPL=elasticsearch
JAVA_OPTS=-Xms1g -Xmx1g
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml

View File

@ -2,14 +2,18 @@ DATASET_ENABLE_SCSI=false
EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mysql:3306
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=elasticsearch
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
JAVA_OPTS=-Xms1g -Xmx1g
GRAPH_SERVICE_IMPL=neo4j
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
MAE_CONSUMER_ENABLED=true
MCE_CONSUMER_ENABLED=true

View File

@ -1,8 +1,8 @@
#!/bin/bash
set -x
# Add default URI (http) scheme to NEO4J_HOST if missing
if [[ -n "$NEO4J_HOST" && $NEO4J_HOST != *"://"* ]] ; then
NEO4J_HOST="http://$NEO4J_HOST"
# Add default URI (http) scheme if needed
if ! echo $NEO4J_HOST | grep -q "://" ; then
NEO4J_HOST="http://$NEO4J_HOST"
fi
if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
@ -15,7 +15,6 @@ if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
ELASTICSEARCH_AUTH_HEADER="Accept: */*"
fi
# Add elasticsearch protocol
if [[ $ELASTICSEARCH_USE_SSL == true ]]; then
ELASTICSEARCH_PROTOCOL=https
else
@ -32,23 +31,9 @@ if [[ $SKIP_KAFKA_CHECK != true ]]; then
WAIT_FOR_KAFKA=" -wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') "
fi
# Add dependency to graph service if needed
WAIT_FOR_GRAPH_SERVICE=""
if [[ $GRAPH_SERVICE_IMPL == neo4j ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then
if [[ -z "$NEO4J_HOST" ]]; then
echo "GRAPH_SERVICE_IMPL set to neo4j but no NEO4J_HOST set"
exit 1
fi
WAIT_FOR_GRAPH_SERVICE=" -wait $NEO4J_HOST "
elif [[ $GRAPH_SERVICE_IMPL == dgraph ]] && [[ $SKIP_DGRAPH_CHECK != true ]]; then
if [[ -z "$DGRAPH_HOST" ]]; then
echo "GRAPH_SERVICE_IMPL set to dgraph but no DGRAPH_HOST set"
exit 1
fi
if [[ -n "$DGRAPH_HOST" && $DGRAPH_HOST != *":"* ]] ; then
DGRAPH_HOST="$DGRAPH_HOST:9080"
fi
WAIT_FOR_GRAPH_SERVICE=" -wait tcp://$DGRAPH_HOST "
WAIT_FOR_NEO4J=""
if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then
WAIT_FOR_NEO4J=" -wait $NEO4J_HOST "
fi
OTEL_AGENT=""
@ -64,7 +49,7 @@ fi
COMMON="
$WAIT_FOR_EBEAN \
$WAIT_FOR_KAFKA \
$WAIT_FOR_GRAPH_SERVICE \
$WAIT_FOR_NEO4J \
-timeout 240s \
java $JAVA_OPTS $JMX_OPTS \
$OTEL_AGENT \

View File

@ -1,32 +0,0 @@
MAE_CONSUMER_ENABLED=true
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=neo4j
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
GMS_HOST=datahub-gms
GMS_PORT=8080
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false
# Uncomment to configure topic names
# Make sure these names are consistent across the whole deployment
# KAFKA_TOPIC_NAME=MetadataAuditEvent_v4
# DATAHUB_USAGE_EVENT_NAME=DataHubUsageEvent_v1
# Uncomment and set these to support SSL connection to Elasticsearch
# ELASTICSEARCH_USE_SSL=
# ELASTICSEARCH_SSL_PROTOCOL=
# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL=
# ELASTICSEARCH_SSL_TRUSTSTORE_FILE=
# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE=
# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD=
# ELASTICSEARCH_SSL_KEYSTORE_FILE=
# ELASTICSEARCH_SSL_KEYSTORE_TYPE=
# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD=

View File

@ -3,11 +3,9 @@ KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=dgraph
DGRAPH_HOST=dgraph
DGRAPH_SECURITY=plain
GMS_HOST=datahub-gms
GMS_PORT=8080
GRAPH_SERVICE_IMPL=elasticsearch
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
# Uncomment to disable persistence of client-side analytics events

View File

@ -3,9 +3,13 @@ KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=elasticsearch
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
GMS_HOST=datahub-gms
GMS_PORT=8080
GRAPH_SERVICE_IMPL=neo4j
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
# Uncomment to disable persistence of client-side analytics events

View File

@ -1,7 +1,7 @@
#!/bin/bash
# Add default URI (http) scheme to NEO4J_HOST if missing
if [[ -n "$NEO4J_HOST" && $NEO4J_HOST != *"://"* ]] ; then
# Add default URI (http) scheme if needed
if ! echo $NEO4J_HOST | grep -q "://" ; then
NEO4J_HOST="http://$NEO4J_HOST"
fi
@ -15,7 +15,6 @@ if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
ELASTICSEARCH_AUTH_HEADER="Accept: */*"
fi
# Add elasticsearch protocol
if [[ $ELASTICSEARCH_USE_SSL == true ]]; then
ELASTICSEARCH_PROTOCOL=https
else
@ -32,12 +31,9 @@ if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then
WAIT_FOR_ELASTICSEARCH=" -wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT -wait-http-header \"$ELASTICSEARCH_AUTH_HEADER\""
fi
# Add dependency to graph service if needed
WAIT_FOR_GRAPH_SERVICE=""
if [[ $GRAPH_SERVICE_IMPL == neo4j ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then
WAIT_FOR_GRAPH_SERVICE=" -wait $NEO4J_HOST "
elif [[ $GRAPH_SERVICE_IMPL == dgraph ]] && [[ $SKIP_DGRAPH_CHECK != true ]]; then
WAIT_FOR_GRAPH_SERVICE=" -wait $DGRAPH_HOST "
WAIT_FOR_NEO4J=""
if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then
WAIT_FOR_NEO4J=" -wait $NEO4J_HOST "
fi
OTEL_AGENT=""
@ -53,7 +49,6 @@ fi
COMMON="
$WAIT_FOR_KAFKA \
$WAIT_FOR_NEO4J \
$WAIT_FOR_GRAPH_SERVICE \
-timeout 240s \
java $JAVA_OPTS $JMX_OPTS $OTEL_AGENT $PROMETHEUS_AGENT -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar
"

View File

@ -25,7 +25,7 @@ To run the `datahub-upgrade` container, some environment variables must be provi
where the running DataHub containers reside.
Below details the required configurations. By default, these configs are provided for local docker-compose deployments of
DataHub within `docker/datahub-upgrade/env/docker-with-neo4j.env`. They assume that there is a Docker network called datahub_network
DataHub within `docker/datahub-upgrade/env/docker.env`. They assume that there is a Docker network called datahub_network
where the DataHub containers can be found.
These are also the variables used when the provided `datahub-upgrade.sh` script is executed. To run the upgrade CLI for non-local deployments,
@ -93,7 +93,7 @@ For example, to run the migration named "NoCodeDataMigration", you would do exec
OR
```aidl
docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker-with-neo4j.env acryldata/datahub-upgrade:head -u NoCodeDataMigration
docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker.env acryldata/datahub-upgrade:head -u NoCodeDataMigration
```
In addition to the required `-u` argument, each upgrade may require specific arguments. You can provide arguments to individual
@ -109,5 +109,5 @@ To specify these, you can use a combination of `-a` arguments and of the form *a
OR
```aidl
docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker-with-neo4j.env acryldata/datahub-upgrade:head -u NoCodeDataMigration -a batchSize=500 -a batchDelayMs=1000
docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker.env acryldata/datahub-upgrade:head -u NoCodeDataMigration -a batchSize=500 -a batchDelayMs=1000
```

View File

@ -2,4 +2,4 @@
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
IMAGE=acryldata/datahub-upgrade:head
cd $DIR && docker pull ${IMAGE} && docker run --env-file ./env/docker-with-neo4j.env --network="datahub_network" ${IMAGE} "$@"
cd $DIR && docker pull ${IMAGE} && docker run --env-file ./env/docker.env --network="datahub_network" ${IMAGE} "$@"

View File

@ -10,11 +10,7 @@ KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=neo4j
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
GRAPH_SERVICE_IMPL=elasticsearch
DATAHUB_GMS_HOST=datahub-gms
DATAHUB_GMS_PORT=8080

View File

@ -10,7 +10,11 @@ KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=elasticsearch
GRAPH_SERVICE_IMPL=neo4j
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
DATAHUB_GMS_HOST=datahub-gms
DATAHUB_GMS_PORT=8080

View File

@ -1,17 +0,0 @@
#!/bin/bash
# Launches dev instances of DataHub images. See documentation for more details.
# YOU MUST BUILD VIA GRADLE BEFORE RUNNING THIS.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \
-f docker-compose-with-neo4j.yml \
-f docker-compose-with-neo4j.override.yml \
-f docker-compose.dev.yml \
pull \
&& \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
-f docker-compose-with-neo4j.yml \
-f docker-compose-with-neo4j.override.yml \
-f docker-compose.dev.yml \
up --build $@

View File

@ -23,13 +23,13 @@ fi
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \
-f docker-compose-with-dgraph.yml \
-f docker-compose-with-dgraph.override.yml \
-f docker-compose-without-neo4j.yml \
-f docker-compose-without-neo4j.override.yml \
-f docker-compose.dev.yml \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE pull \
&& \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
-f docker-compose-with-dgraph.yml \
-f docker-compose-with-dgraph.override.yml \
-f docker-compose-without-neo4j.yml \
-f docker-compose-without-neo4j.override.yml \
-f docker-compose.dev.yml \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE up --build $@

View File

@ -1,11 +0,0 @@
# Dgraph
DataHub can use Dgraph as the graph database in the backend to serve graph queries.
An alternative to Dgraph for that purpose is [Neo4j](../neo4j).
The [Dgraph image](https://hub.docker.com/r/dgraph/dgraph) found in Docker Hub is used without any modification.
## Dgraph UI Ratel
You can use the cloud hosted Dgraph UI [Ratel](https://play.dgraph.io/?latest#) to connect to your Dgraph cluster,
run queries and visualize your graph data. Point the UI to [http://localhost:8082](http://localhost:8082).

View File

@ -1 +0,0 @@
DGRAPH_SECURITY=plain

View File

@ -1,36 +0,0 @@
# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml).
---
version: '3.8'
services:
mysql:
container_name: mysql
hostname: mysql
image: mysql:5.7
env_file: mysql/env/docker.env
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
ports:
- "3306:3306"
volumes:
- ./mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
build:
context: ../
dockerfile: docker/mysql-setup/Dockerfile
image: acryldata/datahub-mysql-setup:head
env_file: mysql-setup/env/docker.env
hostname: mysql-setup
container_name: mysql-setup
depends_on:
- mysql
datahub-gms:
env_file: datahub-gms/env/docker-with-dgraph.env
depends_on:
- mysql
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
volumes:
mysqldata:

View File

@ -1,126 +0,0 @@
# Docker compose file covering DataHub's default configuration, which is to run all containers on a single host.
# Please see the README.md for instructions as to how to use and customize.
# NOTE: This file will cannot build! No dockerfiles are set. See the README.md in this directory.
---
version: '3.8'
services:
zookeeper:
image: confluentinc/cp-zookeeper:5.4.0
env_file: zookeeper/env/docker.env
hostname: zookeeper
container_name: zookeeper
ports:
- "2181:2181"
volumes:
- zkdata:/var/opt/zookeeper
broker:
image: confluentinc/cp-kafka:5.4.0
env_file: broker/env/docker.env
hostname: broker
container_name: broker
depends_on:
- zookeeper
ports:
- "29092:29092"
- "9092:9092"
# This "container" is a workaround to pre-create topics
kafka-setup:
build:
context: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
env_file: kafka-setup/env/docker.env
hostname: kafka-setup
container_name: kafka-setup
depends_on:
- broker
- schema-registry
schema-registry:
image: confluentinc/cp-schema-registry:5.4.0
env_file: schema-registry/env/docker.env
hostname: schema-registry
container_name: schema-registry
depends_on:
- zookeeper
- broker
ports:
- "8081:8081"
elasticsearch:
image: elasticsearch:7.9.3
env_file: elasticsearch/env/docker.env
container_name: elasticsearch
hostname: elasticsearch
ports:
- "9200:9200"
volumes:
- esdata:/usr/share/elasticsearch/data
healthcheck:
test: ["CMD-SHELL", "curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1"]
start_period: 2m
retries: 4
dgraph:
image: dgraph/standalone:v21.12.0
env_file: dgraph/env/docker.env
hostname: dgraph
container_name: dgraph
ports:
- "8082:8080"
- "9080:9080"
volumes:
- dgraphdata:/dgraph
# This "container" is a workaround to pre-create search indices
elasticsearch-setup:
build:
context: ../
dockerfile: docker/elasticsearch-setup/Dockerfile
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
env_file: elasticsearch-setup/env/docker.env
hostname: elasticsearch-setup
container_name: elasticsearch-setup
depends_on:
- elasticsearch
datahub-gms:
build:
context: ../
dockerfile: docker/datahub-gms/Dockerfile
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
env_file: datahub-gms/env/docker-with-dgraph.env
hostname: datahub-gms
container_name: datahub-gms
ports:
- "8080:8080"
depends_on:
- elasticsearch-setup
- kafka-setup
- mysql
- dgraph
datahub-frontend-react:
build:
context: ../
dockerfile: docker/datahub-frontend/Dockerfile
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
env_file: datahub-frontend/env/docker.env
hostname: datahub-frontend-react
container_name: datahub-frontend-react
ports:
- "9002:9002"
depends_on:
- datahub-gms
networks:
default:
name: datahub_network
volumes:
dgraphdata:
esdata:
zkdata:

View File

@ -1,4 +1,3 @@
# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml).
---
version: '3.8'
services:
@ -26,7 +25,7 @@ services:
- mysql
datahub-gms:
env_file: datahub-gms/env/docker-with-neo4j.env
env_file: datahub-gms/env/docker-without-neo4j.env
depends_on:
- mysql
volumes:

View File

@ -64,17 +64,6 @@ services:
start_period: 2m
retries: 4
neo4j:
image: neo4j:4.0.6
env_file: neo4j/env/docker.env
hostname: neo4j
container_name: neo4j
ports:
- "7474:7474"
- "7687:7687"
volumes:
- neo4jdata:/data
# This "container" is a workaround to pre-create search indices
elasticsearch-setup:
build:
@ -92,7 +81,7 @@ services:
context: ../
dockerfile: docker/datahub-gms/Dockerfile
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
env_file: datahub-gms/env/docker-with-neo4j.env
env_file: datahub-gms/env/docker-without-neo4j.env
hostname: datahub-gms
container_name: datahub-gms
ports:
@ -101,7 +90,6 @@ services:
- elasticsearch-setup
- kafka-setup
- mysql
- neo4j
datahub-frontend-react:
build:
@ -122,5 +110,4 @@ networks:
volumes:
esdata:
neo4jdata:
zkdata:

View File

@ -11,7 +11,7 @@ services:
context: ../
dockerfile: docker/datahub-mae-consumer/Dockerfile
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head}
env_file: datahub-mae-consumer/env/docker-with-neo4j.env
env_file: datahub-mae-consumer/env/docker-without-neo4j.env
hostname: datahub-mae-consumer
container_name: datahub-mae-consumer
ports:
@ -19,7 +19,6 @@ services:
depends_on:
- kafka-setup
- elasticsearch-setup
- neo4j
datahub-mce-consumer:
build:
@ -34,4 +33,3 @@ services:
depends_on:
- kafka-setup
- datahub-gms

View File

@ -19,6 +19,7 @@ services:
depends_on:
- kafka-setup
- elasticsearch-setup
- neo4j
datahub-mce-consumer:
build:

View File

@ -1,3 +1,4 @@
# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml).
---
version: '3.8'
services:

View File

@ -27,31 +27,6 @@ services:
- "29092:29092"
- "9092:9092"
kafka-rest-proxy:
image: confluentinc/cp-kafka-rest:5.4.0
env_file: kafka-rest-proxy/env/docker.env
hostname: kafka-rest-proxy
container_name: kafka-rest-proxy
ports:
- "8082:8082"
depends_on:
- zookeeper
- broker
- schema-registry
kafka-topics-ui:
image: landoop/kafka-topics-ui:0.9.4
env_file: kafka-topics-ui/env/docker.env
hostname: kafka-topics-ui
container_name: kafka-topics-ui
ports:
- "18000:8000"
depends_on:
- zookeeper
- broker
- schema-registry
- kafka-rest-proxy
# This "container" is a workaround to pre-create topics
kafka-setup:
build:
@ -75,16 +50,6 @@ services:
ports:
- "8081:8081"
schema-registry-ui:
image: landoop/schema-registry-ui:latest
env_file: schema-registry-ui/env/docker.env
container_name: schema-registry-ui
hostname: schema-registry-ui
ports:
- "8000:8000"
depends_on:
- schema-registry
elasticsearch:
image: elasticsearch:7.9.3
env_file: elasticsearch/env/docker.env
@ -99,6 +64,17 @@ services:
start_period: 2m
retries: 4
neo4j:
image: neo4j:4.0.6
env_file: neo4j/env/docker.env
hostname: neo4j
container_name: neo4j
ports:
- "7474:7474"
- "7687:7687"
volumes:
- neo4jdata:/data
# This "container" is a workaround to pre-create search indices
elasticsearch-setup:
build:
@ -116,7 +92,6 @@ services:
context: ../
dockerfile: docker/datahub-gms/Dockerfile
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
env_file: datahub-gms/env/docker.env
hostname: datahub-gms
container_name: datahub-gms
ports:
@ -125,6 +100,7 @@ services:
- elasticsearch-setup
- kafka-setup
- mysql
- neo4j
datahub-frontend-react:
build:
@ -145,4 +121,5 @@ networks:
volumes:
esdata:
neo4jdata:
zkdata:

View File

@ -1,9 +1,8 @@
# Neo4j
DataHub can use Neo4j as the graph database in the backend to serve graph queries.
An alternative to Neo4j for that purpose is [Dgraph](../dgraph).
The [official Neo4j image](https://hub.docker.com/_/neo4j) found in Docker Hub is used without any modification.
DataHub uses Neo4j as graph db in the backend to serve graph queries.
[Official Neo4j image](https://hub.docker.com/_/neo4j) found in Docker Hub is used without
any modification.
## Neo4j Browser
To be able to debug and run Cypher queries against your Neo4j image, you can open up `Neo4j Browser` which is running at

View File

@ -3,9 +3,9 @@
# Quickstarts an Ember-serving variant of DataHub by pulling all images from dockerhub and then running the containers locally.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
export DATAHUB_VERSION=${DATAHUB_VERSION:-head}
cd $DIR && docker-compose -f docker-compose-with-neo4j.yml -f docker-compose-with-neo4j.override.yml -f docker-compose.ember.yml pull && docker-compose -p datahub \
-f docker-compose-with-neo4j.yml \
-f docker-compose-with-neo4j.override.yml \
cd $DIR && docker-compose -f docker-compose.yml -f docker-compose.override.yml -f docker-compose.ember.yml pull && docker-compose -p datahub \
-f docker-compose.yml \
-f docker-compose.override.yml \
-f docker-compose.ember.yml \
up \
--scale datahub-frontend-react=0

View File

@ -29,17 +29,13 @@ if docker volume ls | grep -c -q datahub_neo4jdata
then
echo "Datahub Neo4j volume found, starting with neo4j as graph service"
cd $DIR && docker-compose pull && docker-compose -p datahub up
elif docker volume ls | grep -c -q datahub_dgraphdata
then
echo "Datahub Dgraph volume found, starting with dgraph as graph service"
cd $DIR && docker-compose pull && docker-compose -p datahub up
else
echo "No Datahub Neo4j or Dgraph volume found, starting with elasticsearch as graph service"
echo "No Datahub Neo4j volume found, starting with elasticsearch as graph service"
cd $DIR && \
docker-compose \
-f quickstart/docker-compose.quickstart.yml \
-f quickstart/docker-compose-without-neo4j.quickstart.yml \
$MONITORING_COMPOSE $CONSUMERS_COMPOSE pull && \
docker-compose -p datahub \
-f quickstart/docker-compose.quickstart.yml \
-f quickstart/docker-compose-without-neo4j.quickstart.yml \
$MONITORING_COMPOSE $CONSUMERS_COMPOSE up $@
fi

View File

@ -1,12 +0,0 @@
# Quickstart
These Docker YAML files are used by the [Docker quickstart script](../quickstart.sh) and
the [DataHub CLI quickstart](../../docs/quickstart.md) command.
## Developer Notes
The [DataHub CLI quickstart](../../docs/quickstart.md) command fetches these YAML files from DataHub's GitHub master.
This means, files referenced by earlier releases of DataHub CLI must not be deleted from this directory in order
to preserve backward compatibility.
Otherwise, earlier releases of the DataHub CLI will stop working.
See GitHub issue [linkedin/datahub#3266](https://github.com/linkedin/datahub/issues/3266) for more details.

View File

@ -1,177 +0,0 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=dgraph
- DGRAPH_HOST=dgraph
- DGRAPH_SECURITY=plain
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
dgraph:
container_name: dgraph
environment:
- DGRAPH_SECURITY=plain
hostname: dgraph
image: dgraph/standalone:v21.12.0
ports:
- 8082:8080
- 9080:9080
volumes:
- dgraphdata:/dgraph
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
dgraphdata: null
esdata: null
mysqldata: null
zkdata: null

View File

@ -1,163 +0,0 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
zkdata: null

View File

@ -1,181 +0,0 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=neo4j
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
neo4j:
container_name: neo4j
environment:
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
hostname: neo4j
image: neo4j:4.0.6
ports:
- 7474:7474
- 7687:7687
volumes:
- neo4jdata:/data
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
neo4jdata: null
zkdata: null

View File

@ -1 +0,0 @@
docker-compose-with-elasticsearch.quickstart.yml

View File

@ -0,0 +1,163 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
zkdata: null

View File

@ -1 +0,0 @@
docker-compose-with-elasticsearch.quickstart.yml

View File

@ -0,0 +1,181 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- GRAPH_SERVICE_IMPL=neo4j
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
neo4j:
container_name: neo4j
environment:
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
hostname: neo4j
image: neo4j:4.0.6
ports:
- 7474:7474
- 7687:7687
volumes:
- neo4jdata:/data
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
neo4jdata: null
zkdata: null

View File

@ -1,7 +1,7 @@
#!/bin/bash
# this scripts checks if docker-compose$flavour.quickstart.yml is up to date for these 'flavours':
FLAVOURS=("-with-elasticsearch" "-with-neo4j" "-with-dgraph" ".monitoring")
FLAVOURS=("" "-without-neo4j" ".monitoring")
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd "$DIR"
@ -12,10 +12,8 @@ python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp-with-elasticsearch.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-with-neo4j.yml ../docker-compose-with-neo4j.override.yml temp-with-neo4j.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-with-dgraph.yml ../docker-compose-with-dgraph.override.yml temp-with-dgraph.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml temp-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml
for flavour in "${FLAVOURS[@]}"

View File

@ -9,9 +9,6 @@ python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose-with-elasticsearch.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-with-neo4j.yml ../docker-compose-with-neo4j.override.yml docker-compose-with-neo4j.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-with-dgraph.yml ../docker-compose-with-dgraph.override.yml docker-compose-with-dgraph.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml docker-compose-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.quickstart.monitoring.yml

View File

@ -23,15 +23,14 @@ from datahub.telemetry import telemetry
logger = logging.getLogger(__name__)
NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-with-neo4j.quickstart.yml"
)
DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-with-dgraph.quickstart.yml"
"docker/quickstart/docker-compose.quickstart.yml"
)
ELASTIC_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-with-elasticsearch.quickstart.yml"
"docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
)
M1_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
)
M1_QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose-m1.quickstart.yml"
BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
@ -39,9 +38,6 @@ GITHUB_BASE_URL = "https://raw.githubusercontent.com/linkedin/datahub/master"
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{GITHUB_BASE_URL}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
)
GITHUB_DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{GITHUB_BASE_URL}/{DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
)
GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{GITHUB_BASE_URL}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
)
@ -94,23 +90,22 @@ def is_m1() -> bool:
return False
def which_graph_service_to_use(graph_service_override: Optional[str]) -> str:
def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
if graph_service_override is not None:
if graph_service_override == "elasticsearch":
click.echo("Starting with elasticsearch due to graph-service-impl param\n")
elif graph_service_override == "neo4j":
return False
if graph_service_override == "neo4j":
click.echo("Starting with neo4j due to graph-service-impl param\n")
elif graph_service_override == "dgraph":
click.echo("Starting with dgraph due to graph-service-impl param\n")
return True
else:
click.secho(
graph_service_override
+ " is not a valid graph service option. Choose either `neo4j`, `dgraph` "
"or `elasticsearch`\n",
+ " is not a valid graph service option. Choose either `neo4j` or "
"`elasticsearch`\n",
fg="red",
)
raise ValueError(f"invalid graph service option: {graph_service_override}")
return graph_service_override
with get_client_with_error() as (client, error):
if error:
click.secho(
@ -121,26 +116,17 @@ def which_graph_service_to_use(graph_service_override: Optional[str]) -> str:
if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0:
click.echo(
"Datahub Neo4j volume found, starting with neo4j as graph service.\n"
"If you want to run using elasticsearch or dgraph, run `datahub docker nuke` and re-ingest your data.\n"
"If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n"
)
return "neo4j"
if len(client.volumes.list(filters={"name": "datahub_dgraphdata"})) > 0:
click.echo(
"Datahub Dgraph volume found, starting with dgraph as graph service.\n"
"If you want to run using elasticsearch or neo4j, run `datahub docker nuke` and re-ingest your data.\n"
)
return "dgraph"
return True
click.echo(
"No Datahub Neo4j or Dgraph volume found, starting with elasticsearch as graph service.\n"
"No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
"To use neo4j as a graph backend, run \n"
"`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose-with-neo4j.quickstart.yml`\n"
"To use dgraph as a graph backend, run \n"
"`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose-with-dgraph.quickstart.yml`\n"
"from the root of the datahub repo\n"
"`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose.quickstart.yml`"
"\nfrom the root of the datahub repo\n"
)
return "elasticsearch"
return False
@docker.command()
@ -207,27 +193,19 @@ def quickstart(
quickstart_compose_file
) # convert to list from tuple
if not quickstart_compose_file:
graph_service_impl = which_graph_service_to_use(graph_service_impl)
if graph_service_impl == "neo4j":
if running_on_m1:
click.secho(
"Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph",
fg="red",
)
github_file = GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
else:
github_file = GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
elif graph_service_impl == "dgraph":
github_file = GITHUB_DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_URL
elif graph_service_impl == "elasticsearch":
if not running_on_m1:
github_file = GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
else:
github_file = GITHUB_M1_QUICKSTART_COMPOSE_URL
else:
raise ValueError(
f"Unsupported graph service implementation: {graph_service_impl}"
should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
if should_use_neo4j and running_on_m1:
click.secho(
"Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph",
fg="red",
)
github_file = (
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
if should_use_neo4j and not running_on_m1
else GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
if not running_on_m1
else GITHUB_M1_QUICKSTART_COMPOSE_URL
)
with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp_file:
path = pathlib.Path(tmp_file.name)

View File

@ -382,15 +382,6 @@ public class DgraphGraphService implements GraphService {
return new RelatedEntitiesResult(offset, 0, 0, Collections.emptyList());
}
// TODO: Remove once gms client code uses null instead of "" for any type
// https://github.com/linkedin/datahub/issues/3143
if (sourceType != null && sourceType.isEmpty()) {
sourceType = null;
}
if (destinationType != null && destinationType.isEmpty()) {
destinationType = null;
}
String query = getQueryForRelatedEntities(
sourceType, sourceEntityFilter,
destinationType, destinationEntityFilter,

View File

@ -63,7 +63,7 @@ public class DgraphContainer extends GenericContainer<DgraphContainer> {
WaitStrategy waitForLeader = new LogMessageWaitStrategy()
.withRegEx(".* Got Zero leader: .*\n");
WaitStrategy waitForCluster = new LogMessageWaitStrategy()
.withRegEx(".* Server is ready.*");
.withRegEx(".* Server is ready\n");
WaitStrategy waitForHttp = new HttpWaitStrategy()
.forPort(HTTP_PORT)
.forStatusCodeMatching(response -> response == HTTP_OK);

View File

@ -1,6 +1,5 @@
package com.linkedin.metadata.graph;
import com.linkedin.metadata.query.filter.RelationshipFilter;
import com.linkedin.metadata.query.filter.RelationshipDirection;
import io.dgraph.DgraphClient;
import io.dgraph.DgraphGrpc;
@ -13,7 +12,6 @@ import io.grpc.ManagedChannelBuilder;
import io.grpc.MethodDescriptor;
import lombok.extern.slf4j.Slf4j;
import org.testcontainers.containers.output.Slf4jLogConsumer;
import org.testng.SkipException;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeMethod;
@ -26,7 +24,6 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
@ -52,7 +49,7 @@ public class DgraphGraphServiceTest extends GraphServiceTestBase {
@BeforeTest
public void setup() {
_container = new DgraphContainer(DgraphContainer.DEFAULT_IMAGE_NAME.withTag("v21.12.0"))
_container = new DgraphContainer(DgraphContainer.DEFAULT_IMAGE_NAME.withTag("v21.03.0"))
.withTmpFs(Collections.singletonMap("/dgraph", "rw,noexec,nosuid,size=1g"))
.withStartupTimeout(Duration.ofMinutes(1))
.withStartupAttempts(3);
@ -108,34 +105,6 @@ public class DgraphGraphServiceTest extends GraphServiceTestBase {
@Override
protected void syncAfterWrite() { }
@Override
@SuppressWarnings("MalformedDataProvider")
@Test(dataProvider = "FindRelatedEntitiesSourceTypeTests")
public void testFindRelatedEntitiesSourceType(String datasetType,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (datasetType != null && datasetType.isEmpty()) {
// https://github.com/linkedin/datahub/issues/3143
throw new SkipException("Code using GraphService uses \"\" instead of null");
}
super.testFindRelatedEntitiesSourceType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
}
@Override
@SuppressWarnings("MalformedDataProvider")
@Test(dataProvider = "FindRelatedEntitiesDestinationTypeTests")
public void testFindRelatedEntitiesDestinationType(String datasetType,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (datasetType != null && datasetType.isEmpty()) {
// https://github.com/linkedin/datahub/issues/3143
throw new SkipException("Code using GraphService uses \"\" instead of null");
}
super.testFindRelatedEntitiesDestinationType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
}
@Test
public void testGetSchema() {
DgraphSchema schema = DgraphGraphService.getSchema("{\n"

View File

@ -1,56 +0,0 @@
package com.linkedin.gms.factory.common;
import io.dgraph.DgraphClient;
import io.dgraph.DgraphGrpc;
import io.grpc.ManagedChannel;
import io.grpc.ManagedChannelBuilder;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.Arrays;
@Slf4j
@Configuration
public class DgraphClientFactory {
@Value("${DGRAPH_HOST:localhost}")
private String[] hosts;
@Value("${DGRAPH_GRPC_PORT:9080}")
private int port;
@Value("${DGRAPH_SECURITY:plain}")
private String security;
@Bean(name = "dgraphClient")
protected DgraphClient createInstance() {
DgraphGrpc.DgraphStub[] stubs = Arrays.stream(hosts)
.map(this::getChannelForHost)
.map(DgraphGrpc::newStub)
.toArray(DgraphGrpc.DgraphStub[]::new);
return new DgraphClient(stubs);
}
private ManagedChannel getChannelForHost(String host) {
log.info("Connecting to host " + host);
if (host.contains(":")) {
return getChannelForBuilder(ManagedChannelBuilder.forTarget(host));
} else {
return getChannelForBuilder(ManagedChannelBuilder.forAddress(host, port));
}
}
private ManagedChannel getChannelForBuilder(ManagedChannelBuilder<?> builder) {
if (security.equalsIgnoreCase("plain")) {
builder.usePlaintext();
} else if (security.equalsIgnoreCase("tls")) {
builder.useTransportSecurity();
} else {
throw new IllegalArgumentException("Unsupported channel security mode");
}
return builder.build();
}
}

View File

@ -1,26 +0,0 @@
package com.linkedin.gms.factory.common;
import com.linkedin.metadata.graph.DgraphGraphService;
import io.dgraph.DgraphClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import javax.annotation.Nonnull;
@Configuration
@Import({DgraphClientFactory.class})
public class DgraphGraphServiceFactory {
@Autowired
@Qualifier("dgraphClient")
private DgraphClient dgraphClient;
@Bean(name = "dgraphGraphService")
@Nonnull
protected DgraphGraphService getInstance() {
return new DgraphGraphService(dgraphClient);
}
}

View File

@ -1,7 +1,6 @@
package com.linkedin.gms.factory.common;
import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
import com.linkedin.metadata.graph.DgraphGraphService;
import com.linkedin.metadata.graph.GraphService;
import com.linkedin.metadata.graph.Neo4jGraphService;
import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
@ -19,7 +18,7 @@ import org.springframework.context.annotation.PropertySource;
@Configuration
@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
@Import({Neo4jGraphServiceFactory.class, DgraphGraphServiceFactory.class, ElasticSearchGraphServiceFactory.class})
@Import({Neo4jGraphServiceFactory.class, ElasticSearchGraphServiceFactory.class})
public class GraphServiceFactory {
@Autowired
@Qualifier("elasticSearchGraphService")
@ -29,28 +28,22 @@ public class GraphServiceFactory {
@Qualifier("neo4jGraphService")
private Neo4jGraphService _neo4jGraphService;
@Autowired
@Qualifier("dgraphGraphService")
private DgraphGraphService _dgraphGraphService;
@Value("${graphService.type}")
private String graphServiceImpl;
@Nonnull
@DependsOn({"neo4jGraphService", "dgraphGraphService", "elasticSearchGraphService"})
@DependsOn({"neo4jGraphService", "elasticSearchGraphService"})
@Bean(name = "graphService")
@Primary
protected GraphService createInstance() {
if (graphServiceImpl.equalsIgnoreCase("neo4j")) {
return _neo4jGraphService;
} else if (graphServiceImpl.equalsIgnoreCase("dgraph")) {
return _dgraphGraphService;
} else if (graphServiceImpl.equalsIgnoreCase("elasticsearch")) {
return _elasticSearchGraphService;
} else {
throw new RuntimeException(
"Error: Failed to initialize graph service. Graph Service provided: " + graphServiceImpl
+ ". Valid options: [neo4j, dgraph, elasticsearch].");
+ ". Valid options: [neo4j, elasticsearch].");
}
}
}