Revert "feat(graph): Make Dgraph a proper Neo4j alternative (#3578)" (#3740)

This commit is contained in:
Gabe Lyons 2021-12-14 10:49:03 -08:00 committed by GitHub
parent 89f6c47d51
commit 3fd3313544
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 450 additions and 1124 deletions

View File

@ -34,7 +34,6 @@ Dependencies:
* [Elasticsearch](elasticsearch-setup) * [Elasticsearch](elasticsearch-setup)
* [MySQL](mysql) * [MySQL](mysql)
* [(Optional) Neo4j](neo4j) * [(Optional) Neo4j](neo4j)
* [(Optional) Dgraph](dgraph)
### Ingesting demo data. ### Ingesting demo data.

View File

@ -1,45 +0,0 @@
DATASET_ENABLE_SCSI=false
EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mysql:3306
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=neo4j
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
JAVA_OPTS=-Xms1g -Xmx1g
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
MAE_CONSUMER_ENABLED=true
MCE_CONSUMER_ENABLED=true
# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false
# Uncomment to configure kafka topic names
# Make sure these names are consistent across the whole deployment
# METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
# METADATA_CHANGE_EVENT_NAME=MetadataChangeEvent_v4
# FAILED_METADATA_CHANGE_EVENT_NAME=FailedMetadataChangeEvent_v4
# Uncomment and set these to support SSL connection to Elasticsearch
# ELASTICSEARCH_USE_SSL=true
# ELASTICSEARCH_SSL_PROTOCOL=TLSv1.2
# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL=
# ELASTICSEARCH_SSL_TRUSTSTORE_FILE=
# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE=
# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD=
# ELASTICSEARCH_SSL_KEYSTORE_FILE=
# ELASTICSEARCH_SSL_KEYSTORE_TYPE=
# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD=
# To use simple username/password authentication to Elasticsearch over HTTPS
# set ELASTICSEARCH_USE_SSL=true and uncomment:
# ELASTICSEARCH_USERNAME=
# ELASTICSEARCH_PASSWORD=

View File

@ -2,15 +2,13 @@ DATASET_ENABLE_SCSI=false
EBEAN_DATASOURCE_USERNAME=datahub EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mysql:3306 EBEAN_DATASOURCE_HOST=mysql:3306
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
KAFKA_BOOTSTRAP_SERVER=broker:29092 KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200 ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=dgraph GRAPH_SERVICE_IMPL=elasticsearch
DGRAPH_HOST=dgraph
DGRAPH_SECURITY=plain
JAVA_OPTS=-Xms1g -Xmx1g JAVA_OPTS=-Xms1g -Xmx1g
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml

View File

@ -2,14 +2,18 @@ DATASET_ENABLE_SCSI=false
EBEAN_DATASOURCE_USERNAME=datahub EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mysql:3306 EBEAN_DATASOURCE_HOST=mysql:3306
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
KAFKA_BOOTSTRAP_SERVER=broker:29092 KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200 ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=elasticsearch NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
JAVA_OPTS=-Xms1g -Xmx1g JAVA_OPTS=-Xms1g -Xmx1g
GRAPH_SERVICE_IMPL=neo4j
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
MAE_CONSUMER_ENABLED=true MAE_CONSUMER_ENABLED=true
MCE_CONSUMER_ENABLED=true MCE_CONSUMER_ENABLED=true

View File

@ -1,8 +1,8 @@
#!/bin/bash #!/bin/bash
set -x set -x
# Add default URI (http) scheme to NEO4J_HOST if missing # Add default URI (http) scheme if needed
if [[ -n "$NEO4J_HOST" && $NEO4J_HOST != *"://"* ]] ; then if ! echo $NEO4J_HOST | grep -q "://" ; then
NEO4J_HOST="http://$NEO4J_HOST" NEO4J_HOST="http://$NEO4J_HOST"
fi fi
if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
@ -15,7 +15,6 @@ if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
ELASTICSEARCH_AUTH_HEADER="Accept: */*" ELASTICSEARCH_AUTH_HEADER="Accept: */*"
fi fi
# Add elasticsearch protocol
if [[ $ELASTICSEARCH_USE_SSL == true ]]; then if [[ $ELASTICSEARCH_USE_SSL == true ]]; then
ELASTICSEARCH_PROTOCOL=https ELASTICSEARCH_PROTOCOL=https
else else
@ -32,23 +31,9 @@ if [[ $SKIP_KAFKA_CHECK != true ]]; then
WAIT_FOR_KAFKA=" -wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') " WAIT_FOR_KAFKA=" -wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') "
fi fi
# Add dependency to graph service if needed WAIT_FOR_NEO4J=""
WAIT_FOR_GRAPH_SERVICE="" if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then
if [[ $GRAPH_SERVICE_IMPL == neo4j ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then WAIT_FOR_NEO4J=" -wait $NEO4J_HOST "
if [[ -z "$NEO4J_HOST" ]]; then
echo "GRAPH_SERVICE_IMPL set to neo4j but no NEO4J_HOST set"
exit 1
fi
WAIT_FOR_GRAPH_SERVICE=" -wait $NEO4J_HOST "
elif [[ $GRAPH_SERVICE_IMPL == dgraph ]] && [[ $SKIP_DGRAPH_CHECK != true ]]; then
if [[ -z "$DGRAPH_HOST" ]]; then
echo "GRAPH_SERVICE_IMPL set to dgraph but no DGRAPH_HOST set"
exit 1
fi
if [[ -n "$DGRAPH_HOST" && $DGRAPH_HOST != *":"* ]] ; then
DGRAPH_HOST="$DGRAPH_HOST:9080"
fi
WAIT_FOR_GRAPH_SERVICE=" -wait tcp://$DGRAPH_HOST "
fi fi
OTEL_AGENT="" OTEL_AGENT=""
@ -64,7 +49,7 @@ fi
COMMON=" COMMON="
$WAIT_FOR_EBEAN \ $WAIT_FOR_EBEAN \
$WAIT_FOR_KAFKA \ $WAIT_FOR_KAFKA \
$WAIT_FOR_GRAPH_SERVICE \ $WAIT_FOR_NEO4J \
-timeout 240s \ -timeout 240s \
java $JAVA_OPTS $JMX_OPTS \ java $JAVA_OPTS $JMX_OPTS \
$OTEL_AGENT \ $OTEL_AGENT \

View File

@ -1,32 +0,0 @@
MAE_CONSUMER_ENABLED=true
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=neo4j
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
GMS_HOST=datahub-gms
GMS_PORT=8080
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false
# Uncomment to configure topic names
# Make sure these names are consistent across the whole deployment
# KAFKA_TOPIC_NAME=MetadataAuditEvent_v4
# DATAHUB_USAGE_EVENT_NAME=DataHubUsageEvent_v1
# Uncomment and set these to support SSL connection to Elasticsearch
# ELASTICSEARCH_USE_SSL=
# ELASTICSEARCH_SSL_PROTOCOL=
# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL=
# ELASTICSEARCH_SSL_TRUSTSTORE_FILE=
# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE=
# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD=
# ELASTICSEARCH_SSL_KEYSTORE_FILE=
# ELASTICSEARCH_SSL_KEYSTORE_TYPE=
# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD=

View File

@ -3,11 +3,9 @@ KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200 ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=dgraph
DGRAPH_HOST=dgraph
DGRAPH_SECURITY=plain
GMS_HOST=datahub-gms GMS_HOST=datahub-gms
GMS_PORT=8080 GMS_PORT=8080
GRAPH_SERVICE_IMPL=elasticsearch
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
# Uncomment to disable persistence of client-side analytics events # Uncomment to disable persistence of client-side analytics events

View File

@ -3,9 +3,13 @@ KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200 ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=elasticsearch NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
GMS_HOST=datahub-gms GMS_HOST=datahub-gms
GMS_PORT=8080 GMS_PORT=8080
GRAPH_SERVICE_IMPL=neo4j
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
# Uncomment to disable persistence of client-side analytics events # Uncomment to disable persistence of client-side analytics events

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# Add default URI (http) scheme to NEO4J_HOST if missing # Add default URI (http) scheme if needed
if [[ -n "$NEO4J_HOST" && $NEO4J_HOST != *"://"* ]] ; then if ! echo $NEO4J_HOST | grep -q "://" ; then
NEO4J_HOST="http://$NEO4J_HOST" NEO4J_HOST="http://$NEO4J_HOST"
fi fi
@ -15,7 +15,6 @@ if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
ELASTICSEARCH_AUTH_HEADER="Accept: */*" ELASTICSEARCH_AUTH_HEADER="Accept: */*"
fi fi
# Add elasticsearch protocol
if [[ $ELASTICSEARCH_USE_SSL == true ]]; then if [[ $ELASTICSEARCH_USE_SSL == true ]]; then
ELASTICSEARCH_PROTOCOL=https ELASTICSEARCH_PROTOCOL=https
else else
@ -32,12 +31,9 @@ if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then
WAIT_FOR_ELASTICSEARCH=" -wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT -wait-http-header \"$ELASTICSEARCH_AUTH_HEADER\"" WAIT_FOR_ELASTICSEARCH=" -wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT -wait-http-header \"$ELASTICSEARCH_AUTH_HEADER\""
fi fi
# Add dependency to graph service if needed WAIT_FOR_NEO4J=""
WAIT_FOR_GRAPH_SERVICE="" if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then
if [[ $GRAPH_SERVICE_IMPL == neo4j ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then WAIT_FOR_NEO4J=" -wait $NEO4J_HOST "
WAIT_FOR_GRAPH_SERVICE=" -wait $NEO4J_HOST "
elif [[ $GRAPH_SERVICE_IMPL == dgraph ]] && [[ $SKIP_DGRAPH_CHECK != true ]]; then
WAIT_FOR_GRAPH_SERVICE=" -wait $DGRAPH_HOST "
fi fi
OTEL_AGENT="" OTEL_AGENT=""
@ -53,7 +49,6 @@ fi
COMMON=" COMMON="
$WAIT_FOR_KAFKA \ $WAIT_FOR_KAFKA \
$WAIT_FOR_NEO4J \ $WAIT_FOR_NEO4J \
$WAIT_FOR_GRAPH_SERVICE \
-timeout 240s \ -timeout 240s \
java $JAVA_OPTS $JMX_OPTS $OTEL_AGENT $PROMETHEUS_AGENT -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar java $JAVA_OPTS $JMX_OPTS $OTEL_AGENT $PROMETHEUS_AGENT -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar
" "

View File

@ -25,7 +25,7 @@ To run the `datahub-upgrade` container, some environment variables must be provi
where the running DataHub containers reside. where the running DataHub containers reside.
Below details the required configurations. By default, these configs are provided for local docker-compose deployments of Below details the required configurations. By default, these configs are provided for local docker-compose deployments of
DataHub within `docker/datahub-upgrade/env/docker-with-neo4j.env`. They assume that there is a Docker network called datahub_network DataHub within `docker/datahub-upgrade/env/docker.env`. They assume that there is a Docker network called datahub_network
where the DataHub containers can be found. where the DataHub containers can be found.
These are also the variables used when the provided `datahub-upgrade.sh` script is executed. To run the upgrade CLI for non-local deployments, These are also the variables used when the provided `datahub-upgrade.sh` script is executed. To run the upgrade CLI for non-local deployments,
@ -93,7 +93,7 @@ For example, to run the migration named "NoCodeDataMigration", you would do exec
OR OR
```aidl ```aidl
docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker-with-neo4j.env acryldata/datahub-upgrade:head -u NoCodeDataMigration docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker.env acryldata/datahub-upgrade:head -u NoCodeDataMigration
``` ```
In addition to the required `-u` argument, each upgrade may require specific arguments. You can provide arguments to individual In addition to the required `-u` argument, each upgrade may require specific arguments. You can provide arguments to individual
@ -109,5 +109,5 @@ To specify these, you can use a combination of `-a` arguments and of the form *a
OR OR
```aidl ```aidl
docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker-with-neo4j.env acryldata/datahub-upgrade:head -u NoCodeDataMigration -a batchSize=500 -a batchDelayMs=1000 docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker.env acryldata/datahub-upgrade:head -u NoCodeDataMigration -a batchSize=500 -a batchDelayMs=1000
``` ```

View File

@ -2,4 +2,4 @@
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
IMAGE=acryldata/datahub-upgrade:head IMAGE=acryldata/datahub-upgrade:head
cd $DIR && docker pull ${IMAGE} && docker run --env-file ./env/docker-with-neo4j.env --network="datahub_network" ${IMAGE} "$@" cd $DIR && docker pull ${IMAGE} && docker run --env-file ./env/docker.env --network="datahub_network" ${IMAGE} "$@"

View File

@ -10,11 +10,7 @@ KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200 ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=neo4j GRAPH_SERVICE_IMPL=elasticsearch
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
DATAHUB_GMS_HOST=datahub-gms DATAHUB_GMS_HOST=datahub-gms
DATAHUB_GMS_PORT=8080 DATAHUB_GMS_PORT=8080

View File

@ -10,7 +10,11 @@ KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200 ELASTICSEARCH_PORT=9200
GRAPH_SERVICE_IMPL=elasticsearch GRAPH_SERVICE_IMPL=neo4j
NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
DATAHUB_GMS_HOST=datahub-gms DATAHUB_GMS_HOST=datahub-gms
DATAHUB_GMS_PORT=8080 DATAHUB_GMS_PORT=8080

View File

@ -1,17 +0,0 @@
#!/bin/bash
# Launches dev instances of DataHub images. See documentation for more details.
# YOU MUST BUILD VIA GRADLE BEFORE RUNNING THIS.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \
-f docker-compose-with-neo4j.yml \
-f docker-compose-with-neo4j.override.yml \
-f docker-compose.dev.yml \
pull \
&& \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
-f docker-compose-with-neo4j.yml \
-f docker-compose-with-neo4j.override.yml \
-f docker-compose.dev.yml \
up --build $@

View File

@ -23,13 +23,13 @@ fi
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && \ cd $DIR && \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \ COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \
-f docker-compose-with-dgraph.yml \ -f docker-compose-without-neo4j.yml \
-f docker-compose-with-dgraph.override.yml \ -f docker-compose-without-neo4j.override.yml \
-f docker-compose.dev.yml \ -f docker-compose.dev.yml \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE pull \ $CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE pull \
&& \ && \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \ COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
-f docker-compose-with-dgraph.yml \ -f docker-compose-without-neo4j.yml \
-f docker-compose-with-dgraph.override.yml \ -f docker-compose-without-neo4j.override.yml \
-f docker-compose.dev.yml \ -f docker-compose.dev.yml \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE up --build $@ $CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE up --build $@

View File

@ -1,11 +0,0 @@
# Dgraph
DataHub can use Dgraph as the graph database in the backend to serve graph queries.
An alternative to Dgraph for that purpose is [Neo4j](../neo4j).
The [Dgraph image](https://hub.docker.com/r/dgraph/dgraph) found in Docker Hub is used without any modification.
## Dgraph UI Ratel
You can use the cloud hosted Dgraph UI [Ratel](https://play.dgraph.io/?latest#) to connect to your Dgraph cluster,
run queries and visualize your graph data. Point the UI to [http://localhost:8082](http://localhost:8082).

View File

@ -1 +0,0 @@
DGRAPH_SECURITY=plain

View File

@ -1,36 +0,0 @@
# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml).
---
version: '3.8'
services:
mysql:
container_name: mysql
hostname: mysql
image: mysql:5.7
env_file: mysql/env/docker.env
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
ports:
- "3306:3306"
volumes:
- ./mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
build:
context: ../
dockerfile: docker/mysql-setup/Dockerfile
image: acryldata/datahub-mysql-setup:head
env_file: mysql-setup/env/docker.env
hostname: mysql-setup
container_name: mysql-setup
depends_on:
- mysql
datahub-gms:
env_file: datahub-gms/env/docker-with-dgraph.env
depends_on:
- mysql
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
volumes:
mysqldata:

View File

@ -1,126 +0,0 @@
# Docker compose file covering DataHub's default configuration, which is to run all containers on a single host.
# Please see the README.md for instructions as to how to use and customize.
# NOTE: This file will cannot build! No dockerfiles are set. See the README.md in this directory.
---
version: '3.8'
services:
zookeeper:
image: confluentinc/cp-zookeeper:5.4.0
env_file: zookeeper/env/docker.env
hostname: zookeeper
container_name: zookeeper
ports:
- "2181:2181"
volumes:
- zkdata:/var/opt/zookeeper
broker:
image: confluentinc/cp-kafka:5.4.0
env_file: broker/env/docker.env
hostname: broker
container_name: broker
depends_on:
- zookeeper
ports:
- "29092:29092"
- "9092:9092"
# This "container" is a workaround to pre-create topics
kafka-setup:
build:
context: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
env_file: kafka-setup/env/docker.env
hostname: kafka-setup
container_name: kafka-setup
depends_on:
- broker
- schema-registry
schema-registry:
image: confluentinc/cp-schema-registry:5.4.0
env_file: schema-registry/env/docker.env
hostname: schema-registry
container_name: schema-registry
depends_on:
- zookeeper
- broker
ports:
- "8081:8081"
elasticsearch:
image: elasticsearch:7.9.3
env_file: elasticsearch/env/docker.env
container_name: elasticsearch
hostname: elasticsearch
ports:
- "9200:9200"
volumes:
- esdata:/usr/share/elasticsearch/data
healthcheck:
test: ["CMD-SHELL", "curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1"]
start_period: 2m
retries: 4
dgraph:
image: dgraph/standalone:v21.12.0
env_file: dgraph/env/docker.env
hostname: dgraph
container_name: dgraph
ports:
- "8082:8080"
- "9080:9080"
volumes:
- dgraphdata:/dgraph
# This "container" is a workaround to pre-create search indices
elasticsearch-setup:
build:
context: ../
dockerfile: docker/elasticsearch-setup/Dockerfile
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
env_file: elasticsearch-setup/env/docker.env
hostname: elasticsearch-setup
container_name: elasticsearch-setup
depends_on:
- elasticsearch
datahub-gms:
build:
context: ../
dockerfile: docker/datahub-gms/Dockerfile
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
env_file: datahub-gms/env/docker-with-dgraph.env
hostname: datahub-gms
container_name: datahub-gms
ports:
- "8080:8080"
depends_on:
- elasticsearch-setup
- kafka-setup
- mysql
- dgraph
datahub-frontend-react:
build:
context: ../
dockerfile: docker/datahub-frontend/Dockerfile
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
env_file: datahub-frontend/env/docker.env
hostname: datahub-frontend-react
container_name: datahub-frontend-react
ports:
- "9002:9002"
depends_on:
- datahub-gms
networks:
default:
name: datahub_network
volumes:
dgraphdata:
esdata:
zkdata:

View File

@ -1,4 +1,3 @@
# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml).
--- ---
version: '3.8' version: '3.8'
services: services:
@ -26,7 +25,7 @@ services:
- mysql - mysql
datahub-gms: datahub-gms:
env_file: datahub-gms/env/docker-with-neo4j.env env_file: datahub-gms/env/docker-without-neo4j.env
depends_on: depends_on:
- mysql - mysql
volumes: volumes:

View File

@ -64,17 +64,6 @@ services:
start_period: 2m start_period: 2m
retries: 4 retries: 4
neo4j:
image: neo4j:4.0.6
env_file: neo4j/env/docker.env
hostname: neo4j
container_name: neo4j
ports:
- "7474:7474"
- "7687:7687"
volumes:
- neo4jdata:/data
# This "container" is a workaround to pre-create search indices # This "container" is a workaround to pre-create search indices
elasticsearch-setup: elasticsearch-setup:
build: build:
@ -92,7 +81,7 @@ services:
context: ../ context: ../
dockerfile: docker/datahub-gms/Dockerfile dockerfile: docker/datahub-gms/Dockerfile
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
env_file: datahub-gms/env/docker-with-neo4j.env env_file: datahub-gms/env/docker-without-neo4j.env
hostname: datahub-gms hostname: datahub-gms
container_name: datahub-gms container_name: datahub-gms
ports: ports:
@ -101,7 +90,6 @@ services:
- elasticsearch-setup - elasticsearch-setup
- kafka-setup - kafka-setup
- mysql - mysql
- neo4j
datahub-frontend-react: datahub-frontend-react:
build: build:
@ -122,5 +110,4 @@ networks:
volumes: volumes:
esdata: esdata:
neo4jdata:
zkdata: zkdata:

View File

@ -11,7 +11,7 @@ services:
context: ../ context: ../
dockerfile: docker/datahub-mae-consumer/Dockerfile dockerfile: docker/datahub-mae-consumer/Dockerfile
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head} image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head}
env_file: datahub-mae-consumer/env/docker-with-neo4j.env env_file: datahub-mae-consumer/env/docker-without-neo4j.env
hostname: datahub-mae-consumer hostname: datahub-mae-consumer
container_name: datahub-mae-consumer container_name: datahub-mae-consumer
ports: ports:
@ -19,7 +19,6 @@ services:
depends_on: depends_on:
- kafka-setup - kafka-setup
- elasticsearch-setup - elasticsearch-setup
- neo4j
datahub-mce-consumer: datahub-mce-consumer:
build: build:
@ -34,4 +33,3 @@ services:
depends_on: depends_on:
- kafka-setup - kafka-setup
- datahub-gms - datahub-gms

View File

@ -19,6 +19,7 @@ services:
depends_on: depends_on:
- kafka-setup - kafka-setup
- elasticsearch-setup - elasticsearch-setup
- neo4j
datahub-mce-consumer: datahub-mce-consumer:
build: build:

View File

@ -1,3 +1,4 @@
# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml).
--- ---
version: '3.8' version: '3.8'
services: services:

View File

@ -27,31 +27,6 @@ services:
- "29092:29092" - "29092:29092"
- "9092:9092" - "9092:9092"
kafka-rest-proxy:
image: confluentinc/cp-kafka-rest:5.4.0
env_file: kafka-rest-proxy/env/docker.env
hostname: kafka-rest-proxy
container_name: kafka-rest-proxy
ports:
- "8082:8082"
depends_on:
- zookeeper
- broker
- schema-registry
kafka-topics-ui:
image: landoop/kafka-topics-ui:0.9.4
env_file: kafka-topics-ui/env/docker.env
hostname: kafka-topics-ui
container_name: kafka-topics-ui
ports:
- "18000:8000"
depends_on:
- zookeeper
- broker
- schema-registry
- kafka-rest-proxy
# This "container" is a workaround to pre-create topics # This "container" is a workaround to pre-create topics
kafka-setup: kafka-setup:
build: build:
@ -75,16 +50,6 @@ services:
ports: ports:
- "8081:8081" - "8081:8081"
schema-registry-ui:
image: landoop/schema-registry-ui:latest
env_file: schema-registry-ui/env/docker.env
container_name: schema-registry-ui
hostname: schema-registry-ui
ports:
- "8000:8000"
depends_on:
- schema-registry
elasticsearch: elasticsearch:
image: elasticsearch:7.9.3 image: elasticsearch:7.9.3
env_file: elasticsearch/env/docker.env env_file: elasticsearch/env/docker.env
@ -99,6 +64,17 @@ services:
start_period: 2m start_period: 2m
retries: 4 retries: 4
neo4j:
image: neo4j:4.0.6
env_file: neo4j/env/docker.env
hostname: neo4j
container_name: neo4j
ports:
- "7474:7474"
- "7687:7687"
volumes:
- neo4jdata:/data
# This "container" is a workaround to pre-create search indices # This "container" is a workaround to pre-create search indices
elasticsearch-setup: elasticsearch-setup:
build: build:
@ -116,7 +92,6 @@ services:
context: ../ context: ../
dockerfile: docker/datahub-gms/Dockerfile dockerfile: docker/datahub-gms/Dockerfile
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
env_file: datahub-gms/env/docker.env
hostname: datahub-gms hostname: datahub-gms
container_name: datahub-gms container_name: datahub-gms
ports: ports:
@ -125,6 +100,7 @@ services:
- elasticsearch-setup - elasticsearch-setup
- kafka-setup - kafka-setup
- mysql - mysql
- neo4j
datahub-frontend-react: datahub-frontend-react:
build: build:
@ -145,4 +121,5 @@ networks:
volumes: volumes:
esdata: esdata:
neo4jdata:
zkdata: zkdata:

View File

@ -1,9 +1,8 @@
# Neo4j # Neo4j
DataHub can use Neo4j as the graph database in the backend to serve graph queries. DataHub uses Neo4j as graph db in the backend to serve graph queries.
An alternative to Neo4j for that purpose is [Dgraph](../dgraph). [Official Neo4j image](https://hub.docker.com/_/neo4j) found in Docker Hub is used without
any modification.
The [official Neo4j image](https://hub.docker.com/_/neo4j) found in Docker Hub is used without any modification.
## Neo4j Browser ## Neo4j Browser
To be able to debug and run Cypher queries against your Neo4j image, you can open up `Neo4j Browser` which is running at To be able to debug and run Cypher queries against your Neo4j image, you can open up `Neo4j Browser` which is running at

View File

@ -3,9 +3,9 @@
# Quickstarts an Ember-serving variant of DataHub by pulling all images from dockerhub and then running the containers locally. # Quickstarts an Ember-serving variant of DataHub by pulling all images from dockerhub and then running the containers locally.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
export DATAHUB_VERSION=${DATAHUB_VERSION:-head} export DATAHUB_VERSION=${DATAHUB_VERSION:-head}
cd $DIR && docker-compose -f docker-compose-with-neo4j.yml -f docker-compose-with-neo4j.override.yml -f docker-compose.ember.yml pull && docker-compose -p datahub \ cd $DIR && docker-compose -f docker-compose.yml -f docker-compose.override.yml -f docker-compose.ember.yml pull && docker-compose -p datahub \
-f docker-compose-with-neo4j.yml \ -f docker-compose.yml \
-f docker-compose-with-neo4j.override.yml \ -f docker-compose.override.yml \
-f docker-compose.ember.yml \ -f docker-compose.ember.yml \
up \ up \
--scale datahub-frontend-react=0 --scale datahub-frontend-react=0

View File

@ -29,17 +29,13 @@ if docker volume ls | grep -c -q datahub_neo4jdata
then then
echo "Datahub Neo4j volume found, starting with neo4j as graph service" echo "Datahub Neo4j volume found, starting with neo4j as graph service"
cd $DIR && docker-compose pull && docker-compose -p datahub up cd $DIR && docker-compose pull && docker-compose -p datahub up
elif docker volume ls | grep -c -q datahub_dgraphdata
then
echo "Datahub Dgraph volume found, starting with dgraph as graph service"
cd $DIR && docker-compose pull && docker-compose -p datahub up
else else
echo "No Datahub Neo4j or Dgraph volume found, starting with elasticsearch as graph service" echo "No Datahub Neo4j volume found, starting with elasticsearch as graph service"
cd $DIR && \ cd $DIR && \
docker-compose \ docker-compose \
-f quickstart/docker-compose.quickstart.yml \ -f quickstart/docker-compose-without-neo4j.quickstart.yml \
$MONITORING_COMPOSE $CONSUMERS_COMPOSE pull && \ $MONITORING_COMPOSE $CONSUMERS_COMPOSE pull && \
docker-compose -p datahub \ docker-compose -p datahub \
-f quickstart/docker-compose.quickstart.yml \ -f quickstart/docker-compose-without-neo4j.quickstart.yml \
$MONITORING_COMPOSE $CONSUMERS_COMPOSE up $@ $MONITORING_COMPOSE $CONSUMERS_COMPOSE up $@
fi fi

View File

@ -1,12 +0,0 @@
# Quickstart
These Docker YAML files are used by the [Docker quickstart script](../quickstart.sh) and
the [DataHub CLI quickstart](../../docs/quickstart.md) command.
## Developer Notes
The [DataHub CLI quickstart](../../docs/quickstart.md) command fetches these YAML files from DataHub's GitHub master.
This means, files referenced by earlier releases of DataHub CLI must not be deleted from this directory in order
to preserve backward compatibility.
Otherwise, earlier releases of the DataHub CLI will stop working.
See GitHub issue [linkedin/datahub#3266](https://github.com/linkedin/datahub/issues/3266) for more details.

View File

@ -1,177 +0,0 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=dgraph
- DGRAPH_HOST=dgraph
- DGRAPH_SECURITY=plain
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
dgraph:
container_name: dgraph
environment:
- DGRAPH_SECURITY=plain
hostname: dgraph
image: dgraph/standalone:v21.12.0
ports:
- 8082:8080
- 9080:9080
volumes:
- dgraphdata:/dgraph
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
dgraphdata: null
esdata: null
mysqldata: null
zkdata: null

View File

@ -1,163 +0,0 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
zkdata: null

View File

@ -1,181 +0,0 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=neo4j
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
neo4j:
container_name: neo4j
environment:
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
hostname: neo4j
image: neo4j:4.0.6
ports:
- 7474:7474
- 7687:7687
volumes:
- neo4jdata:/data
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
neo4jdata: null
zkdata: null

View File

@ -1 +0,0 @@
docker-compose-with-elasticsearch.quickstart.yml

View File

@ -0,0 +1,163 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
zkdata: null

View File

@ -1 +0,0 @@
docker-compose-with-elasticsearch.quickstart.yml

View File

@ -0,0 +1,181 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
hostname: broker
image: confluentinc/cp-kafka:5.4.0
ports:
- 29092:29092
- 9092:9092
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- DATASET_ENABLE_SCSI=false
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- GRAPH_SERVICE_IMPL=neo4j
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- 9200:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- 3306:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:head
neo4j:
container_name: neo4j
environment:
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
hostname: neo4j
image: neo4j:4.0.6
ports:
- 7474:7474
- 7687:7687
volumes:
- neo4jdata:/data
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
ports:
- 8081:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
ports:
- 2181:2181
volumes:
- zkdata:/var/opt/zookeeper
version: '2.3'
volumes:
esdata: null
mysqldata: null
neo4jdata: null
zkdata: null

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# this scripts checks if docker-compose$flavour.quickstart.yml is up to date for these 'flavours': # this scripts checks if docker-compose$flavour.quickstart.yml is up to date for these 'flavours':
FLAVOURS=("-with-elasticsearch" "-with-neo4j" "-with-dgraph" ".monitoring") FLAVOURS=("" "-without-neo4j" ".monitoring")
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd "$DIR" cd "$DIR"
@ -12,10 +12,8 @@ python3 -m venv venv
source venv/bin/activate source venv/bin/activate
pip install -r requirements.txt pip install -r requirements.txt
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp-with-elasticsearch.quickstart.yml python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-with-neo4j.yml ../docker-compose-with-neo4j.override.yml temp-with-neo4j.quickstart.yml python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml temp-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-with-dgraph.yml ../docker-compose-with-dgraph.override.yml temp-with-dgraph.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml
for flavour in "${FLAVOURS[@]}" for flavour in "${FLAVOURS[@]}"

View File

@ -9,9 +9,6 @@ python3 -m venv venv
source venv/bin/activate source venv/bin/activate
pip install -r requirements.txt pip install -r requirements.txt
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose-with-elasticsearch.quickstart.yml python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-with-neo4j.yml ../docker-compose-with-neo4j.override.yml docker-compose-with-neo4j.quickstart.yml python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml docker-compose-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-with-dgraph.yml ../docker-compose-with-dgraph.override.yml docker-compose-with-dgraph.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.quickstart.monitoring.yml

View File

@ -23,15 +23,14 @@ from datahub.telemetry import telemetry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = ( NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-with-neo4j.quickstart.yml" "docker/quickstart/docker-compose.quickstart.yml"
)
DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-with-dgraph.quickstart.yml"
) )
ELASTIC_QUICKSTART_COMPOSE_FILE = ( ELASTIC_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-with-elasticsearch.quickstart.yml" "docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
)
M1_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
) )
M1_QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose-m1.quickstart.yml"
BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json" BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
@ -39,9 +38,6 @@ GITHUB_BASE_URL = "https://raw.githubusercontent.com/linkedin/datahub/master"
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = ( GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{GITHUB_BASE_URL}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}" f"{GITHUB_BASE_URL}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
) )
GITHUB_DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{GITHUB_BASE_URL}/{DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
)
GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL = ( GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{GITHUB_BASE_URL}/{ELASTIC_QUICKSTART_COMPOSE_FILE}" f"{GITHUB_BASE_URL}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
) )
@ -94,23 +90,22 @@ def is_m1() -> bool:
return False return False
def which_graph_service_to_use(graph_service_override: Optional[str]) -> str: def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
if graph_service_override is not None: if graph_service_override is not None:
if graph_service_override == "elasticsearch": if graph_service_override == "elasticsearch":
click.echo("Starting with elasticsearch due to graph-service-impl param\n") click.echo("Starting with elasticsearch due to graph-service-impl param\n")
elif graph_service_override == "neo4j": return False
if graph_service_override == "neo4j":
click.echo("Starting with neo4j due to graph-service-impl param\n") click.echo("Starting with neo4j due to graph-service-impl param\n")
elif graph_service_override == "dgraph": return True
click.echo("Starting with dgraph due to graph-service-impl param\n")
else: else:
click.secho( click.secho(
graph_service_override graph_service_override
+ " is not a valid graph service option. Choose either `neo4j`, `dgraph` " + " is not a valid graph service option. Choose either `neo4j` or "
"or `elasticsearch`\n", "`elasticsearch`\n",
fg="red", fg="red",
) )
raise ValueError(f"invalid graph service option: {graph_service_override}") raise ValueError(f"invalid graph service option: {graph_service_override}")
return graph_service_override
with get_client_with_error() as (client, error): with get_client_with_error() as (client, error):
if error: if error:
click.secho( click.secho(
@ -121,26 +116,17 @@ def which_graph_service_to_use(graph_service_override: Optional[str]) -> str:
if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0: if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0:
click.echo( click.echo(
"Datahub Neo4j volume found, starting with neo4j as graph service.\n" "Datahub Neo4j volume found, starting with neo4j as graph service.\n"
"If you want to run using elasticsearch or dgraph, run `datahub docker nuke` and re-ingest your data.\n" "If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n"
) )
return "neo4j" return True
if len(client.volumes.list(filters={"name": "datahub_dgraphdata"})) > 0:
click.echo(
"Datahub Dgraph volume found, starting with dgraph as graph service.\n"
"If you want to run using elasticsearch or neo4j, run `datahub docker nuke` and re-ingest your data.\n"
)
return "dgraph"
click.echo( click.echo(
"No Datahub Neo4j or Dgraph volume found, starting with elasticsearch as graph service.\n" "No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
"To use neo4j as a graph backend, run \n" "To use neo4j as a graph backend, run \n"
"`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose-with-neo4j.quickstart.yml`\n" "`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose.quickstart.yml`"
"To use dgraph as a graph backend, run \n" "\nfrom the root of the datahub repo\n"
"`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose-with-dgraph.quickstart.yml`\n"
"from the root of the datahub repo\n"
) )
return "elasticsearch" return False
@docker.command() @docker.command()
@ -207,27 +193,19 @@ def quickstart(
quickstart_compose_file quickstart_compose_file
) # convert to list from tuple ) # convert to list from tuple
if not quickstart_compose_file: if not quickstart_compose_file:
graph_service_impl = which_graph_service_to_use(graph_service_impl) should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
if graph_service_impl == "neo4j": if should_use_neo4j and running_on_m1:
if running_on_m1: click.secho(
click.secho( "Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph",
"Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph", fg="red",
fg="red",
)
github_file = GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
else:
github_file = GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
elif graph_service_impl == "dgraph":
github_file = GITHUB_DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_URL
elif graph_service_impl == "elasticsearch":
if not running_on_m1:
github_file = GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
else:
github_file = GITHUB_M1_QUICKSTART_COMPOSE_URL
else:
raise ValueError(
f"Unsupported graph service implementation: {graph_service_impl}"
) )
github_file = (
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
if should_use_neo4j and not running_on_m1
else GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
if not running_on_m1
else GITHUB_M1_QUICKSTART_COMPOSE_URL
)
with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp_file: with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp_file:
path = pathlib.Path(tmp_file.name) path = pathlib.Path(tmp_file.name)

View File

@ -382,15 +382,6 @@ public class DgraphGraphService implements GraphService {
return new RelatedEntitiesResult(offset, 0, 0, Collections.emptyList()); return new RelatedEntitiesResult(offset, 0, 0, Collections.emptyList());
} }
// TODO: Remove once gms client code uses null instead of "" for any type
// https://github.com/linkedin/datahub/issues/3143
if (sourceType != null && sourceType.isEmpty()) {
sourceType = null;
}
if (destinationType != null && destinationType.isEmpty()) {
destinationType = null;
}
String query = getQueryForRelatedEntities( String query = getQueryForRelatedEntities(
sourceType, sourceEntityFilter, sourceType, sourceEntityFilter,
destinationType, destinationEntityFilter, destinationType, destinationEntityFilter,

View File

@ -63,7 +63,7 @@ public class DgraphContainer extends GenericContainer<DgraphContainer> {
WaitStrategy waitForLeader = new LogMessageWaitStrategy() WaitStrategy waitForLeader = new LogMessageWaitStrategy()
.withRegEx(".* Got Zero leader: .*\n"); .withRegEx(".* Got Zero leader: .*\n");
WaitStrategy waitForCluster = new LogMessageWaitStrategy() WaitStrategy waitForCluster = new LogMessageWaitStrategy()
.withRegEx(".* Server is ready.*"); .withRegEx(".* Server is ready\n");
WaitStrategy waitForHttp = new HttpWaitStrategy() WaitStrategy waitForHttp = new HttpWaitStrategy()
.forPort(HTTP_PORT) .forPort(HTTP_PORT)
.forStatusCodeMatching(response -> response == HTTP_OK); .forStatusCodeMatching(response -> response == HTTP_OK);

View File

@ -1,6 +1,5 @@
package com.linkedin.metadata.graph; package com.linkedin.metadata.graph;
import com.linkedin.metadata.query.filter.RelationshipFilter;
import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipDirection;
import io.dgraph.DgraphClient; import io.dgraph.DgraphClient;
import io.dgraph.DgraphGrpc; import io.dgraph.DgraphGrpc;
@ -13,7 +12,6 @@ import io.grpc.ManagedChannelBuilder;
import io.grpc.MethodDescriptor; import io.grpc.MethodDescriptor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.testcontainers.containers.output.Slf4jLogConsumer; import org.testcontainers.containers.output.Slf4jLogConsumer;
import org.testng.SkipException;
import org.testng.annotations.AfterMethod; import org.testng.annotations.AfterMethod;
import org.testng.annotations.AfterTest; import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeMethod;
@ -26,7 +24,6 @@ import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -52,7 +49,7 @@ public class DgraphGraphServiceTest extends GraphServiceTestBase {
@BeforeTest @BeforeTest
public void setup() { public void setup() {
_container = new DgraphContainer(DgraphContainer.DEFAULT_IMAGE_NAME.withTag("v21.12.0")) _container = new DgraphContainer(DgraphContainer.DEFAULT_IMAGE_NAME.withTag("v21.03.0"))
.withTmpFs(Collections.singletonMap("/dgraph", "rw,noexec,nosuid,size=1g")) .withTmpFs(Collections.singletonMap("/dgraph", "rw,noexec,nosuid,size=1g"))
.withStartupTimeout(Duration.ofMinutes(1)) .withStartupTimeout(Duration.ofMinutes(1))
.withStartupAttempts(3); .withStartupAttempts(3);
@ -108,34 +105,6 @@ public class DgraphGraphServiceTest extends GraphServiceTestBase {
@Override @Override
protected void syncAfterWrite() { } protected void syncAfterWrite() { }
@Override
@SuppressWarnings("MalformedDataProvider")
@Test(dataProvider = "FindRelatedEntitiesSourceTypeTests")
public void testFindRelatedEntitiesSourceType(String datasetType,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (datasetType != null && datasetType.isEmpty()) {
// https://github.com/linkedin/datahub/issues/3143
throw new SkipException("Code using GraphService uses \"\" instead of null");
}
super.testFindRelatedEntitiesSourceType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
}
@Override
@SuppressWarnings("MalformedDataProvider")
@Test(dataProvider = "FindRelatedEntitiesDestinationTypeTests")
public void testFindRelatedEntitiesDestinationType(String datasetType,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (datasetType != null && datasetType.isEmpty()) {
// https://github.com/linkedin/datahub/issues/3143
throw new SkipException("Code using GraphService uses \"\" instead of null");
}
super.testFindRelatedEntitiesDestinationType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
}
@Test @Test
public void testGetSchema() { public void testGetSchema() {
DgraphSchema schema = DgraphGraphService.getSchema("{\n" DgraphSchema schema = DgraphGraphService.getSchema("{\n"

View File

@ -1,56 +0,0 @@
package com.linkedin.gms.factory.common;
import io.dgraph.DgraphClient;
import io.dgraph.DgraphGrpc;
import io.grpc.ManagedChannel;
import io.grpc.ManagedChannelBuilder;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.Arrays;
@Slf4j
@Configuration
public class DgraphClientFactory {
@Value("${DGRAPH_HOST:localhost}")
private String[] hosts;
@Value("${DGRAPH_GRPC_PORT:9080}")
private int port;
@Value("${DGRAPH_SECURITY:plain}")
private String security;
@Bean(name = "dgraphClient")
protected DgraphClient createInstance() {
DgraphGrpc.DgraphStub[] stubs = Arrays.stream(hosts)
.map(this::getChannelForHost)
.map(DgraphGrpc::newStub)
.toArray(DgraphGrpc.DgraphStub[]::new);
return new DgraphClient(stubs);
}
private ManagedChannel getChannelForHost(String host) {
log.info("Connecting to host " + host);
if (host.contains(":")) {
return getChannelForBuilder(ManagedChannelBuilder.forTarget(host));
} else {
return getChannelForBuilder(ManagedChannelBuilder.forAddress(host, port));
}
}
private ManagedChannel getChannelForBuilder(ManagedChannelBuilder<?> builder) {
if (security.equalsIgnoreCase("plain")) {
builder.usePlaintext();
} else if (security.equalsIgnoreCase("tls")) {
builder.useTransportSecurity();
} else {
throw new IllegalArgumentException("Unsupported channel security mode");
}
return builder.build();
}
}

View File

@ -1,26 +0,0 @@
package com.linkedin.gms.factory.common;
import com.linkedin.metadata.graph.DgraphGraphService;
import io.dgraph.DgraphClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import javax.annotation.Nonnull;
@Configuration
@Import({DgraphClientFactory.class})
public class DgraphGraphServiceFactory {
@Autowired
@Qualifier("dgraphClient")
private DgraphClient dgraphClient;
@Bean(name = "dgraphGraphService")
@Nonnull
protected DgraphGraphService getInstance() {
return new DgraphGraphService(dgraphClient);
}
}

View File

@ -1,7 +1,6 @@
package com.linkedin.gms.factory.common; package com.linkedin.gms.factory.common;
import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
import com.linkedin.metadata.graph.DgraphGraphService;
import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.GraphService;
import com.linkedin.metadata.graph.Neo4jGraphService; import com.linkedin.metadata.graph.Neo4jGraphService;
import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
@ -19,7 +18,7 @@ import org.springframework.context.annotation.PropertySource;
@Configuration @Configuration
@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) @PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
@Import({Neo4jGraphServiceFactory.class, DgraphGraphServiceFactory.class, ElasticSearchGraphServiceFactory.class}) @Import({Neo4jGraphServiceFactory.class, ElasticSearchGraphServiceFactory.class})
public class GraphServiceFactory { public class GraphServiceFactory {
@Autowired @Autowired
@Qualifier("elasticSearchGraphService") @Qualifier("elasticSearchGraphService")
@ -29,28 +28,22 @@ public class GraphServiceFactory {
@Qualifier("neo4jGraphService") @Qualifier("neo4jGraphService")
private Neo4jGraphService _neo4jGraphService; private Neo4jGraphService _neo4jGraphService;
@Autowired
@Qualifier("dgraphGraphService")
private DgraphGraphService _dgraphGraphService;
@Value("${graphService.type}") @Value("${graphService.type}")
private String graphServiceImpl; private String graphServiceImpl;
@Nonnull @Nonnull
@DependsOn({"neo4jGraphService", "dgraphGraphService", "elasticSearchGraphService"}) @DependsOn({"neo4jGraphService", "elasticSearchGraphService"})
@Bean(name = "graphService") @Bean(name = "graphService")
@Primary @Primary
protected GraphService createInstance() { protected GraphService createInstance() {
if (graphServiceImpl.equalsIgnoreCase("neo4j")) { if (graphServiceImpl.equalsIgnoreCase("neo4j")) {
return _neo4jGraphService; return _neo4jGraphService;
} else if (graphServiceImpl.equalsIgnoreCase("dgraph")) {
return _dgraphGraphService;
} else if (graphServiceImpl.equalsIgnoreCase("elasticsearch")) { } else if (graphServiceImpl.equalsIgnoreCase("elasticsearch")) {
return _elasticSearchGraphService; return _elasticSearchGraphService;
} else { } else {
throw new RuntimeException( throw new RuntimeException(
"Error: Failed to initialize graph service. Graph Service provided: " + graphServiceImpl "Error: Failed to initialize graph service. Graph Service provided: " + graphServiceImpl
+ ". Valid options: [neo4j, dgraph, elasticsearch]."); + ". Valid options: [neo4j, elasticsearch].");
} }
} }
} }