diff --git a/docker/README.md b/docker/README.md index 4622098856..730300d75f 100644 --- a/docker/README.md +++ b/docker/README.md @@ -34,7 +34,6 @@ Dependencies: * [Elasticsearch](elasticsearch-setup) * [MySQL](mysql) * [(Optional) Neo4j](neo4j) -* [(Optional) Dgraph](dgraph) ### Ingesting demo data. diff --git a/docker/datahub-gms/env/docker-with-neo4j.env b/docker/datahub-gms/env/docker-with-neo4j.env deleted file mode 100644 index 5b0bd4ff7d..0000000000 --- a/docker/datahub-gms/env/docker-with-neo4j.env +++ /dev/null @@ -1,45 +0,0 @@ -DATASET_ENABLE_SCSI=false -EBEAN_DATASOURCE_USERNAME=datahub -EBEAN_DATASOURCE_PASSWORD=datahub -EBEAN_DATASOURCE_HOST=mysql:3306 -EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 -EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver -KAFKA_BOOTSTRAP_SERVER=broker:29092 -KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 -ELASTICSEARCH_HOST=elasticsearch -ELASTICSEARCH_PORT=9200 -GRAPH_SERVICE_IMPL=neo4j -NEO4J_HOST=http://neo4j:7474 -NEO4J_URI=bolt://neo4j -NEO4J_USERNAME=neo4j -NEO4J_PASSWORD=datahub -JAVA_OPTS=-Xms1g -Xmx1g -ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - -MAE_CONSUMER_ENABLED=true -MCE_CONSUMER_ENABLED=true - -# Uncomment to disable persistence of client-side analytics events -# DATAHUB_ANALYTICS_ENABLED=false - -# Uncomment to configure kafka topic names -# Make sure these names are consistent across the whole deployment -# METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 -# METADATA_CHANGE_EVENT_NAME=MetadataChangeEvent_v4 -# FAILED_METADATA_CHANGE_EVENT_NAME=FailedMetadataChangeEvent_v4 - -# Uncomment and set these to support SSL connection to Elasticsearch -# ELASTICSEARCH_USE_SSL=true -# ELASTICSEARCH_SSL_PROTOCOL=TLSv1.2 -# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL= -# ELASTICSEARCH_SSL_TRUSTSTORE_FILE= -# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE= -# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD= -# ELASTICSEARCH_SSL_KEYSTORE_FILE= -# ELASTICSEARCH_SSL_KEYSTORE_TYPE= -# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD= - -# To use simple username/password authentication to Elasticsearch over HTTPS -# set ELASTICSEARCH_USE_SSL=true and uncomment: -# ELASTICSEARCH_USERNAME= -# ELASTICSEARCH_PASSWORD= diff --git a/docker/datahub-gms/env/docker-with-dgraph.env b/docker/datahub-gms/env/docker-without-neo4j.env similarity index 93% rename from docker/datahub-gms/env/docker-with-dgraph.env rename to docker/datahub-gms/env/docker-without-neo4j.env index 88b8711435..e3d8e09905 100644 --- a/docker/datahub-gms/env/docker-with-dgraph.env +++ b/docker/datahub-gms/env/docker-without-neo4j.env @@ -2,15 +2,13 @@ DATASET_ENABLE_SCSI=false EBEAN_DATASOURCE_USERNAME=datahub EBEAN_DATASOURCE_PASSWORD=datahub EBEAN_DATASOURCE_HOST=mysql:3306 -EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 +EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver KAFKA_BOOTSTRAP_SERVER=broker:29092 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_PORT=9200 -GRAPH_SERVICE_IMPL=dgraph -DGRAPH_HOST=dgraph -DGRAPH_SECURITY=plain +GRAPH_SERVICE_IMPL=elasticsearch JAVA_OPTS=-Xms1g -Xmx1g ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml diff --git a/docker/datahub-gms/env/docker.env b/docker/datahub-gms/env/docker.env index f5a680cf4e..d6bddc9a27 100644 --- a/docker/datahub-gms/env/docker.env +++ b/docker/datahub-gms/env/docker.env @@ -2,14 +2,18 @@ DATASET_ENABLE_SCSI=false EBEAN_DATASOURCE_USERNAME=datahub EBEAN_DATASOURCE_PASSWORD=datahub EBEAN_DATASOURCE_HOST=mysql:3306 -EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 +EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver KAFKA_BOOTSTRAP_SERVER=broker:29092 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_PORT=9200 -GRAPH_SERVICE_IMPL=elasticsearch +NEO4J_HOST=http://neo4j:7474 +NEO4J_URI=bolt://neo4j +NEO4J_USERNAME=neo4j +NEO4J_PASSWORD=datahub JAVA_OPTS=-Xms1g -Xmx1g +GRAPH_SERVICE_IMPL=neo4j ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml MAE_CONSUMER_ENABLED=true MCE_CONSUMER_ENABLED=true diff --git a/docker/datahub-gms/start.sh b/docker/datahub-gms/start.sh index 10b4265630..6281a07f38 100755 --- a/docker/datahub-gms/start.sh +++ b/docker/datahub-gms/start.sh @@ -1,8 +1,8 @@ #!/bin/bash set -x -# Add default URI (http) scheme to NEO4J_HOST if missing -if [[ -n "$NEO4J_HOST" && $NEO4J_HOST != *"://"* ]] ; then - NEO4J_HOST="http://$NEO4J_HOST" +# Add default URI (http) scheme if needed +if ! echo $NEO4J_HOST | grep -q "://" ; then + NEO4J_HOST="http://$NEO4J_HOST" fi if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then @@ -15,7 +15,6 @@ if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then ELASTICSEARCH_AUTH_HEADER="Accept: */*" fi -# Add elasticsearch protocol if [[ $ELASTICSEARCH_USE_SSL == true ]]; then ELASTICSEARCH_PROTOCOL=https else @@ -32,23 +31,9 @@ if [[ $SKIP_KAFKA_CHECK != true ]]; then WAIT_FOR_KAFKA=" -wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') " fi -# Add dependency to graph service if needed -WAIT_FOR_GRAPH_SERVICE="" -if [[ $GRAPH_SERVICE_IMPL == neo4j ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then - if [[ -z "$NEO4J_HOST" ]]; then - echo "GRAPH_SERVICE_IMPL set to neo4j but no NEO4J_HOST set" - exit 1 - fi - WAIT_FOR_GRAPH_SERVICE=" -wait $NEO4J_HOST " -elif [[ $GRAPH_SERVICE_IMPL == dgraph ]] && [[ $SKIP_DGRAPH_CHECK != true ]]; then - if [[ -z "$DGRAPH_HOST" ]]; then - echo "GRAPH_SERVICE_IMPL set to dgraph but no DGRAPH_HOST set" - exit 1 - fi - if [[ -n "$DGRAPH_HOST" && $DGRAPH_HOST != *":"* ]] ; then - DGRAPH_HOST="$DGRAPH_HOST:9080" - fi - WAIT_FOR_GRAPH_SERVICE=" -wait tcp://$DGRAPH_HOST " +WAIT_FOR_NEO4J="" +if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then + WAIT_FOR_NEO4J=" -wait $NEO4J_HOST " fi OTEL_AGENT="" @@ -64,7 +49,7 @@ fi COMMON=" $WAIT_FOR_EBEAN \ $WAIT_FOR_KAFKA \ - $WAIT_FOR_GRAPH_SERVICE \ + $WAIT_FOR_NEO4J \ -timeout 240s \ java $JAVA_OPTS $JMX_OPTS \ $OTEL_AGENT \ diff --git a/docker/datahub-mae-consumer/env/docker-with-neo4j.env b/docker/datahub-mae-consumer/env/docker-with-neo4j.env deleted file mode 100644 index b221ee41aa..0000000000 --- a/docker/datahub-mae-consumer/env/docker-with-neo4j.env +++ /dev/null @@ -1,32 +0,0 @@ -MAE_CONSUMER_ENABLED=true -KAFKA_BOOTSTRAP_SERVER=broker:29092 -KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 -ELASTICSEARCH_HOST=elasticsearch -ELASTICSEARCH_PORT=9200 -GRAPH_SERVICE_IMPL=neo4j -NEO4J_HOST=http://neo4j:7474 -NEO4J_URI=bolt://neo4j -NEO4J_USERNAME=neo4j -NEO4J_PASSWORD=datahub -GMS_HOST=datahub-gms -GMS_PORT=8080 -ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml - -# Uncomment to disable persistence of client-side analytics events -# DATAHUB_ANALYTICS_ENABLED=false - -# Uncomment to configure topic names -# Make sure these names are consistent across the whole deployment -# KAFKA_TOPIC_NAME=MetadataAuditEvent_v4 -# DATAHUB_USAGE_EVENT_NAME=DataHubUsageEvent_v1 - -# Uncomment and set these to support SSL connection to Elasticsearch -# ELASTICSEARCH_USE_SSL= -# ELASTICSEARCH_SSL_PROTOCOL= -# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL= -# ELASTICSEARCH_SSL_TRUSTSTORE_FILE= -# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE= -# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD= -# ELASTICSEARCH_SSL_KEYSTORE_FILE= -# ELASTICSEARCH_SSL_KEYSTORE_TYPE= -# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD= diff --git a/docker/datahub-mae-consumer/env/docker-with-dgraph.env b/docker/datahub-mae-consumer/env/docker-without-neo4j.env similarity index 93% rename from docker/datahub-mae-consumer/env/docker-with-dgraph.env rename to docker/datahub-mae-consumer/env/docker-without-neo4j.env index 7009c6b3b3..ea4f703fce 100644 --- a/docker/datahub-mae-consumer/env/docker-with-dgraph.env +++ b/docker/datahub-mae-consumer/env/docker-without-neo4j.env @@ -3,11 +3,9 @@ KAFKA_BOOTSTRAP_SERVER=broker:29092 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_PORT=9200 -GRAPH_SERVICE_IMPL=dgraph -DGRAPH_HOST=dgraph -DGRAPH_SECURITY=plain GMS_HOST=datahub-gms GMS_PORT=8080 +GRAPH_SERVICE_IMPL=elasticsearch ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml # Uncomment to disable persistence of client-side analytics events diff --git a/docker/datahub-mae-consumer/env/docker.env b/docker/datahub-mae-consumer/env/docker.env index e264c52aee..c68f5adfa5 100644 --- a/docker/datahub-mae-consumer/env/docker.env +++ b/docker/datahub-mae-consumer/env/docker.env @@ -3,9 +3,13 @@ KAFKA_BOOTSTRAP_SERVER=broker:29092 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_PORT=9200 -GRAPH_SERVICE_IMPL=elasticsearch +NEO4J_HOST=http://neo4j:7474 +NEO4J_URI=bolt://neo4j +NEO4J_USERNAME=neo4j +NEO4J_PASSWORD=datahub GMS_HOST=datahub-gms GMS_PORT=8080 +GRAPH_SERVICE_IMPL=neo4j ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml # Uncomment to disable persistence of client-side analytics events diff --git a/docker/datahub-mae-consumer/start.sh b/docker/datahub-mae-consumer/start.sh index b70b2887c8..6c54b1b7ad 100755 --- a/docker/datahub-mae-consumer/start.sh +++ b/docker/datahub-mae-consumer/start.sh @@ -1,7 +1,7 @@ #!/bin/bash -# Add default URI (http) scheme to NEO4J_HOST if missing -if [[ -n "$NEO4J_HOST" && $NEO4J_HOST != *"://"* ]] ; then +# Add default URI (http) scheme if needed +if ! echo $NEO4J_HOST | grep -q "://" ; then NEO4J_HOST="http://$NEO4J_HOST" fi @@ -15,7 +15,6 @@ if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then ELASTICSEARCH_AUTH_HEADER="Accept: */*" fi -# Add elasticsearch protocol if [[ $ELASTICSEARCH_USE_SSL == true ]]; then ELASTICSEARCH_PROTOCOL=https else @@ -32,12 +31,9 @@ if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then WAIT_FOR_ELASTICSEARCH=" -wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT -wait-http-header \"$ELASTICSEARCH_AUTH_HEADER\"" fi -# Add dependency to graph service if needed -WAIT_FOR_GRAPH_SERVICE="" -if [[ $GRAPH_SERVICE_IMPL == neo4j ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then - WAIT_FOR_GRAPH_SERVICE=" -wait $NEO4J_HOST " -elif [[ $GRAPH_SERVICE_IMPL == dgraph ]] && [[ $SKIP_DGRAPH_CHECK != true ]]; then - WAIT_FOR_GRAPH_SERVICE=" -wait $DGRAPH_HOST " +WAIT_FOR_NEO4J="" +if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then + WAIT_FOR_NEO4J=" -wait $NEO4J_HOST " fi OTEL_AGENT="" @@ -53,7 +49,6 @@ fi COMMON=" $WAIT_FOR_KAFKA \ $WAIT_FOR_NEO4J \ - $WAIT_FOR_GRAPH_SERVICE \ -timeout 240s \ java $JAVA_OPTS $JMX_OPTS $OTEL_AGENT $PROMETHEUS_AGENT -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar " diff --git a/docker/datahub-upgrade/README.md b/docker/datahub-upgrade/README.md index 2e040dd581..c5a4371fef 100644 --- a/docker/datahub-upgrade/README.md +++ b/docker/datahub-upgrade/README.md @@ -25,7 +25,7 @@ To run the `datahub-upgrade` container, some environment variables must be provi where the running DataHub containers reside. Below details the required configurations. By default, these configs are provided for local docker-compose deployments of -DataHub within `docker/datahub-upgrade/env/docker-with-neo4j.env`. They assume that there is a Docker network called datahub_network +DataHub within `docker/datahub-upgrade/env/docker.env`. They assume that there is a Docker network called datahub_network where the DataHub containers can be found. These are also the variables used when the provided `datahub-upgrade.sh` script is executed. To run the upgrade CLI for non-local deployments, @@ -93,7 +93,7 @@ For example, to run the migration named "NoCodeDataMigration", you would do exec OR ```aidl -docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker-with-neo4j.env acryldata/datahub-upgrade:head -u NoCodeDataMigration +docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker.env acryldata/datahub-upgrade:head -u NoCodeDataMigration ``` In addition to the required `-u` argument, each upgrade may require specific arguments. You can provide arguments to individual @@ -109,5 +109,5 @@ To specify these, you can use a combination of `-a` arguments and of the form *a OR ```aidl -docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker-with-neo4j.env acryldata/datahub-upgrade:head -u NoCodeDataMigration -a batchSize=500 -a batchDelayMs=1000 +docker pull acryldata/datahub-upgrade:head && docker run --env-file env/docker.env acryldata/datahub-upgrade:head -u NoCodeDataMigration -a batchSize=500 -a batchDelayMs=1000 ``` \ No newline at end of file diff --git a/docker/datahub-upgrade/datahub-upgrade.sh b/docker/datahub-upgrade/datahub-upgrade.sh index 5bf73db4cb..d7acfd0da3 100755 --- a/docker/datahub-upgrade/datahub-upgrade.sh +++ b/docker/datahub-upgrade/datahub-upgrade.sh @@ -2,4 +2,4 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" IMAGE=acryldata/datahub-upgrade:head -cd $DIR && docker pull ${IMAGE} && docker run --env-file ./env/docker-with-neo4j.env --network="datahub_network" ${IMAGE} "$@" +cd $DIR && docker pull ${IMAGE} && docker run --env-file ./env/docker.env --network="datahub_network" ${IMAGE} "$@" diff --git a/docker/datahub-upgrade/env/docker-with-neo4j.env b/docker/datahub-upgrade/env/docker-without-neo4j.env similarity index 88% rename from docker/datahub-upgrade/env/docker-with-neo4j.env rename to docker/datahub-upgrade/env/docker-without-neo4j.env index 9821239e26..64aa305569 100644 --- a/docker/datahub-upgrade/env/docker-with-neo4j.env +++ b/docker/datahub-upgrade/env/docker-without-neo4j.env @@ -10,11 +10,7 @@ KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_PORT=9200 -GRAPH_SERVICE_IMPL=neo4j -NEO4J_HOST=http://neo4j:7474 -NEO4J_URI=bolt://neo4j -NEO4J_USERNAME=neo4j -NEO4J_PASSWORD=datahub +GRAPH_SERVICE_IMPL=elasticsearch DATAHUB_GMS_HOST=datahub-gms DATAHUB_GMS_PORT=8080 diff --git a/docker/datahub-upgrade/env/docker.env b/docker/datahub-upgrade/env/docker.env index 64aa305569..9821239e26 100644 --- a/docker/datahub-upgrade/env/docker.env +++ b/docker/datahub-upgrade/env/docker.env @@ -10,7 +10,11 @@ KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_PORT=9200 -GRAPH_SERVICE_IMPL=elasticsearch +GRAPH_SERVICE_IMPL=neo4j +NEO4J_HOST=http://neo4j:7474 +NEO4J_URI=bolt://neo4j +NEO4J_USERNAME=neo4j +NEO4J_PASSWORD=datahub DATAHUB_GMS_HOST=datahub-gms DATAHUB_GMS_PORT=8080 diff --git a/docker/dev-with-neo4j.sh b/docker/dev-with-neo4j.sh deleted file mode 100755 index 6c39738cd6..0000000000 --- a/docker/dev-with-neo4j.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -# Launches dev instances of DataHub images. See documentation for more details. -# YOU MUST BUILD VIA GRADLE BEFORE RUNNING THIS. -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd $DIR && \ - COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \ - -f docker-compose-with-neo4j.yml \ - -f docker-compose-with-neo4j.override.yml \ - -f docker-compose.dev.yml \ - pull \ -&& \ - COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \ - -f docker-compose-with-neo4j.yml \ - -f docker-compose-with-neo4j.override.yml \ - -f docker-compose.dev.yml \ - up --build $@ diff --git a/docker/dev-with-dgraph.sh b/docker/dev-without-neo4j.sh similarity index 86% rename from docker/dev-with-dgraph.sh rename to docker/dev-without-neo4j.sh index f5cc1571ff..0d8e0661fb 100755 --- a/docker/dev-with-dgraph.sh +++ b/docker/dev-without-neo4j.sh @@ -23,13 +23,13 @@ fi DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd $DIR && \ COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \ - -f docker-compose-with-dgraph.yml \ - -f docker-compose-with-dgraph.override.yml \ + -f docker-compose-without-neo4j.yml \ + -f docker-compose-without-neo4j.override.yml \ -f docker-compose.dev.yml \ $CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE pull \ && \ COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \ - -f docker-compose-with-dgraph.yml \ - -f docker-compose-with-dgraph.override.yml \ + -f docker-compose-without-neo4j.yml \ + -f docker-compose-without-neo4j.override.yml \ -f docker-compose.dev.yml \ $CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE up --build $@ diff --git a/docker/dgraph/README.md b/docker/dgraph/README.md deleted file mode 100644 index baaee83cef..0000000000 --- a/docker/dgraph/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# Dgraph - -DataHub can use Dgraph as the graph database in the backend to serve graph queries. -An alternative to Dgraph for that purpose is [Neo4j](../neo4j). - -The [Dgraph image](https://hub.docker.com/r/dgraph/dgraph) found in Docker Hub is used without any modification. - -## Dgraph UI Ratel - -You can use the cloud hosted Dgraph UI [Ratel](https://play.dgraph.io/?latest#) to connect to your Dgraph cluster, -run queries and visualize your graph data. Point the UI to [http://localhost:8082](http://localhost:8082). diff --git a/docker/dgraph/env/docker.env b/docker/dgraph/env/docker.env deleted file mode 100644 index c04f638a07..0000000000 --- a/docker/dgraph/env/docker.env +++ /dev/null @@ -1 +0,0 @@ -DGRAPH_SECURITY=plain diff --git a/docker/docker-compose-with-dgraph.override.yml b/docker/docker-compose-with-dgraph.override.yml deleted file mode 100644 index 0afa4aaa9d..0000000000 --- a/docker/docker-compose-with-dgraph.override.yml +++ /dev/null @@ -1,36 +0,0 @@ -# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml). ---- -version: '3.8' -services: - mysql: - container_name: mysql - hostname: mysql - image: mysql:5.7 - env_file: mysql/env/docker.env - command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin - ports: - - "3306:3306" - volumes: - - ./mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - - mysqldata:/var/lib/mysql - - mysql-setup: - build: - context: ../ - dockerfile: docker/mysql-setup/Dockerfile - image: acryldata/datahub-mysql-setup:head - env_file: mysql-setup/env/docker.env - hostname: mysql-setup - container_name: mysql-setup - depends_on: - - mysql - - datahub-gms: - env_file: datahub-gms/env/docker-with-dgraph.env - depends_on: - - mysql - volumes: - - ${HOME}/.datahub/plugins/:/etc/datahub/plugins - -volumes: - mysqldata: diff --git a/docker/docker-compose-with-dgraph.yml b/docker/docker-compose-with-dgraph.yml deleted file mode 100644 index 236dcf0cc8..0000000000 --- a/docker/docker-compose-with-dgraph.yml +++ /dev/null @@ -1,126 +0,0 @@ -# Docker compose file covering DataHub's default configuration, which is to run all containers on a single host. - -# Please see the README.md for instructions as to how to use and customize. - -# NOTE: This file will cannot build! No dockerfiles are set. See the README.md in this directory. ---- -version: '3.8' -services: - zookeeper: - image: confluentinc/cp-zookeeper:5.4.0 - env_file: zookeeper/env/docker.env - hostname: zookeeper - container_name: zookeeper - ports: - - "2181:2181" - volumes: - - zkdata:/var/opt/zookeeper - - broker: - image: confluentinc/cp-kafka:5.4.0 - env_file: broker/env/docker.env - hostname: broker - container_name: broker - depends_on: - - zookeeper - ports: - - "29092:29092" - - "9092:9092" - - # This "container" is a workaround to pre-create topics - kafka-setup: - build: - context: kafka-setup - image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head} - env_file: kafka-setup/env/docker.env - hostname: kafka-setup - container_name: kafka-setup - depends_on: - - broker - - schema-registry - - schema-registry: - image: confluentinc/cp-schema-registry:5.4.0 - env_file: schema-registry/env/docker.env - hostname: schema-registry - container_name: schema-registry - depends_on: - - zookeeper - - broker - ports: - - "8081:8081" - - elasticsearch: - image: elasticsearch:7.9.3 - env_file: elasticsearch/env/docker.env - container_name: elasticsearch - hostname: elasticsearch - ports: - - "9200:9200" - volumes: - - esdata:/usr/share/elasticsearch/data - healthcheck: - test: ["CMD-SHELL", "curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1"] - start_period: 2m - retries: 4 - - dgraph: - image: dgraph/standalone:v21.12.0 - env_file: dgraph/env/docker.env - hostname: dgraph - container_name: dgraph - ports: - - "8082:8080" - - "9080:9080" - volumes: - - dgraphdata:/dgraph - - # This "container" is a workaround to pre-create search indices - elasticsearch-setup: - build: - context: ../ - dockerfile: docker/elasticsearch-setup/Dockerfile - image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head} - env_file: elasticsearch-setup/env/docker.env - hostname: elasticsearch-setup - container_name: elasticsearch-setup - depends_on: - - elasticsearch - - datahub-gms: - build: - context: ../ - dockerfile: docker/datahub-gms/Dockerfile - image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} - env_file: datahub-gms/env/docker-with-dgraph.env - hostname: datahub-gms - container_name: datahub-gms - ports: - - "8080:8080" - depends_on: - - elasticsearch-setup - - kafka-setup - - mysql - - dgraph - - datahub-frontend-react: - build: - context: ../ - dockerfile: docker/datahub-frontend/Dockerfile - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} - env_file: datahub-frontend/env/docker.env - hostname: datahub-frontend-react - container_name: datahub-frontend-react - ports: - - "9002:9002" - depends_on: - - datahub-gms - -networks: - default: - name: datahub_network - -volumes: - dgraphdata: - esdata: - zkdata: diff --git a/docker/docker-compose-with-neo4j.override.yml b/docker/docker-compose-without-neo4j.override.yml similarity index 83% rename from docker/docker-compose-with-neo4j.override.yml rename to docker/docker-compose-without-neo4j.override.yml index a4eec79f2d..5b5df8c1fb 100644 --- a/docker/docker-compose-with-neo4j.override.yml +++ b/docker/docker-compose-without-neo4j.override.yml @@ -1,4 +1,3 @@ -# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml). --- version: '3.8' services: @@ -26,7 +25,7 @@ services: - mysql datahub-gms: - env_file: datahub-gms/env/docker-with-neo4j.env + env_file: datahub-gms/env/docker-without-neo4j.env depends_on: - mysql volumes: diff --git a/docker/docker-compose-with-neo4j.yml b/docker/docker-compose-without-neo4j.yml similarity index 91% rename from docker/docker-compose-with-neo4j.yml rename to docker/docker-compose-without-neo4j.yml index fa50690296..6fc0e6b5e0 100644 --- a/docker/docker-compose-with-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -64,17 +64,6 @@ services: start_period: 2m retries: 4 - neo4j: - image: neo4j:4.0.6 - env_file: neo4j/env/docker.env - hostname: neo4j - container_name: neo4j - ports: - - "7474:7474" - - "7687:7687" - volumes: - - neo4jdata:/data - # This "container" is a workaround to pre-create search indices elasticsearch-setup: build: @@ -92,7 +81,7 @@ services: context: ../ dockerfile: docker/datahub-gms/Dockerfile image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} - env_file: datahub-gms/env/docker-with-neo4j.env + env_file: datahub-gms/env/docker-without-neo4j.env hostname: datahub-gms container_name: datahub-gms ports: @@ -101,7 +90,6 @@ services: - elasticsearch-setup - kafka-setup - mysql - - neo4j datahub-frontend-react: build: @@ -122,5 +110,4 @@ networks: volumes: esdata: - neo4jdata: zkdata: diff --git a/docker/docker-compose.consumers-with-neo4j.yml b/docker/docker-compose.consumers-without-neo4j.yml similarity index 92% rename from docker/docker-compose.consumers-with-neo4j.yml rename to docker/docker-compose.consumers-without-neo4j.yml index 8f295c769d..0a6c6b7089 100644 --- a/docker/docker-compose.consumers-with-neo4j.yml +++ b/docker/docker-compose.consumers-without-neo4j.yml @@ -11,7 +11,7 @@ services: context: ../ dockerfile: docker/datahub-mae-consumer/Dockerfile image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head} - env_file: datahub-mae-consumer/env/docker-with-neo4j.env + env_file: datahub-mae-consumer/env/docker-without-neo4j.env hostname: datahub-mae-consumer container_name: datahub-mae-consumer ports: @@ -19,7 +19,6 @@ services: depends_on: - kafka-setup - elasticsearch-setup - - neo4j datahub-mce-consumer: build: @@ -34,4 +33,3 @@ services: depends_on: - kafka-setup - datahub-gms - diff --git a/docker/docker-compose.consumers.yml b/docker/docker-compose.consumers.yml index cf094f74dc..3eb8580a61 100644 --- a/docker/docker-compose.consumers.yml +++ b/docker/docker-compose.consumers.yml @@ -19,6 +19,7 @@ services: depends_on: - kafka-setup - elasticsearch-setup + - neo4j datahub-mce-consumer: build: diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml index 81d1ecbdbb..1e8231e205 100644 --- a/docker/docker-compose.override.yml +++ b/docker/docker-compose.override.yml @@ -1,3 +1,4 @@ +# Default override to use MySQL as a backing store for datahub-gms (same as docker-compose.mysql.yml). --- version: '3.8' services: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 8a792f34c4..68b312e5aa 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -27,31 +27,6 @@ services: - "29092:29092" - "9092:9092" - kafka-rest-proxy: - image: confluentinc/cp-kafka-rest:5.4.0 - env_file: kafka-rest-proxy/env/docker.env - hostname: kafka-rest-proxy - container_name: kafka-rest-proxy - ports: - - "8082:8082" - depends_on: - - zookeeper - - broker - - schema-registry - - kafka-topics-ui: - image: landoop/kafka-topics-ui:0.9.4 - env_file: kafka-topics-ui/env/docker.env - hostname: kafka-topics-ui - container_name: kafka-topics-ui - ports: - - "18000:8000" - depends_on: - - zookeeper - - broker - - schema-registry - - kafka-rest-proxy - # This "container" is a workaround to pre-create topics kafka-setup: build: @@ -75,16 +50,6 @@ services: ports: - "8081:8081" - schema-registry-ui: - image: landoop/schema-registry-ui:latest - env_file: schema-registry-ui/env/docker.env - container_name: schema-registry-ui - hostname: schema-registry-ui - ports: - - "8000:8000" - depends_on: - - schema-registry - elasticsearch: image: elasticsearch:7.9.3 env_file: elasticsearch/env/docker.env @@ -99,6 +64,17 @@ services: start_period: 2m retries: 4 + neo4j: + image: neo4j:4.0.6 + env_file: neo4j/env/docker.env + hostname: neo4j + container_name: neo4j + ports: + - "7474:7474" + - "7687:7687" + volumes: + - neo4jdata:/data + # This "container" is a workaround to pre-create search indices elasticsearch-setup: build: @@ -116,7 +92,6 @@ services: context: ../ dockerfile: docker/datahub-gms/Dockerfile image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} - env_file: datahub-gms/env/docker.env hostname: datahub-gms container_name: datahub-gms ports: @@ -125,6 +100,7 @@ services: - elasticsearch-setup - kafka-setup - mysql + - neo4j datahub-frontend-react: build: @@ -145,4 +121,5 @@ networks: volumes: esdata: + neo4jdata: zkdata: diff --git a/docker/neo4j/README.md b/docker/neo4j/README.md index 26bbaba485..b0b9f486d9 100644 --- a/docker/neo4j/README.md +++ b/docker/neo4j/README.md @@ -1,9 +1,8 @@ # Neo4j -DataHub can use Neo4j as the graph database in the backend to serve graph queries. -An alternative to Neo4j for that purpose is [Dgraph](../dgraph). - -The [official Neo4j image](https://hub.docker.com/_/neo4j) found in Docker Hub is used without any modification. +DataHub uses Neo4j as graph db in the backend to serve graph queries. +[Official Neo4j image](https://hub.docker.com/_/neo4j) found in Docker Hub is used without +any modification. ## Neo4j Browser To be able to debug and run Cypher queries against your Neo4j image, you can open up `Neo4j Browser` which is running at diff --git a/docker/quickstart-ember.sh b/docker/quickstart-ember.sh index e3bd1116a7..df4ff0106a 100755 --- a/docker/quickstart-ember.sh +++ b/docker/quickstart-ember.sh @@ -3,9 +3,9 @@ # Quickstarts an Ember-serving variant of DataHub by pulling all images from dockerhub and then running the containers locally. DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" export DATAHUB_VERSION=${DATAHUB_VERSION:-head} -cd $DIR && docker-compose -f docker-compose-with-neo4j.yml -f docker-compose-with-neo4j.override.yml -f docker-compose.ember.yml pull && docker-compose -p datahub \ - -f docker-compose-with-neo4j.yml \ - -f docker-compose-with-neo4j.override.yml \ +cd $DIR && docker-compose -f docker-compose.yml -f docker-compose.override.yml -f docker-compose.ember.yml pull && docker-compose -p datahub \ + -f docker-compose.yml \ + -f docker-compose.override.yml \ -f docker-compose.ember.yml \ up \ --scale datahub-frontend-react=0 diff --git a/docker/quickstart.sh b/docker/quickstart.sh index 5d99e3f507..6bb42da6fb 100755 --- a/docker/quickstart.sh +++ b/docker/quickstart.sh @@ -29,17 +29,13 @@ if docker volume ls | grep -c -q datahub_neo4jdata then echo "Datahub Neo4j volume found, starting with neo4j as graph service" cd $DIR && docker-compose pull && docker-compose -p datahub up -elif docker volume ls | grep -c -q datahub_dgraphdata -then - echo "Datahub Dgraph volume found, starting with dgraph as graph service" - cd $DIR && docker-compose pull && docker-compose -p datahub up else - echo "No Datahub Neo4j or Dgraph volume found, starting with elasticsearch as graph service" + echo "No Datahub Neo4j volume found, starting with elasticsearch as graph service" cd $DIR && \ docker-compose \ - -f quickstart/docker-compose.quickstart.yml \ + -f quickstart/docker-compose-without-neo4j.quickstart.yml \ $MONITORING_COMPOSE $CONSUMERS_COMPOSE pull && \ docker-compose -p datahub \ - -f quickstart/docker-compose.quickstart.yml \ + -f quickstart/docker-compose-without-neo4j.quickstart.yml \ $MONITORING_COMPOSE $CONSUMERS_COMPOSE up $@ fi diff --git a/docker/quickstart/README.md b/docker/quickstart/README.md deleted file mode 100644 index b6c970bc7f..0000000000 --- a/docker/quickstart/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Quickstart - -These Docker YAML files are used by the [Docker quickstart script](../quickstart.sh) and -the [DataHub CLI quickstart](../../docs/quickstart.md) command. - -## Developer Notes -The [DataHub CLI quickstart](../../docs/quickstart.md) command fetches these YAML files from DataHub's GitHub master. -This means, files referenced by earlier releases of DataHub CLI must not be deleted from this directory in order -to preserve backward compatibility. -Otherwise, earlier releases of the DataHub CLI will stop working. - -See GitHub issue [linkedin/datahub#3266](https://github.com/linkedin/datahub/issues/3266) for more details. \ No newline at end of file diff --git a/docker/quickstart/docker-compose-with-dgraph.quickstart.yml b/docker/quickstart/docker-compose-with-dgraph.quickstart.yml deleted file mode 100644 index 139916f6d4..0000000000 --- a/docker/quickstart/docker-compose-with-dgraph.quickstart.yml +++ /dev/null @@ -1,177 +0,0 @@ -networks: - default: - name: datahub_network -services: - broker: - container_name: broker - depends_on: - - zookeeper - environment: - - KAFKA_BROKER_ID=1 - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT - - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 - - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - hostname: broker - image: confluentinc/cp-kafka:5.4.0 - ports: - - 29092:29092 - - 9092:9092 - datahub-frontend-react: - container_name: datahub-frontend-react - depends_on: - - datahub-gms - environment: - - DATAHUB_GMS_HOST=datahub-gms - - DATAHUB_GMS_PORT=8080 - - DATAHUB_SECRET=YouKnowNothing - - DATAHUB_APP_VERSION=1.0 - - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB - - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf - -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml - -Dlogback.debug=false -Dpidfile.path=/dev/null - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 - - ELASTIC_CLIENT_HOST=elasticsearch - - ELASTIC_CLIENT_PORT=9200 - hostname: datahub-frontend-react - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} - ports: - - 9002:9002 - datahub-gms: - container_name: datahub-gms - depends_on: - - mysql - environment: - - DATASET_ENABLE_SCSI=false - - EBEAN_DATASOURCE_USERNAME=datahub - - EBEAN_DATASOURCE_PASSWORD=datahub - - EBEAN_DATASOURCE_HOST=mysql:3306 - - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 - - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - GRAPH_SERVICE_IMPL=dgraph - - DGRAPH_HOST=dgraph - - DGRAPH_SECURITY=plain - - JAVA_OPTS=-Xms1g -Xmx1g - - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - - MAE_CONSUMER_ENABLED=true - - MCE_CONSUMER_ENABLED=true - hostname: datahub-gms - image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} - ports: - - 8080:8080 - volumes: - - ${HOME}/.datahub/plugins/:/etc/datahub/plugins - dgraph: - container_name: dgraph - environment: - - DGRAPH_SECURITY=plain - hostname: dgraph - image: dgraph/standalone:v21.12.0 - ports: - - 8082:8080 - - 9080:9080 - volumes: - - dgraphdata:/dgraph - elasticsearch: - container_name: elasticsearch - environment: - - discovery.type=single-node - - xpack.security.enabled=false - - ES_JAVA_OPTS=-Xms256m -Xmx256m - healthcheck: - retries: 4 - start_period: 2m - test: - - CMD-SHELL - - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' - || exit 1 - hostname: elasticsearch - image: elasticsearch:7.9.3 - mem_limit: 1g - ports: - - 9200:9200 - volumes: - - esdata:/usr/share/elasticsearch/data - elasticsearch-setup: - container_name: elasticsearch-setup - depends_on: - - elasticsearch - environment: - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - ELASTICSEARCH_PROTOCOL=http - hostname: elasticsearch-setup - image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head} - kafka-setup: - container_name: kafka-setup - depends_on: - - broker - - schema-registry - environment: - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - hostname: kafka-setup - image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head} - mysql: - command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin - container_name: mysql - environment: - - MYSQL_DATABASE=datahub - - MYSQL_USER=datahub - - MYSQL_PASSWORD=datahub - - MYSQL_ROOT_PASSWORD=datahub - hostname: mysql - image: mysql:5.7 - ports: - - 3306:3306 - volumes: - - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - - mysqldata:/var/lib/mysql - mysql-setup: - container_name: mysql-setup - depends_on: - - mysql - environment: - - MYSQL_HOST=mysql - - MYSQL_PORT=3306 - - MYSQL_USERNAME=datahub - - MYSQL_PASSWORD=datahub - - DATAHUB_DB_NAME=datahub - hostname: mysql-setup - image: acryldata/datahub-mysql-setup:head - schema-registry: - container_name: schema-registry - depends_on: - - zookeeper - - broker - environment: - - SCHEMA_REGISTRY_HOST_NAME=schemaregistry - - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 - hostname: schema-registry - image: confluentinc/cp-schema-registry:5.4.0 - ports: - - 8081:8081 - zookeeper: - container_name: zookeeper - environment: - - ZOOKEEPER_CLIENT_PORT=2181 - - ZOOKEEPER_TICK_TIME=2000 - hostname: zookeeper - image: confluentinc/cp-zookeeper:5.4.0 - ports: - - 2181:2181 - volumes: - - zkdata:/var/opt/zookeeper -version: '2.3' -volumes: - dgraphdata: null - esdata: null - mysqldata: null - zkdata: null diff --git a/docker/quickstart/docker-compose-with-elasticsearch.quickstart.yml b/docker/quickstart/docker-compose-with-elasticsearch.quickstart.yml deleted file mode 100644 index 55c8c8a872..0000000000 --- a/docker/quickstart/docker-compose-with-elasticsearch.quickstart.yml +++ /dev/null @@ -1,163 +0,0 @@ -networks: - default: - name: datahub_network -services: - broker: - container_name: broker - depends_on: - - zookeeper - environment: - - KAFKA_BROKER_ID=1 - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT - - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 - - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - hostname: broker - image: confluentinc/cp-kafka:5.4.0 - ports: - - 29092:29092 - - 9092:9092 - datahub-frontend-react: - container_name: datahub-frontend-react - depends_on: - - datahub-gms - environment: - - DATAHUB_GMS_HOST=datahub-gms - - DATAHUB_GMS_PORT=8080 - - DATAHUB_SECRET=YouKnowNothing - - DATAHUB_APP_VERSION=1.0 - - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB - - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf - -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml - -Dlogback.debug=false -Dpidfile.path=/dev/null - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 - - ELASTIC_CLIENT_HOST=elasticsearch - - ELASTIC_CLIENT_PORT=9200 - hostname: datahub-frontend-react - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} - ports: - - 9002:9002 - datahub-gms: - container_name: datahub-gms - depends_on: - - mysql - environment: - - DATASET_ENABLE_SCSI=false - - EBEAN_DATASOURCE_USERNAME=datahub - - EBEAN_DATASOURCE_PASSWORD=datahub - - EBEAN_DATASOURCE_HOST=mysql:3306 - - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 - - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - GRAPH_SERVICE_IMPL=elasticsearch - - JAVA_OPTS=-Xms1g -Xmx1g - - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - - MAE_CONSUMER_ENABLED=true - - MCE_CONSUMER_ENABLED=true - hostname: datahub-gms - image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} - ports: - - 8080:8080 - volumes: - - ${HOME}/.datahub/plugins/:/etc/datahub/plugins - elasticsearch: - container_name: elasticsearch - environment: - - discovery.type=single-node - - xpack.security.enabled=false - - ES_JAVA_OPTS=-Xms256m -Xmx256m - healthcheck: - retries: 4 - start_period: 2m - test: - - CMD-SHELL - - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' - || exit 1 - hostname: elasticsearch - image: elasticsearch:7.9.3 - mem_limit: 1g - ports: - - 9200:9200 - volumes: - - esdata:/usr/share/elasticsearch/data - elasticsearch-setup: - container_name: elasticsearch-setup - depends_on: - - elasticsearch - environment: - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - ELASTICSEARCH_PROTOCOL=http - hostname: elasticsearch-setup - image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head} - kafka-setup: - container_name: kafka-setup - depends_on: - - broker - - schema-registry - environment: - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - hostname: kafka-setup - image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head} - mysql: - command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin - container_name: mysql - environment: - - MYSQL_DATABASE=datahub - - MYSQL_USER=datahub - - MYSQL_PASSWORD=datahub - - MYSQL_ROOT_PASSWORD=datahub - hostname: mysql - image: mysql:5.7 - ports: - - 3306:3306 - volumes: - - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - - mysqldata:/var/lib/mysql - mysql-setup: - container_name: mysql-setup - depends_on: - - mysql - environment: - - MYSQL_HOST=mysql - - MYSQL_PORT=3306 - - MYSQL_USERNAME=datahub - - MYSQL_PASSWORD=datahub - - DATAHUB_DB_NAME=datahub - hostname: mysql-setup - image: acryldata/datahub-mysql-setup:head - schema-registry: - container_name: schema-registry - depends_on: - - zookeeper - - broker - environment: - - SCHEMA_REGISTRY_HOST_NAME=schemaregistry - - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 - hostname: schema-registry - image: confluentinc/cp-schema-registry:5.4.0 - ports: - - 8081:8081 - zookeeper: - container_name: zookeeper - environment: - - ZOOKEEPER_CLIENT_PORT=2181 - - ZOOKEEPER_TICK_TIME=2000 - hostname: zookeeper - image: confluentinc/cp-zookeeper:5.4.0 - ports: - - 2181:2181 - volumes: - - zkdata:/var/opt/zookeeper -version: '2.3' -volumes: - esdata: null - mysqldata: null - zkdata: null diff --git a/docker/quickstart/docker-compose-with-neo4j.quickstart.yml b/docker/quickstart/docker-compose-with-neo4j.quickstart.yml deleted file mode 100644 index 823113bd2b..0000000000 --- a/docker/quickstart/docker-compose-with-neo4j.quickstart.yml +++ /dev/null @@ -1,181 +0,0 @@ -networks: - default: - name: datahub_network -services: - broker: - container_name: broker - depends_on: - - zookeeper - environment: - - KAFKA_BROKER_ID=1 - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT - - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 - - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - hostname: broker - image: confluentinc/cp-kafka:5.4.0 - ports: - - 29092:29092 - - 9092:9092 - datahub-frontend-react: - container_name: datahub-frontend-react - depends_on: - - datahub-gms - environment: - - DATAHUB_GMS_HOST=datahub-gms - - DATAHUB_GMS_PORT=8080 - - DATAHUB_SECRET=YouKnowNothing - - DATAHUB_APP_VERSION=1.0 - - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB - - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf - -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml - -Dlogback.debug=false -Dpidfile.path=/dev/null - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 - - ELASTIC_CLIENT_HOST=elasticsearch - - ELASTIC_CLIENT_PORT=9200 - hostname: datahub-frontend-react - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} - ports: - - 9002:9002 - datahub-gms: - container_name: datahub-gms - depends_on: - - mysql - environment: - - DATASET_ENABLE_SCSI=false - - EBEAN_DATASOURCE_USERNAME=datahub - - EBEAN_DATASOURCE_PASSWORD=datahub - - EBEAN_DATASOURCE_HOST=mysql:3306 - - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 - - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - GRAPH_SERVICE_IMPL=neo4j - - NEO4J_HOST=http://neo4j:7474 - - NEO4J_URI=bolt://neo4j - - NEO4J_USERNAME=neo4j - - NEO4J_PASSWORD=datahub - - JAVA_OPTS=-Xms1g -Xmx1g - - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - - MAE_CONSUMER_ENABLED=true - - MCE_CONSUMER_ENABLED=true - hostname: datahub-gms - image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} - ports: - - 8080:8080 - volumes: - - ${HOME}/.datahub/plugins:/etc/datahub/plugins - elasticsearch: - container_name: elasticsearch - environment: - - discovery.type=single-node - - xpack.security.enabled=false - - ES_JAVA_OPTS=-Xms256m -Xmx256m - healthcheck: - retries: 4 - start_period: 2m - test: - - CMD-SHELL - - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' - || exit 1 - hostname: elasticsearch - image: elasticsearch:7.9.3 - mem_limit: 1g - ports: - - 9200:9200 - volumes: - - esdata:/usr/share/elasticsearch/data - elasticsearch-setup: - container_name: elasticsearch-setup - depends_on: - - elasticsearch - environment: - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - ELASTICSEARCH_PROTOCOL=http - hostname: elasticsearch-setup - image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head} - kafka-setup: - container_name: kafka-setup - depends_on: - - broker - - schema-registry - environment: - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - hostname: kafka-setup - image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head} - mysql: - command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin - container_name: mysql - environment: - - MYSQL_DATABASE=datahub - - MYSQL_USER=datahub - - MYSQL_PASSWORD=datahub - - MYSQL_ROOT_PASSWORD=datahub - hostname: mysql - image: mysql:5.7 - ports: - - 3306:3306 - volumes: - - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - - mysqldata:/var/lib/mysql - mysql-setup: - container_name: mysql-setup - depends_on: - - mysql - environment: - - MYSQL_HOST=mysql - - MYSQL_PORT=3306 - - MYSQL_USERNAME=datahub - - MYSQL_PASSWORD=datahub - - DATAHUB_DB_NAME=datahub - hostname: mysql-setup - image: acryldata/datahub-mysql-setup:head - neo4j: - container_name: neo4j - environment: - - NEO4J_AUTH=neo4j/datahub - - NEO4J_dbms_default__database=graph.db - - NEO4J_dbms_allow__upgrade=true - hostname: neo4j - image: neo4j:4.0.6 - ports: - - 7474:7474 - - 7687:7687 - volumes: - - neo4jdata:/data - schema-registry: - container_name: schema-registry - depends_on: - - zookeeper - - broker - environment: - - SCHEMA_REGISTRY_HOST_NAME=schemaregistry - - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 - hostname: schema-registry - image: confluentinc/cp-schema-registry:5.4.0 - ports: - - 8081:8081 - zookeeper: - container_name: zookeeper - environment: - - ZOOKEEPER_CLIENT_PORT=2181 - - ZOOKEEPER_TICK_TIME=2000 - hostname: zookeeper - image: confluentinc/cp-zookeeper:5.4.0 - ports: - - 2181:2181 - volumes: - - zkdata:/var/opt/zookeeper -version: '2.3' -volumes: - esdata: null - mysqldata: null - neo4jdata: null - zkdata: null diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml similarity index 100% rename from docker/quickstart/docker-compose-m1.quickstart.yml rename to docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml deleted file mode 120000 index 016be10f77..0000000000 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ /dev/null @@ -1 +0,0 @@ -docker-compose-with-elasticsearch.quickstart.yml \ No newline at end of file diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml new file mode 100644 index 0000000000..7a70b77bc6 --- /dev/null +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -0,0 +1,163 @@ +networks: + default: + name: datahub_network +services: + broker: + container_name: broker + depends_on: + - zookeeper + environment: + - KAFKA_BROKER_ID=1 + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 + - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 + - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m + hostname: broker + image: confluentinc/cp-kafka:5.4.0 + ports: + - 29092:29092 + - 9092:9092 + datahub-frontend-react: + container_name: datahub-frontend-react + depends_on: + - datahub-gms + environment: + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - DATAHUB_SECRET=YouKnowNothing + - DATAHUB_APP_VERSION=1.0 + - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB + - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf + -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml + -Dlogback.debug=false -Dpidfile.path=/dev/null + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 + - ELASTIC_CLIENT_HOST=elasticsearch + - ELASTIC_CLIENT_PORT=9200 + hostname: datahub-frontend-react + image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} + ports: + - 9002:9002 + datahub-gms: + container_name: datahub-gms + depends_on: + - mysql + environment: + - DATASET_ENABLE_SCSI=false + - EBEAN_DATASOURCE_USERNAME=datahub + - EBEAN_DATASOURCE_PASSWORD=datahub + - EBEAN_DATASOURCE_HOST=mysql:3306 + - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 + - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - GRAPH_SERVICE_IMPL=elasticsearch + - JAVA_OPTS=-Xms1g -Xmx1g + - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + - MAE_CONSUMER_ENABLED=true + - MCE_CONSUMER_ENABLED=true + hostname: datahub-gms + image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} + ports: + - 8080:8080 + volumes: + - ${HOME}/.datahub/plugins:/etc/datahub/plugins + elasticsearch: + container_name: elasticsearch + environment: + - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms256m -Xmx256m + healthcheck: + retries: 4 + start_period: 2m + test: + - CMD-SHELL + - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' + || exit 1 + hostname: elasticsearch + image: elasticsearch:7.9.3 + mem_limit: 1g + ports: + - 9200:9200 + volumes: + - esdata:/usr/share/elasticsearch/data + elasticsearch-setup: + container_name: elasticsearch-setup + depends_on: + - elasticsearch + environment: + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - ELASTICSEARCH_PROTOCOL=http + hostname: elasticsearch-setup + image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head} + kafka-setup: + container_name: kafka-setup + depends_on: + - broker + - schema-registry + environment: + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + hostname: kafka-setup + image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head} + mysql: + command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin + container_name: mysql + environment: + - MYSQL_DATABASE=datahub + - MYSQL_USER=datahub + - MYSQL_PASSWORD=datahub + - MYSQL_ROOT_PASSWORD=datahub + hostname: mysql + image: mysql:5.7 + ports: + - 3306:3306 + volumes: + - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql + - mysqldata:/var/lib/mysql + mysql-setup: + container_name: mysql-setup + depends_on: + - mysql + environment: + - MYSQL_HOST=mysql + - MYSQL_PORT=3306 + - MYSQL_USERNAME=datahub + - MYSQL_PASSWORD=datahub + - DATAHUB_DB_NAME=datahub + hostname: mysql-setup + image: acryldata/datahub-mysql-setup:head + schema-registry: + container_name: schema-registry + depends_on: + - zookeeper + - broker + environment: + - SCHEMA_REGISTRY_HOST_NAME=schemaregistry + - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 + hostname: schema-registry + image: confluentinc/cp-schema-registry:5.4.0 + ports: + - 8081:8081 + zookeeper: + container_name: zookeeper + environment: + - ZOOKEEPER_CLIENT_PORT=2181 + - ZOOKEEPER_TICK_TIME=2000 + hostname: zookeeper + image: confluentinc/cp-zookeeper:5.4.0 + ports: + - 2181:2181 + volumes: + - zkdata:/var/opt/zookeeper +version: '2.3' +volumes: + esdata: null + mysqldata: null + zkdata: null diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml deleted file mode 120000 index 016be10f77..0000000000 --- a/docker/quickstart/docker-compose.quickstart.yml +++ /dev/null @@ -1 +0,0 @@ -docker-compose-with-elasticsearch.quickstart.yml \ No newline at end of file diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml new file mode 100644 index 0000000000..1232e63a69 --- /dev/null +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -0,0 +1,181 @@ +networks: + default: + name: datahub_network +services: + broker: + container_name: broker + depends_on: + - zookeeper + environment: + - KAFKA_BROKER_ID=1 + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 + - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 + - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m + hostname: broker + image: confluentinc/cp-kafka:5.4.0 + ports: + - 29092:29092 + - 9092:9092 + datahub-frontend-react: + container_name: datahub-frontend-react + depends_on: + - datahub-gms + environment: + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - DATAHUB_SECRET=YouKnowNothing + - DATAHUB_APP_VERSION=1.0 + - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB + - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf + -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml + -Dlogback.debug=false -Dpidfile.path=/dev/null + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 + - ELASTIC_CLIENT_HOST=elasticsearch + - ELASTIC_CLIENT_PORT=9200 + hostname: datahub-frontend-react + image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} + ports: + - 9002:9002 + datahub-gms: + container_name: datahub-gms + depends_on: + - mysql + environment: + - DATASET_ENABLE_SCSI=false + - EBEAN_DATASOURCE_USERNAME=datahub + - EBEAN_DATASOURCE_PASSWORD=datahub + - EBEAN_DATASOURCE_HOST=mysql:3306 + - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 + - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - NEO4J_HOST=http://neo4j:7474 + - NEO4J_URI=bolt://neo4j + - NEO4J_USERNAME=neo4j + - NEO4J_PASSWORD=datahub + - JAVA_OPTS=-Xms1g -Xmx1g + - GRAPH_SERVICE_IMPL=neo4j + - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + - MAE_CONSUMER_ENABLED=true + - MCE_CONSUMER_ENABLED=true + hostname: datahub-gms + image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} + ports: + - 8080:8080 + volumes: + - ${HOME}/.datahub/plugins/:/etc/datahub/plugins + elasticsearch: + container_name: elasticsearch + environment: + - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms256m -Xmx256m + healthcheck: + retries: 4 + start_period: 2m + test: + - CMD-SHELL + - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' + || exit 1 + hostname: elasticsearch + image: elasticsearch:7.9.3 + mem_limit: 1g + ports: + - 9200:9200 + volumes: + - esdata:/usr/share/elasticsearch/data + elasticsearch-setup: + container_name: elasticsearch-setup + depends_on: + - elasticsearch + environment: + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - ELASTICSEARCH_PROTOCOL=http + hostname: elasticsearch-setup + image: linkedin/datahub-elasticsearch-setup:${DATAHUB_VERSION:-head} + kafka-setup: + container_name: kafka-setup + depends_on: + - broker + - schema-registry + environment: + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + hostname: kafka-setup + image: linkedin/datahub-kafka-setup:${DATAHUB_VERSION:-head} + mysql: + command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin + container_name: mysql + environment: + - MYSQL_DATABASE=datahub + - MYSQL_USER=datahub + - MYSQL_PASSWORD=datahub + - MYSQL_ROOT_PASSWORD=datahub + hostname: mysql + image: mysql:5.7 + ports: + - 3306:3306 + volumes: + - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql + - mysqldata:/var/lib/mysql + mysql-setup: + container_name: mysql-setup + depends_on: + - mysql + environment: + - MYSQL_HOST=mysql + - MYSQL_PORT=3306 + - MYSQL_USERNAME=datahub + - MYSQL_PASSWORD=datahub + - DATAHUB_DB_NAME=datahub + hostname: mysql-setup + image: acryldata/datahub-mysql-setup:head + neo4j: + container_name: neo4j + environment: + - NEO4J_AUTH=neo4j/datahub + - NEO4J_dbms_default__database=graph.db + - NEO4J_dbms_allow__upgrade=true + hostname: neo4j + image: neo4j:4.0.6 + ports: + - 7474:7474 + - 7687:7687 + volumes: + - neo4jdata:/data + schema-registry: + container_name: schema-registry + depends_on: + - zookeeper + - broker + environment: + - SCHEMA_REGISTRY_HOST_NAME=schemaregistry + - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 + hostname: schema-registry + image: confluentinc/cp-schema-registry:5.4.0 + ports: + - 8081:8081 + zookeeper: + container_name: zookeeper + environment: + - ZOOKEEPER_CLIENT_PORT=2181 + - ZOOKEEPER_TICK_TIME=2000 + hostname: zookeeper + image: confluentinc/cp-zookeeper:5.4.0 + ports: + - 2181:2181 + volumes: + - zkdata:/var/opt/zookeeper +version: '2.3' +volumes: + esdata: null + mysqldata: null + neo4jdata: null + zkdata: null diff --git a/docker/quickstart/generate_and_compare.sh b/docker/quickstart/generate_and_compare.sh index 1d48204fc6..6939eae7c2 100755 --- a/docker/quickstart/generate_and_compare.sh +++ b/docker/quickstart/generate_and_compare.sh @@ -1,7 +1,7 @@ #!/bin/bash # this scripts checks if docker-compose$flavour.quickstart.yml is up to date for these 'flavours': -FLAVOURS=("-with-elasticsearch" "-with-neo4j" "-with-dgraph" ".monitoring") +FLAVOURS=("" "-without-neo4j" ".monitoring") DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "$DIR" @@ -12,10 +12,8 @@ python3 -m venv venv source venv/bin/activate pip install -r requirements.txt -python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp-with-elasticsearch.quickstart.yml -python generate_docker_quickstart.py ../docker-compose-with-neo4j.yml ../docker-compose-with-neo4j.override.yml temp-with-neo4j.quickstart.yml -python generate_docker_quickstart.py ../docker-compose-with-dgraph.yml ../docker-compose-with-dgraph.override.yml temp-with-dgraph.quickstart.yml - +python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp.quickstart.yml +python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml temp-without-neo4j.quickstart.yml python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml for flavour in "${FLAVOURS[@]}" diff --git a/docker/quickstart/generate_docker_quickstart.sh b/docker/quickstart/generate_docker_quickstart.sh index 81c2e43659..d8427a67b5 100755 --- a/docker/quickstart/generate_docker_quickstart.sh +++ b/docker/quickstart/generate_docker_quickstart.sh @@ -9,9 +9,6 @@ python3 -m venv venv source venv/bin/activate pip install -r requirements.txt -python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose-with-elasticsearch.quickstart.yml -python generate_docker_quickstart.py ../docker-compose-with-neo4j.yml ../docker-compose-with-neo4j.override.yml docker-compose-with-neo4j.quickstart.yml -python generate_docker_quickstart.py ../docker-compose-with-dgraph.yml ../docker-compose-with-dgraph.override.yml docker-compose-with-dgraph.quickstart.yml +python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose.quickstart.yml +python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml docker-compose-without-neo4j.quickstart.yml python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml -python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.quickstart.monitoring.yml - diff --git a/metadata-ingestion/src/datahub/cli/docker.py b/metadata-ingestion/src/datahub/cli/docker.py index 3855998c5b..c94f50dd2e 100644 --- a/metadata-ingestion/src/datahub/cli/docker.py +++ b/metadata-ingestion/src/datahub/cli/docker.py @@ -23,15 +23,14 @@ from datahub.telemetry import telemetry logger = logging.getLogger(__name__) NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = ( - "docker/quickstart/docker-compose-with-neo4j.quickstart.yml" -) -DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = ( - "docker/quickstart/docker-compose-with-dgraph.quickstart.yml" + "docker/quickstart/docker-compose.quickstart.yml" ) ELASTIC_QUICKSTART_COMPOSE_FILE = ( - "docker/quickstart/docker-compose-with-elasticsearch.quickstart.yml" + "docker/quickstart/docker-compose-without-neo4j.quickstart.yml" +) +M1_QUICKSTART_COMPOSE_FILE = ( + "docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml" ) -M1_QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose-m1.quickstart.yml" BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json" @@ -39,9 +38,6 @@ GITHUB_BASE_URL = "https://raw.githubusercontent.com/linkedin/datahub/master" GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = ( f"{GITHUB_BASE_URL}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}" ) -GITHUB_DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_URL = ( - f"{GITHUB_BASE_URL}/{DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}" -) GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL = ( f"{GITHUB_BASE_URL}/{ELASTIC_QUICKSTART_COMPOSE_FILE}" ) @@ -94,23 +90,22 @@ def is_m1() -> bool: return False -def which_graph_service_to_use(graph_service_override: Optional[str]) -> str: +def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool: if graph_service_override is not None: if graph_service_override == "elasticsearch": click.echo("Starting with elasticsearch due to graph-service-impl param\n") - elif graph_service_override == "neo4j": + return False + if graph_service_override == "neo4j": click.echo("Starting with neo4j due to graph-service-impl param\n") - elif graph_service_override == "dgraph": - click.echo("Starting with dgraph due to graph-service-impl param\n") + return True else: click.secho( graph_service_override - + " is not a valid graph service option. Choose either `neo4j`, `dgraph` " - "or `elasticsearch`\n", + + " is not a valid graph service option. Choose either `neo4j` or " + "`elasticsearch`\n", fg="red", ) raise ValueError(f"invalid graph service option: {graph_service_override}") - return graph_service_override with get_client_with_error() as (client, error): if error: click.secho( @@ -121,26 +116,17 @@ def which_graph_service_to_use(graph_service_override: Optional[str]) -> str: if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0: click.echo( "Datahub Neo4j volume found, starting with neo4j as graph service.\n" - "If you want to run using elasticsearch or dgraph, run `datahub docker nuke` and re-ingest your data.\n" + "If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n" ) - return "neo4j" - - if len(client.volumes.list(filters={"name": "datahub_dgraphdata"})) > 0: - click.echo( - "Datahub Dgraph volume found, starting with dgraph as graph service.\n" - "If you want to run using elasticsearch or neo4j, run `datahub docker nuke` and re-ingest your data.\n" - ) - return "dgraph" + return True click.echo( - "No Datahub Neo4j or Dgraph volume found, starting with elasticsearch as graph service.\n" + "No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n" "To use neo4j as a graph backend, run \n" - "`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose-with-neo4j.quickstart.yml`\n" - "To use dgraph as a graph backend, run \n" - "`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose-with-dgraph.quickstart.yml`\n" - "from the root of the datahub repo\n" + "`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose.quickstart.yml`" + "\nfrom the root of the datahub repo\n" ) - return "elasticsearch" + return False @docker.command() @@ -207,27 +193,19 @@ def quickstart( quickstart_compose_file ) # convert to list from tuple if not quickstart_compose_file: - graph_service_impl = which_graph_service_to_use(graph_service_impl) - if graph_service_impl == "neo4j": - if running_on_m1: - click.secho( - "Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph", - fg="red", - ) - github_file = GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL - else: - github_file = GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL - elif graph_service_impl == "dgraph": - github_file = GITHUB_DGRAPH_AND_ELASTIC_QUICKSTART_COMPOSE_URL - elif graph_service_impl == "elasticsearch": - if not running_on_m1: - github_file = GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL - else: - github_file = GITHUB_M1_QUICKSTART_COMPOSE_URL - else: - raise ValueError( - f"Unsupported graph service implementation: {graph_service_impl}" + should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl) + if should_use_neo4j and running_on_m1: + click.secho( + "Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph", + fg="red", ) + github_file = ( + GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL + if should_use_neo4j and not running_on_m1 + else GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL + if not running_on_m1 + else GITHUB_M1_QUICKSTART_COMPOSE_URL + ) with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp_file: path = pathlib.Path(tmp_file.name) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/DgraphGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/DgraphGraphService.java index 504ca53727..6767229515 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/DgraphGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/DgraphGraphService.java @@ -382,15 +382,6 @@ public class DgraphGraphService implements GraphService { return new RelatedEntitiesResult(offset, 0, 0, Collections.emptyList()); } - // TODO: Remove once gms client code uses null instead of "" for any type - // https://github.com/linkedin/datahub/issues/3143 - if (sourceType != null && sourceType.isEmpty()) { - sourceType = null; - } - if (destinationType != null && destinationType.isEmpty()) { - destinationType = null; - } - String query = getQueryForRelatedEntities( sourceType, sourceEntityFilter, destinationType, destinationEntityFilter, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphContainer.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphContainer.java index 1bfd5945a6..6847b9bb93 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphContainer.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphContainer.java @@ -63,7 +63,7 @@ public class DgraphContainer extends GenericContainer { WaitStrategy waitForLeader = new LogMessageWaitStrategy() .withRegEx(".* Got Zero leader: .*\n"); WaitStrategy waitForCluster = new LogMessageWaitStrategy() - .withRegEx(".* Server is ready.*"); + .withRegEx(".* Server is ready\n"); WaitStrategy waitForHttp = new HttpWaitStrategy() .forPort(HTTP_PORT) .forStatusCodeMatching(response -> response == HTTP_OK); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphGraphServiceTest.java index 6031ae74d9..249a3b31b1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/DgraphGraphServiceTest.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.graph; -import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.RelationshipDirection; import io.dgraph.DgraphClient; import io.dgraph.DgraphGrpc; @@ -13,7 +12,6 @@ import io.grpc.ManagedChannelBuilder; import io.grpc.MethodDescriptor; import lombok.extern.slf4j.Slf4j; import org.testcontainers.containers.output.Slf4jLogConsumer; -import org.testng.SkipException; import org.testng.annotations.AfterMethod; import org.testng.annotations.AfterTest; import org.testng.annotations.BeforeMethod; @@ -26,7 +24,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -52,7 +49,7 @@ public class DgraphGraphServiceTest extends GraphServiceTestBase { @BeforeTest public void setup() { - _container = new DgraphContainer(DgraphContainer.DEFAULT_IMAGE_NAME.withTag("v21.12.0")) + _container = new DgraphContainer(DgraphContainer.DEFAULT_IMAGE_NAME.withTag("v21.03.0")) .withTmpFs(Collections.singletonMap("/dgraph", "rw,noexec,nosuid,size=1g")) .withStartupTimeout(Duration.ofMinutes(1)) .withStartupAttempts(3); @@ -108,34 +105,6 @@ public class DgraphGraphServiceTest extends GraphServiceTestBase { @Override protected void syncAfterWrite() { } - @Override - @SuppressWarnings("MalformedDataProvider") - @Test(dataProvider = "FindRelatedEntitiesSourceTypeTests") - public void testFindRelatedEntitiesSourceType(String datasetType, - List relationshipTypes, - RelationshipFilter relationships, - List expectedRelatedEntities) throws Exception { - if (datasetType != null && datasetType.isEmpty()) { - // https://github.com/linkedin/datahub/issues/3143 - throw new SkipException("Code using GraphService uses \"\" instead of null"); - } - super.testFindRelatedEntitiesSourceType(datasetType, relationshipTypes, relationships, expectedRelatedEntities); - } - - @Override - @SuppressWarnings("MalformedDataProvider") - @Test(dataProvider = "FindRelatedEntitiesDestinationTypeTests") - public void testFindRelatedEntitiesDestinationType(String datasetType, - List relationshipTypes, - RelationshipFilter relationships, - List expectedRelatedEntities) throws Exception { - if (datasetType != null && datasetType.isEmpty()) { - // https://github.com/linkedin/datahub/issues/3143 - throw new SkipException("Code using GraphService uses \"\" instead of null"); - } - super.testFindRelatedEntitiesDestinationType(datasetType, relationshipTypes, relationships, expectedRelatedEntities); - } - @Test public void testGetSchema() { DgraphSchema schema = DgraphGraphService.getSchema("{\n" diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/DgraphClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/DgraphClientFactory.java deleted file mode 100644 index 738285de45..0000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/DgraphClientFactory.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.linkedin.gms.factory.common; - -import io.dgraph.DgraphClient; -import io.dgraph.DgraphGrpc; -import io.grpc.ManagedChannel; -import io.grpc.ManagedChannelBuilder; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; - -import java.util.Arrays; - -@Slf4j -@Configuration -public class DgraphClientFactory { - @Value("${DGRAPH_HOST:localhost}") - private String[] hosts; - - @Value("${DGRAPH_GRPC_PORT:9080}") - private int port; - - @Value("${DGRAPH_SECURITY:plain}") - private String security; - - @Bean(name = "dgraphClient") - protected DgraphClient createInstance() { - DgraphGrpc.DgraphStub[] stubs = Arrays.stream(hosts) - .map(this::getChannelForHost) - .map(DgraphGrpc::newStub) - .toArray(DgraphGrpc.DgraphStub[]::new); - - return new DgraphClient(stubs); - } - - private ManagedChannel getChannelForHost(String host) { - log.info("Connecting to host " + host); - if (host.contains(":")) { - return getChannelForBuilder(ManagedChannelBuilder.forTarget(host)); - } else { - return getChannelForBuilder(ManagedChannelBuilder.forAddress(host, port)); - } - } - - private ManagedChannel getChannelForBuilder(ManagedChannelBuilder builder) { - if (security.equalsIgnoreCase("plain")) { - builder.usePlaintext(); - } else if (security.equalsIgnoreCase("tls")) { - builder.useTransportSecurity(); - } else { - throw new IllegalArgumentException("Unsupported channel security mode"); - } - - return builder.build(); - } -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/DgraphGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/DgraphGraphServiceFactory.java deleted file mode 100644 index 7983491fb6..0000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/DgraphGraphServiceFactory.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.linkedin.gms.factory.common; - -import com.linkedin.metadata.graph.DgraphGraphService; -import io.dgraph.DgraphClient; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; - -import javax.annotation.Nonnull; - - -@Configuration -@Import({DgraphClientFactory.class}) -public class DgraphGraphServiceFactory { - @Autowired - @Qualifier("dgraphClient") - private DgraphClient dgraphClient; - - @Bean(name = "dgraphGraphService") - @Nonnull - protected DgraphGraphService getInstance() { - return new DgraphGraphService(dgraphClient); - } -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java index 008e03d280..1fdfcccf6f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java @@ -1,7 +1,6 @@ package com.linkedin.gms.factory.common; import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; -import com.linkedin.metadata.graph.DgraphGraphService; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.Neo4jGraphService; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; @@ -19,7 +18,7 @@ import org.springframework.context.annotation.PropertySource; @Configuration @PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) -@Import({Neo4jGraphServiceFactory.class, DgraphGraphServiceFactory.class, ElasticSearchGraphServiceFactory.class}) +@Import({Neo4jGraphServiceFactory.class, ElasticSearchGraphServiceFactory.class}) public class GraphServiceFactory { @Autowired @Qualifier("elasticSearchGraphService") @@ -29,28 +28,22 @@ public class GraphServiceFactory { @Qualifier("neo4jGraphService") private Neo4jGraphService _neo4jGraphService; - @Autowired - @Qualifier("dgraphGraphService") - private DgraphGraphService _dgraphGraphService; - @Value("${graphService.type}") private String graphServiceImpl; @Nonnull - @DependsOn({"neo4jGraphService", "dgraphGraphService", "elasticSearchGraphService"}) + @DependsOn({"neo4jGraphService", "elasticSearchGraphService"}) @Bean(name = "graphService") @Primary protected GraphService createInstance() { if (graphServiceImpl.equalsIgnoreCase("neo4j")) { return _neo4jGraphService; - } else if (graphServiceImpl.equalsIgnoreCase("dgraph")) { - return _dgraphGraphService; } else if (graphServiceImpl.equalsIgnoreCase("elasticsearch")) { return _elasticSearchGraphService; } else { throw new RuntimeException( "Error: Failed to initialize graph service. Graph Service provided: " + graphServiceImpl - + ". Valid options: [neo4j, dgraph, elasticsearch]."); + + ". Valid options: [neo4j, elasticsearch]."); } } }