datahub/docker/docker-compose-with-cassandra.yml

209 lines
6.2 KiB
YAML

# Docker compose file covering DataHub's default configuration, which is to run all containers on a single host.
# Please see the README.md for instructions as to how to use and customize.
# NOTE: This file does not build! No dockerfiles are set. See the README.md in this directory.
---
services:
datahub-frontend-react:
hostname: datahub-frontend-react
image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head}
ports:
- 9002:9002
build:
context: ../
dockerfile: docker/datahub-frontend/Dockerfile
env_file: datahub-frontend/env/docker.env
depends_on:
datahub-gms:
condition: service_healthy
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-actions:
hostname: actions
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${DATAHUB_VERSION:-head}-slim
build:
context: ../
dockerfile: docker/datahub-actions/Dockerfile
env_file: datahub-actions/env/docker.env
environment:
- ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-}
- ACTIONS_CONFIG=${ACTIONS_CONFIG:-}
depends_on:
datahub-gms:
condition: service_healthy
datahub-gms:
hostname: datahub-gms
image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head}
ports:
- 8080:8080
build:
context: ../
dockerfile: docker/datahub-gms/Dockerfile
env_file: ./datahub-gms/env/docker.cassandra.env
environment:
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
healthcheck:
test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health
start_period: 20s
interval: 1s
retries: 20
timeout: 5s
depends_on:
datahub-upgrade:
condition: service_completed_successfully
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-upgrade:
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
command:
- -u
- SystemUpdate
build:
context: ../
dockerfile: docker/datahub-upgrade/Dockerfile
env_file: datahub-upgrade/env/docker-without-neo4j.env
depends_on:
cassandra-setup:
condition: service_completed_successfully
elasticsearch-setup:
condition: service_completed_successfully
neo4j:
condition: service_healthy
broker:
condition: service_healthy
schema-registry:
condition: service_healthy
cassandra-setup:
hostname: cassandra-setup
image: cassandra:3.11
command: /bin/bash -c "cqlsh cassandra -f /init.cql"
depends_on:
cassandra:
condition: service_healthy
volumes:
- ./cassandra/init.cql:/init.cql
labels:
datahub_setup_job: true
# This "container" is a workaround to pre-create search indices
elasticsearch-setup:
hostname: elasticsearch-setup
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
build:
context: ../
dockerfile: docker/elasticsearch-setup/Dockerfile
env_file: elasticsearch-setup/env/docker.env
environment:
- ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
- USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
depends_on:
elasticsearch:
condition: service_healthy
labels:
datahub_setup_job: true
cassandra:
hostname: cassandra
image: cassandra:3.11
ports:
- 9042:9042
healthcheck:
test: cqlsh -u cassandra -p cassandra -e describe keyspaces
interval: 15s
timeout: 10s
retries: 10
volumes:
- cassandradata:/var/lib/cassandra
elasticsearch:
hostname: elasticsearch
image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
ports:
- 9200:9200
env_file: elasticsearch/env/docker.env
environment:
- discovery.type=single-node
- ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
healthcheck:
test: curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s
start_period: 5s
interval: 1s
retries: 5
timeout: 5s
volumes:
- esdata:/usr/share/elasticsearch/data
neo4j:
hostname: neo4j
image: neo4j:4.0.6
ports:
- 7474:7474
- 7687:7687
env_file: neo4j/env/docker.env
healthcheck:
test: wget http://neo4j:7474
start_period: 5s
interval: 1s
retries: 5
timeout: 5s
volumes:
- neo4jdata:/data
schema-registry:
hostname: schema-registry
image: ${DATAHUB_CONFLUENT_SCHEMA_REGISTRY_IMAGE:-confluentinc/cp-schema-registry}:${DATAHUB_CONFLUENT_VERSION:-7.4.0}
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
env_file: schema-registry/env/docker.env
healthcheck:
test: nc -z schema-registry 8081
start_period: 5s
interval: 1s
retries: 5
timeout: 5s
depends_on:
broker:
condition: service_healthy
broker:
hostname: broker
image: ${DATAHUB_CONFLUENT_KAFKA_IMAGE:-confluentinc/cp-kafka}:${DATAHUB_CONFLUENT_VERSION:-7.4.0}
ports:
- 29092:29092
- 9092:9092
env_file: broker/env/docker.env
healthcheck:
test: nc -z broker 9092
start_period: 5s
interval: 1s
retries: 5
timeout: 5s
depends_on:
zookeeper:
condition: service_healthy
volumes:
- broker:/var/lib/kafka/data/
zookeeper:
hostname: zookeeper
image: ${DATAHUB_CONFLUENT_ZOOKEEPER_IMAGE:-confluentinc/cp-zookeeper}:${DATAHUB_CONFLUENT_VERSION:-7.4.0}
ports:
- 2181:2181
env_file: zookeeper/env/docker.env
healthcheck:
test: echo srvr | nc zookeeper 2181
start_period: 45s
interval: 5s
retries: 5
timeout: 5s
volumes:
# See https://stackoverflow.com/a/61008432 for why we need two volumes.
# See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk
- zkdata:/var/lib/zookeeper/data
- zklogs:/var/lib/zookeeper/log
networks:
default:
name: datahub_network
volumes:
cassandradata:
esdata:
neo4jdata:
broker:
zkdata:
zklogs: