# Docker compose file covering DataHub's default configuration, which is to run all containers on a single host. # Please see the README.md for instructions as to how to use and customize. # NOTE: This file does not build! No dockerfiles are set. See the README.md in this directory. --- services: datahub-frontend-react: hostname: datahub-frontend-react image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - 9002:9002 build: context: ../ dockerfile: docker/datahub-frontend/Dockerfile env_file: datahub-frontend/env/docker.env depends_on: datahub-gms: condition: service_healthy volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-actions: hostname: actions image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${DATAHUB_VERSION:-head}-slim build: context: ../ dockerfile: docker/datahub-actions/Dockerfile env_file: datahub-actions/env/docker.env environment: - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} depends_on: datahub-gms: condition: service_healthy datahub-gms: hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - 8080:8080 build: context: ../ dockerfile: docker/datahub-gms/Dockerfile env_file: ./datahub-gms/env/docker.cassandra.env environment: - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 20s interval: 1s retries: 20 timeout: 5s depends_on: datahub-upgrade: condition: service_completed_successfully volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-upgrade: hostname: datahub-upgrade image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head} command: - -u - SystemUpdate build: context: ../ dockerfile: docker/datahub-upgrade/Dockerfile env_file: datahub-upgrade/env/docker-without-neo4j.env depends_on: cassandra-setup: condition: service_completed_successfully elasticsearch-setup: condition: service_completed_successfully neo4j: condition: service_healthy broker: condition: service_healthy schema-registry: condition: service_healthy cassandra-setup: hostname: cassandra-setup image: cassandra:3.11 command: /bin/bash -c "cqlsh cassandra -f /init.cql" depends_on: cassandra: condition: service_healthy volumes: - ./cassandra/init.cql:/init.cql labels: datahub_setup_job: true # This "container" is a workaround to pre-create search indices elasticsearch-setup: hostname: elasticsearch-setup image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} build: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile env_file: elasticsearch-setup/env/docker.env environment: - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} depends_on: elasticsearch: condition: service_healthy labels: datahub_setup_job: true cassandra: hostname: cassandra image: cassandra:3.11 ports: - 9042:9042 healthcheck: test: cqlsh -u cassandra -p cassandra -e describe keyspaces interval: 15s timeout: 10s retries: 10 volumes: - cassandradata:/var/lib/cassandra elasticsearch: hostname: elasticsearch image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - 9200:9200 env_file: elasticsearch/env/docker.env environment: - discovery.type=single-node - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} healthcheck: test: curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s start_period: 5s interval: 1s retries: 5 timeout: 5s volumes: - esdata:/usr/share/elasticsearch/data neo4j: hostname: neo4j image: neo4j:4.0.6 ports: - 7474:7474 - 7687:7687 env_file: neo4j/env/docker.env healthcheck: test: wget http://neo4j:7474 start_period: 5s interval: 1s retries: 5 timeout: 5s volumes: - neo4jdata:/data schema-registry: hostname: schema-registry image: ${DATAHUB_CONFLUENT_SCHEMA_REGISTRY_IMAGE:-confluentinc/cp-schema-registry}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 env_file: schema-registry/env/docker.env healthcheck: test: nc -z schema-registry 8081 start_period: 5s interval: 1s retries: 5 timeout: 5s depends_on: broker: condition: service_healthy broker: hostname: broker image: ${DATAHUB_CONFLUENT_KAFKA_IMAGE:-confluentinc/cp-kafka}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - 29092:29092 - 9092:9092 env_file: broker/env/docker.env healthcheck: test: nc -z broker 9092 start_period: 5s interval: 1s retries: 5 timeout: 5s depends_on: zookeeper: condition: service_healthy volumes: - broker:/var/lib/kafka/data/ zookeeper: hostname: zookeeper image: ${DATAHUB_CONFLUENT_ZOOKEEPER_IMAGE:-confluentinc/cp-zookeeper}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - 2181:2181 env_file: zookeeper/env/docker.env healthcheck: test: echo srvr | nc zookeeper 2181 start_period: 45s interval: 5s retries: 5 timeout: 5s volumes: # See https://stackoverflow.com/a/61008432 for why we need two volumes. # See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk - zkdata:/var/lib/zookeeper/data - zklogs:/var/lib/zookeeper/log networks: default: name: datahub_network volumes: cassandradata: esdata: neo4jdata: broker: zkdata: zklogs: