datahub/docker/docker-compose-without-neo4j.yml
2025-06-15 10:52:16 -05:00

184 lines
6.0 KiB
YAML

# Docker compose file covering DataHub's default configuration, which is to run all containers on a single host.
# Please see the README.md for instructions as to how to use and customize.
# NOTE: This file will cannot build! No dockerfiles are set. See the README.md in this directory.
---
services:
datahub-frontend-react:
hostname: datahub-frontend-react
image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
build:
context: ../
dockerfile: docker/datahub-frontend/Dockerfile
env_file: datahub-frontend/env/docker.env
depends_on:
datahub-gms:
condition: service_healthy
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-actions:
hostname: actions
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${DATAHUB_VERSION:-head}-slim
build:
context: ../
dockerfile: docker/datahub-actions/Dockerfile
env_file: datahub-actions/env/docker.env
environment:
- ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-}
- ACTIONS_CONFIG=${ACTIONS_CONFIG:-}
depends_on:
datahub-gms:
condition: service_healthy
datahub-gms:
hostname: datahub-gms
image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
build:
context: ../
dockerfile: docker/datahub-gms/Dockerfile
env_file: datahub-gms/env/docker-without-neo4j.env
environment:
- KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
healthcheck:
test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health
start_period: 90s
interval: 1s
retries: 3
timeout: 5s
depends_on:
datahub-upgrade:
condition: service_completed_successfully
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-upgrade:
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
command:
- -u
- SystemUpdate
build:
context: ../
dockerfile: docker/datahub-upgrade/Dockerfile
env_file: datahub-upgrade/env/docker-without-neo4j.env
depends_on:
elasticsearch-setup:
condition: service_completed_successfully
kafka-setup:
condition: service_completed_successfully
labels:
datahub_setup_job: true
# This "container" is a workaround to pre-create search indices
elasticsearch-setup:
hostname: elasticsearch-setup
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
build:
context: ../
dockerfile: docker/elasticsearch-setup/Dockerfile
env_file: elasticsearch-setup/env/docker.env
environment:
- ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
- USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
depends_on:
elasticsearch:
condition: service_healthy
labels:
datahub_setup_job: true
kafka-setup:
hostname: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
build:
dockerfile: ./docker/kafka-setup/Dockerfile
context: ../
env_file: kafka-setup/env/docker.env
depends_on:
broker:
condition: service_healthy
schema-registry:
condition: service_healthy
labels:
datahub_setup_job: true
elasticsearch:
hostname: elasticsearch
image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
ports:
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
env_file: elasticsearch/env/docker.env
environment:
- discovery.type=single-node
- ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
deploy:
resources:
limits:
memory: 1G
healthcheck:
test: curl -sS --fail http://elasticsearch:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s
start_period: 20s
interval: 1s
retries: 3
timeout: 5s
volumes:
- esdata:/usr/share/elasticsearch/data
schema-registry:
hostname: schema-registry
image: ${DATAHUB_CONFLUENT_SCHEMA_REGISTRY_IMAGE:-confluentinc/cp-schema-registry}:${DATAHUB_CONFLUENT_VERSION:-8.0.0}
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
env_file: schema-registry/env/docker.env
healthcheck:
test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081}
start_period: 60s
interval: 1s
retries: 3
timeout: 5s
depends_on:
broker:
condition: service_healthy
broker:
hostname: broker
image: ${DATAHUB_CONFLUENT_KAFKA_IMAGE:-confluentinc/cp-kafka}:${DATAHUB_CONFLUENT_VERSION:-8.0.0}
ports:
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
env_file: broker/env/docker.env
healthcheck:
test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092}
start_period: 60s
interval: 1s
retries: 5
timeout: 5s
depends_on:
zookeeper:
condition: service_healthy
volumes:
- broker:/var/lib/kafka/data/
zookeeper:
hostname: zookeeper
image: ${DATAHUB_CONFLUENT_ZOOKEEPER_IMAGE:-confluentinc/cp-zookeeper}:${DATAHUB_CONFLUENT_VERSION:-8.0.0}
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
env_file: zookeeper/env/docker.env
healthcheck:
test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181}
start_period: 30s
interval: 5s
retries: 3
timeout: 5s
volumes:
# See https://stackoverflow.com/a/61008432 for why we need two volumes.
# See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk
- zkdata:/var/lib/zookeeper/data
- zklogs:/var/lib/zookeeper/log
networks:
default:
name: datahub_network
volumes:
esdata:
broker:
zkdata:
zklogs: