# Docker compose file covering DataHub's default configuration, which is to run all containers on a single host. # Please see the README.md for instructions as to how to use and customize. # NOTE: This file does not build! No dockerfiles are set. See the README.md in this directory. --- services: datahub-frontend-react: hostname: datahub-frontend-react image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 build: context: ../ dockerfile: docker/datahub-frontend/Dockerfile env_file: datahub-frontend/env/docker.env depends_on: datahub-gms: condition: service_healthy volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-actions: hostname: actions image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${DATAHUB_VERSION:-head}-slim build: context: ../ dockerfile: docker/datahub-actions/Dockerfile env_file: datahub-actions/env/docker.env environment: - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} depends_on: datahub-gms: condition: service_healthy datahub-gms: hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} environment: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 build: context: ../ dockerfile: docker/datahub-gms/Dockerfile healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s interval: 1s retries: 3 timeout: 5s depends_on: datahub-upgrade: condition: service_completed_successfully volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-upgrade: hostname: datahub-upgrade image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head} command: - -u - SystemUpdate build: context: ../ dockerfile: docker/datahub-upgrade/Dockerfile env_file: datahub-upgrade/env/docker-without-neo4j.env labels: datahub_setup_job: true depends_on: mysql-setup: condition: service_completed_successfully elasticsearch-setup: condition: service_completed_successfully kafka-setup: condition: service_completed_successfully neo4j: condition: service_healthy # This "container" is a workaround to pre-create search indices elasticsearch-setup: hostname: elasticsearch-setup image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} build: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile env_file: elasticsearch-setup/env/docker.env environment: - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} depends_on: elasticsearch: condition: service_healthy labels: datahub_setup_job: true # This "container" is a workaround to pre-create topics. # This is not required in most cases, kept here for backwards compatibility with older clients that # explicitly wait for this container kafka-setup: hostname: kafka-setup image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} build: dockerfile: ./docker/kafka-setup/Dockerfile context: ../ env_file: kafka-setup/env/docker.env depends_on: broker: condition: service_healthy schema-registry: condition: service_healthy labels: datahub_setup_job: true elasticsearch: hostname: elasticsearch image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 env_file: elasticsearch/env/docker.env environment: - discovery.type=single-node - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} deploy: resources: limits: memory: 1G healthcheck: test: curl -sS --fail http://elasticsearch:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s start_period: 20s interval: 1s retries: 3 timeout: 5s volumes: - esdata:/usr/share/elasticsearch/data neo4j: hostname: neo4j image: neo4j:4.4.9-community ports: - ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474 - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687 env_file: neo4j/env/docker.env healthcheck: test: wget http://neo4j:$${DATAHUB_NEO4J_HTTP_PORT:-7474} start_period: 5s interval: 1s retries: 5 timeout: 5s volumes: - neo4jdata:/data schema-registry: hostname: schema-registry image: ${DATAHUB_CONFLUENT_SCHEMA_REGISTRY_IMAGE:-confluentinc/cp-schema-registry}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 env_file: schema-registry/env/docker.env healthcheck: test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081} start_period: 60s interval: 1s retries: 3 timeout: 5s depends_on: broker: condition: service_healthy broker: hostname: broker image: ${DATAHUB_CONFLUENT_KAFKA_IMAGE:-confluentinc/cp-kafka}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 env_file: broker/env/docker.env healthcheck: test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092} start_period: 60s interval: 1s retries: 5 timeout: 5s depends_on: zookeeper: condition: service_healthy volumes: - broker:/var/lib/kafka/data/ zookeeper: hostname: zookeeper image: ${DATAHUB_CONFLUENT_ZOOKEEPER_IMAGE:-confluentinc/cp-zookeeper}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 env_file: zookeeper/env/docker.env healthcheck: test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181} start_period: 10s interval: 5s retries: 3 timeout: 5s volumes: # See https://stackoverflow.com/a/61008432 for why we need two volumes. # See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk - zkdata:/var/lib/zookeeper/data - zklogs:/var/lib/zookeeper/log networks: default: name: datahub_network volumes: esdata: neo4jdata: broker: zkdata: zklogs: