feat(quickstart): Remove kafka-setup as a hard deployment requirement (#7073)

This commit is contained in:
Pedro Silva 2023-01-24 16:12:57 +00:00 committed by GitHub
parent db968497cc
commit bef59b0928
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 706 additions and 489 deletions

View File

@ -73,6 +73,10 @@ jobs:
- uses: actions/setup-python@v4
with:
python-version: "3.7"
- name: Download YQ
uses: chrisdickinson/setup-yq@v1.0.1
with:
yq-version: v4.28.2
- name: Quickstart Compose Validation
run: ./docker/quickstart/generate_and_compare.sh

View File

@ -2,7 +2,7 @@ MCE_CONSUMER_ENABLED=true
EBEAN_DATASOURCE_USERNAME=datahub
EBEAN_DATASOURCE_PASSWORD=datahub
EBEAN_DATASOURCE_HOST=mysql:3306
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
@ -15,6 +15,10 @@ JAVA_OPTS=-Xms1g -Xmx1g
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml
DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
ENTITY_SERVICE_ENABLE_RETENTION=true
MAE_CONSUMER_ENABLED=false
PE_CONSUMER_ENABLED=false
UI_INGESTION_ENABLED=false
# Uncomment to configure kafka topic names
# Make sure these names are consistent across the whole deployment

View File

@ -7,7 +7,7 @@
version: '3.8'
services:
zookeeper:
image: confluentinc/cp-zookeeper:5.4.0
image: confluentinc/cp-zookeeper:7.2.2
env_file: zookeeper/env/docker.env
hostname: zookeeper
container_name: zookeeper
@ -17,7 +17,7 @@ services:
- zkdata:/var/lib/zookeeper
broker:
image: confluentinc/cp-kafka:5.4.0
image: confluentinc/cp-kafka:7.2.2
env_file: broker/env/docker.env
hostname: broker
container_name: broker
@ -29,25 +29,12 @@ services:
volumes:
- broker:/var/lib/kafka/data/
# This "container" is a workaround to pre-create topics
kafka-setup:
build:
context: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
env_file: kafka-setup/env/docker.env
hostname: kafka-setup
container_name: kafka-setup
depends_on:
- broker
- schema-registry
schema-registry:
image: confluentinc/cp-schema-registry:5.4.0
image: confluentinc/cp-schema-registry:7.2.2
env_file: schema-registry/env/docker.env
hostname: schema-registry
container_name: schema-registry
depends_on:
- zookeeper
- broker
ports:
- "8081:8081"

View File

@ -1,9 +1,3 @@
services:
broker:
image: kymeric/cp-kafka:latest
mysql:
image: mariadb:10.5.8
schema-registry:
image: eugenetea/schema-registry-arm64:latest
zookeeper:
image: kymeric/cp-zookeeper:latest

View File

@ -6,7 +6,7 @@ services:
hostname: mysql
image: mysql:5.7
env_file: mysql/env/docker.env
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password
ports:
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
volumes:

View File

@ -7,7 +7,7 @@
version: '3.8'
services:
zookeeper:
image: confluentinc/cp-zookeeper:5.4.0
image: confluentinc/cp-zookeeper:7.2.2
env_file: zookeeper/env/docker.env
hostname: zookeeper
container_name: zookeeper
@ -17,7 +17,7 @@ services:
- zkdata:/var/lib/zookeeper
broker:
image: confluentinc/cp-kafka:5.4.0
image: confluentinc/cp-kafka:7.2.2
env_file: broker/env/docker.env
hostname: broker
container_name: broker
@ -26,25 +26,12 @@ services:
ports:
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
# This "container" is a workaround to pre-create topics
kafka-setup:
build:
context: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
env_file: kafka-setup/env/docker.env
hostname: kafka-setup
container_name: kafka-setup
depends_on:
- broker
- schema-registry
schema-registry:
image: confluentinc/cp-schema-registry:5.4.0
image: confluentinc/cp-schema-registry:7.2.2
env_file: schema-registry/env/docker.env
hostname: schema-registry
container_name: schema-registry
depends_on:
- zookeeper
- broker
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081

View File

@ -25,5 +25,8 @@ services:
env_file: datahub-mce-consumer/env/docker.env
hostname: datahub-mce-consumer
container_name: datahub-mce-consumer
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
ports:
- "9090:9090"

View File

@ -27,5 +27,15 @@ services:
env_file: datahub-mce-consumer/env/docker.env
hostname: datahub-mce-consumer
container_name: datahub-mce-consumer
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- GRAPH_SERVICE_IMPL=neo4j
ports:
- "9090:9090"
depends_on:
- neo4j

View File

@ -24,17 +24,6 @@ services:
- ./elasticsearch-setup/create-indices.sh:/create-indices.sh
- ../metadata-service/restli-servlet-impl/src/main/resources/index/:/index
kafka-setup:
image: linkedin/datahub-kafka-setup:debug
build:
context: ../
dockerfile: ./docker/kafka-setup/Dockerfile
args:
APP_ENV: dev
depends_on:
- broker
- schema-registry
datahub-gms:
image: linkedin/datahub-gms:debug
build:

View File

@ -0,0 +1,15 @@
# Service definitions for Kafka Setup container.
version: '3.8'
services:
# This "container" is a workaround to pre-create topics
kafka-setup:
build:
context: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
env_file: kafka-setup/env/docker.env
hostname: kafka-setup
container_name: kafka-setup
depends_on:
- broker
- schema-registry

View File

@ -1,11 +1,5 @@
services:
broker:
image: kymeric/cp-kafka:latest
mysql:
image: mariadb:10.5.8
schema-registry:
image: eugenetea/schema-registry-arm64:latest
zookeeper:
image: kymeric/cp-zookeeper:latest
neo4j:
image: neo4j/neo4j-arm64-experimental:4.0.6-arm64

View File

@ -7,7 +7,7 @@ services:
hostname: mysql
image: mysql:5.7
env_file: mysql/env/docker.env
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password
ports:
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
volumes:

View File

@ -3,7 +3,7 @@
version: '3.8'
services:
kafka-rest-proxy:
image: confluentinc/cp-kafka-rest:5.4.0
image: confluentinc/cp-kafka-rest:7.2.2
env_file: kafka-rest-proxy/env/docker.env
hostname: kafka-rest-proxy
container_name: kafka-rest-proxy

View File

@ -7,7 +7,7 @@
version: '3.8'
services:
zookeeper:
image: confluentinc/cp-zookeeper:5.4.0
image: confluentinc/cp-zookeeper:7.2.2
env_file: zookeeper/env/docker.env
hostname: zookeeper
container_name: zookeeper
@ -17,7 +17,7 @@ services:
- zkdata:/var/lib/zookeeper
broker:
image: confluentinc/cp-kafka:5.4.0
image: confluentinc/cp-kafka:7.2.2
env_file: broker/env/docker.env
hostname: broker
container_name: broker
@ -28,26 +28,12 @@ services:
volumes:
- broker:/var/lib/kafka/data/
# This "container" is a workaround to pre-create topics
kafka-setup:
build:
dockerfile: ./docker/kafka-setup/Dockerfile
context: ../
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
env_file: kafka-setup/env/docker.env
hostname: kafka-setup
container_name: kafka-setup
depends_on:
- broker
- schema-registry
schema-registry:
image: confluentinc/cp-schema-registry:5.4.0
image: confluentinc/cp-schema-registry:7.2.2
env_file: schema-registry/env/docker.env
hostname: schema-registry
container_name: schema-registry
depends_on:
- zookeeper
- broker
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081

View File

@ -0,0 +1,204 @@
networks:
default:
name: datahub_network
services:
broker:
container_name: broker
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
hostname: broker
image: confluentinc/cp-kafka:7.2.2
ports:
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
volumes:
- broker:/var/lib/kafka/data/
datahub-actions:
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_PROTOCOL=http
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
restart: on-failure:5
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-gms:
container_name: datahub-gms
depends_on:
- neo4j
- mysql
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- GRAPH_SERVICE_IMPL=neo4j
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
- METADATA_SERVICE_AUTH_ENABLED=false
hostname: datahub-gms
image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
- ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
volumes:
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mariadb:10.5.8
ports:
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:${DATAHUB_VERSION:-head}
neo4j:
container_name: neo4j
environment:
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
hostname: neo4j
image: neo4j/neo4j-arm64-experimental:4.0.6-arm64
ports:
- ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474
- ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687
volumes:
- neo4jdata:/data
schema-registry:
container_name: schema-registry
depends_on:
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT
- SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092
hostname: schema-registry
image: confluentinc/cp-schema-registry:7.2.2
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:7.2.2
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
volumes:
- zkdata:/var/lib/zookeeper
version: '2.3'
volumes:
broker: null
esdata: null
mysqldata: null
neo4jdata: null
zkdata: null

View File

@ -21,28 +21,18 @@ services:
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
datahub-actions:
depends_on:
- datahub-gms
- datahub-gms
environment:
- DATAHUB_GMS_PROTOCOL=http
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
- DATAHUB_ACTIONS_SLACK_ENABLED
- DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL
- DATAHUB_ACTIONS_SLACK_BOT_TOKEN
- DATAHUB_ACTIONS_SLACK_SIGNING_SECRET
- DATAHUB_ACTIONS_SLACK_CHANNEL
- DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY
- DATAHUB_ACTIONS_TEAMS_ENABLED
- DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL
- DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL
- DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY
- DATAHUB_GMS_PROTOCOL=http
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
restart: on-failure:5
@ -68,31 +58,33 @@ services:
ports:
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
environment:
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- GRAPH_SERVICE_IMPL=elasticsearch
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- EBEAN_DATASOURCE_USERNAME=datahub
- MCE_CONSUMER_ENABLED=true
- EBEAN_DATASOURCE_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- UI_INGESTION_ENABLED=true
- ENTITY_SERVICE_ENABLE_RETENTION=true
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- GRAPH_SERVICE_IMPL=elasticsearch
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- ELASTICSEARCH_PORT=9200
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- EBEAN_DATASOURCE_HOST=mysql:3306
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- PE_CONSUMER_ENABLED=true
hostname: datahub-gms
@ -131,16 +123,6 @@ services:
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password
container_name: mysql
@ -151,7 +133,6 @@ services:
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mariadb:10.5.8
# image: mysql:8
ports:
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
volumes:
@ -172,13 +153,13 @@ services:
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=PLAINTEXT://broker:29092
- SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT
- SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092
hostname: schema-registry
image: confluentinc/cp-schema-registry:7.2.0
image: confluentinc/cp-schema-registry:7.2.2
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
zookeeper:

View File

@ -5,195 +5,172 @@ services:
broker:
container_name: broker
depends_on:
- zookeeper
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
hostname: broker
image: confluentinc/cp-kafka:5.4.0
image: confluentinc/cp-kafka:7.2.2
ports:
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
datahub-actions:
depends_on:
- datahub-gms
- datahub-gms
environment:
- DATAHUB_GMS_PROTOCOL=http
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
- DATAHUB_ACTIONS_SLACK_ENABLED
- DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL
- DATAHUB_ACTIONS_SLACK_BOT_TOKEN
- DATAHUB_ACTIONS_SLACK_SIGNING_SECRET
- DATAHUB_ACTIONS_SLACK_CHANNEL
- DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY
- DATAHUB_ACTIONS_TEAMS_ENABLED
- DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL
- DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL
- DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY
- DATAHUB_GMS_PROTOCOL=http
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
restart: on-failure:5
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
- mysql
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- UI_INGESTION_ENABLED=true
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- GRAPH_SERVICE_IMPL=elasticsearch
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- MCE_CONSUMER_ENABLED=true
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- JAVA_OPTS=-Xms1g -Xmx1g
- EBEAN_DATASOURCE_HOST=mysql:3306
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- ENTITY_SERVICE_ENABLE_RETENTION=true
- EBEAN_DATASOURCE_USERNAME=datahub
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_HOST=elasticsearch
- PE_CONSUMER_ENABLED=true
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
hostname: datahub-gms
image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
volumes:
- esdata:/usr/share/elasticsearch/data
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:${DATAHUB_VERSION:-head}
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT
- SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
image: confluentinc/cp-schema-registry:7.2.2
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
image: confluentinc/cp-zookeeper:7.2.2
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
volumes:
- zkdata:/var/lib/zookeeper
version: '2.3'
- zkdata:/var/lib/zookeeper
version: "2.3"
volumes:
esdata: null
mysqldata: null

View File

@ -24,16 +24,16 @@ services:
datahub-mce-consumer:
container_name: datahub-mce-consumer
environment:
- MCE_CONSUMER_ENABLED=true
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- MCE_CONSUMER_ENABLED=true
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL

View File

@ -29,27 +29,29 @@ services:
- 9091:9091
datahub-mce-consumer:
container_name: datahub-mce-consumer
depends_on:
- neo4j
environment:
- MCE_CONSUMER_ENABLED=true
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- GRAPH_SERVICE_IMPL=neo4j
- MCE_CONSUMER_ENABLED=true
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- GRAPH_SERVICE_IMPL=neo4j
- JAVA_OPTS=-Xms1g -Xmx1g
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing

View File

@ -0,0 +1,12 @@
services:
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
version: '2.3'

View File

@ -1,47 +1,47 @@
services:
datahub-frontend-react:
environment:
- ENABLE_PROMETHEUS=true
- ENABLE_OTEL=true
- OTEL_TRACES_EXPORTER=jaeger
- OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250
- OTEL_METRICS_EXPORTER=none
- OTEL_SERVICE_NAME=datahub-gms
- ENABLE_PROMETHEUS=true
- ENABLE_OTEL=true
- OTEL_TRACES_EXPORTER=jaeger
- OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250
- OTEL_METRICS_EXPORTER=none
- OTEL_SERVICE_NAME=datahub-gms
ports:
- '4318'
- "4318"
datahub-gms:
environment:
- ENABLE_PROMETHEUS=true
- ENABLE_OTEL=true
- OTEL_TRACES_EXPORTER=jaeger
- OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250
- OTEL_METRICS_EXPORTER=none
- OTEL_SERVICE_NAME=datahub-gms
- ENABLE_PROMETHEUS=true
- ENABLE_OTEL=true
- OTEL_TRACES_EXPORTER=jaeger
- OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250
- OTEL_METRICS_EXPORTER=none
- OTEL_SERVICE_NAME=datahub-gms
ports:
- '4318'
- "4318"
grafana:
depends_on:
- prometheus
- prometheus
image: grafana/grafana:9.1.4
ports:
- 3001:3000
- 3001:3000
volumes:
- grafana-storage:/var/lib/grafana
- ../monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
- ../monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards
- grafana-storage:/var/lib/grafana
- ../monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
- ../monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards
jaeger-all-in-one:
image: jaegertracing/all-in-one:latest
ports:
- 16686:16686
- '14268'
- '14250'
- 16686:16686
- "14268"
- "14250"
prometheus:
container_name: prometheus
image: prom/prometheus:latest
ports:
- 9089:9090
- 9089:9090
volumes:
- ../monitoring/prometheus.yaml:/etc/prometheus/prometheus.yml
version: '2.3'
- ../monitoring/prometheus.yaml:/etc/prometheus/prometheus.yml
version: "2.3"
volumes:
grafana-storage: null

View File

@ -5,216 +5,194 @@ services:
broker:
container_name: broker
depends_on:
- zookeeper
- zookeeper
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
hostname: broker
image: confluentinc/cp-kafka:5.4.0
image: confluentinc/cp-kafka:7.2.2
ports:
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
volumes:
- broker:/var/lib/kafka/data/
- broker:/var/lib/kafka/data/
datahub-actions:
depends_on:
- datahub-gms
- datahub-gms
environment:
- DATAHUB_GMS_PROTOCOL=http
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
- DATAHUB_ACTIONS_SLACK_ENABLED
- DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL
- DATAHUB_ACTIONS_SLACK_BOT_TOKEN
- DATAHUB_ACTIONS_SLACK_SIGNING_SECRET
- DATAHUB_ACTIONS_SLACK_CHANNEL
- DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY
- DATAHUB_ACTIONS_TEAMS_ENABLED
- DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL
- DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL
- DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY
- DATAHUB_GMS_PROTOCOL=http
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
restart: on-failure:5
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
- datahub-gms
- datahub-gms
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
-Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=YouKnowNothing
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=elasticsearch
- ELASTIC_CLIENT_PORT=9200
hostname: datahub-frontend-react
image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-gms:
container_name: datahub-gms
depends_on:
- mysql
- mysql
- neo4j
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- GRAPH_SERVICE_IMPL=neo4j
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
- METADATA_SERVICE_AUTH_ENABLED=false
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_HOST=mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- JAVA_OPTS=-Xms1g -Xmx1g
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- GRAPH_SERVICE_IMPL=neo4j
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
- METADATA_SERVICE_AUTH_ENABLED=false
hostname: datahub-gms
image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
volumes:
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
- ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
- ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources
elasticsearch:
container_name: elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
healthcheck:
retries: 4
start_period: 2m
test:
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
|| exit 1
- CMD-SHELL
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1
hostname: elasticsearch
image: elasticsearch:7.9.3
mem_limit: 1g
ports:
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
volumes:
- esdata:/usr/share/elasticsearch/data
- esdata:/usr/share/elasticsearch/data
elasticsearch-setup:
container_name: elasticsearch-setup
depends_on:
- elasticsearch
- elasticsearch
environment:
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
hostname: elasticsearch-setup
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
kafka-setup:
container_name: kafka-setup
depends_on:
- broker
- schema-registry
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
hostname: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
mysql:
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password
container_name: mysql
environment:
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
- MYSQL_DATABASE=datahub
- MYSQL_USER=datahub
- MYSQL_PASSWORD=datahub
- MYSQL_ROOT_PASSWORD=datahub
hostname: mysql
image: mysql:5.7
ports:
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
volumes:
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
- mysqldata:/var/lib/mysql
mysql-setup:
container_name: mysql-setup
depends_on:
- mysql
- mysql
environment:
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
- MYSQL_HOST=mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=datahub
- MYSQL_PASSWORD=datahub
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: acryldata/datahub-mysql-setup:${DATAHUB_VERSION:-head}
neo4j:
container_name: neo4j
environment:
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
hostname: neo4j
image: neo4j:4.4.9-community
ports:
- ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474
- ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687
- ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474
- ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687
volumes:
- neo4jdata:/data
- neo4jdata:/data
schema-registry:
container_name: schema-registry
depends_on:
- zookeeper
- broker
- broker
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT
- SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092
hostname: schema-registry
image: confluentinc/cp-schema-registry:5.4.0
image: confluentinc/cp-schema-registry:7.2.2
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
zookeeper:
container_name: zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
hostname: zookeeper
image: confluentinc/cp-zookeeper:5.4.0
image: confluentinc/cp-zookeeper:7.2.2
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
volumes:
- zkdata:/var/lib/zookeeper
version: '2.3'
- zkdata:/var/lib/zookeeper
version: "2.3"
volumes:
broker: null
esdata: null

View File

@ -14,13 +14,16 @@ source venv/bin/activate
pip install -r requirements.txt
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml temp-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml ../docker-compose.m1.yml temp-m1.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml ../docker-compose-without-neo4j.m1.yml temp-without-neo4j-m1.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.consumers.yml temp.consumers.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml temp.consumers-without-neo4j.quickstart.yml
for flavour in "${FLAVOURS[@]}"
do
if cmp docker-compose$flavour.quickstart.yml temp$flavour.quickstart.yml; then
if cmp <(yq -i -P 'sort_keys(..)' docker-compose$flavour.quickstart.yml) <(yq -i -P 'sort_keys(..)' temp$flavour.quickstart.yml); then
echo "docker-compose$flavour.quickstart.yml is up to date."
else
echo "docker-compose$flavour.quickstart.yml is out of date."

View File

@ -1,5 +1,4 @@
import os
from collections import OrderedDict
from collections.abc import Mapping
import click
@ -27,10 +26,14 @@ def dict_merge(dct, merge_dct):
for k, v in merge_dct.items():
if k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], Mapping):
dict_merge(dct[k], merge_dct[k])
elif k in dct and isinstance(dct[k], list):
a = set(dct[k])
b = set(merge_dct[k])
if a != b:
dct[k] = list(a.union(b))
else:
dct[k] = merge_dct[k]
def modify_docker_config(base_path, docker_yaml_config):
# 0. Filter out services to be omitted.
for key in list(docker_yaml_config["services"]):
@ -80,7 +83,7 @@ def modify_docker_config(base_path, docker_yaml_config):
elif volumes[i].startswith("./"):
volumes[i] = "." + volumes[i]
# 9. Set docker compose version to 2.
# 10. Set docker compose version to 2.
# We need at least this version, since we use features like start_period for
# healthchecks and shell-like variable interpolation.
docker_yaml_config["version"] = "2.3"
@ -113,6 +116,9 @@ def generate(compose_files, output_file) -> None:
for modified_file in modified_files:
dict_merge(merged_docker_config, modified_file)
# Dedup env vars, last wins
dedup_env_vars(merged_docker_config)
# Write output file
output_dir = os.path.dirname(output_file)
if len(output_dir) and not os.path.exists(output_dir):
@ -127,5 +133,25 @@ def generate(compose_files, output_file) -> None:
print(f"Successfully generated {output_file}.")
def dedup_env_vars(merged_docker_config):
for service in merged_docker_config['services']:
if 'environment' in merged_docker_config['services'][service]:
lst = merged_docker_config['services'][service]['environment']
if lst is not None:
# use a set to cache duplicates
caches = set()
results = []
for item in lst:
partitions = item.rpartition('=')
prefix = partitions[0]
suffix = partitions[1]
# check whether prefix already exists
if prefix not in caches and suffix != "":
results.append(item)
caches.add(prefix)
if set(lst) != set(results):
merged_docker_config['services'][service]['environment'] = results
if __name__ == "__main__":
generate()

View File

@ -11,6 +11,9 @@ source venv/bin/activate
pip install -r requirements.txt
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml docker-compose-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml ../docker-compose.m1.yml docker-compose-m1.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml ../docker-compose-without-neo4j.m1.yml docker-compose-without-neo4j-m1.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.consumers.yml docker-compose.consumers.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml docker-compose.consumers-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.kafka-setup.yml docker-compose.kafka-setup.quickstart.yml

View File

@ -1,9 +1,11 @@
SCHEMA_REGISTRY_HOST_NAME=schemaregistry
SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092
# Uncomment to customize the Schema Registry kafka store connection
# SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT
# SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092
# Uncomment to customize the Schema Registry kafka store connection
# ZOOKEEPER_SASL_ENABLED=false
# KAFKA_OPTS=-Xms1g -Xmx1g
# SCHEMA_REGISTRY_JMX_OPTS=-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false
# Uncomment to use schema registry < v5.4.0
# SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181

View File

@ -9,7 +9,6 @@ REQUIRED_CONTAINERS = [
"elasticsearch",
"datahub-gms",
"datahub-frontend-react",
"kafka-setup",
"schema-registry",
"broker",
"zookeeper",

View File

@ -43,7 +43,10 @@ NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = (
ELASTIC_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
)
M1_QUICKSTART_COMPOSE_FILE = (
NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-m1.quickstart.yml"
)
ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
)
CONSUMERS_QUICKSTART_COMPOSE_FILE = (
@ -52,14 +55,21 @@ CONSUMERS_QUICKSTART_COMPOSE_FILE = (
ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml"
)
KAFKA_SETUP_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose.kafka-setup.quickstart.yml"
)
NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{DOCKER_COMPOSE_BASE}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
)
ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{DOCKER_COMPOSE_BASE}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
)
M1_QUICKSTART_COMPOSE_URL = f"{DOCKER_COMPOSE_BASE}/{M1_QUICKSTART_COMPOSE_FILE}"
NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_URL = (
f"{DOCKER_COMPOSE_BASE}/{NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
)
ELASTIC_M1_QUICKSTART_COMPOSE_URL = (
f"{DOCKER_COMPOSE_BASE}/{ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
)
class Architectures(Enum):
@ -166,7 +176,7 @@ def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) ->
click.echo(
"No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
"To use neo4j as a graph backend, run \n"
"`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose.quickstart.yml`"
"`datahub docker quickstart --graph-service-impl neo4j`"
"\nfrom the root of the datahub repo\n"
)
return False
@ -581,6 +591,13 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
default=False,
help="Launches MAE & MCE consumers as stand alone docker containers",
)
@click.option(
"--kafka-setup",
required=False,
is_flag=True,
default=False,
help="Launches Kafka setup job as part of the compose deployment",
)
@click.option(
"--arch",
required=False,
@ -608,6 +625,7 @@ def quickstart(
restore_indices: bool,
no_restore_indices: bool,
standalone_consumers: bool,
kafka_setup: bool,
arch: Optional[str],
) -> None:
"""Start an instance of DataHub locally using docker-compose.
@ -650,70 +668,21 @@ def quickstart(
auth_resources_folder = Path(DATAHUB_ROOT_FOLDER) / "plugins/auth/resources"
os.makedirs(auth_resources_folder, exist_ok=True)
default_quickstart_compose_file = _get_default_quickstart_compose_file()
quickstart_compose_file_name = _get_default_quickstart_compose_file()
if stop:
_attempt_stop(quickstart_compose_file)
return
elif not quickstart_compose_file:
# download appropriate quickstart file
should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
if should_use_neo4j and is_arch_m1(quickstart_arch):
click.secho(
"Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph",
fg="red",
)
github_file = (
NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
if should_use_neo4j and not is_arch_m1(quickstart_arch)
else ELASTIC_QUICKSTART_COMPOSE_URL
if not is_arch_m1(quickstart_arch)
else M1_QUICKSTART_COMPOSE_URL
print("compose file name", quickstart_compose_file_name)
download_compose_files(
quickstart_compose_file_name,
quickstart_compose_file,
graph_service_impl,
kafka_setup,
quickstart_arch,
standalone_consumers,
)
# also allow local files
request_session = requests.Session()
request_session.mount("file://", FileAdapter())
with open(
default_quickstart_compose_file, "wb"
) if default_quickstart_compose_file else tempfile.NamedTemporaryFile(
suffix=".yml", delete=False
) as tmp_file:
path = pathlib.Path(tmp_file.name)
quickstart_compose_file.append(path)
click.echo(f"Fetching docker-compose file {github_file} from GitHub")
# Download the quickstart docker-compose file from GitHub.
quickstart_download_response = request_session.get(github_file)
quickstart_download_response.raise_for_status()
tmp_file.write(quickstart_download_response.content)
logger.debug(f"Copied to {path}")
if standalone_consumers:
consumer_github_file = (
f"{DOCKER_COMPOSE_BASE}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
if should_use_neo4j
else f"{DOCKER_COMPOSE_BASE}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
)
default_consumer_compose_file = (
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
)
with open(
default_consumer_compose_file, "wb"
) if default_consumer_compose_file else tempfile.NamedTemporaryFile(
suffix=".yml", delete=False
) as tmp_file:
path = pathlib.Path(tmp_file.name)
quickstart_compose_file.append(path)
click.echo(
f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
)
# Download the quickstart docker-compose file from GitHub.
quickstart_download_response = request_session.get(consumer_github_file)
quickstart_download_response.raise_for_status()
tmp_file.write(quickstart_download_response.content)
logger.debug(f"Copied to {path}")
# set version
_set_environment_variables(
version=version,
@ -833,6 +802,94 @@ def quickstart(
)
def download_compose_files(
quickstart_compose_file_name,
quickstart_compose_file_list,
graph_service_impl,
kafka_setup,
quickstart_arch,
standalone_consumers,
):
# download appropriate quickstart file
should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
if should_use_neo4j:
github_file = (
NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
if not is_arch_m1(quickstart_arch)
else NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_URL
)
else:
github_file = (
ELASTIC_QUICKSTART_COMPOSE_URL
if not is_arch_m1(quickstart_arch)
else ELASTIC_M1_QUICKSTART_COMPOSE_URL
)
# also allow local files
request_session = requests.Session()
request_session.mount("file://", FileAdapter())
with open(
quickstart_compose_file_name, "wb"
) if quickstart_compose_file_name else tempfile.NamedTemporaryFile(
suffix=".yml", delete=False
) as tmp_file:
path = pathlib.Path(tmp_file.name)
quickstart_compose_file_list.append(path)
click.echo(f"Fetching docker-compose file {github_file} from GitHub")
# Download the quickstart docker-compose file from GitHub.
quickstart_download_response = request_session.get(github_file)
quickstart_download_response.raise_for_status()
tmp_file.write(quickstart_download_response.content)
logger.debug(f"Copied to {path}")
if standalone_consumers:
consumer_github_file = (
f"{DOCKER_COMPOSE_BASE}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
if should_use_neo4j
else f"{DOCKER_COMPOSE_BASE}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
)
default_consumer_compose_file = (
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
)
with open(
default_consumer_compose_file, "wb"
) if default_consumer_compose_file else tempfile.NamedTemporaryFile(
suffix=".yml", delete=False
) as tmp_file:
path = pathlib.Path(tmp_file.name)
quickstart_compose_file_list.append(path)
click.echo(
f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
)
# Download the quickstart docker-compose file from GitHub.
quickstart_download_response = request_session.get(consumer_github_file)
quickstart_download_response.raise_for_status()
tmp_file.write(quickstart_download_response.content)
logger.debug(f"Copied to {path}")
if kafka_setup:
kafka_setup_github_file = (
f"{DOCKER_COMPOSE_BASE}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}"
)
default_consumer_compose_file = (
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
)
with open(
default_consumer_compose_file, "wb"
) if default_consumer_compose_file else tempfile.NamedTemporaryFile(
suffix=".yml", delete=False
) as tmp_file:
path = pathlib.Path(tmp_file.name)
quickstart_compose_file_list.append(path)
click.echo(
f"Fetching consumer docker-compose file {kafka_setup_github_file} from GitHub"
)
# Download the quickstart docker-compose file from GitHub.
quickstart_download_response = request_session.get(kafka_setup_github_file)
quickstart_download_response.raise_for_status()
tmp_file.write(quickstart_download_response.content)
logger.debug(f"Copied to {path}")
def valid_restore_options(
restore: bool, restore_indices: bool, no_restore_indices: bool
) -> bool: