build(docker): refactor docker build scripts (#1687)

* build(docker): refactor docker build scripts

- add "build" option to docker-compose files to simplify rebuilding of images
- create "start.sh" script so it's easier to override "command" in the quickstart's docker-compose file
- use dockerize to wait for requisite services to start up
- add a dedicated Dockerfile for kafka-setup

This fixes https://github.com/linkedin/datahub/issues/1549 & https://github.com/linkedin/datahub/issues/1550
This commit is contained in:
Mars Lan 2020-06-08 13:37:14 -07:00 committed by GitHub
parent 94ffb300a9
commit 4f221f9a12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 103 additions and 91 deletions

View File

@ -1,17 +1,12 @@
FROM openjdk:8 # This "container" is a workaround to pre-create search indices
FROM jwilder/dockerize:0.6.1
MAINTAINER Kerem Sahin <ksahin@linkedin.com> RUN apk add --no-cache curl
RUN apt-get update && apt-get install -y wget && apt-get install -y curl
COPY corpuser-index-config.json dataset-index-config.json / COPY corpuser-index-config.json dataset-index-config.json /
ENV DOCKERIZE_VERSION v0.6.1 CMD dockerize \
RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ -wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
&& tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ -timeout 120s \
&& rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/corpuserinfodocument --data @corpuser-index-config.json && \
CMD dockerize -wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
-timeout 120s; \
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/corpuserinfodocument --data @corpuser-index-config.json; \
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/datasetdocument --data @dataset-index-config.json curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/datasetdocument --data @dataset-index-config.json

View File

@ -1,7 +1,5 @@
FROM openjdk:8 as builder FROM openjdk:8 as builder
MAINTAINER Kerem Sahin ksahin@linkedin.com
RUN apt-get update && apt-get install -y wget \ RUN apt-get update && apt-get install -y wget \
&& wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \ && wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
&& dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install && dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install

View File

@ -4,18 +4,13 @@
Refer to [DataHub Frontend Service](../../datahub-frontend) to have a quick understanding of the architecture and Refer to [DataHub Frontend Service](../../datahub-frontend) to have a quick understanding of the architecture and
responsibility of this service for the DataHub. responsibility of this service for the DataHub.
## Build ## Build & Run
``` ```
docker image build -t linkedin/datahub-frontend -f docker/frontend/Dockerfile . cd docker/frontend && docker-compose up --build
``` ```
This command will build and deploy the image in your local store. This command will rebuild the docker image and start a container based on the image.
## Run container To start a container using an existing image, run the same command without the `--build` flag.
```
cd docker/frontend && docker-compose pull && docker-compose up
```
This command will start the container. If you have the image available in your local store, this image will be used
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
### Container configuration ### Container configuration
#### External Port #### External Port

View File

@ -3,6 +3,9 @@ version: '3.5'
services: services:
datahub-frontend: datahub-frontend:
image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest} image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest}
build:
context: ../../
dockerfile: docker/frontend/Dockerfile
hostname: datahub-frontend hostname: datahub-frontend
container_name: datahub-frontend container_name: datahub-frontend
ports: ports:

View File

@ -7,9 +7,13 @@ RUN cd /datahub-src && ./gradlew :gms:war:build \
FROM openjdk:8-jre-alpine FROM openjdk:8-jre-alpine
ENV DOCKERIZE_VERSION v0.6.1 ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar \ RUN apk --no-cache add curl tar \
&& curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner-9.4.20.v20190813.jar \ && curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner.jar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv && curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
COPY --from=builder /gms.war . COPY --from=builder /gms.war .
COPY docker/gms/start.sh /start.sh
RUN chmod +x /start.sh
EXPOSE 8080 EXPOSE 8080
CMD /start.sh

View File

@ -4,18 +4,14 @@
Refer to [DataHub GMS Service](../../gms) to have a quick understanding of the architecture and Refer to [DataHub GMS Service](../../gms) to have a quick understanding of the architecture and
responsibility of this service for the DataHub. responsibility of this service for the DataHub.
## Build
```
docker image build -t linkedin/datahub-gms -f docker/gms/Dockerfile .
```
This command will build and deploy the image in your local store.
## Run container ## Build & Run
``` ```
cd docker/gms && docker-compose pull && docker-compose up cd docker/gms && docker-compose up --build
``` ```
This command will start the container. If you have the image available in your local store, this image will be used This command will rebuild the local docker image and start a container based on the image.
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
To start a container using an existing image, run the same command without the `--build` flag.
### Container configuration ### Container configuration
#### External Port #### External Port
@ -66,6 +62,7 @@ The value of `ELASTICSEARCH_HOST` variable should be set to the host name of the
``` ```
environment: environment:
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j - NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j - NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub - NEO4J_PASSWORD=datahub

View File

@ -3,6 +3,9 @@ version: '3.5'
services: services:
datahub-gms: datahub-gms:
image: linkedin/datahub-gms:${DATAHUB_VERSION:-latest} image: linkedin/datahub-gms:${DATAHUB_VERSION:-latest}
build:
context: ../../
dockerfile: docker/gms/Dockerfile
hostname: datahub-gms hostname: datahub-gms
container_name: datahub-gms container_name: datahub-gms
ports: ports:
@ -17,10 +20,10 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PORT=9200
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j - NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j - NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub - NEO4J_PASSWORD=datahub
command: "sh -c 'java -jar jetty-runner-9.4.20.v20190813.jar gms.war'"
networks: networks:
default: default:

9
docker/gms/start.sh Normal file
View File

@ -0,0 +1,9 @@
#!/bin/sh
dockerize \
-wait tcp://$EBEAN_DATASOURCE_HOST \
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
-wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
-wait http://$NEO4J_HOST \
-timeout 240s \
java -jar jetty-runner.jar gms.war

8
docker/kafka/Dockerfile Normal file
View File

@ -0,0 +1,8 @@
# This "container" is a workaround to pre-create topics
FROM confluentinc/cp-kafka:5.4.0
CMD echo Waiting for Kafka to be ready... && \
cub kafka-ready -b $KAFKA_BOOTSTRAP_SERVER 1 60 && \
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent

View File

@ -10,7 +10,7 @@ Below command will start all Kafka related containers.
cd docker/kafka && docker-compose pull && docker-compose up cd docker/kafka && docker-compose pull && docker-compose up
``` ```
As part of `docker-compose`, we also initialize a container called `kafka-setup` to create `MetadataAuditEvent` and As part of `docker-compose`, we also initialize a container called `kafka-setup` to create `MetadataAuditEvent` and
`MetadataChangeEvent` topics. The only thing this container does is creating Kafka topics after Kafka broker is ready. `MetadataChangeEvent` & `FailedMetadataChangeEvent` topics. The only thing this container does is creating Kafka topics after Kafka broker is ready.
There is also a container which provides visual schema registry interface which you can register/unregister schemas. There is also a container which provides visual schema registry interface which you can register/unregister schemas.
You can connect to `schema-registry-ui` on your web browser to monitor Kafka Schema Registry via below link: You can connect to `schema-registry-ui` on your web browser to monitor Kafka Schema Registry via below link:

View File

@ -30,22 +30,16 @@ services:
# This "container" is a workaround to pre-create topics # This "container" is a workaround to pre-create topics
kafka-setup: kafka-setup:
image: confluentinc/cp-kafka:5.4.0 build:
context: .
hostname: kafka-setup hostname: kafka-setup
container_name: kafka-setup container_name: kafka-setup
depends_on: depends_on:
- broker - broker
- schema-registry - schema-registry
command: "bash -c 'echo Waiting for Kafka to be ready... && \
cub kafka-ready -b broker:29092 1 60 && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent'"
environment: environment:
# The following settings are listed here only to satisfy the image's requirements. - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
# We override the image's `command` anyways, hence this container will not start a broker. - KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_BROKER_ID: ignored
KAFKA_ZOOKEEPER_CONNECT: ignored
kafka-rest-proxy: kafka-rest-proxy:
image: confluentinc/cp-kafka-rest:5.4.0 image: confluentinc/cp-kafka-rest:5.4.0

View File

@ -1,7 +1,5 @@
FROM openjdk:8 as builder FROM openjdk:8 as builder
MAINTAINER Kerem Sahin ksahin@linkedin.com
COPY . datahub-src COPY . datahub-src
RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build \ RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build \
&& cp metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar ../mae-consumer-job.jar \ && cp metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar ../mae-consumer-job.jar \
@ -13,7 +11,9 @@ RUN apk --no-cache add curl tar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv && curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
COPY --from=builder /mae-consumer-job.jar /mae-consumer-job.jar COPY --from=builder /mae-consumer-job.jar /mae-consumer-job.jar
COPY docker/mae-consumer/start.sh /start.sh
RUN chmod +x /start.sh
EXPOSE 9091 EXPOSE 9091
ENTRYPOINT ["java", "-jar", "mae-consumer-job.jar"] CMD /start.sh

View File

@ -4,18 +4,13 @@
Refer to [DataHub MAE Consumer Job](../../metadata-jobs/mae-consumer-job) to have a quick understanding of the architecture and Refer to [DataHub MAE Consumer Job](../../metadata-jobs/mae-consumer-job) to have a quick understanding of the architecture and
responsibility of this service for the DataHub. responsibility of this service for the DataHub.
## Build ## Build & Run
``` ```
docker image build -t linkedin/datahub-mae-consumer -f docker/mae-consumer/Dockerfile . cd docker/mae-consumer && docker-compose up --build
``` ```
This command will build and deploy the image in your local store. This command will rebuild the docker image and start a container based on the image.
## Run container To start a container using a previously built image, run the same command without the `--build` flag.
```
cd docker/mae-consumer && docker-compose pull && docker-compose up
```
This command will start the container. If you have the image available in your local store, this image will be used
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
### Container configuration ### Container configuration

View File

@ -3,6 +3,9 @@ version: '3.5'
services: services:
datahub-mae-consumer: datahub-mae-consumer:
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-latest} image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-latest}
build:
context: ../../
dockerfile: docker/mae-consumer/Dockerfile
hostname: datahub-mae-consumer hostname: datahub-mae-consumer
container_name: datahub-mae-consumer container_name: datahub-mae-consumer
ports: ports:
@ -12,6 +15,7 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PORT=9200
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j - NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j - NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub - NEO4J_PASSWORD=datahub

View File

@ -0,0 +1,8 @@
#!/bin/sh
dockerize \
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
-wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
-wait http://$NEO4J_HOST \
-timeout 240s \
java -jar mae-consumer-job.jar

View File

@ -1,16 +1,19 @@
FROM openjdk:8 as builder FROM openjdk:8 as builder
MAINTAINER Kerem Sahin ksahin@linkedin.com
COPY . datahub-src COPY . datahub-src
RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build \ RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build \
&& cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar ../mce-consumer-job.jar \ && cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar ../mce-consumer-job.jar \
&& cd .. && rm -rf datahub-src && cd .. && rm -rf datahub-src
FROM openjdk:8-jre-alpine FROM openjdk:8-jre-alpine
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
COPY --from=builder /mce-consumer-job.jar /mce-consumer-job.jar COPY --from=builder /mce-consumer-job.jar /mce-consumer-job.jar
COPY docker/mce-consumer/start.sh /start.sh
RUN chmod +x /start.sh
EXPOSE 9090 EXPOSE 9090
ENTRYPOINT ["java", "-jar", "mce-consumer-job.jar"] CMD /start.sh

View File

@ -4,18 +4,13 @@
Refer to [DataHub MCE Consumer Job](../../metadata-jobs/mce-consumer-job) to have a quick understanding of the architecture and Refer to [DataHub MCE Consumer Job](../../metadata-jobs/mce-consumer-job) to have a quick understanding of the architecture and
responsibility of this service for the DataHub. responsibility of this service for the DataHub.
## Build ## Build & Run
``` ```
docker image build -t linkedin/datahub-mce-consumer -f docker/mce-consumer/Dockerfile . cd docker/mce-consumer && docker-compose up --build
``` ```
This command will build and deploy the image in your local store. This command will rebuild the docker image and start a container based on the image.
## Run container To start a container using a previously built image, run the same command without the `--build` flag.
```
cd docker/mce-consumer && docker-compose pull && docker-compose up
```
This command will start the container. If you have the image available in your local store, this image will be used
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
### Container configuration ### Container configuration

View File

@ -3,6 +3,9 @@ version: '3.5'
services: services:
datahub-mce-consumer: datahub-mce-consumer:
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest} image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
build:
context: ../../
dockerfile: docker/mce-consumer/Dockerfile
hostname: datahub-mce-consumer hostname: datahub-mce-consumer
container_name: datahub-mce-consumer container_name: datahub-mce-consumer
ports: ports:

View File

@ -0,0 +1,7 @@
#!/bin/sh
# -wait tcp://GMS_HOST:$GMS_PORT \
dockerize \
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
-timeout 240s \
java -jar mce-consumer-job.jar

View File

@ -80,22 +80,16 @@ services:
# This "container" is a workaround to pre-create topics # This "container" is a workaround to pre-create topics
kafka-setup: kafka-setup:
image: confluentinc/cp-kafka:5.4.0 build:
context: ../kafka
hostname: kafka-setup hostname: kafka-setup
container_name: kafka-setup container_name: kafka-setup
depends_on: depends_on:
- broker - broker
- schema-registry - schema-registry
command: "bash -c 'echo Waiting for Kafka to be ready... && \
cub kafka-ready -b broker:29092 1 60 && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent'"
environment: environment:
# The following settings are listed here only to satisfy the image's requirements. - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
# We override the image's `command` anyways, hence this container will not start a broker. - KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_BROKER_ID: ignored
KAFKA_ZOOKEEPER_CONNECT: ignored
schema-registry: schema-registry:
image: confluentinc/cp-schema-registry:5.4.0 image: confluentinc/cp-schema-registry:5.4.0
@ -191,18 +185,15 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PORT=9200
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j - NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j - NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub - NEO4J_PASSWORD=datahub
depends_on: depends_on:
- elasticsearch - elasticsearch-setup
- broker - kafka-setup
- mysql - mysql
- schema-registry
- neo4j - neo4j
command: "sh -c 'dockerize -wait tcp://mysql:3306 -wait tcp://broker:29092 -wait http://elasticsearch:9200 \
-timeout 240s \
java -jar jetty-runner-9.4.20.v20190813.jar gms.war'"
datahub-frontend: datahub-frontend:
image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest} image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest}
@ -230,16 +221,16 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PORT=9200
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j - NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j - NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub - NEO4J_PASSWORD=datahub
depends_on: depends_on:
- kafka-setup - kafka-setup
- elasticsearch - elasticsearch-setup
- neo4j - neo4j
command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \ command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \
echo kafka-setup done! && \ echo kafka-setup done! && /start.sh'"
dockerize -wait http://neo4j:7474 -timeout 240s && java -jar mae-consumer-job.jar'"
datahub-mce-consumer: datahub-mce-consumer:
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest} image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
@ -256,7 +247,7 @@ services:
- kafka-setup - kafka-setup
- datahub-gms - datahub-gms
command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \ command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \
echo kafka-setup done! && java -jar mce-consumer-job.jar'" echo kafka-setup done! && /start.sh'"
networks: networks:
default: default: