build(docker): refactor docker build scripts (#1687)

* build(docker): refactor docker build scripts

- add "build" option to docker-compose files to simplify rebuilding of images
- create "start.sh" script so it's easier to override "command" in the quickstart's docker-compose file
- use dockerize to wait for requisite services to start up
- add a dedicated Dockerfile for kafka-setup

This fixes https://github.com/linkedin/datahub/issues/1549 & https://github.com/linkedin/datahub/issues/1550
This commit is contained in:
Mars Lan 2020-06-08 13:37:14 -07:00 committed by GitHub
parent 94ffb300a9
commit 4f221f9a12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 103 additions and 91 deletions

View File

@ -1,17 +1,12 @@
FROM openjdk:8
# This "container" is a workaround to pre-create search indices
FROM jwilder/dockerize:0.6.1
MAINTAINER Kerem Sahin <ksahin@linkedin.com>
RUN apt-get update && apt-get install -y wget && apt-get install -y curl
RUN apk add --no-cache curl
COPY corpuser-index-config.json dataset-index-config.json /
ENV DOCKERIZE_VERSION v0.6.1
RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
&& tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
&& rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz
CMD dockerize -wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
-timeout 120s; \
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/corpuserinfodocument --data @corpuser-index-config.json; \
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/datasetdocument --data @dataset-index-config.json
CMD dockerize \
-wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
-timeout 120s \
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/corpuserinfodocument --data @corpuser-index-config.json && \
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/datasetdocument --data @dataset-index-config.json

View File

@ -1,7 +1,5 @@
FROM openjdk:8 as builder
MAINTAINER Kerem Sahin ksahin@linkedin.com
RUN apt-get update && apt-get install -y wget \
&& wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
&& dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install

View File

@ -4,18 +4,13 @@
Refer to [DataHub Frontend Service](../../datahub-frontend) to have a quick understanding of the architecture and
responsibility of this service for the DataHub.
## Build
## Build & Run
```
docker image build -t linkedin/datahub-frontend -f docker/frontend/Dockerfile .
cd docker/frontend && docker-compose up --build
```
This command will build and deploy the image in your local store.
This command will rebuild the docker image and start a container based on the image.
## Run container
```
cd docker/frontend && docker-compose pull && docker-compose up
```
This command will start the container. If you have the image available in your local store, this image will be used
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
To start a container using an existing image, run the same command without the `--build` flag.
### Container configuration
#### External Port

View File

@ -3,6 +3,9 @@ version: '3.5'
services:
datahub-frontend:
image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest}
build:
context: ../../
dockerfile: docker/frontend/Dockerfile
hostname: datahub-frontend
container_name: datahub-frontend
ports:

View File

@ -7,9 +7,13 @@ RUN cd /datahub-src && ./gradlew :gms:war:build \
FROM openjdk:8-jre-alpine
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar \
&& curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner-9.4.20.v20190813.jar \
&& curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner.jar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
COPY --from=builder /gms.war .
COPY docker/gms/start.sh /start.sh
RUN chmod +x /start.sh
EXPOSE 8080
CMD /start.sh

View File

@ -4,18 +4,14 @@
Refer to [DataHub GMS Service](../../gms) to have a quick understanding of the architecture and
responsibility of this service for the DataHub.
## Build
```
docker image build -t linkedin/datahub-gms -f docker/gms/Dockerfile .
```
This command will build and deploy the image in your local store.
## Run container
## Build & Run
```
cd docker/gms && docker-compose pull && docker-compose up
cd docker/gms && docker-compose up --build
```
This command will start the container. If you have the image available in your local store, this image will be used
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
This command will rebuild the local docker image and start a container based on the image.
To start a container using an existing image, run the same command without the `--build` flag.
### Container configuration
#### External Port
@ -66,6 +62,7 @@ The value of `ELASTICSEARCH_HOST` variable should be set to the host name of the
```
environment:
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub

View File

@ -3,6 +3,9 @@ version: '3.5'
services:
datahub-gms:
image: linkedin/datahub-gms:${DATAHUB_VERSION:-latest}
build:
context: ../../
dockerfile: docker/gms/Dockerfile
hostname: datahub-gms
container_name: datahub-gms
ports:
@ -17,10 +20,10 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
command: "sh -c 'java -jar jetty-runner-9.4.20.v20190813.jar gms.war'"
networks:
default:

9
docker/gms/start.sh Normal file
View File

@ -0,0 +1,9 @@
#!/bin/sh
dockerize \
-wait tcp://$EBEAN_DATASOURCE_HOST \
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
-wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
-wait http://$NEO4J_HOST \
-timeout 240s \
java -jar jetty-runner.jar gms.war

8
docker/kafka/Dockerfile Normal file
View File

@ -0,0 +1,8 @@
# This "container" is a workaround to pre-create topics
FROM confluentinc/cp-kafka:5.4.0
CMD echo Waiting for Kafka to be ready... && \
cub kafka-ready -b $KAFKA_BOOTSTRAP_SERVER 1 60 && \
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent

View File

@ -10,7 +10,7 @@ Below command will start all Kafka related containers.
cd docker/kafka && docker-compose pull && docker-compose up
```
As part of `docker-compose`, we also initialize a container called `kafka-setup` to create `MetadataAuditEvent` and
`MetadataChangeEvent` topics. The only thing this container does is creating Kafka topics after Kafka broker is ready.
`MetadataChangeEvent` & `FailedMetadataChangeEvent` topics. The only thing this container does is creating Kafka topics after Kafka broker is ready.
There is also a container which provides visual schema registry interface which you can register/unregister schemas.
You can connect to `schema-registry-ui` on your web browser to monitor Kafka Schema Registry via below link:

View File

@ -30,22 +30,16 @@ services:
# This "container" is a workaround to pre-create topics
kafka-setup:
image: confluentinc/cp-kafka:5.4.0
build:
context: .
hostname: kafka-setup
container_name: kafka-setup
depends_on:
- broker
- schema-registry
command: "bash -c 'echo Waiting for Kafka to be ready... && \
cub kafka-ready -b broker:29092 1 60 && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent'"
environment:
# The following settings are listed here only to satisfy the image's requirements.
# We override the image's `command` anyways, hence this container will not start a broker.
KAFKA_BROKER_ID: ignored
KAFKA_ZOOKEEPER_CONNECT: ignored
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
kafka-rest-proxy:
image: confluentinc/cp-kafka-rest:5.4.0

View File

@ -1,7 +1,5 @@
FROM openjdk:8 as builder
MAINTAINER Kerem Sahin ksahin@linkedin.com
COPY . datahub-src
RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build \
&& cp metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar ../mae-consumer-job.jar \
@ -13,7 +11,9 @@ RUN apk --no-cache add curl tar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
COPY --from=builder /mae-consumer-job.jar /mae-consumer-job.jar
COPY docker/mae-consumer/start.sh /start.sh
RUN chmod +x /start.sh
EXPOSE 9091
ENTRYPOINT ["java", "-jar", "mae-consumer-job.jar"]
CMD /start.sh

View File

@ -4,18 +4,13 @@
Refer to [DataHub MAE Consumer Job](../../metadata-jobs/mae-consumer-job) to have a quick understanding of the architecture and
responsibility of this service for the DataHub.
## Build
## Build & Run
```
docker image build -t linkedin/datahub-mae-consumer -f docker/mae-consumer/Dockerfile .
cd docker/mae-consumer && docker-compose up --build
```
This command will build and deploy the image in your local store.
This command will rebuild the docker image and start a container based on the image.
## Run container
```
cd docker/mae-consumer && docker-compose pull && docker-compose up
```
This command will start the container. If you have the image available in your local store, this image will be used
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
To start a container using a previously built image, run the same command without the `--build` flag.
### Container configuration

View File

@ -3,6 +3,9 @@ version: '3.5'
services:
datahub-mae-consumer:
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-latest}
build:
context: ../../
dockerfile: docker/mae-consumer/Dockerfile
hostname: datahub-mae-consumer
container_name: datahub-mae-consumer
ports:
@ -12,6 +15,7 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub

View File

@ -0,0 +1,8 @@
#!/bin/sh
dockerize \
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
-wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
-wait http://$NEO4J_HOST \
-timeout 240s \
java -jar mae-consumer-job.jar

View File

@ -1,16 +1,19 @@
FROM openjdk:8 as builder
MAINTAINER Kerem Sahin ksahin@linkedin.com
COPY . datahub-src
RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build \
&& cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar ../mce-consumer-job.jar \
&& cd .. && rm -rf datahub-src
FROM openjdk:8-jre-alpine
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
COPY --from=builder /mce-consumer-job.jar /mce-consumer-job.jar
COPY docker/mce-consumer/start.sh /start.sh
RUN chmod +x /start.sh
EXPOSE 9090
ENTRYPOINT ["java", "-jar", "mce-consumer-job.jar"]
CMD /start.sh

View File

@ -4,18 +4,13 @@
Refer to [DataHub MCE Consumer Job](../../metadata-jobs/mce-consumer-job) to have a quick understanding of the architecture and
responsibility of this service for the DataHub.
## Build
## Build & Run
```
docker image build -t linkedin/datahub-mce-consumer -f docker/mce-consumer/Dockerfile .
cd docker/mce-consumer && docker-compose up --build
```
This command will build and deploy the image in your local store.
This command will rebuild the docker image and start a container based on the image.
## Run container
```
cd docker/mce-consumer && docker-compose pull && docker-compose up
```
This command will start the container. If you have the image available in your local store, this image will be used
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
To start a container using a previously built image, run the same command without the `--build` flag.
### Container configuration

View File

@ -3,6 +3,9 @@ version: '3.5'
services:
datahub-mce-consumer:
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
build:
context: ../../
dockerfile: docker/mce-consumer/Dockerfile
hostname: datahub-mce-consumer
container_name: datahub-mce-consumer
ports:

View File

@ -0,0 +1,7 @@
#!/bin/sh
# -wait tcp://GMS_HOST:$GMS_PORT \
dockerize \
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
-timeout 240s \
java -jar mce-consumer-job.jar

View File

@ -80,22 +80,16 @@ services:
# This "container" is a workaround to pre-create topics
kafka-setup:
image: confluentinc/cp-kafka:5.4.0
build:
context: ../kafka
hostname: kafka-setup
container_name: kafka-setup
depends_on:
- broker
- schema-registry
command: "bash -c 'echo Waiting for Kafka to be ready... && \
cub kafka-ready -b broker:29092 1 60 && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent'"
environment:
# The following settings are listed here only to satisfy the image's requirements.
# We override the image's `command` anyways, hence this container will not start a broker.
KAFKA_BROKER_ID: ignored
KAFKA_ZOOKEEPER_CONNECT: ignored
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_BOOTSTRAP_SERVER=broker:29092
schema-registry:
image: confluentinc/cp-schema-registry:5.4.0
@ -191,18 +185,15 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
depends_on:
- elasticsearch
- broker
- elasticsearch-setup
- kafka-setup
- mysql
- schema-registry
- neo4j
command: "sh -c 'dockerize -wait tcp://mysql:3306 -wait tcp://broker:29092 -wait http://elasticsearch:9200 \
-timeout 240s \
java -jar jetty-runner-9.4.20.v20190813.jar gms.war'"
datahub-frontend:
image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest}
@ -230,23 +221,23 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- NEO4J_HOST=neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
depends_on:
- kafka-setup
- elasticsearch
- elasticsearch-setup
- neo4j
command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \
echo kafka-setup done! && \
dockerize -wait http://neo4j:7474 -timeout 240s && java -jar mae-consumer-job.jar'"
echo kafka-setup done! && /start.sh'"
datahub-mce-consumer:
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
hostname: datahub-mce-consumer
container_name: datahub-mce-consumer
ports:
- "9090:9090"
- "9090:9090"
environment:
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
@ -256,7 +247,7 @@ services:
- kafka-setup
- datahub-gms
command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \
echo kafka-setup done! && java -jar mce-consumer-job.jar'"
echo kafka-setup done! && /start.sh'"
networks:
default: