mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-27 17:02:03 +00:00
build(docker): refactor docker build scripts (#1687)
* build(docker): refactor docker build scripts - add "build" option to docker-compose files to simplify rebuilding of images - create "start.sh" script so it's easier to override "command" in the quickstart's docker-compose file - use dockerize to wait for requisite services to start up - add a dedicated Dockerfile for kafka-setup This fixes https://github.com/linkedin/datahub/issues/1549 & https://github.com/linkedin/datahub/issues/1550
This commit is contained in:
parent
94ffb300a9
commit
4f221f9a12
@ -1,17 +1,12 @@
|
||||
FROM openjdk:8
|
||||
# This "container" is a workaround to pre-create search indices
|
||||
FROM jwilder/dockerize:0.6.1
|
||||
|
||||
MAINTAINER Kerem Sahin <ksahin@linkedin.com>
|
||||
|
||||
RUN apt-get update && apt-get install -y wget && apt-get install -y curl
|
||||
RUN apk add --no-cache curl
|
||||
|
||||
COPY corpuser-index-config.json dataset-index-config.json /
|
||||
|
||||
ENV DOCKERIZE_VERSION v0.6.1
|
||||
RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
|
||||
&& tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
|
||||
&& rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz
|
||||
|
||||
CMD dockerize -wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
|
||||
-timeout 120s; \
|
||||
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/corpuserinfodocument --data @corpuser-index-config.json; \
|
||||
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/datasetdocument --data @dataset-index-config.json
|
||||
CMD dockerize \
|
||||
-wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
|
||||
-timeout 120s \
|
||||
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/corpuserinfodocument --data @corpuser-index-config.json && \
|
||||
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/datasetdocument --data @dataset-index-config.json
|
||||
@ -1,7 +1,5 @@
|
||||
FROM openjdk:8 as builder
|
||||
|
||||
MAINTAINER Kerem Sahin ksahin@linkedin.com
|
||||
|
||||
RUN apt-get update && apt-get install -y wget \
|
||||
&& wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
|
||||
&& dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install
|
||||
|
||||
@ -4,18 +4,13 @@
|
||||
Refer to [DataHub Frontend Service](../../datahub-frontend) to have a quick understanding of the architecture and
|
||||
responsibility of this service for the DataHub.
|
||||
|
||||
## Build
|
||||
## Build & Run
|
||||
```
|
||||
docker image build -t linkedin/datahub-frontend -f docker/frontend/Dockerfile .
|
||||
cd docker/frontend && docker-compose up --build
|
||||
```
|
||||
This command will build and deploy the image in your local store.
|
||||
This command will rebuild the docker image and start a container based on the image.
|
||||
|
||||
## Run container
|
||||
```
|
||||
cd docker/frontend && docker-compose pull && docker-compose up
|
||||
```
|
||||
This command will start the container. If you have the image available in your local store, this image will be used
|
||||
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
|
||||
To start a container using an existing image, run the same command without the `--build` flag.
|
||||
|
||||
### Container configuration
|
||||
#### External Port
|
||||
|
||||
@ -3,6 +3,9 @@ version: '3.5'
|
||||
services:
|
||||
datahub-frontend:
|
||||
image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest}
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: docker/frontend/Dockerfile
|
||||
hostname: datahub-frontend
|
||||
container_name: datahub-frontend
|
||||
ports:
|
||||
|
||||
@ -7,9 +7,13 @@ RUN cd /datahub-src && ./gradlew :gms:war:build \
|
||||
FROM openjdk:8-jre-alpine
|
||||
ENV DOCKERIZE_VERSION v0.6.1
|
||||
RUN apk --no-cache add curl tar \
|
||||
&& curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner-9.4.20.v20190813.jar \
|
||||
&& curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner.jar \
|
||||
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
|
||||
|
||||
COPY --from=builder /gms.war .
|
||||
COPY docker/gms/start.sh /start.sh
|
||||
RUN chmod +x /start.sh
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
CMD /start.sh
|
||||
@ -4,18 +4,14 @@
|
||||
Refer to [DataHub GMS Service](../../gms) to have a quick understanding of the architecture and
|
||||
responsibility of this service for the DataHub.
|
||||
|
||||
## Build
|
||||
```
|
||||
docker image build -t linkedin/datahub-gms -f docker/gms/Dockerfile .
|
||||
```
|
||||
This command will build and deploy the image in your local store.
|
||||
|
||||
## Run container
|
||||
## Build & Run
|
||||
```
|
||||
cd docker/gms && docker-compose pull && docker-compose up
|
||||
cd docker/gms && docker-compose up --build
|
||||
```
|
||||
This command will start the container. If you have the image available in your local store, this image will be used
|
||||
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
|
||||
This command will rebuild the local docker image and start a container based on the image.
|
||||
|
||||
To start a container using an existing image, run the same command without the `--build` flag.
|
||||
|
||||
### Container configuration
|
||||
#### External Port
|
||||
@ -66,6 +62,7 @@ The value of `ELASTICSEARCH_HOST` variable should be set to the host name of the
|
||||
|
||||
```
|
||||
environment:
|
||||
- NEO4J_HOST=neo4j:7474
|
||||
- NEO4J_URI=bolt://neo4j
|
||||
- NEO4J_USERNAME=neo4j
|
||||
- NEO4J_PASSWORD=datahub
|
||||
|
||||
@ -3,6 +3,9 @@ version: '3.5'
|
||||
services:
|
||||
datahub-gms:
|
||||
image: linkedin/datahub-gms:${DATAHUB_VERSION:-latest}
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: docker/gms/Dockerfile
|
||||
hostname: datahub-gms
|
||||
container_name: datahub-gms
|
||||
ports:
|
||||
@ -17,10 +20,10 @@ services:
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
- ELASTICSEARCH_HOST=elasticsearch
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- NEO4J_HOST=neo4j:7474
|
||||
- NEO4J_URI=bolt://neo4j
|
||||
- NEO4J_USERNAME=neo4j
|
||||
- NEO4J_PASSWORD=datahub
|
||||
command: "sh -c 'java -jar jetty-runner-9.4.20.v20190813.jar gms.war'"
|
||||
|
||||
networks:
|
||||
default:
|
||||
|
||||
9
docker/gms/start.sh
Normal file
9
docker/gms/start.sh
Normal file
@ -0,0 +1,9 @@
|
||||
#!/bin/sh
|
||||
|
||||
dockerize \
|
||||
-wait tcp://$EBEAN_DATASOURCE_HOST \
|
||||
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
|
||||
-wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
|
||||
-wait http://$NEO4J_HOST \
|
||||
-timeout 240s \
|
||||
java -jar jetty-runner.jar gms.war
|
||||
8
docker/kafka/Dockerfile
Normal file
8
docker/kafka/Dockerfile
Normal file
@ -0,0 +1,8 @@
|
||||
# This "container" is a workaround to pre-create topics
|
||||
FROM confluentinc/cp-kafka:5.4.0
|
||||
|
||||
CMD echo Waiting for Kafka to be ready... && \
|
||||
cub kafka-ready -b $KAFKA_BOOTSTRAP_SERVER 1 60 && \
|
||||
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
|
||||
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
|
||||
kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent
|
||||
@ -10,7 +10,7 @@ Below command will start all Kafka related containers.
|
||||
cd docker/kafka && docker-compose pull && docker-compose up
|
||||
```
|
||||
As part of `docker-compose`, we also initialize a container called `kafka-setup` to create `MetadataAuditEvent` and
|
||||
`MetadataChangeEvent` topics. The only thing this container does is creating Kafka topics after Kafka broker is ready.
|
||||
`MetadataChangeEvent` & `FailedMetadataChangeEvent` topics. The only thing this container does is creating Kafka topics after Kafka broker is ready.
|
||||
|
||||
There is also a container which provides visual schema registry interface which you can register/unregister schemas.
|
||||
You can connect to `schema-registry-ui` on your web browser to monitor Kafka Schema Registry via below link:
|
||||
|
||||
@ -30,22 +30,16 @@ services:
|
||||
|
||||
# This "container" is a workaround to pre-create topics
|
||||
kafka-setup:
|
||||
image: confluentinc/cp-kafka:5.4.0
|
||||
build:
|
||||
context: .
|
||||
hostname: kafka-setup
|
||||
container_name: kafka-setup
|
||||
depends_on:
|
||||
- broker
|
||||
- schema-registry
|
||||
command: "bash -c 'echo Waiting for Kafka to be ready... && \
|
||||
cub kafka-ready -b broker:29092 1 60 && \
|
||||
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
|
||||
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
|
||||
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent'"
|
||||
environment:
|
||||
# The following settings are listed here only to satisfy the image's requirements.
|
||||
# We override the image's `command` anyways, hence this container will not start a broker.
|
||||
KAFKA_BROKER_ID: ignored
|
||||
KAFKA_ZOOKEEPER_CONNECT: ignored
|
||||
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
|
||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
|
||||
kafka-rest-proxy:
|
||||
image: confluentinc/cp-kafka-rest:5.4.0
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
FROM openjdk:8 as builder
|
||||
|
||||
MAINTAINER Kerem Sahin ksahin@linkedin.com
|
||||
|
||||
COPY . datahub-src
|
||||
RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build \
|
||||
&& cp metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar ../mae-consumer-job.jar \
|
||||
@ -13,7 +11,9 @@ RUN apk --no-cache add curl tar \
|
||||
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
|
||||
|
||||
COPY --from=builder /mae-consumer-job.jar /mae-consumer-job.jar
|
||||
COPY docker/mae-consumer/start.sh /start.sh
|
||||
RUN chmod +x /start.sh
|
||||
|
||||
EXPOSE 9091
|
||||
|
||||
ENTRYPOINT ["java", "-jar", "mae-consumer-job.jar"]
|
||||
CMD /start.sh
|
||||
@ -4,18 +4,13 @@
|
||||
Refer to [DataHub MAE Consumer Job](../../metadata-jobs/mae-consumer-job) to have a quick understanding of the architecture and
|
||||
responsibility of this service for the DataHub.
|
||||
|
||||
## Build
|
||||
## Build & Run
|
||||
```
|
||||
docker image build -t linkedin/datahub-mae-consumer -f docker/mae-consumer/Dockerfile .
|
||||
cd docker/mae-consumer && docker-compose up --build
|
||||
```
|
||||
This command will build and deploy the image in your local store.
|
||||
This command will rebuild the docker image and start a container based on the image.
|
||||
|
||||
## Run container
|
||||
```
|
||||
cd docker/mae-consumer && docker-compose pull && docker-compose up
|
||||
```
|
||||
This command will start the container. If you have the image available in your local store, this image will be used
|
||||
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
|
||||
To start a container using a previously built image, run the same command without the `--build` flag.
|
||||
|
||||
### Container configuration
|
||||
|
||||
|
||||
@ -3,6 +3,9 @@ version: '3.5'
|
||||
services:
|
||||
datahub-mae-consumer:
|
||||
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-latest}
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: docker/mae-consumer/Dockerfile
|
||||
hostname: datahub-mae-consumer
|
||||
container_name: datahub-mae-consumer
|
||||
ports:
|
||||
@ -12,6 +15,7 @@ services:
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
- ELASTICSEARCH_HOST=elasticsearch
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- NEO4J_HOST=neo4j:7474
|
||||
- NEO4J_URI=bolt://neo4j
|
||||
- NEO4J_USERNAME=neo4j
|
||||
- NEO4J_PASSWORD=datahub
|
||||
|
||||
8
docker/mae-consumer/start.sh
Normal file
8
docker/mae-consumer/start.sh
Normal file
@ -0,0 +1,8 @@
|
||||
#!/bin/sh
|
||||
|
||||
dockerize \
|
||||
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
|
||||
-wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \
|
||||
-wait http://$NEO4J_HOST \
|
||||
-timeout 240s \
|
||||
java -jar mae-consumer-job.jar
|
||||
@ -1,16 +1,19 @@
|
||||
FROM openjdk:8 as builder
|
||||
|
||||
MAINTAINER Kerem Sahin ksahin@linkedin.com
|
||||
|
||||
COPY . datahub-src
|
||||
RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build \
|
||||
&& cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar ../mce-consumer-job.jar \
|
||||
&& cd .. && rm -rf datahub-src
|
||||
|
||||
FROM openjdk:8-jre-alpine
|
||||
ENV DOCKERIZE_VERSION v0.6.1
|
||||
RUN apk --no-cache add curl tar \
|
||||
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
|
||||
|
||||
COPY --from=builder /mce-consumer-job.jar /mce-consumer-job.jar
|
||||
COPY docker/mce-consumer/start.sh /start.sh
|
||||
RUN chmod +x /start.sh
|
||||
|
||||
EXPOSE 9090
|
||||
|
||||
ENTRYPOINT ["java", "-jar", "mce-consumer-job.jar"]
|
||||
CMD /start.sh
|
||||
@ -4,18 +4,13 @@
|
||||
Refer to [DataHub MCE Consumer Job](../../metadata-jobs/mce-consumer-job) to have a quick understanding of the architecture and
|
||||
responsibility of this service for the DataHub.
|
||||
|
||||
## Build
|
||||
## Build & Run
|
||||
```
|
||||
docker image build -t linkedin/datahub-mce-consumer -f docker/mce-consumer/Dockerfile .
|
||||
cd docker/mce-consumer && docker-compose up --build
|
||||
```
|
||||
This command will build and deploy the image in your local store.
|
||||
This command will rebuild the docker image and start a container based on the image.
|
||||
|
||||
## Run container
|
||||
```
|
||||
cd docker/mce-consumer && docker-compose pull && docker-compose up
|
||||
```
|
||||
This command will start the container. If you have the image available in your local store, this image will be used
|
||||
for the container otherwise it will download the `latest` image from Docker Hub and then start that.
|
||||
To start a container using a previously built image, run the same command without the `--build` flag.
|
||||
|
||||
### Container configuration
|
||||
|
||||
|
||||
@ -3,6 +3,9 @@ version: '3.5'
|
||||
services:
|
||||
datahub-mce-consumer:
|
||||
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: docker/mce-consumer/Dockerfile
|
||||
hostname: datahub-mce-consumer
|
||||
container_name: datahub-mce-consumer
|
||||
ports:
|
||||
|
||||
7
docker/mce-consumer/start.sh
Normal file
7
docker/mce-consumer/start.sh
Normal file
@ -0,0 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
# -wait tcp://GMS_HOST:$GMS_PORT \
|
||||
dockerize \
|
||||
-wait tcp://$KAFKA_BOOTSTRAP_SERVER \
|
||||
-timeout 240s \
|
||||
java -jar mce-consumer-job.jar
|
||||
@ -80,22 +80,16 @@ services:
|
||||
|
||||
# This "container" is a workaround to pre-create topics
|
||||
kafka-setup:
|
||||
image: confluentinc/cp-kafka:5.4.0
|
||||
build:
|
||||
context: ../kafka
|
||||
hostname: kafka-setup
|
||||
container_name: kafka-setup
|
||||
depends_on:
|
||||
- broker
|
||||
- schema-registry
|
||||
command: "bash -c 'echo Waiting for Kafka to be ready... && \
|
||||
cub kafka-ready -b broker:29092 1 60 && \
|
||||
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \
|
||||
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \
|
||||
kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent'"
|
||||
environment:
|
||||
# The following settings are listed here only to satisfy the image's requirements.
|
||||
# We override the image's `command` anyways, hence this container will not start a broker.
|
||||
KAFKA_BROKER_ID: ignored
|
||||
KAFKA_ZOOKEEPER_CONNECT: ignored
|
||||
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
|
||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
|
||||
schema-registry:
|
||||
image: confluentinc/cp-schema-registry:5.4.0
|
||||
@ -191,18 +185,15 @@ services:
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
- ELASTICSEARCH_HOST=elasticsearch
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- NEO4J_HOST=neo4j:7474
|
||||
- NEO4J_URI=bolt://neo4j
|
||||
- NEO4J_USERNAME=neo4j
|
||||
- NEO4J_PASSWORD=datahub
|
||||
depends_on:
|
||||
- elasticsearch
|
||||
- broker
|
||||
- elasticsearch-setup
|
||||
- kafka-setup
|
||||
- mysql
|
||||
- schema-registry
|
||||
- neo4j
|
||||
command: "sh -c 'dockerize -wait tcp://mysql:3306 -wait tcp://broker:29092 -wait http://elasticsearch:9200 \
|
||||
-timeout 240s \
|
||||
java -jar jetty-runner-9.4.20.v20190813.jar gms.war'"
|
||||
|
||||
datahub-frontend:
|
||||
image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest}
|
||||
@ -230,23 +221,23 @@ services:
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
- ELASTICSEARCH_HOST=elasticsearch
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- NEO4J_HOST=neo4j:7474
|
||||
- NEO4J_URI=bolt://neo4j
|
||||
- NEO4J_USERNAME=neo4j
|
||||
- NEO4J_PASSWORD=datahub
|
||||
depends_on:
|
||||
- kafka-setup
|
||||
- elasticsearch
|
||||
- elasticsearch-setup
|
||||
- neo4j
|
||||
command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \
|
||||
echo kafka-setup done! && \
|
||||
dockerize -wait http://neo4j:7474 -timeout 240s && java -jar mae-consumer-job.jar'"
|
||||
echo kafka-setup done! && /start.sh'"
|
||||
|
||||
datahub-mce-consumer:
|
||||
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
|
||||
hostname: datahub-mce-consumer
|
||||
container_name: datahub-mce-consumer
|
||||
ports:
|
||||
- "9090:9090"
|
||||
- "9090:9090"
|
||||
environment:
|
||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
@ -256,7 +247,7 @@ services:
|
||||
- kafka-setup
|
||||
- datahub-gms
|
||||
command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \
|
||||
echo kafka-setup done! && java -jar mce-consumer-job.jar'"
|
||||
echo kafka-setup done! && /start.sh'"
|
||||
|
||||
networks:
|
||||
default:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user