2021-03-01 17:36:38 -08:00
|
|
|
# Defining environment
|
|
|
|
ARG APP_ENV=prod
|
2023-03-20 18:06:35 -05:00
|
|
|
ARG DOCKER_VERSION=latest
|
2021-03-01 17:36:38 -08:00
|
|
|
|
2023-03-20 18:06:35 -05:00
|
|
|
FROM acryldata/datahub-ingestion-base:$DOCKER_VERSION as base
|
2021-03-02 14:51:59 -08:00
|
|
|
|
2023-03-20 18:06:35 -05:00
|
|
|
FROM eclipse-temurin:11 as prod-build
|
2021-03-02 14:51:59 -08:00
|
|
|
COPY . /datahub-src
|
2023-03-20 18:06:35 -05:00
|
|
|
WORKDIR /datahub-src
|
2022-12-08 22:37:29 -05:00
|
|
|
# We noticed that the gradle wrapper download failed frequently on in CI on arm64 machines.
|
|
|
|
# I suspect this was due because of the QEMU emulation slowdown, combined with the arm64
|
|
|
|
# build being starved for CPU by the x86_64 build's codegen step.
|
|
|
|
#
|
|
|
|
# The middle step will attempt to download gradle wrapper 5 times with exponential backoff.
|
|
|
|
# The ./gradlew --version will force the download of the gradle wrapper but is otherwise a no-op.
|
|
|
|
# Note that the retry logic will always return success, so we should always attempt to run codegen.
|
|
|
|
# Inspired by https://github.com/gradle/gradle/issues/18124#issuecomment-958182335.
|
|
|
|
# and https://unix.stackexchange.com/a/82610/378179.
|
|
|
|
# This is a workaround for https://github.com/gradle/gradle/issues/18124.
|
2023-03-20 18:06:35 -05:00
|
|
|
RUN (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \
|
2022-12-08 22:37:29 -05:00
|
|
|
./gradlew :metadata-events:mxe-schemas:build
|
2021-03-02 14:51:59 -08:00
|
|
|
|
|
|
|
FROM base as prod-codegen
|
|
|
|
COPY --from=prod-build /datahub-src /datahub-src
|
2021-10-08 11:57:00 -07:00
|
|
|
RUN cd /datahub-src/metadata-ingestion && \
|
|
|
|
pip install -e ".[base]" && \
|
|
|
|
./scripts/codegen.sh
|
2021-03-02 14:51:59 -08:00
|
|
|
|
|
|
|
FROM base as prod-install
|
|
|
|
COPY --from=prod-codegen /datahub-src/metadata-ingestion /datahub-ingestion
|
2022-06-29 15:26:12 +05:30
|
|
|
COPY --from=prod-codegen /root/.cache/pip /root/.cache/pip
|
2022-01-10 18:38:19 -05:00
|
|
|
ARG RELEASE_VERSION
|
2021-03-02 14:51:59 -08:00
|
|
|
RUN cd /datahub-ingestion && \
|
2022-01-10 18:38:19 -05:00
|
|
|
sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
|
|
|
|
cat src/datahub/__init__.py && \
|
2022-07-12 12:37:47 +05:30
|
|
|
pip install ".[all]" && \
|
2023-01-25 04:43:34 +01:00
|
|
|
pip freeze && \
|
|
|
|
# This is required to fix security vulnerability in htrace-core4
|
|
|
|
rm -f /usr/local/lib/python3.10/site-packages/pyspark/jars/htrace-core4-4.1.0-incubating.jar
|
2021-03-01 17:36:38 -08:00
|
|
|
|
|
|
|
FROM base as dev-install
|
|
|
|
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
|
|
|
|
# See this excellent thread https://github.com/docker/cli/issues/1134
|
|
|
|
|
|
|
|
FROM ${APP_ENV}-install as final
|
|
|
|
|
|
|
|
RUN addgroup --system datahub && adduser --system datahub --ingroup datahub
|
|
|
|
USER datahub
|
|
|
|
|
2022-11-22 14:15:20 -05:00
|
|
|
ENTRYPOINT [ "datahub" ]
|