51 lines
2.2 KiB
Docker

# Defining environment
ARG APP_ENV=prod
ARG DOCKER_VERSION=latest
FROM acryldata/datahub-ingestion-base:$DOCKER_VERSION as base
FROM eclipse-temurin:11 as prod-build
COPY . /datahub-src
WORKDIR /datahub-src
# We noticed that the gradle wrapper download failed frequently on in CI on arm64 machines.
# I suspect this was due because of the QEMU emulation slowdown, combined with the arm64
# build being starved for CPU by the x86_64 build's codegen step.
#
# The middle step will attempt to download gradle wrapper 5 times with exponential backoff.
# The ./gradlew --version will force the download of the gradle wrapper but is otherwise a no-op.
# Note that the retry logic will always return success, so we should always attempt to run codegen.
# Inspired by https://github.com/gradle/gradle/issues/18124#issuecomment-958182335.
# and https://unix.stackexchange.com/a/82610/378179.
# This is a workaround for https://github.com/gradle/gradle/issues/18124.
RUN (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \
./gradlew :metadata-events:mxe-schemas:build
FROM base as prod-codegen
COPY --from=prod-build /datahub-src /datahub-src
RUN cd /datahub-src/metadata-ingestion && \
pip install -e ".[base]" && \
./scripts/codegen.sh
FROM base as prod-install
COPY --from=prod-codegen /datahub-src/metadata-ingestion /datahub-ingestion
COPY --from=prod-codegen /root/.cache/pip /root/.cache/pip
ARG RELEASE_VERSION
RUN cd /datahub-ingestion && \
sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
cat src/datahub/__init__.py && \
pip install ".[all]" && \
pip freeze && \
# This is required to fix security vulnerability in htrace-core4
rm -f /usr/local/lib/python3.10/site-packages/pyspark/jars/htrace-core4-4.1.0-incubating.jar
FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
# See this excellent thread https://github.com/docker/cli/issues/1134
FROM ${APP_ENV}-install as final
RUN addgroup --system datahub && adduser --system datahub --ingroup datahub
USER datahub
ENTRYPOINT [ "datahub" ]