datahub/docker/snippets/ingestion_base.template
2025-04-09 11:53:34 -07:00

47 lines
1.3 KiB
Plaintext

# This is the "base" image workflow.
# While it has a bunch of intermediate stages, it "exports" a couple
# stages for consumption.
# - base-empty: A basic stage, with basic deps, Python, and a venv.
# - base-slim: Currently the same as base-empty.
# - base-full: Adds a JRE and Oracle client.
FROM ubuntu:24.04 AS base-empty
ARG PYTHON_VERSION
RUN test -n "${PYTHON_VERSION}" # PYTHON_VERSION must be set
# INLINE-BEGIN @/docker/snippets/ubuntu_mirror_setup
# INLINE-END
# INLINE-BEGIN @/docker/snippets/ubuntu_python_base
# INLINE-END
FROM base-empty AS full-deps-prebuild
USER 0
RUN apt-get update && apt-get install --no-install-recommends -y -qq \
build-essential \
maven \
&& rm -rf /var/lib/apt/lists/*
USER datahub
RUN uv pip install python-ldap==3.4.4
RUN --mount=type=cache,target=$HOME/.cache/uv,uid=1000,gid=1000 \
--mount=type=bind,source=./docker/datahub-ingestion/pyspark_jars.sh,target=/pyspark_jars.sh \
uv pip install python-ldap==3.4.4 pyspark~=3.5.0 && \
/pyspark_jars.sh
FROM base-empty AS base-slim
# Nothing to do here.
FROM base-slim AS base-full
USER 0
# INLINE-BEGIN @/docker/snippets/ingestion_full_deps
# INLINE-END
USER datahub
RUN --mount=from=full-deps-prebuild,source=$HOME/.venv,target=/venv-full \
rm -r .venv && \
cp -r /venv-full .venv