mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
47 lines
1.3 KiB
Plaintext
47 lines
1.3 KiB
Plaintext
# This is the "base" image workflow.
|
|
# While it has a bunch of intermediate stages, it "exports" a couple
|
|
# stages for consumption.
|
|
# - base-empty: A basic stage, with basic deps, Python, and a venv.
|
|
# - base-slim: Currently the same as base-empty.
|
|
# - base-full: Adds a JRE and Oracle client.
|
|
|
|
FROM ubuntu:24.04 AS base-empty
|
|
|
|
ARG PYTHON_VERSION
|
|
RUN test -n "${PYTHON_VERSION}" # PYTHON_VERSION must be set
|
|
|
|
# INLINE-BEGIN @/docker/snippets/ubuntu_mirror_setup
|
|
# INLINE-END
|
|
|
|
# INLINE-BEGIN @/docker/snippets/ubuntu_python_base
|
|
# INLINE-END
|
|
|
|
FROM base-empty AS full-deps-prebuild
|
|
|
|
USER 0
|
|
RUN apt-get update && apt-get install --no-install-recommends -y -qq \
|
|
build-essential \
|
|
maven \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
USER datahub
|
|
|
|
RUN uv pip install python-ldap==3.4.4
|
|
RUN --mount=type=cache,target=$HOME/.cache/uv,uid=1000,gid=1000 \
|
|
--mount=type=bind,source=./docker/datahub-ingestion/pyspark_jars.sh,target=/pyspark_jars.sh \
|
|
uv pip install python-ldap==3.4.4 pyspark~=3.5.0 && \
|
|
/pyspark_jars.sh
|
|
|
|
FROM base-empty AS base-slim
|
|
# Nothing to do here.
|
|
|
|
FROM base-slim AS base-full
|
|
|
|
USER 0
|
|
# INLINE-BEGIN @/docker/snippets/ingestion_full_deps
|
|
# INLINE-END
|
|
USER datahub
|
|
|
|
RUN --mount=from=full-deps-prebuild,source=$HOME/.venv,target=/venv-full \
|
|
rm -r .venv && \
|
|
cp -r /venv-full .venv
|