2023-08-17 00:10:17 -05:00
|
|
|
ARG APP_ENV=full
|
|
|
|
ARG BASE_IMAGE=base
|
|
|
|
|
2023-11-28 21:52:11 +01:00
|
|
|
# Defining custom repo urls for use in enterprise environments. Re-used between stages below.
|
|
|
|
ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine
|
|
|
|
ARG GITHUB_REPO_URL=https://github.com
|
2023-12-18 14:06:17 -08:00
|
|
|
ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
|
2024-08-28 17:18:41 -05:00
|
|
|
ARG UBUNTU_REPO_URL=http://ports.ubuntu.com/ubuntu-ports
|
2024-02-29 18:05:14 -08:00
|
|
|
ARG PIP_MIRROR_URL=https://pypi.python.org/simple
|
2023-11-28 21:52:11 +01:00
|
|
|
|
2024-08-28 17:18:41 -05:00
|
|
|
FROM powerman/dockerize:0.19 AS dockerize-binary
|
2023-03-20 18:06:35 -05:00
|
|
|
|
2024-08-28 17:18:41 -05:00
|
|
|
FROM ubuntu:22.04 AS base
|
2023-03-20 18:06:35 -05:00
|
|
|
|
2023-11-28 21:52:11 +01:00
|
|
|
ARG GITHUB_REPO_URL
|
|
|
|
|
2024-08-14 14:23:11 +05:30
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
2022-09-07 17:51:06 +01:00
|
|
|
|
2024-02-29 18:05:14 -08:00
|
|
|
# Optionally set corporate mirror for deb
|
|
|
|
ARG DEBIAN_REPO_URL
|
2024-08-28 17:18:41 -05:00
|
|
|
ARG UBUNTU_REPO_URL
|
|
|
|
RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list ; fi
|
|
|
|
RUN if [ "${UBUNTU_REPO_URL}" != "http://ports.ubuntu.com/ubuntu-ports" ] ; then sed -i "s#http.*://ports.ubuntu.com/ubuntu-ports#${UBUNTU_REPO_URL}#g" /etc/apt/sources.list ; fi
|
2024-02-29 18:05:14 -08:00
|
|
|
|
|
|
|
# Optionally set corporate mirror for pip
|
|
|
|
ARG PIP_MIRROR_URL
|
|
|
|
RUN if [ "${PIP_MIRROR_URL}" != "https://pypi.python.org/simple" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
|
|
|
|
ENV UV_INDEX_URL=${PIP_MIRROR_URL}
|
2023-11-28 21:52:11 +01:00
|
|
|
|
2024-08-28 17:18:41 -05:00
|
|
|
RUN apt-get update && apt-get upgrade -y \
|
|
|
|
&& apt-get install -y -qq \
|
2024-08-29 20:32:06 -05:00
|
|
|
lsb-release \
|
2024-08-28 17:18:41 -05:00
|
|
|
python3 \
|
|
|
|
python3-pip \
|
|
|
|
python3-venv \
|
|
|
|
python-is-python3 \
|
2022-09-07 17:51:06 +01:00
|
|
|
python3-ldap \
|
|
|
|
libldap2-dev \
|
|
|
|
libsasl2-dev \
|
|
|
|
libsasl2-modules \
|
|
|
|
libaio1 \
|
|
|
|
libsasl2-modules-gssapi-mit \
|
|
|
|
krb5-user \
|
2024-08-28 17:18:41 -05:00
|
|
|
krb5-config \
|
|
|
|
libkrb5-dev \
|
2022-09-07 17:51:06 +01:00
|
|
|
wget \
|
|
|
|
zip \
|
|
|
|
unzip \
|
|
|
|
ldap-utils \
|
2024-01-23 22:34:18 -08:00
|
|
|
unixodbc \
|
|
|
|
libodbc2 \
|
2024-09-03 10:53:44 -07:00
|
|
|
&& python -m pip install --no-cache --upgrade pip 'uv>=0.1.10' wheel setuptools \
|
2024-08-28 17:18:41 -05:00
|
|
|
&& apt-get clean \
|
|
|
|
&& rm -rf /var/lib/{apt,dpkg,cache,log}/
|
2023-08-17 00:10:17 -05:00
|
|
|
|
2024-08-29 20:34:00 -07:00
|
|
|
COPY --from=powerman/dockerize:0.19 /usr/local/bin/dockerize /usr/local/bin
|
2022-09-07 17:51:06 +01:00
|
|
|
|
2023-08-17 00:10:17 -05:00
|
|
|
COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh
|
|
|
|
|
2024-02-26 15:02:47 -08:00
|
|
|
RUN addgroup --gid 1000 datahub && \
|
|
|
|
adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub && \
|
|
|
|
chmod +x /entrypoint.sh
|
|
|
|
|
|
|
|
USER datahub
|
2024-04-23 18:54:49 -05:00
|
|
|
ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
|
2024-02-26 15:02:47 -08:00
|
|
|
ENV VIRTUAL_ENV=/datahub-ingestion/.venv
|
|
|
|
ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
|
|
|
|
RUN python3 -m venv $VIRTUAL_ENV && \
|
2024-08-29 20:34:00 -07:00
|
|
|
uv pip install --no-cache --upgrade pip setuptools wheel
|
|
|
|
|
|
|
|
# Note: Normally uv will create hardlinks from the cache directory to the venv.
|
|
|
|
# In our docker files, we normally use `RUN --mount=type=cache,... uv pip install ...`,
|
|
|
|
# which means the cache directory is on a separate filesystem. uv will emit a warning:
|
|
|
|
# Failed to hardlink files; falling back to full copy. This may lead to degraded performance.
|
|
|
|
# If the cache and target directories are on different filesystems, hardlinking may not be supported.
|
|
|
|
# If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.
|
2023-08-17 00:10:17 -05:00
|
|
|
|
|
|
|
ENTRYPOINT [ "/entrypoint.sh" ]
|
|
|
|
|
2024-08-14 14:23:11 +05:30
|
|
|
FROM ${BASE_IMAGE} AS full-install
|
2023-08-17 00:10:17 -05:00
|
|
|
|
2024-02-26 15:02:47 -08:00
|
|
|
USER 0
|
2023-08-17 00:10:17 -05:00
|
|
|
RUN apt-get update && apt-get install -y -qq \
|
|
|
|
default-jre-headless \
|
|
|
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apk/*
|
|
|
|
|
2022-09-07 17:51:06 +01:00
|
|
|
RUN if [ $(arch) = "x86_64" ]; then \
|
|
|
|
mkdir /opt/oracle && \
|
|
|
|
cd /opt/oracle && \
|
2024-07-31 17:06:19 -03:00
|
|
|
wget --no-verbose -c https://download.oracle.com/otn_software/linux/instantclient/2115000/instantclient-basic-linux.x64-21.15.0.0.0dbru.zip && \
|
|
|
|
unzip instantclient-basic-linux.x64-21.15.0.0.0dbru.zip && \
|
|
|
|
rm instantclient-basic-linux.x64-21.15.0.0.0dbru.zip && \
|
|
|
|
sh -c "echo /opt/oracle/instantclient_21_15 > /etc/ld.so.conf.d/oracle-instantclient.conf" && \
|
2022-09-07 17:51:06 +01:00
|
|
|
ldconfig; \
|
|
|
|
else \
|
|
|
|
mkdir /opt/oracle && \
|
|
|
|
cd /opt/oracle && \
|
2024-07-31 17:06:19 -03:00
|
|
|
wget --no-verbose -c https://download.oracle.com/otn_software/linux/instantclient/1923000/instantclient-basic-linux.arm64-19.23.0.0.0dbru.zip && \
|
|
|
|
unzip instantclient-basic-linux.arm64-19.23.0.0.0dbru.zip && \
|
|
|
|
rm instantclient-basic-linux.arm64-19.23.0.0.0dbru.zip && \
|
|
|
|
sh -c "echo /opt/oracle/instantclient_19_23 > /etc/ld.so.conf.d/oracle-instantclient.conf" && \
|
2022-09-07 17:51:06 +01:00
|
|
|
ldconfig; \
|
|
|
|
fi;
|
2022-06-27 17:28:12 +05:30
|
|
|
|
2024-02-26 15:02:47 -08:00
|
|
|
USER datahub
|
|
|
|
|
2024-08-14 14:23:11 +05:30
|
|
|
FROM ${BASE_IMAGE} AS slim-install
|
2023-08-17 00:10:17 -05:00
|
|
|
# Do nothing else on top of base
|
|
|
|
|
|
|
|
FROM ${APP_ENV}-install
|
2022-06-27 14:08:16 +05:30
|
|
|
|
2023-11-28 21:52:11 +01:00
|
|
|
ENV PATH="/datahub-ingestion/.local/bin:$PATH"
|