[CHORE] Reduce docker image size by removing pip cache (#12708)

* [CHORE] Reduce docker image size by removing pip cache

* [CHORE] Reduce image size for ingestion/operators as well

* [CHORE] Reduce image size for CI
This commit is contained in:
Matias Puerta 2023-08-02 14:36:27 +02:00 committed by GitHub
parent 191cee2da0
commit e203ece20c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 56 deletions

View File

@ -1,10 +1,12 @@
FROM apache/airflow:2.6.3-python3.9 FROM apache/airflow:2.6.3-python3.9
USER root USER root
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
# Install Dependencies (listed in alphabetical order) # Install Dependencies (listed in alphabetical order)
RUN apt-get update \ ENV DEBIAN_FRONTEND=noninteractive
&& apt-get install -y alien \ RUN apt-get -qq update \
&& apt-get -qq install -y \
alien \
build-essential \ build-essential \
default-libmysqlclient-dev \ default-libmysqlclient-dev \
freetds-bin \ freetds-bin \
@ -31,7 +33,6 @@ RUN apt-get update \
unixodbc \ unixodbc \
unixodbc-dev \ unixodbc-dev \
unzip \ unzip \
vim \
wget --no-install-recommends \ wget --no-install-recommends \
# Accept MSSQL ODBC License # Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \ && ACCEPT_EULA=Y apt-get install -y msodbcsql18 \
@ -39,11 +40,11 @@ RUN apt-get update \
RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \ RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \
then \ then \
wget https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \ wget -q https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \
unzip -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \ unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
else \ else \
wget https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \ wget -q https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \
unzip -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \ unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
fi fi
ENV LD_LIBRARY_PATH=/instantclient ENV LD_LIBRARY_PATH=/instantclient
@ -55,25 +56,31 @@ ENV LD_LIBRARY_PATH=/instantclient
# https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852 # https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852
# https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest # https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest
RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list
RUN apt-get update \ RUN apt-get -qq update \
&& apt-get install -t bullseye-backports -y \ && apt-get -qq install -t bullseye-backports -y \
curl \ curl \
libpcre2-8-0 \ libpcre2-8-0 \
postgresql-common \ postgresql-common \
expat \ expat \
bind9 bind9 \
&& rm -rf /var/lib/apt/lists/*
# Required for Starting Ingestion Container in Docker Compose # Required for Starting Ingestion Container in Docker Compose
COPY --chown=airflow:0 ingestion/ingestion_dependency.sh /opt/airflow COPY --chown=airflow:0 --chmod=775 ingestion/ingestion_dependency.sh /opt/airflow
# Required for Ingesting Sample Data # Required for Ingesting Sample Data
COPY --chown=airflow:0 ingestion/examples/sample_data /home/airflow/ingestion/examples/sample_data COPY --chown=airflow:0 ingestion/examples/sample_data /home/airflow/ingestion/examples/sample_data
# Required for Airflow DAGs of Sample Data # Required for Airflow DAGs of Sample Data
COPY --chown=airflow:0 ingestion/examples/airflow/dags /opt/airflow/dags COPY --chown=airflow:0 ingestion/examples/airflow/dags /opt/airflow/dags
# Provide Execute Permissions to shell script
RUN chmod +x /opt/airflow/ingestion_dependency.sh
USER airflow USER airflow
# Argument to provide for Ingestion Dependencies to install. Defaults to all # Argument to provide for Ingestion Dependencies to install. Defaults to all
ARG INGESTION_DEPENDENCY="all" ARG INGESTION_DEPENDENCY="all"
# Disable pip cache dir
# https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching
ENV PIP_NO_CACHE_DIR=1
# Make pip silent
ENV PIP_QUIET=1
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install "openmetadata-managed-apis~=1.1.0.4" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.9.txt" RUN pip install "openmetadata-managed-apis~=1.1.0.4" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.9.txt"
RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]~=1.1.0.4" RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]~=1.1.0.4"

View File

@ -1,10 +1,11 @@
FROM apache/airflow:2.6.3-python3.9 FROM apache/airflow:2.6.3-python3.9
USER root USER root
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
# Install Dependencies (listed in alphabetical order) # Install Dependencies (listed in alphabetical order)
RUN apt-get update \ RUN apt-get -qq update \
&& apt-get install -y alien \ && apt-get -qq install -y \
alien \
build-essential \ build-essential \
default-libmysqlclient-dev \ default-libmysqlclient-dev \
freetds-bin \ freetds-bin \
@ -34,7 +35,7 @@ RUN apt-get update \
vim \ vim \
wget --no-install-recommends \ wget --no-install-recommends \
# Accept MSSQL ODBC License # Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \ && ACCEPT_EULA=Y apt-get -qq install -y msodbcsql18 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \ RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \
@ -55,8 +56,8 @@ ENV LD_LIBRARY_PATH=/instantclient
# https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852 # https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852
# https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest # https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest
RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list
RUN apt-get update \ RUN apt-get -qq update \
&& apt-get install -t bullseye-backports -y \ && apt-get -qq install -t bullseye-backports -y \
curl \ curl \
libpcre2-8-0 \ libpcre2-8-0 \
postgresql-common \ postgresql-common \
@ -64,24 +65,32 @@ RUN apt-get update \
bind9 bind9
# Required for Starting Ingestion Container in Docker Compose # Required for Starting Ingestion Container in Docker Compose
COPY --chown=airflow:0 ingestion/ingestion_dependency.sh /opt/airflow # Provide Execute Permissions to shell script
COPY --chown=airflow:0 --chmod=775 ingestion/ingestion_dependency.sh /opt/airflow
# Required for Ingesting Sample Data # Required for Ingesting Sample Data
COPY --chown=airflow:0 ingestion /home/airflow/ingestion COPY --chown=airflow:0 ingestion /home/airflow/ingestion
COPY --chown=airflow:0 openmetadata-airflow-apis /home/airflow/openmetadata-airflow-apis COPY --chown=airflow:0 openmetadata-airflow-apis /home/airflow/openmetadata-airflow-apis
# Required for Airflow DAGs of Sample Data # Required for Airflow DAGs of Sample Data
COPY --chown=airflow:0 ingestion/examples/airflow/dags /opt/airflow/dags COPY --chown=airflow:0 ingestion/examples/airflow/dags /opt/airflow/dags
# Provide Execute Permissions to shell script
RUN chmod +x /opt/airflow/ingestion_dependency.sh
USER airflow USER airflow
ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.9.txt" ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.9.txt"
# Argument to provide for Ingestion Dependencies to install. Defaults to all
# Disable pip cache dir
# https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching
ENV PIP_NO_CACHE_DIR=1
# Make pip silent
ENV PIP_QUIET=1
RUN pip install --upgrade pip RUN pip install --upgrade pip
WORKDIR /home/airflow/openmetadata-airflow-apis WORKDIR /home/airflow/openmetadata-airflow-apis
RUN pip install "." RUN pip install "."
WORKDIR /home/airflow/ingestion WORKDIR /home/airflow/ingestion
# Argument to provide for Ingestion Dependencies to install. Defaults to all
ARG INGESTION_DEPENDENCY="all" ARG INGESTION_DEPENDENCY="all"
RUN pip install ".[${INGESTION_DEPENDENCY}]" RUN pip install ".[${INGESTION_DEPENDENCY}]"
@ -105,6 +114,7 @@ RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2 mysqlclient==2.1.1 RUN pip install psycopg2 mysqlclient==2.1.1
# Make required folders for openmetadata-airflow-apis # Make required folders for openmetadata-airflow-apis
RUN mkdir -p /opt/airflow/dag_generated_configs RUN mkdir -p /opt/airflow/dag_generated_configs
EXPOSE 8080 EXPOSE 8080
# This is required as it's responsible to create airflow.cfg file # This is required as it's responsible to create airflow.cfg file
RUN airflow db init && rm -f /opt/airflow/airflow.db RUN airflow db init && rm -f /opt/airflow/airflow.db

View File

@ -1,11 +1,12 @@
FROM python:3.9-bullseye FROM python:3.9-bullseye
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
# Install Dependencies (listed in alphabetical order) # Install Dependencies (listed in alphabetical order)
RUN apt-get update \ RUN apt-get -qq update \
&& apt-get install -y alien \ && apt-get -qq install -y \
alien \
build-essential \ build-essential \
default-libmysqlclient-dev \ default-libmysqlclient-dev \
freetds-bin \ freetds-bin \
@ -34,24 +35,24 @@ RUN apt-get update \
unzip \ unzip \
wget --no-install-recommends \ wget --no-install-recommends \
# Accept MSSQL ODBC License # Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \ && ACCEPT_EULA=Y apt-get -qq install -y msodbcsql18 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Add updated postgres/redshift dependencies based on libq # Add updated postgres/redshift dependencies based on libq
RUN curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - RUN curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
RUN echo "deb https://apt.postgresql.org/pub/repos/apt/ buster-pgdg main" > /etc/apt/sources.list.d/pgdg.list; \ RUN echo "deb https://apt.postgresql.org/pub/repos/apt/ buster-pgdg main" > /etc/apt/sources.list.d/pgdg.list; \
apt-get update; \ apt-get -qq update; \
apt-get install --no-install-recommends -y libpq-dev postgresql-client postgresql-common postgresql postgresql-contrib; \ apt-get -qq install --no-install-recommends -y libpq-dev postgresql-client postgresql-common postgresql postgresql-contrib; \
apt-get autoremove -yqq --purge; \ apt-get -qq autoremove -yqq --purge; \
apt-get clean && rm -rf /var/lib/apt/lists/* apt-get -qq clean && rm -rf /var/lib/apt/lists/*
RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \ RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \
then \ then \
wget https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \ wget -q https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \
unzip -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \ unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
else \ else \
wget https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \ wget -q https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \
unzip -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \ unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
fi fi
ENV LD_LIBRARY_PATH=/instantclient ENV LD_LIBRARY_PATH=/instantclient
@ -63,7 +64,7 @@ ENV LD_LIBRARY_PATH=/instantclient
# https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852 # https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852
# https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest # https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest
RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list
RUN apt-get update \ RUN apt-get -qq update \
&& apt-get install -t bullseye-backports -y \ && apt-get install -t bullseye-backports -y \
curl \ curl \
libpcre2-8-0 \ libpcre2-8-0 \
@ -76,6 +77,12 @@ WORKDIR ingestion/
# Required for Airflow DockerOperator, as we need to run the workflows from a `python main.py` command in the container. # Required for Airflow DockerOperator, as we need to run the workflows from a `python main.py` command in the container.
COPY ingestion/operators/docker/*.py . COPY ingestion/operators/docker/*.py .
# Disable pip cache dir
# https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching
ENV PIP_NO_CACHE_DIR=1
# Make pip silent
ENV PIP_QUIET=1
RUN pip install --upgrade pip RUN pip install --upgrade pip
ARG INGESTION_DEPENDENCY="all" ARG INGESTION_DEPENDENCY="all"

View File

@ -1,11 +1,12 @@
FROM python:3.9-bullseye FROM python:3.9-bullseye
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
# Install Dependencies (listed in alphabetical order) # Install Dependencies (listed in alphabetical order)
RUN apt-get update \ RUN apt-get -qq update \
&& apt-get install -y alien \ && apt-get -qq install -y \
alien \
build-essential \ build-essential \
default-libmysqlclient-dev \ default-libmysqlclient-dev \
freetds-bin \ freetds-bin \
@ -35,24 +36,24 @@ RUN apt-get update \
vim \ vim \
wget --no-install-recommends \ wget --no-install-recommends \
# Accept MSSQL ODBC License # Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \ && ACCEPT_EULA=Y apt-get -qq install -y msodbcsql18 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Add updated postgres/redshift dependencies based on libq # Add updated postgres/redshift dependencies based on libq
RUN curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - RUN curl -sS https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
RUN echo "deb https://apt.postgresql.org/pub/repos/apt/ buster-pgdg main" > /etc/apt/sources.list.d/pgdg.list; \ RUN echo "deb https://apt.postgresql.org/pub/repos/apt/ buster-pgdg main" > /etc/apt/sources.list.d/pgdg.list; \
apt-get update; \ apt-get -qq update; \
apt-get install --no-install-recommends -y libpq-dev postgresql-client postgresql-common postgresql postgresql-contrib; \ apt-get -qq install --no-install-recommends -y libpq-dev postgresql-client postgresql-common postgresql postgresql-contrib; \
apt-get autoremove -yqq --purge; \ apt-get -qq autoremove -yqq --purge; \
apt-get clean && rm -rf /var/lib/apt/lists/* apt-get -qq clean && rm -rf /var/lib/apt/lists/*
RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \ RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \
then \ then \
wget https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \ wget -q https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \
unzip -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \ unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
else \ else \
wget https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \ wget -q https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \
unzip -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \ unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
fi fi
ENV LD_LIBRARY_PATH=/instantclient ENV LD_LIBRARY_PATH=/instantclient
@ -64,8 +65,8 @@ ENV LD_LIBRARY_PATH=/instantclient
# https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852 # https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852
# https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest # https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest
RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list
RUN apt-get update \ RUN apt-get -qq update \
&& apt-get install -t bullseye-backports -y \ && apt-get -qq install -t bullseye-backports -y \
curl \ curl \
libpcre2-8-0 \ libpcre2-8-0 \
postgresql-common \ postgresql-common \
@ -77,6 +78,12 @@ WORKDIR ingestion/
# For the dev build, we copy all files # For the dev build, we copy all files
COPY ingestion/ . COPY ingestion/ .
# Disable pip cache dir
# https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching
ENV PIP_NO_CACHE_DIR=1
# Make pip silent
ENV PIP_QUIET=1
RUN pip install --upgrade pip setuptools RUN pip install --upgrade pip setuptools
ARG INGESTION_DEPENDENCY="all" ARG INGESTION_DEPENDENCY="all"