OpenMetadata/ingestion/Dockerfile.ci
Imri Paran aade838020
Fixes #15388: Use native backup tools (#15393)
* feat: use native backup tools

1. added mysqldump 8.3 to the ingestion container.
2. documented how to use native tools to back up and restore.
3. added deprecated message on the cli backup and restore.

* added deprecation notice for 1.3 backup

* removed 1.3.x deprecation notice

* added another backup page in 1.3 introducing SQL dump tools

* added --set-gtid-purged=OFF to the mysql dump process
2024-03-12 06:23:05 +01:00

125 lines
4.4 KiB
Docker

FROM mysql:8.3 as mysql
FROM apache/airflow:2.7.3-python3.10
USER root
RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
# Install Dependencies (listed in alphabetical order)
RUN apt-get -qq update \
&& apt-get -qq install -y \
alien \
build-essential \
default-libmysqlclient-dev \
freetds-bin \
freetds-dev \
gcc \
gnupg \
libaio1 \
libevent-dev \
libffi-dev \
libpq-dev \
librdkafka-dev \
libsasl2-dev \
libsasl2-2 \
libsasl2-modules \
libsasl2-modules-gssapi-mit \
libssl-dev \
libxml2 \
libkrb5-dev \
openjdk-11-jre \
openssl \
postgresql \
postgresql-contrib \
tdsodbc \
unixodbc \
unixodbc-dev \
unzip \
vim \
git \
wget --no-install-recommends \
# Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get -qq install -y msodbcsql18 \
&& rm -rf /var/lib/apt/lists/*
COPY --from=mysql /usr/bin/mysqldump /usr/bin/mysqldump
RUN if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; \
then \
wget https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \
unzip -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
else \
wget https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \
unzip -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
fi
ENV LD_LIBRARY_PATH=/instantclient
# Security patches for base image
# monitor no fixed version for
# https://security.snyk.io/vuln/SNYK-DEBIAN11-LIBTASN16-3061097
# https://security.snyk.io/vuln/SNYK-DEBIAN11-MARIADB105-2940589
# https://security.snyk.io/vuln/SNYK-DEBIAN11-BIND9-3027852
# https://security.snyk.io/vuln/SNYK-DEBIAN11-EXPAT-3023031 we are already installed the latest
RUN echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list
RUN apt-get -qq update \
&& apt-get -qq install -t bullseye-backports -y \
curl \
libpcre2-8-0 \
postgresql-common \
expat \
bind9
# Required for Starting Ingestion Container in Docker Compose
# Provide Execute Permissions to shell script
COPY --chown=airflow:0 --chmod=775 ingestion/ingestion_dependency.sh /opt/airflow
# Required for Ingesting Sample Data
COPY --chown=airflow:0 ingestion /home/airflow/ingestion
COPY --chown=airflow:0 openmetadata-airflow-apis /home/airflow/openmetadata-airflow-apis
# Required for Airflow DAGs of Sample Data
COPY --chown=airflow:0 ingestion/examples/airflow/dags /opt/airflow/dags
USER airflow
ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt"
# Disable pip cache dir
# https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching
ENV PIP_NO_CACHE_DIR=1
# Make pip silent
ENV PIP_QUIET=1
RUN pip install --upgrade pip
WORKDIR /home/airflow/openmetadata-airflow-apis
RUN pip install "."
WORKDIR /home/airflow/ingestion
# Argument to provide for Ingestion Dependencies to install. Defaults to all
ARG INGESTION_DEPENDENCY="all"
RUN pip install ".[${INGESTION_DEPENDENCY}]"
# Temporary workaround for https://github.com/open-metadata/OpenMetadata/issues/9593
RUN echo "Image built for $(uname -m)"
RUN if [[ $(uname -m) != "aarch64" ]]; \
then \
pip install "ibm-db-sa~=0.4"; \
fi
# bump python-daemon for https://github.com/apache/airflow/pull/29916
RUN pip install "python-daemon>=3.0.0"
# remove all airflow providers except for docker and cncf kubernetes
RUN pip freeze | grep "apache-airflow-providers" | grep --invert-match -E "docker|http|cncf" | xargs pip uninstall -y
# Uninstalling psycopg2-binary and installing psycopg2 instead
# because the psycopg2-binary generates a architecture specific error
# while authenticating connection with the airflow, psycopg2 solves this error
RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2 mysqlclient==2.1.1
# Make required folders for openmetadata-airflow-apis
RUN mkdir -p /opt/airflow/dag_generated_configs
EXPOSE 8080
# This is required as it's responsible to create airflow.cfg file
RUN airflow db init && rm -f /opt/airflow/airflow.db