Tamas Nemeth f4fb89e799
feat(ingest/spark): Promote beta plugin (#10881)
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
2024-07-25 14:46:32 +02:00

42 lines
1.3 KiB
Docker

FROM python:3.9
ARG shared_workspace=/opt/workspace
ENV SHARED_WORKSPACE=${shared_workspace}
# -- Layer: Apache Spark
ARG spark_version=3.2.0
ARG hadoop_version=2.7
RUN apt-get update -y && \
apt-get install -y --no-install-recommends curl gnupg software-properties-common && \
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 0xB1998361219BD9C9 && \
curl https://cdn.azul.com/zulu/bin/zulu-repo_1.0.0-3_all.deb -o /tmp/zulu-repo_1.0.0-3_all.deb && \
apt-get install /tmp/zulu-repo_1.0.0-3_all.deb && \
apt-get update && \
# apt-cache search zulu && \
apt-get install -y --no-install-recommends zulu17-jre && \
apt-get clean && \
curl -sS https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz && \
tar -xf spark.tgz && \
mv spark-${spark_version}-bin-hadoop${hadoop_version} /usr/bin/ && \
mkdir /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}/logs && \
rm spark.tgz && \
rm -rf /var/tmp/* /tmp/* /var/lib/apt/lists/*
RUN set -e; \
pip install JPype1
ENV SPARK_HOME /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}
ENV SPARK_MASTER_HOST spark-master
ENV SPARK_MASTER_PORT 7077
ENV PYSPARK_PYTHON python3.9
ENV PATH=$PATH:$SPARK_HOME/bin
COPY workspace $SHARED_WORKSPACE
WORKDIR ${SPARK_HOME}