2022-11-05 19:37:50 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								FROM python:3.9
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-23 09:03:21 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ARG shared_workspace=/opt/workspace
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ENV SHARED_WORKSPACE=${shared_workspace}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# -- Layer: Apache Spark
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-05 19:37:50 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								ARG spark_version=3.2.0
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-23 09:03:21 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ARG hadoop_version=2.7
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN apt-get update -y && \
							 | 
						
					
						
							
								
									
										
										
										
											2025-09-02 19:58:02 +05:30
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    apt-get install -y --no-install-recommends curl gnupg && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    curl -s https://repos.azul.com/azul-repo.key | gpg --dearmor -o /usr/share/keyrings/azul.gpg && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    echo "deb [signed-by=/usr/share/keyrings/azul.gpg] https://repos.azul.com/zulu/deb stable main" | tee /etc/apt/sources.list.d/zulu.list && \
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-05 19:37:50 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    curl https://cdn.azul.com/zulu/bin/zulu-repo_1.0.0-3_all.deb -o /tmp/zulu-repo_1.0.0-3_all.deb && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    apt-get install /tmp/zulu-repo_1.0.0-3_all.deb && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    apt-get update && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#    apt-cache search zulu && \
							 | 
						
					
						
							
								
									
										
										
										
											2023-12-15 13:28:33 -06:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    apt-get install -y --no-install-recommends zulu17-jre && \
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-05 19:37:50 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    apt-get clean && \
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-04 22:39:08 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    curl -sS https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz && \
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-23 09:03:21 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    tar -xf spark.tgz && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    mv spark-${spark_version}-bin-hadoop${hadoop_version} /usr/bin/ && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    mkdir /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}/logs && \
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-05 19:37:50 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    rm spark.tgz && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    rm -rf /var/tmp/* /tmp/* /var/lib/apt/lists/*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN set -e; \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    pip install JPype1
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-23 09:03:21 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ENV SPARK_HOME /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ENV SPARK_MASTER_HOST spark-master
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ENV SPARK_MASTER_PORT 7077
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-05 19:37:50 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								ENV PYSPARK_PYTHON python3.9
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-23 09:03:21 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ENV PATH=$PATH:$SPARK_HOME/bin
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								COPY workspace $SHARED_WORKSPACE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								WORKDIR ${SPARK_HOME}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 |