| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | # Defining environment
 | 
					
						
							|  |  |  | ARG APP_ENV=prod | 
					
						
							| 
									
										
										
										
											2023-03-20 18:06:35 -05:00
										 |  |  | ARG DOCKER_VERSION=latest | 
					
						
							| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-20 18:06:35 -05:00
										 |  |  | FROM acryldata/datahub-ingestion-base:$DOCKER_VERSION as base
 | 
					
						
							| 
									
										
										
										
											2021-03-02 14:51:59 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-20 18:06:35 -05:00
										 |  |  | FROM eclipse-temurin:11 as prod-build
 | 
					
						
							| 
									
										
										
										
											2021-03-02 14:51:59 -08:00
										 |  |  | COPY . /datahub-src
 | 
					
						
							| 
									
										
										
										
											2023-03-20 18:06:35 -05:00
										 |  |  | WORKDIR /datahub-src
 | 
					
						
							| 
									
										
										
										
											2022-12-08 22:37:29 -05:00
										 |  |  | # We noticed that the gradle wrapper download failed frequently on in CI on arm64 machines.
 | 
					
						
							|  |  |  | # I suspect this was due because of the QEMU emulation slowdown, combined with the arm64
 | 
					
						
							|  |  |  | # build being starved for CPU by the x86_64 build's codegen step.
 | 
					
						
							|  |  |  | #
 | 
					
						
							|  |  |  | # The middle step will attempt to download gradle wrapper 5 times with exponential backoff.
 | 
					
						
							|  |  |  | # The ./gradlew --version will force the download of the gradle wrapper but is otherwise a no-op.
 | 
					
						
							|  |  |  | # Note that the retry logic will always return success, so we should always attempt to run codegen.
 | 
					
						
							|  |  |  | # Inspired by https://github.com/gradle/gradle/issues/18124#issuecomment-958182335.
 | 
					
						
							|  |  |  | # and https://unix.stackexchange.com/a/82610/378179.
 | 
					
						
							|  |  |  | # This is a workaround for https://github.com/gradle/gradle/issues/18124.
 | 
					
						
							| 
									
										
										
										
											2023-03-20 18:06:35 -05:00
										 |  |  | RUN (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \
 | 
					
						
							| 
									
										
										
										
											2022-12-08 22:37:29 -05:00
										 |  |  |     ./gradlew :metadata-events:mxe-schemas:build
 | 
					
						
							| 
									
										
										
										
											2021-03-02 14:51:59 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | FROM base as prod-codegen
 | 
					
						
							|  |  |  | COPY --from=prod-build /datahub-src /datahub-src
 | 
					
						
							| 
									
										
										
										
											2021-10-08 11:57:00 -07:00
										 |  |  | RUN cd /datahub-src/metadata-ingestion && \
 | 
					
						
							|  |  |  |     pip install -e ".[base]" && \
 | 
					
						
							|  |  |  |     ./scripts/codegen.sh
 | 
					
						
							| 
									
										
										
										
											2021-03-02 14:51:59 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | FROM base as prod-install
 | 
					
						
							|  |  |  | COPY --from=prod-codegen /datahub-src/metadata-ingestion /datahub-ingestion
 | 
					
						
							| 
									
										
										
										
											2022-06-29 15:26:12 +05:30
										 |  |  | COPY --from=prod-codegen /root/.cache/pip /root/.cache/pip
 | 
					
						
							| 
									
										
										
										
											2022-01-10 18:38:19 -05:00
										 |  |  | ARG RELEASE_VERSION
 | 
					
						
							| 
									
										
										
										
											2021-03-02 14:51:59 -08:00
										 |  |  | RUN cd /datahub-ingestion && \
 | 
					
						
							| 
									
										
										
										
											2022-01-10 18:38:19 -05:00
										 |  |  |     sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
 | 
					
						
							|  |  |  |     cat src/datahub/__init__.py && \
 | 
					
						
							| 
									
										
										
										
											2022-07-12 12:37:47 +05:30
										 |  |  |     pip install ".[all]" && \
 | 
					
						
							| 
									
										
										
										
											2023-01-25 04:43:34 +01:00
										 |  |  |     pip freeze && \
 | 
					
						
							|  |  |  |     # This is required to fix security vulnerability in htrace-core4
 | 
					
						
							|  |  |  |     rm -f /usr/local/lib/python3.10/site-packages/pyspark/jars/htrace-core4-4.1.0-incubating.jar
 | 
					
						
							| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | FROM base as dev-install
 | 
					
						
							|  |  |  | # Dummy stage for development. Assumes code is built on your machine and mounted to this image.
 | 
					
						
							|  |  |  | # See this excellent thread https://github.com/docker/cli/issues/1134
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | FROM ${APP_ENV}-install as final
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | RUN addgroup --system datahub && adduser --system datahub --ingroup datahub
 | 
					
						
							|  |  |  | USER datahub
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-22 14:15:20 -05:00
										 |  |  | ENTRYPOINT [ "datahub" ]
 |