| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | # Defining environment
 | 
					
						
							| 
									
										
										
										
											2023-08-17 00:10:17 -05:00
										 |  |  | ARG APP_ENV=full | 
					
						
							|  |  |  | ARG BASE_IMAGE=acryldata/datahub-ingestion-base
 | 
					
						
							| 
									
										
										
										
											2024-07-31 17:06:19 -03:00
										 |  |  | ARG DOCKER_VERSION=head-full
 | 
					
						
							| 
									
										
										
										
											2023-12-18 14:06:17 -08:00
										 |  |  | ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
 | 
					
						
							| 
									
										
										
										
											2024-08-28 17:18:41 -05:00
										 |  |  | ARG UBUNTU_REPO_URL=http://ports.ubuntu.com/ubuntu-ports
 | 
					
						
							| 
									
										
										
										
											2024-02-29 18:05:14 -08:00
										 |  |  | ARG PIP_MIRROR_URL=https://pypi.python.org/simple
 | 
					
						
							| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 14:23:11 +05:30
										 |  |  | FROM $BASE_IMAGE:$DOCKER_VERSION AS base
 | 
					
						
							| 
									
										
										
										
											2024-02-29 18:05:14 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Optionally set corporate mirror for deb
 | 
					
						
							|  |  |  | USER 0
 | 
					
						
							|  |  |  | ARG DEBIAN_REPO_URL
 | 
					
						
							| 
									
										
										
										
											2024-08-28 17:18:41 -05:00
										 |  |  | ARG UBUNTU_REPO_URL
 | 
					
						
							|  |  |  | RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list ; fi
 | 
					
						
							|  |  |  | RUN if [ "${UBUNTU_REPO_URL}" != "http://ports.ubuntu.com/ubuntu-ports" ] ; then sed -i "s#http.*://ports.ubuntu.com/ubuntu-ports#${UBUNTU_REPO_URL}#g" /etc/apt/sources.list ; fi
 | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  | USER datahub
 | 
					
						
							| 
									
										
										
										
											2023-08-17 00:10:17 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-29 18:05:14 -08:00
										 |  |  | # Optionally set corporate mirror for pip
 | 
					
						
							|  |  |  | ARG PIP_MIRROR_URL
 | 
					
						
							|  |  |  | RUN if [ "${PIP_MIRROR_URL}" != "https://pypi.python.org/simple" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
 | 
					
						
							|  |  |  | ENV UV_INDEX_URL=${PIP_MIRROR_URL}
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-29 20:34:00 -07:00
										 |  |  | COPY --chown=datahub ./metadata-ingestion /metadata-ingestion
 | 
					
						
							|  |  |  | COPY --chown=datahub ./metadata-ingestion-modules/airflow-plugin /metadata-ingestion/airflow-plugin
 | 
					
						
							| 
									
										
										
										
											2023-08-17 00:10:17 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-10 18:38:19 -05:00
										 |  |  | ARG RELEASE_VERSION
 | 
					
						
							| 
									
										
										
										
											2024-08-29 20:34:00 -07:00
										 |  |  | WORKDIR /metadata-ingestion
 | 
					
						
							| 
									
										
										
										
											2024-01-22 11:46:04 -06:00
										 |  |  | RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \
 | 
					
						
							|  |  |  |     sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \
 | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  |     cat src/datahub/__init__.py | grep __version__ && \
 | 
					
						
							|  |  |  |     cat airflow-plugin/src/datahub_airflow_plugin/__init__.py | grep __version__
 | 
					
						
							| 
									
										
										
										
											2023-08-17 00:10:17 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 14:23:11 +05:30
										 |  |  | FROM base AS slim-install
 | 
					
						
							| 
									
										
										
										
											2023-11-28 21:52:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-29 20:34:00 -07:00
										 |  |  | RUN --mount=type=cache,target=/datahub-ingestion/.cache/uv,uid=1000,gid=1000 \
 | 
					
						
							|  |  |  |     UV_LINK_MODE=copy uv pip install -e ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]"
 | 
					
						
							| 
									
										
										
										
											2023-08-17 00:10:17 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 14:23:11 +05:30
										 |  |  | FROM base AS full-install-build
 | 
					
						
							| 
									
										
										
										
											2023-11-10 22:10:00 -06:00
										 |  |  | 
 | 
					
						
							|  |  |  | USER 0
 | 
					
						
							|  |  |  | RUN apt-get update && apt-get install -y -qq maven
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | USER datahub
 | 
					
						
							|  |  |  | COPY ./docker/datahub-ingestion/pyspark_jars.sh .
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-29 20:34:00 -07:00
										 |  |  | RUN --mount=type=cache,target=/datahub-ingestion/.cache/uv,uid=1000,gid=1000 \
 | 
					
						
							|  |  |  |     UV_LINK_MODE=copy uv pip install -e ".[base,all]" "./airflow-plugin[plugin-v2]" && \
 | 
					
						
							|  |  |  |     ./pyspark_jars.sh && \
 | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  |     datahub --version
 | 
					
						
							| 
									
										
										
										
											2023-11-10 22:10:00 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 14:23:11 +05:30
										 |  |  | FROM base AS full-install
 | 
					
						
							| 
									
										
										
										
											2023-11-10 22:10:00 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  | COPY --from=full-install-build ${VIRTUAL_ENV} ${VIRTUAL_ENV}
 | 
					
						
							| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 14:23:11 +05:30
										 |  |  | FROM base AS dev-install
 | 
					
						
							| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | # Dummy stage for development. Assumes code is built on your machine and mounted to this image.
 | 
					
						
							|  |  |  | # See this excellent thread https://github.com/docker/cli/issues/1134
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 14:23:11 +05:30
										 |  |  | FROM ${APP_ENV}-install AS final
 | 
					
						
							| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-29 20:34:00 -07:00
										 |  |  | WORKDIR /datahub-ingestion
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-01 17:36:38 -08:00
										 |  |  | USER datahub
 |