mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
build(docker): metadata-ingestion images build and add slim version (#7412)
This commit is contained in:
parent
0ff7bea043
commit
0a9dc73402
40
.github/workflows/docker-ingestion-base.yml
vendored
40
.github/workflows/docker-ingestion-base.yml
vendored
@ -7,14 +7,14 @@ on:
|
||||
- master
|
||||
paths:
|
||||
- ".github/workflows/docker-ingestion-base.yml"
|
||||
- "docker/datahub-ingestion/**"
|
||||
- "docker/datahub-ingestion-base/**"
|
||||
- "gradle*"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- ".github/workflows/docker-ingestion-base.yml"
|
||||
- "docker/datahub-ingestion/**"
|
||||
- "docker/datahub-ingestion-base/**"
|
||||
- "gradle*"
|
||||
workflow_dispatch:
|
||||
|
||||
@ -40,38 +40,6 @@ jobs:
|
||||
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
|
||||
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
|
||||
publish: ${{ github.ref == 'refs/heads/master' }}
|
||||
context: ./docker/datahub-ingestion
|
||||
file: ./docker/datahub-ingestion/base.Dockerfile
|
||||
context: .
|
||||
file: ./docker/datahub-ingestion-base/Dockerfile
|
||||
platforms: linux/amd64,linux/arm64
|
||||
ingestion-base_scan:
|
||||
permissions:
|
||||
contents: read # for actions/checkout to fetch code
|
||||
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
|
||||
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
|
||||
name: "[Monitoring] Scan ingestion-base images for vulnerabilities"
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-base]
|
||||
steps:
|
||||
- name: Checkout # adding checkout step just to make trivy upload happy
|
||||
uses: actions/checkout@v3
|
||||
- name: Download image
|
||||
uses: ishworkh/docker-image-artifact-download@v1
|
||||
if: ${{ github.ref == 'refs/heads/master' }}
|
||||
with:
|
||||
image: acryldata/datahub-ingestion-base:latest
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@0.8.0
|
||||
env:
|
||||
TRIVY_OFFLINE_SCAN: true
|
||||
with:
|
||||
image-ref: acryldata/datahub-ingestion-base:latest
|
||||
format: "template"
|
||||
template: "@/contrib/sarif.tpl"
|
||||
output: "trivy-results.sarif"
|
||||
severity: "CRITICAL,HIGH"
|
||||
ignore-unfixed: true
|
||||
vuln-type: "os,library"
|
||||
- name: Upload Trivy scan results to GitHub Security tab
|
||||
uses: github/codeql-action/upload-sarif@v2
|
||||
with:
|
||||
sarif_file: "trivy-results.sarif"
|
4
.github/workflows/docker-ingestion-smoke.yml
vendored
4
.github/workflows/docker-ingestion-smoke.yml
vendored
@ -6,7 +6,7 @@ on:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- "docker/datahub-ingestion/**"
|
||||
- "docker/datahub-ingestion-base/**"
|
||||
- "smoke-test/**"
|
||||
- ".github/workflows/docker-ingestion-smoke.yml"
|
||||
workflow_dispatch:
|
||||
@ -64,7 +64,7 @@ jobs:
|
||||
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
|
||||
publish: ${{ needs.setup.outputs.publish == 'true' }}
|
||||
context: .
|
||||
file: ./docker/datahub-ingestion/smoke.Dockerfile
|
||||
file: ./docker/datahub-ingestion-base/smoke.Dockerfile
|
||||
platforms: linux/amd64,linux/arm64
|
||||
build-args: |
|
||||
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
|
||||
|
45
.github/workflows/docker-ingestion.yml
vendored
45
.github/workflows/docker-ingestion.yml
vendored
@ -13,6 +13,7 @@ on:
|
||||
- "metadata-ingestion/**"
|
||||
- "metadata-models/**"
|
||||
- "docker/datahub-ingestion/**"
|
||||
- "docker/datahub-ingestion-slim/**"
|
||||
- ".github/workflows/docker-ingestion.yml"
|
||||
release:
|
||||
types: [published, edited]
|
||||
@ -71,3 +72,47 @@ jobs:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
build-args: |
|
||||
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
|
||||
- name: Build and Push image (slim)
|
||||
uses: ./.github/actions/docker-custom-build-and-push
|
||||
with:
|
||||
images: |
|
||||
linkedin/datahub-ingestion-slim
|
||||
tags: ${{ needs.setup.outputs.tag }}
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
publish: ${{ needs.setup.outputs.publish == 'true' }}
|
||||
context: .
|
||||
file: ./docker/datahub-ingestion-slim/Dockerfile
|
||||
platforms: linux/amd64,linux/arm64
|
||||
ingestion-slim_scan:
|
||||
permissions:
|
||||
contents: read # for actions/checkout to fetch code
|
||||
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
|
||||
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
|
||||
name: "[Monitoring] Scan datahub-ingestion-slim images for vulnerabilities"
|
||||
if: ${{ github.ref == 'refs/heads/master' }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: [push_to_registries]
|
||||
steps:
|
||||
- name: Checkout # adding checkout step just to make trivy upload happy
|
||||
uses: actions/checkout@v3
|
||||
- name: Download image
|
||||
uses: ishworkh/docker-image-artifact-download@v1
|
||||
with:
|
||||
image: acryldata/datahub-ingestion-slim:latest
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@0.8.0
|
||||
env:
|
||||
TRIVY_OFFLINE_SCAN: true
|
||||
with:
|
||||
image-ref: acryldata/datahub-ingestion-slim:latest
|
||||
format: "template"
|
||||
template: "@/contrib/sarif.tpl"
|
||||
output: "trivy-results.sarif"
|
||||
severity: "CRITICAL,HIGH"
|
||||
ignore-unfixed: true
|
||||
vuln-type: "os,library"
|
||||
- name: Upload Trivy scan results to GitHub Security tab
|
||||
uses: github/codeql-action/upload-sarif@v2
|
||||
with:
|
||||
sarif_file: "trivy-results.sarif"
|
||||
|
@ -1,19 +1,22 @@
|
||||
FROM python:3.10 as base
|
||||
FROM golang:1-alpine3.17 AS binary
|
||||
|
||||
ENV DOCKERIZE_VERSION v0.6.1
|
||||
WORKDIR /go/src/github.com/jwilder
|
||||
|
||||
RUN apk --no-cache --update add openssl git tar curl
|
||||
|
||||
WORKDIR /go/src/github.com/jwilder/dockerize
|
||||
|
||||
RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION
|
||||
|
||||
FROM python:3.10 as base
|
||||
|
||||
ENV LIBRDKAFKA_VERSION=1.6.2
|
||||
ENV CONFLUENT_KAFKA_VERSION=1.6.1
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
&& if [ $(arch) = "aarch64" ]; then \
|
||||
DOCKERIZE_ARCH='aarch64';\
|
||||
elif [ $(arch) = "x86_64" ]; then \
|
||||
DOCKERIZE_ARCH='amd64'; \
|
||||
else \
|
||||
echo >&2 "Unsupported architecture $(arch)" ; exit 1; \
|
||||
fi \
|
||||
&& apt-get install -y -qq \
|
||||
# gcc \
|
||||
make \
|
||||
@ -29,13 +32,13 @@ RUN apt-get update && apt-get install -y \
|
||||
unzip \
|
||||
ldap-utils \
|
||||
openjdk-11-jre-headless \
|
||||
&& curl -L https://github.com/treff7es/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-${DOCKERIZE_ARCH}-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv \
|
||||
&& python -m pip install --upgrade pip wheel setuptools==57.5.0 \
|
||||
&& curl -Lk -o /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz \
|
||||
&& tar -xzf /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz -C /root \
|
||||
&& cd /root/librdkafka-${LIBRDKAFKA_VERSION} \
|
||||
&& ./configure --prefix /usr && make && make install && make clean && ./configure --clean \
|
||||
&& apt-get remove -y make
|
||||
COPY --from=binary /go/bin/dockerize /usr/local/bin
|
||||
|
||||
RUN if [ $(arch) = "x86_64" ]; then \
|
||||
mkdir /opt/oracle && \
|
||||
@ -55,7 +58,7 @@ RUN if [ $(arch) = "x86_64" ]; then \
|
||||
ldconfig; \
|
||||
fi;
|
||||
|
||||
COPY ./base-requirements.txt requirements.txt
|
||||
COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt
|
||||
|
||||
RUN pip install -r requirements.txt && \
|
||||
pip uninstall -y acryl-datahub
|
36
docker/datahub-ingestion-base/build.gradle
Normal file
36
docker/datahub-ingestion-base/build.gradle
Normal file
@ -0,0 +1,36 @@
|
||||
plugins {
|
||||
id 'com.palantir.docker'
|
||||
id 'java' // required for versioning
|
||||
}
|
||||
|
||||
apply from: "../../gradle/versioning/versioning.gradle"
|
||||
|
||||
ext {
|
||||
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
|
||||
docker_repo = 'datahub-ingestion-base'
|
||||
docker_dir = 'datahub-ingestion-base'
|
||||
}
|
||||
|
||||
docker {
|
||||
name "${docker_registry}/${docker_repo}:v${version}"
|
||||
version "v${version}"
|
||||
dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile")
|
||||
files fileTree(rootProject.projectDir) {
|
||||
include "docker/${docker_dir}/*"
|
||||
}
|
||||
}
|
||||
tasks.getByPath('docker').dependsOn('build')
|
||||
|
||||
task mkdirBuildDocker {
|
||||
doFirst {
|
||||
mkdir "${project.buildDir}/docker"
|
||||
}
|
||||
}
|
||||
dockerClean.finalizedBy(mkdirBuildDocker)
|
||||
|
||||
task cleanLocalDockerImages {
|
||||
doLast {
|
||||
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString())
|
||||
}
|
||||
}
|
||||
dockerClean.finalizedBy(cleanLocalDockerImages)
|
9
docker/datahub-ingestion-slim/Dockerfile
Normal file
9
docker/datahub-ingestion-slim/Dockerfile
Normal file
@ -0,0 +1,9 @@
|
||||
# Defining environment
|
||||
ARG APP_ENV=prod
|
||||
ARG DOCKER_VERSION=latest
|
||||
|
||||
FROM acryldata/datahub-ingestion:$DOCKER_VERSION as base
|
||||
|
||||
USER 0
|
||||
RUN pip uninstall -y pyspark
|
||||
USER datahub
|
39
docker/datahub-ingestion-slim/build.gradle
Normal file
39
docker/datahub-ingestion-slim/build.gradle
Normal file
@ -0,0 +1,39 @@
|
||||
plugins {
|
||||
id 'com.palantir.docker'
|
||||
id 'java' // required for versioning
|
||||
}
|
||||
|
||||
apply from: "../../gradle/versioning/versioning.gradle"
|
||||
|
||||
ext {
|
||||
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
|
||||
docker_repo = 'datahub-ingestion-slim'
|
||||
docker_dir = 'datahub-ingestion-slim'
|
||||
}
|
||||
|
||||
docker {
|
||||
name "${docker_registry}/${docker_repo}:v${version}"
|
||||
version "v${version}"
|
||||
dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile")
|
||||
files fileTree(rootProject.projectDir) {
|
||||
include "docker/${docker_dir}/*"
|
||||
}
|
||||
buildArgs([DOCKER_VERSION: version])
|
||||
|
||||
buildx(false)
|
||||
}
|
||||
tasks.getByPath('docker').dependsOn(['build', ':docker:datahub-ingestion:docker'])
|
||||
|
||||
task mkdirBuildDocker {
|
||||
doFirst {
|
||||
mkdir "${project.buildDir}/docker"
|
||||
}
|
||||
}
|
||||
dockerClean.finalizedBy(mkdirBuildDocker)
|
||||
|
||||
task cleanLocalDockerImages {
|
||||
doLast {
|
||||
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString())
|
||||
}
|
||||
}
|
||||
dockerClean.finalizedBy(cleanLocalDockerImages)
|
@ -1,10 +1,12 @@
|
||||
# Defining environment
|
||||
ARG APP_ENV=prod
|
||||
ARG DOCKER_VERSION=latest
|
||||
|
||||
FROM acryldata/datahub-ingestion-base as base
|
||||
FROM acryldata/datahub-ingestion-base:$DOCKER_VERSION as base
|
||||
|
||||
FROM openjdk:11 as prod-build
|
||||
FROM eclipse-temurin:11 as prod-build
|
||||
COPY . /datahub-src
|
||||
WORKDIR /datahub-src
|
||||
# We noticed that the gradle wrapper download failed frequently on in CI on arm64 machines.
|
||||
# I suspect this was due because of the QEMU emulation slowdown, combined with the arm64
|
||||
# build being starved for CPU by the x86_64 build's codegen step.
|
||||
@ -15,8 +17,7 @@ COPY . /datahub-src
|
||||
# Inspired by https://github.com/gradle/gradle/issues/18124#issuecomment-958182335.
|
||||
# and https://unix.stackexchange.com/a/82610/378179.
|
||||
# This is a workaround for https://github.com/gradle/gradle/issues/18124.
|
||||
RUN cd /datahub-src && \
|
||||
(for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \
|
||||
RUN (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \
|
||||
./gradlew :metadata-events:mxe-schemas:build
|
||||
|
||||
FROM base as prod-codegen
|
||||
|
45
docker/datahub-ingestion/build.gradle
Normal file
45
docker/datahub-ingestion/build.gradle
Normal file
@ -0,0 +1,45 @@
|
||||
plugins {
|
||||
id 'com.palantir.docker'
|
||||
id 'java' // required for versioning
|
||||
}
|
||||
|
||||
apply from: "../../gradle/versioning/versioning.gradle"
|
||||
|
||||
ext {
|
||||
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
|
||||
docker_repo = 'datahub-ingestion'
|
||||
docker_dir = 'datahub-ingestion'
|
||||
}
|
||||
|
||||
docker {
|
||||
name "${docker_registry}/${docker_repo}:v${version}"
|
||||
version "v${version}"
|
||||
dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile")
|
||||
files fileTree(rootProject.projectDir) {
|
||||
include "docker/${docker_dir}/*"
|
||||
include "metadata-ingestion/**"
|
||||
include "metadata-events/**"
|
||||
include "metadata-models/**"
|
||||
include "li-utils/**"
|
||||
include "docs/**"
|
||||
include "gradle/**"
|
||||
include "buildSrc/**"
|
||||
include "*"
|
||||
}
|
||||
buildArgs([DOCKER_VERSION: version])
|
||||
}
|
||||
tasks.getByPath('docker').dependsOn(['build', ':docker:datahub-ingestion-base:docker'])
|
||||
|
||||
task mkdirBuildDocker {
|
||||
doFirst {
|
||||
mkdir "${project.buildDir}/docker"
|
||||
}
|
||||
}
|
||||
dockerClean.finalizedBy(mkdirBuildDocker)
|
||||
|
||||
task cleanLocalDockerImages {
|
||||
doLast {
|
||||
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString())
|
||||
}
|
||||
}
|
||||
dockerClean.finalizedBy(cleanLocalDockerImages)
|
@ -31,6 +31,9 @@ include ':docker:elasticsearch-setup'
|
||||
include ':docker:mysql-setup'
|
||||
include ':docker:postgres-setup'
|
||||
include ':docker:kafka-setup'
|
||||
include ':docker:datahub-ingestion'
|
||||
include ':docker:datahub-ingestion-base'
|
||||
include ':docker:datahub-ingestion-slim'
|
||||
include 'metadata-models'
|
||||
include 'metadata-models-validator'
|
||||
include 'metadata-testing:metadata-models-test-utils'
|
||||
|
Loading…
x
Reference in New Issue
Block a user