build(docker): metadata-ingestion images build and add slim version (#7412)

This commit is contained in:
david-leifker 2023-03-20 18:06:35 -05:00 committed by GitHub
parent 0ff7bea043
commit 0a9dc73402
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 201 additions and 52 deletions

View File

@ -7,14 +7,14 @@ on:
- master
paths:
- ".github/workflows/docker-ingestion-base.yml"
- "docker/datahub-ingestion/**"
- "docker/datahub-ingestion-base/**"
- "gradle*"
pull_request:
branches:
- master
paths:
- ".github/workflows/docker-ingestion-base.yml"
- "docker/datahub-ingestion/**"
- "docker/datahub-ingestion-base/**"
- "gradle*"
workflow_dispatch:
@ -40,38 +40,6 @@ jobs:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
publish: ${{ github.ref == 'refs/heads/master' }}
context: ./docker/datahub-ingestion
file: ./docker/datahub-ingestion/base.Dockerfile
context: .
file: ./docker/datahub-ingestion-base/Dockerfile
platforms: linux/amd64,linux/arm64
ingestion-base_scan:
permissions:
contents: read # for actions/checkout to fetch code
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
name: "[Monitoring] Scan ingestion-base images for vulnerabilities"
runs-on: ubuntu-latest
needs: [build-base]
steps:
- name: Checkout # adding checkout step just to make trivy upload happy
uses: actions/checkout@v3
- name: Download image
uses: ishworkh/docker-image-artifact-download@v1
if: ${{ github.ref == 'refs/heads/master' }}
with:
image: acryldata/datahub-ingestion-base:latest
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.8.0
env:
TRIVY_OFFLINE_SCAN: true
with:
image-ref: acryldata/datahub-ingestion-base:latest
format: "template"
template: "@/contrib/sarif.tpl"
output: "trivy-results.sarif"
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: "trivy-results.sarif"

View File

@ -6,7 +6,7 @@ on:
branches:
- master
paths:
- "docker/datahub-ingestion/**"
- "docker/datahub-ingestion-base/**"
- "smoke-test/**"
- ".github/workflows/docker-ingestion-smoke.yml"
workflow_dispatch:
@ -64,7 +64,7 @@ jobs:
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
publish: ${{ needs.setup.outputs.publish == 'true' }}
context: .
file: ./docker/datahub-ingestion/smoke.Dockerfile
file: ./docker/datahub-ingestion-base/smoke.Dockerfile
platforms: linux/amd64,linux/arm64
build-args: |
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}

View File

@ -13,6 +13,7 @@ on:
- "metadata-ingestion/**"
- "metadata-models/**"
- "docker/datahub-ingestion/**"
- "docker/datahub-ingestion-slim/**"
- ".github/workflows/docker-ingestion.yml"
release:
types: [published, edited]
@ -71,3 +72,47 @@ jobs:
platforms: linux/amd64,linux/arm64
build-args: |
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
- name: Build and Push image (slim)
uses: ./.github/actions/docker-custom-build-and-push
with:
images: |
linkedin/datahub-ingestion-slim
tags: ${{ needs.setup.outputs.tag }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
publish: ${{ needs.setup.outputs.publish == 'true' }}
context: .
file: ./docker/datahub-ingestion-slim/Dockerfile
platforms: linux/amd64,linux/arm64
ingestion-slim_scan:
permissions:
contents: read # for actions/checkout to fetch code
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
name: "[Monitoring] Scan datahub-ingestion-slim images for vulnerabilities"
if: ${{ github.ref == 'refs/heads/master' }}
runs-on: ubuntu-latest
needs: [push_to_registries]
steps:
- name: Checkout # adding checkout step just to make trivy upload happy
uses: actions/checkout@v3
- name: Download image
uses: ishworkh/docker-image-artifact-download@v1
with:
image: acryldata/datahub-ingestion-slim:latest
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.8.0
env:
TRIVY_OFFLINE_SCAN: true
with:
image-ref: acryldata/datahub-ingestion-slim:latest
format: "template"
template: "@/contrib/sarif.tpl"
output: "trivy-results.sarif"
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: "trivy-results.sarif"

View File

@ -1,19 +1,22 @@
FROM python:3.10 as base
FROM golang:1-alpine3.17 AS binary
ENV DOCKERIZE_VERSION v0.6.1
WORKDIR /go/src/github.com/jwilder
RUN apk --no-cache --update add openssl git tar curl
WORKDIR /go/src/github.com/jwilder/dockerize
RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION
FROM python:3.10 as base
ENV LIBRDKAFKA_VERSION=1.6.2
ENV CONFLUENT_KAFKA_VERSION=1.6.1
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && apt-get install -y \
&& if [ $(arch) = "aarch64" ]; then \
DOCKERIZE_ARCH='aarch64';\
elif [ $(arch) = "x86_64" ]; then \
DOCKERIZE_ARCH='amd64'; \
else \
echo >&2 "Unsupported architecture $(arch)" ; exit 1; \
fi \
&& apt-get install -y -qq \
# gcc \
make \
@ -29,13 +32,13 @@ RUN apt-get update && apt-get install -y \
unzip \
ldap-utils \
openjdk-11-jre-headless \
&& curl -L https://github.com/treff7es/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-${DOCKERIZE_ARCH}-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv \
&& python -m pip install --upgrade pip wheel setuptools==57.5.0 \
&& curl -Lk -o /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz \
&& tar -xzf /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz -C /root \
&& cd /root/librdkafka-${LIBRDKAFKA_VERSION} \
&& ./configure --prefix /usr && make && make install && make clean && ./configure --clean \
&& apt-get remove -y make
COPY --from=binary /go/bin/dockerize /usr/local/bin
RUN if [ $(arch) = "x86_64" ]; then \
mkdir /opt/oracle && \
@ -55,7 +58,7 @@ RUN if [ $(arch) = "x86_64" ]; then \
ldconfig; \
fi;
COPY ./base-requirements.txt requirements.txt
COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt
RUN pip install -r requirements.txt && \
pip uninstall -y acryl-datahub

View File

@ -0,0 +1,36 @@
plugins {
id 'com.palantir.docker'
id 'java' // required for versioning
}
apply from: "../../gradle/versioning/versioning.gradle"
ext {
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
docker_repo = 'datahub-ingestion-base'
docker_dir = 'datahub-ingestion-base'
}
docker {
name "${docker_registry}/${docker_repo}:v${version}"
version "v${version}"
dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile")
files fileTree(rootProject.projectDir) {
include "docker/${docker_dir}/*"
}
}
tasks.getByPath('docker').dependsOn('build')
task mkdirBuildDocker {
doFirst {
mkdir "${project.buildDir}/docker"
}
}
dockerClean.finalizedBy(mkdirBuildDocker)
task cleanLocalDockerImages {
doLast {
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString())
}
}
dockerClean.finalizedBy(cleanLocalDockerImages)

View File

@ -0,0 +1,9 @@
# Defining environment
ARG APP_ENV=prod
ARG DOCKER_VERSION=latest
FROM acryldata/datahub-ingestion:$DOCKER_VERSION as base
USER 0
RUN pip uninstall -y pyspark
USER datahub

View File

@ -0,0 +1,39 @@
plugins {
id 'com.palantir.docker'
id 'java' // required for versioning
}
apply from: "../../gradle/versioning/versioning.gradle"
ext {
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
docker_repo = 'datahub-ingestion-slim'
docker_dir = 'datahub-ingestion-slim'
}
docker {
name "${docker_registry}/${docker_repo}:v${version}"
version "v${version}"
dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile")
files fileTree(rootProject.projectDir) {
include "docker/${docker_dir}/*"
}
buildArgs([DOCKER_VERSION: version])
buildx(false)
}
tasks.getByPath('docker').dependsOn(['build', ':docker:datahub-ingestion:docker'])
task mkdirBuildDocker {
doFirst {
mkdir "${project.buildDir}/docker"
}
}
dockerClean.finalizedBy(mkdirBuildDocker)
task cleanLocalDockerImages {
doLast {
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString())
}
}
dockerClean.finalizedBy(cleanLocalDockerImages)

View File

@ -1,10 +1,12 @@
# Defining environment
ARG APP_ENV=prod
ARG DOCKER_VERSION=latest
FROM acryldata/datahub-ingestion-base as base
FROM acryldata/datahub-ingestion-base:$DOCKER_VERSION as base
FROM openjdk:11 as prod-build
FROM eclipse-temurin:11 as prod-build
COPY . /datahub-src
WORKDIR /datahub-src
# We noticed that the gradle wrapper download failed frequently on in CI on arm64 machines.
# I suspect this was due because of the QEMU emulation slowdown, combined with the arm64
# build being starved for CPU by the x86_64 build's codegen step.
@ -15,8 +17,7 @@ COPY . /datahub-src
# Inspired by https://github.com/gradle/gradle/issues/18124#issuecomment-958182335.
# and https://unix.stackexchange.com/a/82610/378179.
# This is a workaround for https://github.com/gradle/gradle/issues/18124.
RUN cd /datahub-src && \
(for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \
RUN (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \
./gradlew :metadata-events:mxe-schemas:build
FROM base as prod-codegen

View File

@ -0,0 +1,45 @@
plugins {
id 'com.palantir.docker'
id 'java' // required for versioning
}
apply from: "../../gradle/versioning/versioning.gradle"
ext {
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
docker_repo = 'datahub-ingestion'
docker_dir = 'datahub-ingestion'
}
docker {
name "${docker_registry}/${docker_repo}:v${version}"
version "v${version}"
dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile")
files fileTree(rootProject.projectDir) {
include "docker/${docker_dir}/*"
include "metadata-ingestion/**"
include "metadata-events/**"
include "metadata-models/**"
include "li-utils/**"
include "docs/**"
include "gradle/**"
include "buildSrc/**"
include "*"
}
buildArgs([DOCKER_VERSION: version])
}
tasks.getByPath('docker').dependsOn(['build', ':docker:datahub-ingestion-base:docker'])
task mkdirBuildDocker {
doFirst {
mkdir "${project.buildDir}/docker"
}
}
dockerClean.finalizedBy(mkdirBuildDocker)
task cleanLocalDockerImages {
doLast {
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString())
}
}
dockerClean.finalizedBy(cleanLocalDockerImages)

View File

@ -31,6 +31,9 @@ include ':docker:elasticsearch-setup'
include ':docker:mysql-setup'
include ':docker:postgres-setup'
include ':docker:kafka-setup'
include ':docker:datahub-ingestion'
include ':docker:datahub-ingestion-base'
include ':docker:datahub-ingestion-slim'
include 'metadata-models'
include 'metadata-models-validator'
include 'metadata-testing:metadata-models-test-utils'