datahub/gradle/docker/docker.gradle
Chakru 51863325a5
build: use versioning in gradle consistent with ci (#13259)
Enable use of gradle for all image builds for publishing, eliminating the per-image build action in docker-unified.yml that duplicated what was in gradle but used slightly different mechanisms to determine what is the tag. Enabled gradle build to consume tags provided by the workflow and produce tags same as earlier.

Use bake matrix builds to build slim/full versions of datahub-ingestion, datahub-actions.

Publish images and scan relies on gradle to get the list of images, via depot.

Image publish and scans run once a day on schedule or on manual triggers only.
Pending work: Separate the publish and scans into a separate workflow that runs on a schedule and could also run other tests.
2025-04-24 23:00:03 +05:30

339 lines
12 KiB
Groovy

import groovy.json.JsonSlurper
import org.apache.commons.io.output.TeeOutputStream
def _getDockerImages(String fullImageTag) {
def stdOut = new ByteArrayOutputStream()
exec {
commandLine "docker", "images", "-q", "${fullImageTag}"
standardOutput = stdOut
}
return stdOut.toString().trim().split("\\R").findAll { !it.empty }.unique() as List
}
def _getDockerContainers(String fullImageTag) {
def stdOut = new ByteArrayOutputStream()
exec {
commandLine "docker", "container", "ls", "-q", "--filter",
"ancestor=${fullImageTag}"
standardOutput = stdOut
}
return stdOut.toString().trim().split("\\R").findAll { !it.empty }.unique() as List
}
def _cleanLocalDockerImages(String fullImageTag) {
println("Docker image string: ${fullImageTag}")
def containers = _getDockerContainers(fullImageTag)
if (!containers.isEmpty()) {
println "Stopping containers: $containers"
exec {
commandLine = ["docker", "container", "stop"] + containers
}
exec {
commandLine = ["docker", "container", "rm"] + containers
}
}
def images = _getDockerImages(fullImageTag)
if (!images.isEmpty()) {
println "Removing images: $images"
exec {
ignoreExitValue true // may not work if used by downstream image
commandLine = ["docker", "rmi", "-f"] + images
}
}
}
// Create extension object
class DockerPluginExtension {
Project project
Property<File> dockerfile
CopySpec copySpec
MapProperty<String, String> buildArgs
MapProperty<String, String> tags
ListProperty<String> platforms
ListProperty<Object> dependencies // May contain tasks or task names
Property<String> target
MapProperty<String, Map<String, String>> variants
Property<String> defaultVariant
List<String> defaultPlatforms = ["linux/amd64", "linux/arm64/v8"]
// For quickStart debug builds that use APP_ENV=dev like pattern. Not used in matrix builds.
MapProperty<String, String> debugBuildArgs
DockerPluginExtension(Project project) {
this.project = project
dockerfile = project.objects.property(File)
buildArgs = project.objects.mapProperty(String, String)
copySpec = project.copySpec()
tags = project.objects.mapProperty(String, String)
platforms = project.objects.listProperty(String)
dependencies = project.objects.listProperty(Object)
target = project.objects.property(String)
variants = project.objects.mapProperty(String, Map)
// quickstart builds does not require all variants to be built. So, if default variant is specified, and
// projectProperty matrixBuild is not set to true, then only default variant is built to save time.
// The defaultVariant does not use a suffix in the tag.
defaultVariant = project.objects.property(String)
debugBuildArgs = project.objects.mapProperty(String, String)
}
def files(Object... files) {
copySpec.from(files)
}
def name(String value) {
additionalTag("", value)
}
def dockerfile(File value) {
dockerfile.set(value)
}
def buildArgs(Map<String, String> values) {
buildArgs.putAll(values)
}
def debugBuildArgs(Map<String, String> values) {
debugBuildArgs.putAll(values)
}
def platform(String... platforms) {
this.platforms.addAll(platforms)
}
def additionalTag(String name, String tag) {
tags.put(name, tag)
}
def dependsOn(Object... tasks) {
dependencies.addAll(tasks)
}
def target(String value) {
target.set(value)
}
}
def extension = project.extensions.create("docker", DockerPluginExtension)
project.afterEvaluate {
def buildContext = "${rootProject.buildDir}/dockerBuildContext/${rootProject.relativePath(project.projectDir)}/docker"
// ensure this directory exists
new File(buildContext).mkdirs()
println("buildContext: ${buildContext}")
tasks.register("dockerPrepare", Sync) {
group "docker"
with extension.copySpec
from extension.dockerfile
into buildContext
dependsOn extension.dependencies.get()
}
project.tasks.register("docker", Exec) {
group "docker"
description "Builds the docker image and applies all tags defined"
dependsOn dockerPrepare
def marker = "${buildContext}/../imageCreated-${name}.json"
inputs.file(extension.dockerfile)
inputs.dir(buildContext)
inputs.property("tags", extension.tags)
inputs.property("buildArgs", extension.buildArgs)
outputs.file(marker)
def dockerCmd = []
dockerCmd += ["docker", "buildx", "build", "--load"]
if (extension.platforms.get()) {
dockerCmd << "--platform=${extension.platforms.get().join(',')}"
}
if (extension.target.isPresent()) {
dockerCmd += ["--target", extension.target.get()]
}
// Add GitHub Actions specific arguments if running in GitHub workflow and using gha cache.
// Or set DOCKER_CACHE=DEPOT -- which transparently can cache without these args when running on depot runner.
if (System.getenv("DOCKER_CACHE") == "GITHUB") {
def githubToken = System.getenv("GITHUB_TOKEN")
if (githubToken) {
dockerCmd += ["--cache-from", "type=gha,token=${githubToken}"]
dockerCmd += ["--cache-to", "type=gha,mode=max,token=${githubToken}"]
} else {
dockerCmd += ["--cache-from", "type=gha"]
dockerCmd += ["--cache-to", "type=gha,mode=max"]
}
}
// Generate image metadata (we really just want the sha256 hash of the image)
dockerCmd += ["--metadata-file", marker]
extension.buildArgs.get().each { k, v -> dockerCmd += ["--build-arg", "${k}=${v}"]
}
extension.tags.get().each { taskName, tag ->
dockerCmd += ["-t", tag]
}
dockerCmd << buildContext
// Some projects use a Dockerfile with the non-default name.
dockerCmd += ["--file", extension.dockerfile.get().toPath()]
// Mask GitHub token in the output for security
def maskedCmd = dockerCmd.collect { arg ->
if (arg.startsWith("type=gha,token=")) {
"type=gha,token=****"
} else {
arg
}
}
println(maskedCmd.join(" "))
commandLine dockerCmd
outputs.upToDateWhen {
try {
/* The docker task is up-to-date if
* 1. the last build generated a marker file
* 2. An image with the same tag exists in local docker images
* 3. that existing image sha256 matches what is written in the generated marker file
*/
def jsonContent = new File(marker).text
def jsonData = new JsonSlurper().parseText(jsonContent)
def imageIdFromMarker = jsonData['containerimage.digest']
if (imageIdFromMarker != null && imageIdFromMarker.startsWith("sha256:")) {
imageIdFromMarker = imageIdFromMarker.substring(7); // "sha256:".length() == 7
}
for(String tag : extension.tags.get().values()) {
def actualImage = _getDockerImages(tag)
if (actualImage == null || actualImage.size() == 0 && imageIdFromMarker == null ||
!imageIdFromMarker.startsWith(actualImage.get(0))) {
logger.debug("UP-TO-DATE CHECK for ${name}: did not find image ${imageIdFromMarker}")
return false
}
}
logger.debug("UP-TO-DATE CHECK for ${name}: Is up-to-date, skipping")
return true
} catch (Exception e) {
// any exceptions also implicitly mean not-up-to-date
return false
}
}
}
project.tasks.register("dockerFromCache") {
// This task is same as docker but without the dockerPrepare dependency. This is useful in CI where a
// github workflow can run multiple jobs in parallel which all share the docker build context root folder.
// This is faster than uploading and downloading all images together.
group "docker"
description "Builds the docker image from cache and applies all tags defined but without the dependencies."
// Reference the docker original task's configuration and run it but without its dependencies
doLast {
def originalTaskInstance = tasks.named('docker').get()
project.exec {
commandLine originalTaskInstance.commandLine
workingDir originalTaskInstance.workingDir
}
}
}
project.tasks.register("generateBakeSnippet") {
ext.bakeSpec = []
group "docker"
description "Generates bake snippets for the project"
doLast {
// if matrixBuild is true, this is to publish images, so all variants must be built.
def matrixBuild = project.getProperties().getOrDefault("matrixBuild", false)
def bake_spec_target = [
context: "${buildContext}",
dockerfile: "${extension.dockerfile.get().toPath()}",
]
if (project.hasProperty("tag")) {
def dockerTag = project.property("tag")
bake_spec_target.tags = extension.tags.get().values().findAll({ tag -> tag.contains(dockerTag) }).toList()
} else {
bake_spec_target.tags = extension.tags.get().values().toList()
}
if (extension.buildArgs.get()) {
bake_spec_target.args = extension.buildArgs.get()
} else {
bake_spec_target.args = [:]
}
if (extension.debugBuildArgs.get() && bake_spec_target.tags.findAll { tag -> tag.contains("debug") }.size() > 0) {
bake_spec_target.args.putAll(extension.debugBuildArgs.get())
}
if (matrixBuild) {
if (extension.platforms.get()) {
bake_spec_target.platforms = extension.platforms.get()
} else {
bake_spec_target.platforms = extension.defaultPlatforms
}
}
if (extension.variants.get()) {
bake_spec_target.name = "${project.name}-\${variants.tagSuffix}"
bake_spec_target.matrix = [variants: []]
extension.variants.get().each { variant, variantSpec ->
if (matrixBuild || variant == extension.defaultVariant.get()) {
// Its not easy to merge common buildArgs and variant buildArgs with json format.
// So, we just add common buildArgs to matrix.
bake_spec_target.matrix.variants.add([ 'tagSuffix': "${variantSpec.suffix}", 'args': variantSpec.args + bake_spec_target.args])
}
}
// When variants are present, fix up tags to include the variant suffix
// A bit of a hack that for quickstart builds, we need a tag named debug that contains the default variant without suffixes
bake_spec_target.tags = bake_spec_target.tags.collect(
{ tag -> tag + (tag.contains("debug") ? "" : "\${variants.tagSuffix}") }
).toList()
if (project.hasProperty("shaTag")) {
def shaTag = project.property("shaTag")
def dockerRepo = extension.tags.get().get("").split(":")[0] //Extract the repo name from the default tag
bake_spec_target.tags.add("${dockerRepo}:${shaTag}\${variants.tagSuffix}")
}
bake_spec_target.args = "\${variants.args}"
} else {
if (project.hasProperty("shaTag")) {
def shaTag = project.property("shaTag")
def dockerRepo = extension.tags.get().get("").split(":")[0]
bake_spec_target.tags.add("${dockerRepo}:${shaTag}")
}
}
ext.bakeSpec = [
target: [ "${project.name}": bake_spec_target]
]
}
}
extension.tags.get().each { taskName, tag ->
// For backward compatibility, can be removed if we dont really have a need post migration
// TODO: Choice of task names is to retain current names so that downstream dependencies in quickstart still work
// without changes. Can be changed post full migration.
project.tasks.register("dockerTag${taskName}") {
dependsOn project.tasks.named("docker")
}
}
task dockerClean {
group "docker"
doLast {
extension.tags.get().each { _, tag -> _cleanLocalDockerImages(tag)
}
}
}
}