datahub/docker/build.gradle

694 lines
29 KiB
Groovy
Raw Normal View History

plugins {
id 'java' // required by versioning
id 'docker-compose'
}
import com.avast.gradle.dockercompose.tasks.ComposeUp
import com.avast.gradle.dockercompose.tasks.ComposeDownForced
import org.yaml.snakeyaml.Yaml
apply from: "../gradle/versioning/versioning.gradle"
ext {
compose_base = "profiles/docker-compose.yml"
project_name = "datahub"
backend_profile_modules = [
':docker:elasticsearch-setup',
':docker:mysql-setup',
':datahub-upgrade',
':metadata-service:war',
]
python_services_modules = [
]
// Load environment variables from file if DATAHUB_LOCAL_COMMON_ENV is set
loadCommonEnvFile = {
def envFile = System.getenv("DATAHUB_LOCAL_COMMON_ENV")
def envVars = [:]
if (envFile && new File(envFile).exists()) {
logger.lifecycle("Loading environment variables from: ${envFile}")
new File(envFile).eachLine { line ->
line = line.trim()
if (line && !line.startsWith("#") && line.contains("=")) {
def parts = line.split("=", 2)
if (parts.length == 2) {
envVars[parts[0].trim()] = parts[1].trim()
}
}
}
}
// Also load any environment variables that start with DATAHUB_
System.getenv().each { key, value ->
if (key.startsWith("DATAHUB_")) {
envVars[key] = value
}
}
return envVars
}
// Common configuration for all tasks
common_config = [
captureContainersOutput: true,
captureContainersOutputToFiles: project.file('build/container-logs')
]
// declarative task configuration
quickstart_configs = [
'quickstart': [
profile: 'quickstart-consumers',
modules: python_services_modules + backend_profile_modules + [
':datahub-frontend',
':metadata-jobs:mce-consumer-job',
':metadata-jobs:mae-consumer-job',
':datahub-actions',
]
],
'quickstartCLI': [
profile: 'quickstart',
modules: python_services_modules + backend_profile_modules + [
':datahub-frontend',
':datahub-actions',
]
],
'quickstartDebug': [
profile: 'debug',
modules: python_services_modules + backend_profile_modules + [':datahub-frontend', ':datahub-actions'],
isDebug: true,
additionalEnv: [
DATAHUB_LOCAL_ACTIONS_ENV: "${rootProject.project(':smoke-test').projectDir}/test_resources/actions/actions.env"
]
],
'quickstartCypress': [
profile: 'debug',
modules: python_services_modules + backend_profile_modules + [':datahub-frontend', ':datahub-actions'],
isDebug: true,
additionalEnv: [
DATAHUB_LOCAL_ACTIONS_ENV: "${rootProject.project(':smoke-test').projectDir}/test_resources/actions/actions.env"
],
// Override project name for cypress dev environment
additionalConfig: [
projectName: 'dh-cypress'
]
],
'quickstartDebugMin': [
profile: 'debug-min',
modules: backend_profile_modules + [':datahub-frontend'],
isDebug: true
],
'quickstartCDC': [
profile: 'quickstart-consumers-cdc',
modules: backend_profile_modules + [':datahub-frontend',
':metadata-jobs:mce-consumer-job',
':metadata-jobs:mae-consumer-job',
':datahub-actions',
],
],
'quickstartCDCDebug': [
profile: 'debug-consumers-cdc',
modules: python_services_modules + backend_profile_modules + [':datahub-frontend',
':metadata-jobs:mce-consumer-job',
':metadata-jobs:mae-consumer-job',
':datahub-actions'
],
isDebug: true,
],
'quickstartDebugConsumers': [
profile: 'debug-consumers',
modules: python_services_modules + backend_profile_modules + [':datahub-frontend',
':metadata-jobs:mce-consumer-job',
':metadata-jobs:mae-consumer-job',
':datahub-actions'
],
isDebug: true,
additionalEnv: [
DATAHUB_LOCAL_ACTIONS_ENV: "${rootProject.project(':smoke-test').projectDir}/test_resources/actions/actions.env"
]
],
'quickstartPg': [
profile: 'quickstart-postgres',
modules: (backend_profile_modules - [':docker:mysql-setup']) + [
':docker:postgres-setup',
':datahub-frontend',
':datahub-actions',
]
],
'quickstartPgCdc': [
profile: 'quickstart-postgres-cdc',
modules: (backend_profile_modules - [':docker:mysql-setup']) + [
':docker:postgres-setup',
':datahub-frontend',
':datahub-actions',
]
],
'quickstartPgDebug': [
profile: 'debug-postgres',
modules: python_services_modules + (backend_profile_modules - [':docker:mysql-setup']) + [
':docker:postgres-setup',
':datahub-frontend'
],
isDebug: true
],
'quickstartPgCdcDebug': [
profile: 'debug-postgres-cdc',
modules: python_services_modules + (backend_profile_modules - [':docker:mysql-setup']) + [
':docker:postgres-setup',
':datahub-frontend'
],
isDebug: true
],
'quickstartSlim': [
profile: 'quickstart-backend',
modules: backend_profile_modules + [
':datahub-actions',
]
],
'quickstartSpark': [
profile: 'quickstart-backend',
modules: backend_profile_modules + [
':datahub-actions',
],
additionalEnv: [
'DATAHUB_LOCAL_COMMON_ENV': "${rootProject.project(':metadata-integration:java:spark-lineage-legacy').projectDir}/spark-smoke-test/smoke-gms.env",
'METADATA_SERVICE_AUTH_ENABLED': 'false'
]
],
'quickstartStorage': [
profile: 'quickstart-storage',
preserveVolumes: true
],
'quickstartBackendDebug': [
profile: 'debug-backend-aws',
modules: python_services_modules + backend_profile_modules + [':datahub-frontend', ':datahub-actions'],
isDebug: true,
additionalEnv: [
DATAHUB_LOCAL_ACTIONS_ENV: "${rootProject.project(':smoke-test').projectDir}/test_resources/actions/actions.env"
]
],
'allImages': [ //This is a special task just to include all images as dependencies - and is useful when CI needs to publish all images
profile: 'quickstart-consumers',
modules: python_services_modules + backend_profile_modules + [
':datahub-frontend',
':metadata-jobs:mce-consumer-job',
':metadata-jobs:mae-consumer-job',
':datahub-actions',
':docker:datahub-ingestion',
':docker:postgres-setup'
]
]
]
// only for debug variants of quickstart to enable <variant>Reload tasks.
// The name here is the service name prefix. The suffix doesnt follow a fixed convention across all profiles.
// This list only contains modules that can be reloaded via the reloadTask. Python services support hot reload.
// To re-run setup tasks, quickstart* needs to be used.
moduleToContainer = [
':metadata-service:war': 'datahub-gms',
':datahub-frontend': 'frontend',
':metadata-jobs:mce-consumer-job': 'datahub-mce-consumer',
':metadata-jobs:mae-consumer-job': 'datahub-mae-consumer',
]
// Though these support hot reload, they need to be restarted if any ENVs have been modified and ReloadEnv is run.
moduleToContainerWithHotReload = [
':datahub-actions': 'datahub-actions',
]
// Helper function to read the captured profile name from file
readCapturedProfile = {
def composeFileName = new File(compose_base).getName()
def profileStatusFileName = composeFileName.replace('.yml', '-profile.txt')
def profileFile = new File(rootProject.buildDir, profileStatusFileName)
if (!profileFile.exists()) {
return null
}
def activeProfile = profileFile.text.trim()
logger.lifecycle("Using captured profile: ${activeProfile}")
return activeProfile
}
// Helper function to find the taskName and config that uses a given profile (only searches debug tasks)
// Returns a map with 'taskName' and 'config' keys
findTaskNameByProfile = { profile ->
def matchingTask = quickstart_configs.find { taskName, config ->
config.isDebug && config.profile == profile
}
if (matchingTask) {
return [taskName: matchingTask.key, config: matchingTask.value]
} else {
throw new GradleException("No debug quickstart configuration found for profile: ${profile}")
}
}
// Helper function to get running container service names by matching prefixes from moduleToContainer
// Returns a map of [modulePath: actualServiceName] for only the running containers
// When includeHotReload is true, also includes modules from moduleToContainerWithHotReload
getRunningContainers = { composeFilePath, projectName, includeHotReload = false ->
// Run docker compose ps to get running services
def psCmd = "docker compose -p ${projectName} -f docker/${composeFilePath} ps --services --filter status=running"
def process = psCmd.execute()
process.waitFor()
if (process.exitValue() != 0) {
throw new GradleException("Failed to list running containers: ${process.err.text}")
}
def runningServices = process.text.trim().split('\n').collect { it.trim() }.findAll { it }
logger.lifecycle("Running services: ${runningServices}")
// Match prefixes from moduleToContainer to find actual service names
def runningModuleToContainer = [:]
moduleToContainer.each { modulePath, servicePrefix ->
def matched = runningServices.find { service -> service.startsWith(servicePrefix) }
if (matched) {
runningModuleToContainer[modulePath] = matched
logger.lifecycle("Matched module '${modulePath}' (prefix: '${servicePrefix}') to running service '${matched}'")
} else {
logger.info("Module '${modulePath}' (prefix: '${servicePrefix}') is not running, skipping")
}
}
// Include hot reload modules if requested
if (includeHotReload) {
moduleToContainerWithHotReload.each { modulePath, servicePrefix ->
def matched = runningServices.find { service -> service.startsWith(servicePrefix) }
if (matched) {
runningModuleToContainer[modulePath] = matched
logger.lifecycle("Matched hot-reload module '${modulePath}' (prefix: '${servicePrefix}') to running service '${matched}'")
} else {
logger.info("Hot-reload module '${modulePath}' (prefix: '${servicePrefix}') is not running, skipping")
}
}
}
return runningModuleToContainer
}
}
// Register all quickstart tasks
quickstart_configs.each { taskName, config ->
tasks.register(taskName) {
group = 'quickstart'
}
}
// Dynamically create all quickstart tasks and configurations
dockerCompose {
// Configure default settings that apply to all configurations
useComposeFiles = [compose_base]
projectName = project_name
projectNamePrefix = ''
buildBeforeUp = false
buildBeforePull = false
stopContainers = false
removeVolumes = false
quickstart_configs.each { taskName, config ->
"${taskName}" {
isRequiredBy(tasks.named(taskName))
if (config.profile) {
composeAdditionalArgs = ['--profile', config.profile]
}
// Load common environment variables from file if DATAHUB_LOCAL_COMMON_ENV is set
def commonEnvVars = loadCommonEnvFile.call()
commonEnvVars.each { key, value ->
environment.put key, value
}
// Common environment variables
environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") ?: "v${version}"
environment.put 'DATAHUB_APP_VERSION', System.getenv("DATAHUB_APP_VERSION") ?: "v${version}"
environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false'
environment.put "METADATA_TESTS_ENABLED", "true"
environment.put "DATAHUB_REPO", "${docker_registry}"
// Additional environment variables if specified
if (config.additionalEnv) {
config.additionalEnv.each { key, value ->
environment.put key, value
}
}
useComposeFiles = [compose_base]
projectName = project_name
projectNamePrefix = ''
buildBeforeUp = false
buildBeforePull = false
stopContainers = false
removeVolumes = false
retainContainersOnStartupFailure = config.isDebug ? true: false //forcing nulls to bool
// Apply common configuration
common_config.each { key, value ->
delegate."${key}" = value
}
// Apply additional task-specific configuration if specified
if (config.additionalConfig) {
config.additionalConfig.each { key, value ->
delegate."${key}" = value
}
}
}
}
}
task generateQuickstartComposeConfig {
description = "Generates the docker-compose config output for quickstart configuration for use with oss quickstart cli"
group = "docker"
doLast {
def composeUpTask = tasks.named("quickstartCLIComposeUp")
// Some of these details task gleaned from the docker-compose plugin source code
// https://github.com/avast/gradle-docker-compose-plugin/blob/main/src/main/groovy/com/avast/gradle/dockercompose/ComposeExecutor.groovy
def composeExecutor = composeUpTask.get().getComposeExecutor().get()
composeExecutor.parameters.environment.put("DATAHUB_VERSION", '__VERSION__')
composeExecutor.parameters.environment.put("DATAHUB_APP_VERSION", '__VERSION__')
composeExecutor.parameters.environment.put("METADATA_SERVICE_AUTH_ENABLED", 'false')
def configOutput = composeExecutor.execute('config')
// The `config` returns a fully resolved compose file. We need to replace the DATAHUB_VERSION and HOME with values not available at build time
configOutput = configOutput.replace('__VERSION__', '${DATAHUB_VERSION}')
configOutput = configOutput.replace('__VERSION__', '${DATAHUB_APP_VERSION}')
configOutput = configOutput.replace(System.getenv('HOME'), '${HOME}')
// Parse configOutput as yaml and add UI_INGESTION_DEFAULT_CLI_VERSION as an env var under datahub-gms-quickstart.environment key
// This needs to be matched with the version of the CLI used to launch quickstart
def yaml = new Yaml()
def config = yaml.load(configOutput)
config['services']['datahub-gms-quickstart']['environment']['UI_INGESTION_DEFAULT_CLI_VERSION'] = '${UI_INGESTION_DEFAULT_CLI_VERSION}'
// Configure YAML dump options to match Docker Compose format
def dumpOptions = new org.yaml.snakeyaml.DumperOptions()
dumpOptions.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK)
dumpOptions.setIndent(2)
dumpOptions.setWidth(120)
yaml = new Yaml(dumpOptions)
configOutput = yaml.dump(config)
configOutput = "# This file is generated as part of build process. If any build changes cause this file to be modified, please check in the generated file\n" + configOutput
def outputFile = project.file("${project.projectDir}/quickstart/docker-compose.quickstart-profile.yml")
outputFile.getParentFile().mkdirs()
outputFile.text = configOutput
logger.lifecycle("quickstart Docker Compose config saved to: ${outputFile.absolutePath}")
}
}
// Register all quickstart tasks
quickstart_configs.each { taskName, config ->
tasks.register("prepareAll${taskName}"){
group = 'quickstart-ci'
}
}
quickstart_configs.each { taskName, config ->
if (config.modules) {
tasks.getByName("prepareAll${taskName}").dependsOn(
config.modules.collect { it + ':dockerPrepare' }
)
}
}
quickstart_configs.each { taskName, config ->
tasks.register("buildImages${taskName}", Exec) {
ext{
bakeSpec = [:]
}
group = 'quickstart-ci'
def taskSuffix = config.isDebug? 'debug' : ''
dependsOn(config.modules.collect { it + ":generateBakeSnippet${taskSuffix}" })
dependsOn(tasks.getByName("prepareAll${taskName}"))
dependsOn tasks.named("generateQuickstartComposeConfig")
def jsonFile = new File(rootProject.buildDir, "bake-spec-${taskName}.json")
def bakeCmdArgs = ["bake", "-f", "${jsonFile.absolutePath}"]
def buildCmd = []
if (System.getenv("DOCKER_CACHE") == "DEPOT") {
buildCmd << "depot"
buildCmd += bakeCmdArgs
buildCmd += ['--save', '--metadata-file', "${rootProject.buildDir}/build-metadata.json"]
if (project.properties.getOrDefault("dockerPush", false)){
buildCmd << "--push"
}
} else {
buildCmd += ["docker", "buildx" ]
buildCmd +=bakeCmdArgs
}
commandLine buildCmd
workingDir rootProject.projectDir
doFirst {
def bakeSnippets = [:]
def targets = []
config.modules.each { module ->
def moduleProject = project.project(module)
def generateBakeSnippetsTask = moduleProject.tasks.getByName("generateBakeSnippet${taskSuffix}")
bakeSnippets.putAll(generateBakeSnippetsTask.bakeSpec.target)
targets.addAll(generateBakeSnippetsTask.bakeSpec.target.keySet())
}
ext.bakeSpec.group = [ "default": ["targets": targets] ]
ext.bakeSpec.target = bakeSnippets
jsonFile.parentFile.mkdirs()
jsonFile.text = groovy.json.JsonOutput.prettyPrint(groovy.json.JsonOutput.toJson(ext.bakeSpec))
}
}
}
// Configure dependencies for ComposeUp tasks
quickstart_configs.each { taskName, config ->
if (config.modules) {
tasks.getByName("${taskName}ComposeUp").dependsOn(
tasks.getByName("buildImages${taskName}")
)
}
}
tasks.register('minDockerCompose2.20', Exec) {
executable 'bash'
args '-c', 'echo -e "$(docker compose version | sed -E \'s/.*v([0-9.]+).*/\\1/\')\n2.20"|sort --version-sort --check=quiet --reverse'
}
// Create nuke tasks for all configurations
quickstart_configs.each { taskName, config ->
def actualProjectName = config.additionalConfig?.projectName ?: project_name
tasks.register("${taskName}Nuke") {
group = 'quickstart'
description = "Nuke containers and volumes for configuration: ${taskName} (project: ${actualProjectName})"
doFirst {
// Set removeVolumes for this specific configuration
dockerCompose."${taskName}".removeVolumes = !config.preserveVolumes
// Delete the captured profile file after compose down
def composeFileName = new File(compose_base).getName()
def profileStatusFileName = composeFileName.replace('.yml', '-profile.txt')
def profileFile = new File(rootProject.buildDir, profileStatusFileName)
if (profileFile.exists()) {
profileFile.delete()
logger.lifecycle("Deleted profile file: ${profileFile.absolutePath}")
}
}
finalizedBy(tasks.matching { task ->
task.name == "${taskName}ComposeDownForced"
})
}
}
tasks.register('quickstartDown') {
group = 'quickstart'
finalizedBy(tasks.withType(ComposeDownForced))
doLast {
// Delete the captured profile file after compose down
def composeFileName = new File(compose_base).getName()
def profileStatusFileName = composeFileName.replace('.yml', '-profile.txt')
def profileFile = new File(rootProject.buildDir, profileStatusFileName)
if (profileFile.exists()) {
profileFile.delete()
logger.lifecycle("Deleted profile file: ${profileFile.absolutePath}")
}
}
}
tasks.withType(ComposeUp).configureEach {
shouldRunAfter('quickstartNuke')
dependsOn tasks.named("minDockerCompose2.20")
// Capture profile information to a file based on compose file name when ComposeUp runs
doFirst {
// Only capture profile if the task is being directly executed (not just accessed/configured)
// Extract task name to find corresponding config (e.g., "quickstartDebugComposeUp" -> "quickstartDebug")
def taskBaseName = name.replaceAll('ComposeUp$', '')
def config = quickstart_configs[taskBaseName]
if (config?.profile) {
// Get the compose file name and derive the profile tracking filename
def composeFileName = new File(compose_base).getName()
def profileStatusFileName = composeFileName.replace('.yml', '-profile.txt')
def profileFile = new File(rootProject.buildDir, profileStatusFileName)
// Ensure build directory exists
profileFile.getParentFile().mkdirs()
// Write the profile name
profileFile.text = config.profile
logger.lifecycle("Captured profile '${config.profile}' to ${profileFile.absolutePath}")
}
}
}
tasks.register("reload", Exec) {
group = 'quickstart'
description = "Build and reload only changed containers for the active profile"
// Read the captured profile name from file
def activeProfile = readCapturedProfile.call()
def matchingTask = null
def matchingTaskName = null
def matchingConfig = null
if (activeProfile) {
// Find the task and config that matches this profile
matchingTask = findTaskNameByProfile.call(activeProfile)
matchingTaskName = matchingTask.taskName
matchingConfig = matchingTask.config
// Dynamically depend on the correct prepareAll task
dependsOn tasks.named("prepareAll${matchingTaskName}")
}
doFirst {
if (!activeProfile){
throw new GradleException("Could not detect running profile. reload[Env] is supported only when one of the :docker:quickstartDebug* task is running.")
}
logger.lifecycle("Active profile '${activeProfile}' maps to task: ${matchingTaskName}")
// Get running containers by matching prefixes (exclude hot reload modules)
def actualProjectName = matchingConfig.additionalConfig?.projectName ?: project_name
def runningModuleToContainer = getRunningContainers.call(compose_base, actualProjectName, false)
def executedTasks = project.gradle.taskGraph.allTasks.findAll { it.state.executed }
def containersToRestart = []
runningModuleToContainer.each { modulePath, actualServiceName ->
def moduleProject = project.project(modulePath)
def dockerPrepareTask = moduleProject.tasks.findByName('dockerPrepare')
if (dockerPrepareTask && executedTasks.contains(dockerPrepareTask) && !dockerPrepareTask.state.upToDate) {
containersToRestart << actualServiceName
}
}
// Only restart containers that had their modules rebuilt
if (containersToRestart) {
def cmd = ["docker compose -p ${actualProjectName} --profile ${activeProfile} -f ${compose_base} restart"] + containersToRestart
println(cmd.join(" "))
commandLine 'bash', '-c', cmd.join(" ")
} else {
// If no containers need restart, make this a no-op
commandLine 'bash', '-c', 'echo "No containers need restarting - all modules are up to date"'
}
}
}
tasks.register("reloadEnv", Exec) {
group = 'quickstart'
description = "Build changed containers but recreate all services for the active profile"
// Read the captured profile name from file
def activeProfile = readCapturedProfile.call()
def matchingTask = null
def matchingTaskName = null
def matchingConfig = null
if (activeProfile) {
// Find the task and config that matches this profile
matchingTask = findTaskNameByProfile.call(activeProfile)
matchingTaskName = matchingTask.taskName
matchingConfig = matchingTask.config
// Dynamically depend on the correct prepareAll task
dependsOn tasks.named("prepareAll${matchingTaskName}")
}
doFirst {
if (!activeProfile){
throw new GradleException("Could not detect running profile. reload[Env] is supported only when one of the :docker:quickstartDebug* task is running.")
}
logger.lifecycle("Active profile '${activeProfile}' maps to task: ${matchingTaskName}")
// Get running containers by matching prefixes (include hot reload modules)
def actualProjectName = matchingConfig.additionalConfig?.projectName ?: project_name
def runningModuleToContainer = getRunningContainers.call(compose_base, actualProjectName, true)
def containersToRestart = []
runningModuleToContainer.each { modulePath, actualServiceName ->
// Find which of the reloadable modules are used in this task
if (matchingConfig.modules.contains(modulePath)) {
containersToRestart << actualServiceName
}
}
// Specify all environment variables specified during quickstart* for reloadEnv as well since containers are re-created.
environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") ?: "v${version}"
environment.put 'DATAHUB_APP_VERSION', System.getenv("DATAHUB_APP_VERSION") ?: "v${version}"
environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false'
environment.put "METADATA_TESTS_ENABLED", "true"
environment.put "DATAHUB_REPO", "${docker_registry}"
// Additional environment variables if specified
if (matchingConfig.additionalEnv) {
matchingConfig.additionalEnv.each { key, value ->
environment.put key, value
}
}
def composeFiles = dockerCompose."${matchingTaskName}".useComposeFiles.get()
def composeFileArgs = composeFiles.collectMany { ['-f', it] }
def cmd = ["docker compose -p ${actualProjectName} --profile ${activeProfile} -f ${compose_base} up -d --no-deps"] + containersToRestart
println(cmd.join(" "))
commandLine 'bash', '-c', cmd.join(" ")
}
}
// Redirects from existing tasks as for a transition period. To be deprecated
quickstart_configs.each { taskName, config ->
if (config.isDebug) {
def reloadTaskName = taskName.replaceFirst(/^quickstart/, "")
tasks.register("${reloadTaskName}Reload") {
dependsOn tasks.named("reload")
doLast{
logger.lifecycle("⚠Depcrecated: Just run 'reload' instead of '${reloadTaskName}Reload' task - it will auto-detect the active profile and restart the modified services")
}
}
tasks.register("${reloadTaskName}ReloadEnv") {
dependsOn tasks.named("reloadEnv")
doLast{
logger.lifecycle("⚠Deprecated: Just run 'reloadEnv' instead of '${reloadTaskName}ReloadEnv' task - it will auto-detect the active profile and recreate the modified services")
}
}
}
}
// :docker:build builds all docker images.
build.dependsOn buildImagesallImages