mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-28 03:35:56 +00:00
421 lines
17 KiB
Groovy
421 lines
17 KiB
Groovy
plugins {
|
|
id 'org.springframework.boot'
|
|
id 'java'
|
|
}
|
|
|
|
apply from: "../gradle/versioning/versioning.gradle"
|
|
apply from: "../gradle/coverage/java-coverage.gradle"
|
|
apply from: "../gradle/docker/docker.gradle"
|
|
|
|
ext {
|
|
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
|
|
docker_repo = 'datahub-upgrade'
|
|
}
|
|
|
|
dependencies {
|
|
implementation project(':metadata-io')
|
|
implementation project(':metadata-service:factories')
|
|
implementation project(':metadata-service:restli-client-api')
|
|
implementation project(':metadata-service:configuration')
|
|
implementation project(':metadata-dao-impl:kafka-producer')
|
|
implementation project(':metadata-utils')
|
|
implementation externalDependency.charle
|
|
|
|
implementation externalDependency.mustache
|
|
implementation externalDependency.javaxInject
|
|
implementation externalDependency.springActuator
|
|
implementation(externalDependency.hadoopClient) {
|
|
exclude group: 'net.minidev', module: 'json-smart'
|
|
exclude group: 'com.nimbusds', module: 'nimbus-jose-jwt'
|
|
exclude group: "org.apache.htrace", module: "htrace-core4"
|
|
exclude group: "org.eclipse.jetty"
|
|
exclude group: "org.apache.hadoop.thirdparty", module: "hadoop-shaded-protobuf_3_7"
|
|
exclude group: "com.charleskorn.kaml", module:"kaml"
|
|
exclude group: "org.apache.kerby", module:"kerb-simplekdc"
|
|
}
|
|
|
|
constraints {
|
|
implementation(externalDependency.hadoopCommon3) {
|
|
because("previous versions are vulnerable to CVE-2021-37404")
|
|
}
|
|
implementation(externalDependency.snakeYaml) {
|
|
because("previous versions are vulnerable to CVE-2022-25857")
|
|
}
|
|
implementation(externalDependency.woodstoxCore) {
|
|
because("previous versions are vulnerable to CVE-2022-40151-2")
|
|
}
|
|
implementation(externalDependency.jettison) {
|
|
because("previous versions are vulnerable")
|
|
}
|
|
implementation(externalDependency.guava) {
|
|
because("CVE-2023-2976")
|
|
}
|
|
implementation('io.airlift:aircompressor:0.27') {
|
|
because("CVE-2024-36114")
|
|
}
|
|
implementation('dnsjava:dnsjava:3.6.1') {
|
|
because("CVE-2024-25638")
|
|
}
|
|
implementation('commons-beanutils:commons-beanutils:1.11.0') {
|
|
because("CVE-2025-48734")
|
|
}
|
|
implementation('io.netty:netty-codec-smtp:4.1.128.Final') {
|
|
because("CVE: GHSA-jq43-27x9-3v86 - Netty SMTP Command Injection")
|
|
}
|
|
}
|
|
|
|
|
|
// mock internal schema registry
|
|
implementation externalDependency.kafkaAvroSerde
|
|
implementation externalDependency.kafkaAvroSerializer
|
|
|
|
implementation externalDependency.slf4jApi
|
|
compileOnly externalDependency.lombok
|
|
implementation externalDependency.picocli
|
|
implementation externalDependency.resilience4j
|
|
implementation externalDependency.parquet
|
|
implementation externalDependency.protobuf
|
|
implementation externalDependency.springBeans
|
|
implementation externalDependency.springBootAutoconfigure
|
|
implementation externalDependency.springCore
|
|
implementation externalDependency.springKafka
|
|
implementation externalDependency.kafkaClients
|
|
runtimeOnly externalDependency.opentelemetryExporter
|
|
runtimeOnly externalDependency.openTelemetryExporterLogging
|
|
runtimeOnly externalDependency.openTelemetryExporterCommon
|
|
|
|
runtimeOnly externalDependency.logbackClassic
|
|
runtimeOnly externalDependency.mariadbConnector
|
|
runtimeOnly externalDependency.mysqlConnector
|
|
runtimeOnly externalDependency.postgresql
|
|
|
|
implementation externalDependency.awsMskIamAuth
|
|
implementation externalDependency.azureIdentityExtensions
|
|
implementation externalDependency.azureIdentity
|
|
|
|
implementation platform(externalDependency.jacksonBom)
|
|
implementation externalDependency.jacksonJsr310
|
|
|
|
annotationProcessor externalDependency.lombok
|
|
annotationProcessor externalDependency.picocli
|
|
|
|
testImplementation externalDependency.springBootTest
|
|
testImplementation externalDependency.mockito
|
|
testImplementation externalDependency.testng
|
|
testImplementation 'uk.org.webcompere:system-stubs-testng:2.1.7'
|
|
testRuntimeOnly externalDependency.logbackClassic
|
|
|
|
testImplementation externalDependency.h2
|
|
testImplementation testFixtures(project(':metadata-io'))
|
|
|
|
constraints {
|
|
implementation(externalDependency.parquetHadoop) {
|
|
because("CVE-2022-42003")
|
|
}
|
|
}
|
|
}
|
|
|
|
bootJar {
|
|
mainClass = 'com.linkedin.datahub.upgrade.UpgradeCliApplication'
|
|
archiveFileName = "${project.name}.jar"
|
|
}
|
|
|
|
bootRun {
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true"
|
|
environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true"
|
|
environment "SERVER_PORT", "8083"
|
|
args += ["-u", "SystemUpdate"]
|
|
}
|
|
|
|
/**
|
|
* Runs SystemUpdate on locally running system
|
|
*/
|
|
task run(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the datahub-upgrade SystemUpdate process locally."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true"
|
|
environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true"
|
|
environment "SCHEMA_REGISTRY_TYPE", "INTERNAL"
|
|
environment "SQL_SETUP_ENABLED", "true"
|
|
commandLine "java",
|
|
"-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar",
|
|
"-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SystemUpdate"
|
|
}
|
|
|
|
/**
|
|
* Runs the non-blocking system updates locally (includes lineage index fields backfill)
|
|
* Sets up environment variables for local execution
|
|
*/
|
|
task runNonBlocking(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the non-blocking system updates locally (includes lineage index fields backfill)."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true"
|
|
environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true"
|
|
environment "BOOTSTRAP_SYSTEM_UPDATE_EXECUTOR_POOLS_ENABLED", "false"
|
|
environment "SCHEMA_REGISTRY_TYPE", "INTERNAL"
|
|
commandLine "java",
|
|
"-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar",
|
|
"-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SystemUpdateNonBlocking"
|
|
}
|
|
|
|
task runCron(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the datahub-upgrade SystemUpdate CRON process locally."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true"
|
|
environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true"
|
|
commandLine "java",
|
|
"-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar",
|
|
"-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SystemUpdateCron"
|
|
}
|
|
|
|
task runCronDryRun(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the datahub-upgrade SystemUpdate CRON process locally."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true"
|
|
environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true"
|
|
commandLine "java",
|
|
"-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar",
|
|
"-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SystemUpdateCron", "-a", "dryRun=true"
|
|
}
|
|
|
|
/**
|
|
* Runs RestoreIndices on locally running system.
|
|
* This is useful for debugging or special situations to reindex, the index to reindex is given in -u index= argument"
|
|
*/
|
|
task runReindexDebug(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the datahub-upgrade SystemUpdate ReindexDebug"
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true"
|
|
environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true"
|
|
commandLine "java",
|
|
"-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar",
|
|
"-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "ReindexDebug", "-a", "index=datahubpolicyindex_v2"
|
|
}
|
|
|
|
/**
|
|
* Runs RestoreIndices on locally running system. The batchSize are set to
|
|
* test the process with pagination and not designed for optimal performance.
|
|
*/
|
|
task runRestoreIndices(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the restore indices process locally."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
commandLine "java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar",
|
|
"-Dkafka.schemaRegistry.url=http://localhost:8080/schema-registry/api",
|
|
"-Dserver.port=8083",
|
|
bootJar.getArchiveFile().get(), "-u", "RestoreIndices", "-a", "batchSize=100", "-a", "createDefaultAspects=true"
|
|
}
|
|
|
|
task runRestoreIndicesUrn(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the restore indices process locally."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
commandLine "java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar",
|
|
"-Dkafka.schemaRegistry.url=http://localhost:8080/schema-registry/api",
|
|
"-Dserver.port=8083",
|
|
bootJar.getArchiveFile().get(), "-u", "RestoreIndices", "-a", "batchSize=100", "-a", "urnBasedPagination=true"
|
|
}
|
|
|
|
/**
|
|
* Runs LoadIndices on locally running system. This ensures indices have the correct mappings
|
|
* and then loads data from database to Elasticsearch.
|
|
*
|
|
* The process includes two steps:
|
|
* 1. BuildIndicesStep: Ensures indices have the correct mappings (creates/updates index structure)
|
|
* 2. LoadIndicesStep: Loads data from database to Elasticsearch
|
|
*
|
|
* The task automatically configures ES_BULK_REQUESTS_LIMIT to match the batch size
|
|
* and disables ES_BULK_FLUSH_PERIOD for optimal bulk processor performance.
|
|
*
|
|
* Optional parameters:
|
|
* - limit: Maximum number of records to process (default: no limit)
|
|
* - batchSize: Number of records per batch (default: 10000)
|
|
* - esThreadCount: Elasticsearch I/O thread count (default: 3, enables async bulk processing)
|
|
* - urnLike: URN pattern filter (e.g., "urn:li:dataset:%")
|
|
* - aspectNames: Comma-separated aspect names to filter
|
|
* - lePitEpochMs: Process records created before this timestamp
|
|
* - gePitEpochMs: Process records created after this timestamp
|
|
*
|
|
* Usage examples:
|
|
* ./gradlew runLoadIndices
|
|
* ./gradlew runLoadIndices -Plimit=5000
|
|
* ./gradlew runLoadIndices -Plimit=1000 -PbatchSize=2500
|
|
* ./gradlew runLoadIndices -PurnLike="urn:li:dataset:%"
|
|
* ./gradlew runLoadIndices -PesThreadCount=3
|
|
*/
|
|
task runLoadIndices(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the load indices process locally - ensures correct mappings and loads data from database to Elasticsearch."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
|
|
def args = ["java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar", "-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "LoadIndices"]
|
|
|
|
// Add batchSize (default: 10000)
|
|
def batchSize = project.hasProperty('batchSize') ? project.getProperty('batchSize') : '10000'
|
|
args.addAll(["-a", "batchSize=${batchSize}"])
|
|
|
|
// Optimize bulk request limit for sustained load
|
|
environment "ES_BULK_REQUESTS_LIMIT", batchSize
|
|
// Set flush period to 5 minutes to allow proper batching
|
|
environment "ES_BULK_FLUSH_PERIOD", "300"
|
|
|
|
// Configure Elasticsearch I/O threads for better BulkProcessor performance
|
|
// Increase from default 2 to allow more concurrent HTTP connections
|
|
def threadCount = project.hasProperty('esThreadCount') ? project.getProperty('esThreadCount') : '4'
|
|
environment "ELASTICSEARCH_THREAD_COUNT", threadCount
|
|
|
|
// Add limit if specified
|
|
if (project.hasProperty('limit')) {
|
|
args.addAll(["-a", "limit=${project.getProperty('limit')}"])
|
|
}
|
|
|
|
// Add urnLike if specified
|
|
if (project.hasProperty('urnLike')) {
|
|
args.addAll(["-a", "urnLike=${project.getProperty('urnLike')}"])
|
|
}
|
|
|
|
// Add aspectNames if specified
|
|
if (project.hasProperty('aspectNames')) {
|
|
args.addAll(["-a", "aspectNames=${project.getProperty('aspectNames')}"])
|
|
}
|
|
|
|
// Add lePitEpochMs if specified
|
|
if (project.hasProperty('lePitEpochMs')) {
|
|
args.addAll(["-a", "lePitEpochMs=${project.getProperty('lePitEpochMs')}"])
|
|
}
|
|
|
|
// Add gePitEpochMs if specified
|
|
if (project.hasProperty('gePitEpochMs')) {
|
|
args.addAll(["-a", "gePitEpochMs=${project.getProperty('gePitEpochMs')}"])
|
|
}
|
|
|
|
commandLine args
|
|
}
|
|
|
|
|
|
/**
|
|
* Runs SqlSetup for MySQL on locally running system.
|
|
* Configured for quickstart MySQL service running on localhost:3306
|
|
* Uses environment variables for backwards compatibility with docker/mysql-setup
|
|
*
|
|
* Usage:
|
|
* ./gradlew runSqlSetupMysql
|
|
* MYSQL_USERNAME=datahub MYSQL_PASSWORD=datahub CREATE_USER=true ./gradlew runSqlSetupMysql
|
|
* CDC_MCL_PROCESSING_ENABLED=true CDC_USER=datahub_cdc CDC_PASSWORD=datahub_cdc ./gradlew runSqlSetupMysql
|
|
*/
|
|
task runSqlSetupMysql(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the SqlSetup process for MySQL locally - creates tables and optionally users/CDC. Uses environment variables for backwards compatibility."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
|
|
// Set standard Ebean environment variables for MySQL
|
|
environment "EBEAN_DATASOURCE_USERNAME", "datahub"
|
|
environment "EBEAN_DATASOURCE_PASSWORD", "datahub"
|
|
environment "EBEAN_DATASOURCE_URL", "jdbc:mysql://localhost:3306/datahub"
|
|
environment "EBEAN_DATASOURCE_DRIVER", "com.mysql.cj.jdbc.Driver"
|
|
environment "EBEAN_USE_IAM_AUTH", "false"
|
|
|
|
// Set SqlSetup-specific environment variables
|
|
environment "CREATE_TABLES", "true"
|
|
environment "CREATE_USER", "false"
|
|
environment "CDC_MCL_PROCESSING_ENABLED", "false"
|
|
environment "CDC_USER", "datahub_cdc"
|
|
environment "CDC_PASSWORD", "datahub_cdc"
|
|
|
|
def args = ["java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar", "-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SqlSetup"]
|
|
|
|
commandLine args
|
|
}
|
|
|
|
/**
|
|
* Runs SqlSetup for PostgreSQL on locally running system.
|
|
* Configured for quickstart PostgreSQL service running on localhost:5432
|
|
* Uses environment variables for backwards compatibility with docker/postgres-setup
|
|
*
|
|
* Usage:
|
|
* ./gradlew runSqlSetupPostgres
|
|
* POSTGRES_USERNAME=datahub POSTGRES_PASSWORD=datahub CREATE_USER=true ./gradlew runSqlSetupPostgres
|
|
* CDC_MCL_PROCESSING_ENABLED=true CDC_USER=datahub_cdc CDC_PASSWORD=datahub_cdc ./gradlew runSqlSetupPostgres
|
|
*/
|
|
task runSqlSetupPostgres(type: Exec) {
|
|
dependsOn bootJar
|
|
group = "Execution"
|
|
description = "Run the SqlSetup process for PostgreSQL locally - creates database, tables and optionally users/CDC. Uses environment variables for backwards compatibility."
|
|
environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml"
|
|
|
|
// Set standard Ebean environment variables for PostgreSQL
|
|
environment "EBEAN_DATASOURCE_USERNAME", "datahub"
|
|
environment "EBEAN_DATASOURCE_PASSWORD", "datahub"
|
|
environment "EBEAN_DATASOURCE_URL", "jdbc:postgresql://localhost:5432/datahub"
|
|
environment "EBEAN_DATASOURCE_DRIVER", "org.postgresql.Driver"
|
|
environment "EBEAN_USE_IAM_AUTH", "false"
|
|
|
|
// Set SqlSetup-specific environment variables
|
|
environment "CREATE_TABLES", "true"
|
|
environment "CREATE_USER", "false"
|
|
environment "CDC_MCL_PROCESSING_ENABLED", "false"
|
|
environment "CDC_USER", "datahub_cdc"
|
|
environment "CDC_PASSWORD", "datahub_cdc"
|
|
|
|
def args = ["java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n",
|
|
"-jar", "-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SqlSetup"]
|
|
|
|
commandLine args
|
|
}
|
|
|
|
docker {
|
|
dependsOn(bootJar)
|
|
name "${docker_registry}/${docker_repo}:${versionTag}"
|
|
dockerfile file("${rootProject.projectDir}/docker/${docker_repo}/Dockerfile")
|
|
files bootJar.outputs.files
|
|
files fileTree(rootProject.projectDir) {
|
|
include '.dockerignore'
|
|
include 'docker/monitoring/*'
|
|
include "docker/${docker_repo}/*"
|
|
include 'metadata-models/src/main/resources/*'
|
|
}.exclude {
|
|
i -> (!i.file.name.endsWith(".dockerignore") && i.file.isHidden())
|
|
}
|
|
additionalTag("Debug", "${docker_registry}/${docker_repo}:debug")
|
|
|
|
// Add build args if they are defined (needed for some CI or enterprise environments)
|
|
def dockerBuildArgs = [:]
|
|
if (project.hasProperty('alpineApkRepositoryUrl')) {
|
|
dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl')
|
|
}
|
|
if (project.hasProperty('githubMirrorUrl')) {
|
|
dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl')
|
|
}
|
|
if (project.hasProperty('mavenCentralRepositoryUrl')) {
|
|
dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl')
|
|
}
|
|
|
|
if (dockerBuildArgs.size() > 0) {
|
|
buildArgs(dockerBuildArgs)
|
|
}
|
|
}
|