406 lines
15 KiB
Groovy
Raw Normal View History

plugins {
id("com.palantir.git-version") apply false
}
apply plugin: 'java-library'
apply plugin: 'com.gradleup.shadow'
apply plugin: 'signing'
apply plugin: 'io.codearte.nexus-staging'
apply plugin: 'maven-publish'
apply from: '../../../gradle/coverage/java-coverage.gradle'
apply from: '../versioning.gradle'
jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation
// Define supported Scala versions - always build for both
def scalaVersions = ['2.12', '2.13']
//mark implementaion dependencies which needs to excluded along with transitive dependencies from shadowjar
//functionality is exactly same as "implementation"
configurations {
provided
implementation.extendsFrom provided
}
dependencies {
constraints {
provided(externalDependency.hadoopMapreduceClient) {
because 'Needed for tie breaking of guava version need for spark and wiremock'
}
provided(externalDependency.hadoopCommon) {
because 'required for org.apache.hadoop.util.StopWatch'
}
provided(externalDependency.commonsIo) {
because 'required for org.apache.commons.io.Charsets that is used internally'
}
}
provided(externalDependency.sparkSql)
provided(externalDependency.sparkHive)
implementation 'org.slf4j:slf4j-log4j12:2.0.7'
implementation externalDependency.httpClient
implementation externalDependency.typesafeConfig
implementation externalDependency.commonsLang
implementation externalDependency.slf4jApi
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
implementation externalDependency.typesafeConfig
implementation externalDependency.json
implementation project(':metadata-integration:java:openlineage-converter')
implementation project(path: ':metadata-integration:java:datahub-client')
implementation project(path: ':metadata-integration:java:openlineage-converter')
compileOnly("io.delta:delta-core_2.12:1.0.0")
// Default to Scala 2.12 for main compilation
implementation "io.openlineage:openlineage-spark_2.12:$openLineageVersion"
compileOnly "org.apache.iceberg:iceberg-spark3-runtime:0.12.1"
compileOnly("org.apache.spark:spark-sql_2.12:3.1.3") {
exclude group: 'org.eclipse.jetty', module: 'jetty-servlet'
exclude group: 'org.eclipse.jetty', module: 'jetty-server'
exclude group: 'org.eclipse.jetty', module: 'jetty-util'
exclude group: 'org.eclipse.jetty', module: 'jetty-webapp'
exclude group: 'org.eclipse.jetty', module: 'jetty-security'
}
compileOnly "io.github.spark-redshift-community:spark-redshift_2.12:6.2.0-spark_3.5"
testCompileOnly externalDependency.lombok
testAnnotationProcessor externalDependency.lombok
// Tests need a concrete log4j available. Providing it here
testImplementation 'org.apache.logging.log4j:log4j-api:2.17.1'
testImplementation 'org.slf4j:slf4j-log4j12:2.0.7'
// JUnit 5 dependencies
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
testImplementation 'org.junit.jupiter:junit-jupiter-engine:5.8.2'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.2'
testImplementation(externalDependency.postgresql) {
exclude group: "com.fasterxml.jackson.core"
}
testImplementation externalDependency.mockito
testImplementation(externalDependency.mockServer) {
exclude group: "com.fasterxml.jackson.core"
} // older version to allow older guava
testImplementation(externalDependency.mockServerClient) {
exclude group: "com.fasterxml.jackson.core"
} // older version to allow older guava
testImplementation(externalDependency.testContainersPostgresql)
}
tasks.register('checkShadowJar', Exec) {
commandLine 'sh', '-c', 'scripts/check_jar.sh'
}
// Create separate source and javadoc JARs for each Scala version
scalaVersions.each { sv ->
def scalaVersionUnderscore = sv.replace('.', '_')
tasks.register("sourcesJar_${scalaVersionUnderscore}", Jar) {
archiveClassifier = 'sources'
archiveBaseName = "acryl-spark-lineage_${sv}"
from sourceSets.main.allJava
}
tasks.register("javadocJar_${scalaVersionUnderscore}", Jar) {
dependsOn javadoc
archiveClassifier = 'javadoc'
archiveBaseName = "acryl-spark-lineage_${sv}"
from javadoc.destinationDir
}
}
// Create shadow JAR tasks for each Scala version
scalaVersions.each { sv ->
tasks.register("shadowJar_${sv.replace('.', '_')}", com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar) {
group = 'build'
description = "Build shadow jar for Scala ${sv}"
zip64 = true
archiveClassifier = ''
archiveBaseName = "acryl-spark-lineage_${sv}"
mergeServiceFiles()
from(sourceSets.main.output)
// Add manifest with version information
manifest {
attributes(
'Implementation-Version': project.version,
'Scala-Version': sv
)
}
// Create a completely separate configuration for each Scala version
def scalaConfig = project.configurations.detachedConfiguration()
// Manually add all base dependencies except OpenLineage
scalaConfig.dependencies.add(project.dependencies.create('org.slf4j:slf4j-log4j12:2.0.7'))
scalaConfig.dependencies.add(project.dependencies.create(externalDependency.httpClient))
scalaConfig.dependencies.add(project.dependencies.create(externalDependency.typesafeConfig))
scalaConfig.dependencies.add(project.dependencies.create(externalDependency.commonsLang))
scalaConfig.dependencies.add(project.dependencies.create(externalDependency.slf4jApi))
scalaConfig.dependencies.add(project.dependencies.create(externalDependency.json))
// Add project dependencies
scalaConfig.dependencies.add(project.dependencies.create(project(':metadata-integration:java:openlineage-converter')))
scalaConfig.dependencies.add(project.dependencies.create(project(':metadata-integration:java:datahub-client')))
// Add the Scala-specific OpenLineage dependency - THIS IS THE KEY PART
scalaConfig.dependencies.add(project.dependencies.create("io.openlineage:openlineage-spark_${sv}:${openLineageVersion}"))
scalaConfig.canBeResolved = true
configurations = [scalaConfig]
def exclude_modules = project
.configurations
.provided
.resolvedConfiguration
.getLenientConfiguration()
.getAllModuleDependencies()
.collect {
it.name
}
dependencies {
exclude(dependency {
exclude_modules.contains(it.name)
})
exclude(dependency("org.slf4j::"))
exclude(dependency("ch.qos.logback:"))
exclude("org/apache/commons/logging/**")
}
exclude('module-info.class', 'META-INF/versions/**', 'LICENSE', 'NOTICE')
exclude '**/libzstd-jni.*'
exclude '**/com_github_luben_zstd_*'
// Apply all the relocations
relocate 'avro.com', 'io.acryl.shaded.avro.com'
relocate 'org.json', 'io.acryl.shaded.org.json'
relocate 'com.github', 'io.acryl.shaded.com.github'
relocate 'avroutil1', 'io.acryl.shaded.avroutil1'
relocate 'com.sun.activation', 'io.acryl.shaded.com.sun.activation'
relocate 'com.sun.codemodel', 'io.acryl.shaded.com.sun.codemodel'
relocate 'com.sun.mail', 'io.acryl.shaded.com.sun.mail'
relocate 'org.apache.hc', 'io.acryl.shaded.http'
relocate 'org.apache.commons.codec', 'io.acryl.shaded.org.apache.commons.codec'
relocate 'org.apache.commons.compress', 'io.acryl.shaded.org.apache.commons.compress'
relocate 'org.apache.commons.lang3', 'io.acryl.shaded.org.apache.commons.lang3'
relocate 'mozilla', 'datahub.spark2.shaded.mozilla'
relocate 'com.typesafe', 'io.acryl.shaded.com.typesafe'
relocate 'io.opentracing', 'io.acryl.shaded.io.opentracing'
relocate 'io.netty', 'io.acryl.shaded.io.netty'
relocate 'ch.randelshofer', 'io.acryl.shaded.ch.randelshofer'
relocate 'ch.qos', 'io.acryl.shaded.ch.qos'
relocate 'org.springframework', 'io.acryl.shaded.org.springframework'
relocate 'com.fasterxml.jackson', 'io.acryl.shaded.jackson'
relocate 'org.yaml', 'io.acryl.shaded.org.yaml'
relocate 'net.jcip.annotations', 'io.acryl.shaded.annotations'
relocate 'javassist', 'io.acryl.shaded.javassist'
relocate 'edu.umd.cs.findbugs', 'io.acryl.shaded.findbugs'
relocate 'com.google.common', 'io.acryl.shaded.com.google.common'
relocate 'org.reflections', 'io.acryl.shaded.org.reflections'
relocate 'st4hidden', 'io.acryl.shaded.st4hidden'
relocate 'org.stringtemplate', 'io.acryl.shaded.org.stringtemplate'
relocate 'org.abego.treelayout', 'io.acryl.shaded.treelayout'
relocate 'javax.annotation', 'io.acryl.shaded.javax.annotation'
relocate 'com.github.benmanes.caffeine', 'io.acryl.shaded.com.github.benmanes.caffeine'
relocate 'org.checkerframework', 'io.acryl.shaded.org.checkerframework'
relocate 'com.google.errorprone', 'io.acryl.shaded.com.google.errorprone'
relocate 'com.sun.jna', 'io.acryl.shaded.com.sun.jna'
// Debug output to verify we're using the right dependency
doFirst {
println "Building JAR for Scala ${sv}"
println "OpenLineage dependency: io.openlineage:openlineage-spark_${sv}:${openLineageVersion}"
println "Configuration dependencies:"
scalaConfig.allDependencies.each { dep ->
println " - ${dep.group}:${dep.name}:${dep.version}"
}
}
}
}
// Keep the original shadowJar task and make it build all versions
shadowJar {
// Make shadowJar depend on all Scala version builds
dependsOn scalaVersions.collect { "shadowJar_${it.replace('.', '_')}" }
// Disable actual JAR creation for this task since we create versioned ones
enabled = false
doLast {
println "Built shadow JARs for all Scala versions: ${scalaVersions.join(', ')}"
}
}
checkShadowJar {
dependsOn shadowJar
}
// Task to build all Scala versions (always runs)
tasks.register('buildAllScalaVersions') {
group = 'build'
description = 'Build shadow jars for all Scala versions'
dependsOn scalaVersions.collect { "shadowJar_${it.replace('.', '_')}" }
}
test {
forkEvery = 1
useJUnitPlatform()
}
assemble {
dependsOn shadowJar
}
task integrationTest(type: Exec, dependsOn: [shadowJar, ':docker:quickstart']) {
environment "RUN_QUICKSTART", "false"
commandLine "spark-smoke-test/smoke.sh"
}
// Remove the old shared tasks since we now create version-specific ones
// task sourcesJar(type: Jar) {
// archiveClassifier = 'sources'
// from sourceSets.main.allJava
// }
// task javadocJar(type: Jar, dependsOn: javadoc) {
// archiveClassifier = 'javadoc'
// from javadoc.destinationDir
// }
// Task to debug dependency resolution for each Scala version
tasks.register('debugDependencies') {
group = 'help'
description = 'Show what dependencies are resolved for each Scala version'
doLast {
def supportedScalaVersions = ['2.12', '2.13']
println "=== Base Implementation Dependencies ==="
project.configurations.implementation.allDependencies.each { dep ->
println " ${dep.group}:${dep.name}:${dep.version}"
}
supportedScalaVersions.each { sv ->
println "\n=== Dependencies for Scala ${sv} ==="
// Create the same configuration as the shadow task
def scalaConfig = project.configurations.detachedConfiguration()
// Add the same dependencies as in the shadow task
scalaConfig.dependencies.add(project.dependencies.create('org.slf4j:slf4j-log4j12:2.0.7'))
scalaConfig.dependencies.add(project.dependencies.create(externalDependency.typesafeConfig))
scalaConfig.dependencies.add(project.dependencies.create(externalDependency.json))
scalaConfig.dependencies.add(project.dependencies.create("io.openlineage:openlineage-spark_${sv}:${openLineageVersion}"))
println "Configured dependencies for Scala ${sv}:"
scalaConfig.allDependencies.each { dep ->
println " ADDED: ${dep.group}:${dep.name}:${dep.version}"
}
try {
scalaConfig.canBeResolved = true
println "\nResolved dependencies for Scala ${sv}:"
scalaConfig.resolvedConfiguration.resolvedArtifacts.each { artifact ->
def id = artifact.moduleVersion.id
if (id.name.contains('openlineage')) {
println " ✅ OPENLINEAGE: ${id.group}:${id.name}:${id.version}"
} else {
println " ${id.group}:${id.name}:${id.version}"
}
}
} catch (Exception e) {
println " ERROR resolving dependencies: ${e.message}"
}
}
println "\n=== Summary ==="
println "The key difference should be in the OpenLineage Spark dependency:"
println " - Scala 2.12 should have: openlineage-spark_2.12"
println " - Scala 2.13 should have: openlineage-spark_2.13"
println "Note: Scala itself won't be in the JARs (it's provided/compileOnly)"
}
}
publishing {
publications {
// Create publications for each Scala version - always build both
scalaVersions.each { sv ->
def scalaVersionUnderscore = sv.replace('.', '_')
"shadow_${scalaVersionUnderscore}"(MavenPublication) { publication ->
artifactId = "acryl-spark-lineage_${sv}"
artifact tasks["shadowJar_${scalaVersionUnderscore}"]
artifact tasks["javadocJar_${scalaVersionUnderscore}"]
artifact tasks["sourcesJar_${scalaVersionUnderscore}"]
pom {
name = "Acryl Spark Lineage (Scala ${sv})"
group = 'io.acryl'
artifactId = "acryl-spark-lineage_${sv}"
description = "Library to push data lineage from spark to datahub (Scala ${sv})"
url = 'https://docs.datahub.com'
scm {
connection = 'scm:git:git://github.com/datahub-project/datahub.git'
developerConnection = 'scm:git:ssh://github.com:datahub-project/datahub.git'
url = 'https://github.com/datahub-project/datahub.git'
}
licenses {
license {
name = 'The Apache License, Version 2.0'
url = 'http://www.apache.org/licenses/LICENSE-2.0.txt'
}
}
developers {
developer {
id = 'datahub'
name = 'Datahub'
email = 'datahub@acryl.io'
}
}
}
}
}
}
repositories {
maven {
def releasesRepoUrl = "https://ossrh-staging-api.central.sonatype.com/service/local/staging/deploy/maven2/"
def snapshotsRepoUrl = "https://ossrh-staging-api.central.sonatype.com/content/repositories/snapshots/"
def ossrhUsername = System.getenv('RELEASE_USERNAME')
def ossrhPassword = System.getenv('RELEASE_PASSWORD')
credentials {
username ossrhUsername
password ossrhPassword
}
url = version.endsWith('SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
}
}
}
signing {
def signingKey = findProperty("signingKey")
def signingPassword = System.getenv("SIGNING_PASSWORD")
useInMemoryPgpKeys(signingKey, signingPassword)
// Sign all publications
publishing.publications.each { publication ->
sign publication
}
}
nexusStaging {
serverUrl = "https://ossrh-staging-api.central.sonatype.com/service/local/"
//required only for projects registered in Sonatype after 2021-02-24
username = System.getenv("RELEASE_USERNAME")
password = System.getenv("RELEASE_PASSWORD")
}