plugins { id("com.palantir.git-version") apply false } apply plugin: 'java-library' apply plugin: 'com.gradleup.shadow' apply plugin: 'signing' apply plugin: 'io.codearte.nexus-staging' apply plugin: 'maven-publish' apply from: '../../../gradle/coverage/java-coverage.gradle' apply from: '../versioning.gradle' jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation // Define supported Scala versions - always build for both def scalaVersions = ['2.12', '2.13'] //mark implementaion dependencies which needs to excluded along with transitive dependencies from shadowjar //functionality is exactly same as "implementation" configurations { provided implementation.extendsFrom provided } dependencies { constraints { provided(externalDependency.hadoopMapreduceClient) { because 'Needed for tie breaking of guava version need for spark and wiremock' } provided(externalDependency.hadoopCommon) { because 'required for org.apache.hadoop.util.StopWatch' } provided(externalDependency.commonsIo) { because 'required for org.apache.commons.io.Charsets that is used internally' } } provided(externalDependency.sparkSql) provided(externalDependency.sparkHive) implementation 'org.slf4j:slf4j-log4j12:2.0.7' implementation externalDependency.httpClient implementation externalDependency.typesafeConfig implementation externalDependency.commonsLang implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok implementation externalDependency.typesafeConfig implementation externalDependency.json implementation project(':metadata-integration:java:openlineage-converter') implementation project(path: ':metadata-integration:java:datahub-client') implementation project(path: ':metadata-integration:java:openlineage-converter') compileOnly("io.delta:delta-core_2.12:1.0.0") // Default to Scala 2.12 for main compilation implementation "io.openlineage:openlineage-spark_2.12:$openLineageVersion" compileOnly "org.apache.iceberg:iceberg-spark3-runtime:0.12.1" compileOnly("org.apache.spark:spark-sql_2.12:3.1.3") { exclude group: 'org.eclipse.jetty', module: 'jetty-servlet' exclude group: 'org.eclipse.jetty', module: 'jetty-server' exclude group: 'org.eclipse.jetty', module: 'jetty-util' exclude group: 'org.eclipse.jetty', module: 'jetty-webapp' exclude group: 'org.eclipse.jetty', module: 'jetty-security' } compileOnly "io.github.spark-redshift-community:spark-redshift_2.12:6.2.0-spark_3.5" testCompileOnly externalDependency.lombok testAnnotationProcessor externalDependency.lombok // Tests need a concrete log4j available. Providing it here testImplementation 'org.apache.logging.log4j:log4j-api:2.17.1' testImplementation 'org.slf4j:slf4j-log4j12:2.0.7' // JUnit 5 dependencies testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2' testImplementation 'org.junit.jupiter:junit-jupiter-engine:5.8.2' testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.2' testImplementation(externalDependency.postgresql) { exclude group: "com.fasterxml.jackson.core" } testImplementation externalDependency.mockito testImplementation(externalDependency.mockServer) { exclude group: "com.fasterxml.jackson.core" } // older version to allow older guava testImplementation(externalDependency.mockServerClient) { exclude group: "com.fasterxml.jackson.core" } // older version to allow older guava testImplementation(externalDependency.testContainersPostgresql) } tasks.register('checkShadowJar', Exec) { commandLine 'sh', '-c', 'scripts/check_jar.sh' } // Create separate source and javadoc JARs for each Scala version scalaVersions.each { sv -> def scalaVersionUnderscore = sv.replace('.', '_') tasks.register("sourcesJar_${scalaVersionUnderscore}", Jar) { archiveClassifier = 'sources' archiveBaseName = "acryl-spark-lineage_${sv}" from sourceSets.main.allJava } tasks.register("javadocJar_${scalaVersionUnderscore}", Jar) { dependsOn javadoc archiveClassifier = 'javadoc' archiveBaseName = "acryl-spark-lineage_${sv}" from javadoc.destinationDir } } // Create shadow JAR tasks for each Scala version scalaVersions.each { sv -> tasks.register("shadowJar_${sv.replace('.', '_')}", com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar) { group = 'build' description = "Build shadow jar for Scala ${sv}" zip64 = true archiveClassifier = '' archiveBaseName = "acryl-spark-lineage_${sv}" mergeServiceFiles() from(sourceSets.main.output) // Add manifest with version information manifest { attributes( 'Implementation-Version': project.version, 'Scala-Version': sv ) } // Create a completely separate configuration for each Scala version def scalaConfig = project.configurations.detachedConfiguration() // Manually add all base dependencies except OpenLineage scalaConfig.dependencies.add(project.dependencies.create('org.slf4j:slf4j-log4j12:2.0.7')) scalaConfig.dependencies.add(project.dependencies.create(externalDependency.httpClient)) scalaConfig.dependencies.add(project.dependencies.create(externalDependency.typesafeConfig)) scalaConfig.dependencies.add(project.dependencies.create(externalDependency.commonsLang)) scalaConfig.dependencies.add(project.dependencies.create(externalDependency.slf4jApi)) scalaConfig.dependencies.add(project.dependencies.create(externalDependency.json)) // Add project dependencies scalaConfig.dependencies.add(project.dependencies.create(project(':metadata-integration:java:openlineage-converter'))) scalaConfig.dependencies.add(project.dependencies.create(project(':metadata-integration:java:datahub-client'))) // Add the Scala-specific OpenLineage dependency - THIS IS THE KEY PART scalaConfig.dependencies.add(project.dependencies.create("io.openlineage:openlineage-spark_${sv}:${openLineageVersion}")) scalaConfig.canBeResolved = true configurations = [scalaConfig] def exclude_modules = project .configurations .provided .resolvedConfiguration .getLenientConfiguration() .getAllModuleDependencies() .collect { it.name } dependencies { exclude(dependency { exclude_modules.contains(it.name) }) exclude(dependency("org.slf4j::")) exclude(dependency("ch.qos.logback:")) exclude("org/apache/commons/logging/**") } exclude('module-info.class', 'META-INF/versions/**', 'LICENSE', 'NOTICE') exclude '**/libzstd-jni.*' exclude '**/com_github_luben_zstd_*' // Apply all the relocations relocate 'avro.com', 'io.acryl.shaded.avro.com' relocate 'org.json', 'io.acryl.shaded.org.json' relocate 'com.github', 'io.acryl.shaded.com.github' relocate 'avroutil1', 'io.acryl.shaded.avroutil1' relocate 'com.sun.activation', 'io.acryl.shaded.com.sun.activation' relocate 'com.sun.codemodel', 'io.acryl.shaded.com.sun.codemodel' relocate 'com.sun.mail', 'io.acryl.shaded.com.sun.mail' relocate 'org.apache.hc', 'io.acryl.shaded.http' relocate 'org.apache.commons.codec', 'io.acryl.shaded.org.apache.commons.codec' relocate 'org.apache.commons.compress', 'io.acryl.shaded.org.apache.commons.compress' relocate 'org.apache.commons.lang3', 'io.acryl.shaded.org.apache.commons.lang3' relocate 'mozilla', 'datahub.spark2.shaded.mozilla' relocate 'com.typesafe', 'io.acryl.shaded.com.typesafe' relocate 'io.opentracing', 'io.acryl.shaded.io.opentracing' relocate 'io.netty', 'io.acryl.shaded.io.netty' relocate 'ch.randelshofer', 'io.acryl.shaded.ch.randelshofer' relocate 'ch.qos', 'io.acryl.shaded.ch.qos' relocate 'org.springframework', 'io.acryl.shaded.org.springframework' relocate 'com.fasterxml.jackson', 'io.acryl.shaded.jackson' relocate 'org.yaml', 'io.acryl.shaded.org.yaml' relocate 'net.jcip.annotations', 'io.acryl.shaded.annotations' relocate 'javassist', 'io.acryl.shaded.javassist' relocate 'edu.umd.cs.findbugs', 'io.acryl.shaded.findbugs' relocate 'com.google.common', 'io.acryl.shaded.com.google.common' relocate 'org.reflections', 'io.acryl.shaded.org.reflections' relocate 'st4hidden', 'io.acryl.shaded.st4hidden' relocate 'org.stringtemplate', 'io.acryl.shaded.org.stringtemplate' relocate 'org.abego.treelayout', 'io.acryl.shaded.treelayout' relocate 'javax.annotation', 'io.acryl.shaded.javax.annotation' relocate 'com.github.benmanes.caffeine', 'io.acryl.shaded.com.github.benmanes.caffeine' relocate 'org.checkerframework', 'io.acryl.shaded.org.checkerframework' relocate 'com.google.errorprone', 'io.acryl.shaded.com.google.errorprone' relocate 'com.sun.jna', 'io.acryl.shaded.com.sun.jna' // Debug output to verify we're using the right dependency doFirst { println "Building JAR for Scala ${sv}" println "OpenLineage dependency: io.openlineage:openlineage-spark_${sv}:${openLineageVersion}" println "Configuration dependencies:" scalaConfig.allDependencies.each { dep -> println " - ${dep.group}:${dep.name}:${dep.version}" } } } } // Keep the original shadowJar task and make it build all versions shadowJar { // Make shadowJar depend on all Scala version builds dependsOn scalaVersions.collect { "shadowJar_${it.replace('.', '_')}" } // Disable actual JAR creation for this task since we create versioned ones enabled = false doLast { println "Built shadow JARs for all Scala versions: ${scalaVersions.join(', ')}" } } checkShadowJar { dependsOn shadowJar } // Task to build all Scala versions (always runs) tasks.register('buildAllScalaVersions') { group = 'build' description = 'Build shadow jars for all Scala versions' dependsOn scalaVersions.collect { "shadowJar_${it.replace('.', '_')}" } } test { forkEvery = 1 useJUnitPlatform() } assemble { dependsOn shadowJar } task integrationTest(type: Exec, dependsOn: [shadowJar, ':docker:quickstart']) { environment "RUN_QUICKSTART", "false" commandLine "spark-smoke-test/smoke.sh" } // Remove the old shared tasks since we now create version-specific ones // task sourcesJar(type: Jar) { // archiveClassifier = 'sources' // from sourceSets.main.allJava // } // task javadocJar(type: Jar, dependsOn: javadoc) { // archiveClassifier = 'javadoc' // from javadoc.destinationDir // } // Task to debug dependency resolution for each Scala version tasks.register('debugDependencies') { group = 'help' description = 'Show what dependencies are resolved for each Scala version' doLast { def supportedScalaVersions = ['2.12', '2.13'] println "=== Base Implementation Dependencies ===" project.configurations.implementation.allDependencies.each { dep -> println " ${dep.group}:${dep.name}:${dep.version}" } supportedScalaVersions.each { sv -> println "\n=== Dependencies for Scala ${sv} ===" // Create the same configuration as the shadow task def scalaConfig = project.configurations.detachedConfiguration() // Add the same dependencies as in the shadow task scalaConfig.dependencies.add(project.dependencies.create('org.slf4j:slf4j-log4j12:2.0.7')) scalaConfig.dependencies.add(project.dependencies.create(externalDependency.typesafeConfig)) scalaConfig.dependencies.add(project.dependencies.create(externalDependency.json)) scalaConfig.dependencies.add(project.dependencies.create("io.openlineage:openlineage-spark_${sv}:${openLineageVersion}")) println "Configured dependencies for Scala ${sv}:" scalaConfig.allDependencies.each { dep -> println " ADDED: ${dep.group}:${dep.name}:${dep.version}" } try { scalaConfig.canBeResolved = true println "\nResolved dependencies for Scala ${sv}:" scalaConfig.resolvedConfiguration.resolvedArtifacts.each { artifact -> def id = artifact.moduleVersion.id if (id.name.contains('openlineage')) { println " ✅ OPENLINEAGE: ${id.group}:${id.name}:${id.version}" } else { println " ${id.group}:${id.name}:${id.version}" } } } catch (Exception e) { println " ERROR resolving dependencies: ${e.message}" } } println "\n=== Summary ===" println "The key difference should be in the OpenLineage Spark dependency:" println " - Scala 2.12 should have: openlineage-spark_2.12" println " - Scala 2.13 should have: openlineage-spark_2.13" println "Note: Scala itself won't be in the JARs (it's provided/compileOnly)" } } publishing { publications { // Create publications for each Scala version - always build both scalaVersions.each { sv -> def scalaVersionUnderscore = sv.replace('.', '_') "shadow_${scalaVersionUnderscore}"(MavenPublication) { publication -> artifactId = "acryl-spark-lineage_${sv}" artifact tasks["shadowJar_${scalaVersionUnderscore}"] artifact tasks["javadocJar_${scalaVersionUnderscore}"] artifact tasks["sourcesJar_${scalaVersionUnderscore}"] pom { name = "Acryl Spark Lineage (Scala ${sv})" group = 'io.acryl' artifactId = "acryl-spark-lineage_${sv}" description = "Library to push data lineage from spark to datahub (Scala ${sv})" url = 'https://docs.datahub.com' scm { connection = 'scm:git:git://github.com/datahub-project/datahub.git' developerConnection = 'scm:git:ssh://github.com:datahub-project/datahub.git' url = 'https://github.com/datahub-project/datahub.git' } licenses { license { name = 'The Apache License, Version 2.0' url = 'http://www.apache.org/licenses/LICENSE-2.0.txt' } } developers { developer { id = 'datahub' name = 'Datahub' email = 'datahub@acryl.io' } } } } } } repositories { maven { def releasesRepoUrl = "https://ossrh-staging-api.central.sonatype.com/service/local/staging/deploy/maven2/" def snapshotsRepoUrl = "https://ossrh-staging-api.central.sonatype.com/content/repositories/snapshots/" def ossrhUsername = System.getenv('RELEASE_USERNAME') def ossrhPassword = System.getenv('RELEASE_PASSWORD') credentials { username ossrhUsername password ossrhPassword } url = version.endsWith('SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl } } } signing { def signingKey = findProperty("signingKey") def signingPassword = System.getenv("SIGNING_PASSWORD") useInMemoryPgpKeys(signingKey, signingPassword) // Sign all publications publishing.publications.each { publication -> sign publication } } nexusStaging { serverUrl = "https://ossrh-staging-api.central.sonatype.com/service/local/" //required only for projects registered in Sonatype after 2021-02-24 username = System.getenv("RELEASE_USERNAME") password = System.getenv("RELEASE_PASSWORD") }