fix(datahub-client): fix shadow jar build, correct spark-lineage url parsing (#3871)

This commit is contained in:
Swaroop Jagadish 2022-01-11 14:55:21 -08:00 committed by GitHub
parent 6f7c2120fd
commit adce0dea39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 68 additions and 23 deletions

View File

@ -33,8 +33,7 @@ jobs:
# running build first without datahub-web-react:yarnBuild and then with it is 100% stable
# datahub-frontend:unzipAssets depends on datahub-web-react:yarnBuild but gradle does not know about it
run: |
./gradlew :metadata-integration:java:spark-lineage:build
./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x datahub-web-react:yarnBuild -x datahub-frontend:unzipAssets -x :metadata-integration:java:spark-lineage:test
./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x datahub-web-react:yarnBuild -x datahub-frontend:unzipAssets
./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test
- uses: actions/upload-artifact@v2
if: always()

View File

@ -121,9 +121,9 @@ project.ext.externalDependency = [
'springKafka': 'org.springframework.kafka:spring-kafka:2.2.14.RELEASE',
'springActuator': 'org.springframework.boot:spring-boot-starter-actuator:2.1.4.RELEASE',
'testng': 'org.testng:testng:7.3.0',
'testContainers': 'org.testcontainers:testcontainers:1.15.1',
'testContainersJunit': 'org.testcontainers:junit-jupiter:1.15.1',
'testContainersPostgresql':'org.testcontainers:postgresql:1.2.0',
'testContainers': 'org.testcontainers:testcontainers:1.15.3',
'testContainersJunit': 'org.testcontainers:junit-jupiter:1.15.3',
'testContainersPostgresql':'org.testcontainers:postgresql:1.15.3',
'testContainersElasticsearch': 'org.testcontainers:elasticsearch:1.15.3',
'wiremock':'com.github.tomakehurst:wiremock:2.10.0',
'zookeeper': 'org.apache.zookeeper:zookeeper:3.4.14'
@ -135,14 +135,19 @@ allprojects {
apply plugin: 'checkstyle'
}
subprojects {
apply plugin: 'maven'
configure(subprojects.findAll {it.name != 'spark-lineage'}) {
configurations.all {
exclude group: "io.netty", module: "netty"
exclude group: "log4j", module: "log4j"
}
}
subprojects {
apply plugin: 'maven'
plugins.withType(JavaPlugin) {
dependencies {
testCompile externalDependency.testng

View File

@ -2,13 +2,16 @@ apply plugin: 'java'
apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'jacoco'
jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation
dependencies {
compile project(':metadata-models')
compile externalDependency.httpAsyncClient
compile externalDependency.jacksonDataBind
implementation project(':metadata-models')
shadow externalDependency.httpAsyncClient // we want our clients to provide this
implementation externalDependency.jacksonDataBind
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
testCompile externalDependency.httpAsyncClient // needed as shadow excludes it
testCompile externalDependency.mockito
testCompile externalDependency.mockServer
testCompile externalDependency.mockServerClient
@ -23,9 +26,39 @@ test {
finalizedBy jacocoTestReport
}
task checkShadowJar(type: Exec) {
commandLine 'sh', '-c', 'scripts/check_jar.sh'
}
shadowJar {
zip64=true
classifier=''
archiveClassifier = ''
dependencies {
exclude(dependency('org.apache.httpcomponents:httpasyncclient'))
}
mergeServiceFiles()
// we relocate namespaces manually, because we want to know exactly which libs we are exposing and why
// we can move to automatic relocation using ConfigureShadowRelocation after we get to a good place on these first
relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson'
relocate 'net.jcip.annotations', 'datahub.shaded.annotations'
relocate 'javassist', 'datahub.shaded.javassist'
relocate 'edu.umd.cs.findbugs', 'datahub.shaded.findbugs'
relocate 'org.antlr', 'datahub.shaded.org.antlr'
relocate 'antlr', 'datahub.shaded.antlr'
relocate 'com.google.common', 'datahub.shaded.com.google.common'
relocate 'org.apache.commons', 'datahub.shaded.org.apache.commons'
relocate 'org.reflections', 'datahub.shaded.org.reflections'
relocate 'st4hidden', 'datahub.shaded.st4hidden'
relocate 'org.stringtemplate', 'datahub.shaded.org.stringtemplate'
relocate 'org.abego.treelayout', 'datahub.shaded.treelayout'
relocate 'org.slf4j', 'datahub.shaded.slf4j'
relocate 'javax.annotation', 'datahub.shaded.javax.annotation'
finalizedBy checkShadowJar
}
checkShadowJar {
dependsOn shadowJar
}
assemble {

View File

@ -0,0 +1,9 @@
# This script checks the shadow jar to ensure that we only have allowed classes being exposed through the jar
jar -tvf build/libs/datahub-client.jar| grep -v "datahub/shaded" | grep -v "META-INF" | grep -v "com/linkedin" | grep -v "com/datahub" | grep -v "datahub" | grep -v "entity-registry" | grep -v "pegasus" | grep -v "legacyPegasusSchemas/" | grep -v " com/$"
if [ $? -ne 0 ]; then
echo "No other packages found. Great"
exit 0
else
echo "Found other packages than what we were expecting"
exit 1
fi

View File

@ -22,18 +22,16 @@ dependencies {
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
implementation(project(':metadata-integration:java:datahub-client')) {
exclude group: "org.antlr"
exclude group: "com.google.guava" // causes issues with Guava Stopwatch constructor
exclude group: "com.fasterxml.jackson.core"
}
implementation project(path: ':metadata-integration:java:datahub-client', configuration: 'shadow')
implementation(externalDependency.sparkSql)
implementation(externalDependency.sparkHive)
// Tests need a concrete log4j available. Providing it here
testImplementation 'org.apache.logging.log4j:log4j-api:2.17.1'
testImplementation 'org.apache.logging.log4j:log4j-core:2.17.1'
implementation(externalDependency.sparkSql){
exclude group: "org.apache.hadoop"
}
implementation(externalDependency.sparkHive){
exclude group: "org.apache.hadoop"
}
testImplementation(externalDependency.postgresql){
exclude group: "com.fasterxml.jackson.core"
@ -48,7 +46,7 @@ dependencies {
exclude group: "com.fasterxml.jackson.core"
} // older version to allow older guava
testImplementation(externalDependency.testContainersPostgresql) // older version to allow older jackson
testImplementation(externalDependency.testContainersPostgresql)
}

View File

@ -32,6 +32,7 @@ public class JdbcDataset implements SparkDataset {
url = url.replaceFirst("jdbc:", "");
if (url.contains("postgres")) {
url = url.substring(url.lastIndexOf('/') + 1);
url = url.substring(0, url.indexOf('?'));
}
// TODO different DBs have different formats. TBD mapping to data source names
return url + "." + tbl;