diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 45ba88ba3e..5f96dceb5b 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -25,10 +25,10 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - uses: actions/setup-python@v2 with: python-version: "3.7" diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml index a1d7a1f47d..757e02a835 100644 --- a/.github/workflows/check-datahub-jars.yml +++ b/.github/workflows/check-datahub-jars.yml @@ -30,10 +30,10 @@ jobs: - uses: actions/checkout@v2 with: fetch-depth: 0 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - uses: actions/setup-python@v2 with: python-version: "3.7" diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index bb652fbbfa..b73b43b923 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -359,10 +359,10 @@ jobs: steps: - name: Check out the repo uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - uses: actions/setup-python@v2 with: python-version: "3.7" diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index a1b25565aa..adeaa3eb60 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -19,10 +19,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - uses: actions/setup-python@v2 with: python-version: "3.10" diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index a5e36bb63e..6e5d1c59f8 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -29,10 +29,10 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - uses: actions/setup-python@v2 with: python-version: "3.7" diff --git a/.github/workflows/publish-datahub-jars.yml b/.github/workflows/publish-datahub-jars.yml index 39081a1f98..9b7fc1f8a4 100644 --- a/.github/workflows/publish-datahub-jars.yml +++ b/.github/workflows/publish-datahub-jars.yml @@ -53,10 +53,10 @@ jobs: - uses: actions/checkout@v2 with: fetch-depth: 0 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - uses: actions/setup-python@v2 with: python-version: "3.7" diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 98e2a80c71..ba16333096 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -28,10 +28,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - uses: actions/setup-python@v2 with: python-version: "3.7" diff --git a/.gitignore b/.gitignore index aeba44d279..d1981b2674 100644 --- a/.gitignore +++ b/.gitignore @@ -37,7 +37,7 @@ MANIFEST **/build /config */i18n -/out +out/ # Mac OS **/.DS_Store diff --git a/build.gradle b/build.gradle index d10f4b1e20..55a4eca1a1 100644 --- a/build.gradle +++ b/build.gradle @@ -11,9 +11,6 @@ buildscript { classpath 'com.github.node-gradle:gradle-node-plugin:2.2.4' classpath 'com.commercehub.gradle.plugin:gradle-avro-plugin:0.8.1' classpath 'org.springframework.boot:spring-boot-gradle-plugin:' + springBootVersion - classpath('com.github.jengelman.gradle.plugins:shadow:5.2.0') { - exclude group: 'org.apache.logging.log4j', module: 'log4j-core' - } classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.30.0" classpath "com.palantir.gradle.gitversion:gradle-git-version:0.12.3" classpath "org.gradle.playframework:gradle-playframework:0.12" @@ -23,6 +20,7 @@ buildscript { plugins { id 'com.gorylenko.gradle-git-properties' version '2.4.0-rc2' + id 'com.github.johnrengelman.shadow' version '6.1.0' } project.ext.spec = [ @@ -171,7 +169,7 @@ allprojects { apply plugin: 'checkstyle' } -configure(subprojects.findAll {it.name != 'spark-lineage'}) { +configure(subprojects.findAll {! it.name.startsWith('spark-lineage') }) { configurations.all { exclude group: "io.netty", module: "netty" @@ -221,37 +219,14 @@ subprojects { } } - if (project.name != 'datahub-protobuf') { - tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) - } + tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(11) } - tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) - } - } - } else { - tasks.withType(JavaExec).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(11) - } - } - tasks.withType(Javadoc).configureEach { - javadocTool = javaToolchains.javadocToolFor { - languageVersion = JavaLanguageVersion.of(11) - } - } - tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(11) - } - } - tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(11) - } + } + tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(11) } } diff --git a/docker/broker/env/docker.env b/docker/broker/env/docker.env index f398527f63..18115697c2 100644 --- a/docker/broker/env/docker.env +++ b/docker/broker/env/docker.env @@ -5,3 +5,4 @@ KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 KAFKA_HEAP_OPTS=-Xms256m -Xmx256m +KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false \ No newline at end of file diff --git a/docker/datahub-frontend/Dockerfile b/docker/datahub-frontend/Dockerfile index 8dea591f9f..3759ee260f 100644 --- a/docker/datahub-frontend/Dockerfile +++ b/docker/datahub-frontend/Dockerfile @@ -14,7 +14,7 @@ FROM --platform=$BUILDPLATFORM node:16.13.0-alpine3.14 AS prod-build # Upgrade Alpine and base packages RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add perl openjdk8 + && apk --no-cache add perl openjdk8 openjdk11 ARG USE_SYSTEM_NODE="true" ENV CI=true diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index 1f16e849ac..2eb2c85f9f 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -28,7 +28,7 @@ FROM --platform=$BUILDPLATFORM alpine:3.14 AS prod-build # Upgrade Alpine and base packages RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add openjdk8 perl + && apk --no-cache add openjdk8 openjdk11 perl COPY . /datahub-src RUN cd /datahub-src && ./gradlew :metadata-service:war:build -x test diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index d3d7631b08..3428f2aa55 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -3,7 +3,7 @@ ARG APP_ENV=prod FROM acryldata/datahub-ingestion-base as base -FROM openjdk:8 as prod-build +FROM openjdk:11 as prod-build COPY . /datahub-src RUN cd /datahub-src && ./gradlew :metadata-events:mxe-schemas:build diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index e08d6bb655..c86738e9ff 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -25,7 +25,7 @@ FROM --platform=$BUILDPLATFORM alpine:3.14.2 AS prod-build # Upgrade Alpine and base packages RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add openjdk8 perl + && apk --no-cache add openjdk8 openjdk11 perl COPY . datahub-src RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build -x test diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index fce1c59741..5f0e98abe2 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -25,7 +25,7 @@ FROM --platform=$BUILDPLATFORM alpine:3.14.2 AS prod-build # Upgrade Alpine and base packages RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add openjdk8 perl + && apk --no-cache add openjdk8 openjdk11 perl COPY . datahub-src RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index 9d927ef85c..17fe9f0392 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -26,7 +26,7 @@ FROM --platform=$BUILDPLATFORM alpine:3.14 AS prod-build # Upgrade Alpine and base packages RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add openjdk8 perl + && apk --no-cache add openjdk8 openjdk11 perl COPY . datahub-src RUN cd datahub-src && ./gradlew :datahub-upgrade:build diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index c67b3afc9b..3fabf84b6b 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -14,7 +14,7 @@ services: ports: - "2181:2181" volumes: - - zkdata:/var/opt/zookeeper + - zkdata:/var/lib/zookeeper broker: image: confluentinc/cp-kafka:5.4.0 diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 9f72921f84..fd609b1624 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -14,7 +14,7 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/opt/zookeeper + - zkdata:/var/lib/zookeeper broker: image: confluentinc/cp-kafka:5.4.0 diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 455b2a62cc..58ee367126 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -14,7 +14,7 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/opt/zookeeper + - zkdata:/var/lib/zookeeper broker: image: confluentinc/cp-kafka:5.4.0 diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 40dad574ff..37c80cda2e 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -14,6 +14,7 @@ services: - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m + - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false hostname: broker image: confluentinc/cp-kafka:7.2.0 ports: @@ -176,7 +177,7 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/opt/zookeeper + - zkdata:/var/lib/zookeeper version: '2.3' volumes: esdata: null diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index f1d3327486..05ca657678 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -14,6 +14,7 @@ services: - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m + - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false hostname: broker image: confluentinc/cp-kafka:5.4.0 ports: @@ -178,7 +179,7 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/opt/zookeeper + - zkdata:/var/lib/zookeeper version: '2.3' volumes: esdata: null diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 00a495b636..dc35e0c935 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -14,6 +14,7 @@ services: - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m + - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false hostname: broker image: confluentinc/cp-kafka:5.4.0 ports: @@ -198,7 +199,7 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/opt/zookeeper + - zkdata:/var/lib/zookeeper version: '2.3' volumes: broker: null diff --git a/docs/developers.md b/docs/developers.md index 125caf4ae0..ac793ad9e9 100644 --- a/docs/developers.md +++ b/docs/developers.md @@ -5,14 +5,14 @@ title: "Local Development" # DataHub Developer's Guide ## Pre-requirements - - [Java 1.8 SDK](https://adoptopenjdk.net/?variant=openjdk8&jvmVariant=hotspot) + - [Java 11 SDK](https://openjdk.org/projects/jdk/11/) - [Docker](https://www.docker.com/) - [Docker Compose](https://docs.docker.com/compose/) - Docker engine with at least 8GB of memory to run tests. :::note - Do not try to use a JDK newer than JDK 8. The build process does not work with newer JDKs currently. + Do not try to use a JDK newer than JDK 11. The build process does not work with newer JDKs currently. ::: @@ -101,7 +101,7 @@ You're probably using a Java version that's too new for gradle. Run the followin ``` java --version ``` -While it may be possible to build and run DataHub using newer versions of Java, we currently only support [Java 1.8](https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html) (aka Java 8). Plan for Java 11 migration is being discussed in [this issue](https://github.com/datahub-project/datahub/issues/1699). +While it may be possible to build and run DataHub using newer versions of Java, we currently only support [Java 11](https://openjdk.org/projects/jdk/11/) (aka Java 11). ### Getting `cannot find symbol` error for `javax.annotation.Generated` diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 3e81867c40..d182afdbd0 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -5,6 +5,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next ### Breaking Changes +- Java version 11 or greater is required. ### Potential Downtime diff --git a/li-utils/build.gradle b/li-utils/build.gradle index a0d50f56cb..6a6971589a 100644 --- a/li-utils/build.gradle +++ b/li-utils/build.gradle @@ -1,6 +1,17 @@ apply plugin: 'java' apply plugin: 'pegasus' +tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(8) + } +} +tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(8) + } +} + dependencies { compile spec.product.pegasus.data compile externalDependency.commonsLang diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index 46562f87b2..fd13e31d3e 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -13,6 +13,17 @@ import org.apache.tools.ant.filters.ReplaceTokens jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation +tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(8) + } +} +tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(8) + } +} + dependencies { implementation project(':metadata-models') @@ -22,7 +33,7 @@ dependencies { because 'Vulnerability Issue' } } - shadow externalDependency.httpAsyncClient // we want our clients to provide this + compileOnly externalDependency.httpAsyncClient implementation externalDependency.jacksonDataBind implementation externalDependency.javaxValidation implementation externalDependency.springContext @@ -33,11 +44,11 @@ dependencies { compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok - testCompile externalDependency.httpAsyncClient // needed as shadow excludes it testCompile externalDependency.mockito testCompile externalDependency.mockServer testCompile externalDependency.mockServerClient testCompile externalDependency.testContainers + testCompile externalDependency.httpAsyncClient swaggerCodegen 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.33' } @@ -72,12 +83,10 @@ task checkShadowJar(type: Exec) { shadowJar { zip64=true archiveClassifier = '' - dependencies { - exclude(dependency('org.apache.httpcomponents:httpasyncclient')) - exclude 'LICENSE' - exclude 'NOTICE' - exclude 'LICENSE.txt' - } + // preventing java multi-release JAR leakage + // https://github.com/johnrengelman/shadow/issues/729 + exclude('module-info.class', 'META-INF/versions/**', + '**/LICENSE', '**/LICENSE.txt', '**/NOTICE', '**/NOTICE.txt') mergeServiceFiles() // we relocate namespaces manually, because we want to know exactly which libs we are exposing and why // we can move to automatic relocation using ConfigureShadowRelocation after we get to a good place on these first diff --git a/metadata-integration/java/spark-lineage/build.gradle b/metadata-integration/java/spark-lineage/build.gradle index 18fc84bda0..ee4f02438d 100644 --- a/metadata-integration/java/spark-lineage/build.gradle +++ b/metadata-integration/java/spark-lineage/build.gradle @@ -11,6 +11,17 @@ apply from: '../versioning.gradle' jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation +tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(8) + } +} +tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(8) + } +} + //to rename artifacts for publish project.archivesBaseName = 'datahub-'+project.name @@ -44,15 +55,14 @@ dependencies { implementation project(path: ':metadata-integration:java:datahub-client', configuration: 'shadow') - provided(externalDependency.sparkSql) provided(externalDependency.sparkHive) + implementation externalDependency.httpAsyncClient // Tests need a concrete log4j available. Providing it here testImplementation 'org.apache.logging.log4j:log4j-api:2.17.1' testImplementation 'org.apache.logging.log4j:log4j-core:2.17.1' - testImplementation(externalDependency.postgresql){ exclude group: "com.fasterxml.jackson.core" } @@ -92,8 +102,12 @@ shadowJar { exclude(dependency { exclude_modules.contains(it.name) }) - } + + // preventing java multi-release JAR leakage + // https://github.com/johnrengelman/shadow/issues/729 + exclude('module-info.class', 'META-INF/versions/**') + relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson' relocate 'org.apache.http','datahub.spark2.shaded.http' relocate 'org.apache.commons.codec', 'datahub.spark2.shaded.o.a.c.codec' diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut1.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut1.json index 76e7bf24f9..754c4f59ac 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut1.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut1.json @@ -28,7 +28,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/javahdfsin2hdfsout1" + "/spark/spark_spark-master_7077" ] } } @@ -69,7 +69,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahdfsin2hdfsout1/queryexecid_4" + "/spark/spark_spark-master_7077" ] } }, diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut2.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut2.json index a34781b838..b64aaf5c87 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut2.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut2.json @@ -19,7 +19,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/javahdfsin2hdfsout2" + "/spark/spark_spark-master_7077" ] } }, @@ -55,7 +55,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahdfsin2hdfsout2/queryexecid_4" + "/spark/spark_spark-master_7077" ] } }, @@ -132,7 +132,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahdfsin2hdfsout2/queryexecid_5" + "/spark/spark_spark-master_7077" ] } } diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HiveCreateInsertTable.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HiveCreateInsertTable.json index ce78df1562..a1d24dfdf5 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HiveCreateInsertTable.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HiveCreateInsertTable.json @@ -28,7 +28,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/javahdfsin2hivecreateinserttable" + "/spark/spark_spark-master_7077" ] } } @@ -61,7 +61,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahdfsin2hivecreateinserttable/queryexecid_5" + "/spark/spark_spark-master_7077" ] } }, @@ -107,7 +107,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahdfsin2hivecreateinserttable/queryexecid_6" + "/spark/spark_spark-master_7077" ] } }, @@ -167,7 +167,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahdfsin2hivecreateinserttable/queryexecid_7" + "/spark/spark_spark-master_7077" ] } }, diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HiveCreateTable.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HiveCreateTable.json index c04341f371..2d1d8930c8 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HiveCreateTable.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HiveCreateTable.json @@ -19,7 +19,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/javahdfsin2hivecreatetable" + "/spark/spark_spark-master_7077" ] } }, @@ -64,7 +64,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahdfsin2hivecreatetable/queryexecid_5" + "/spark/spark_spark-master_7077" ] } }, diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHiveInHiveOut.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHiveInHiveOut.json index fbd5a700da..b999b07c4f 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHiveInHiveOut.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHiveInHiveOut.json @@ -14,7 +14,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/javahiveinhiveout" + "/spark/spark_spark-master_7077" ] } }, @@ -55,7 +55,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahiveinhiveout/queryexecid_8" + "/spark/spark_spark-master_7077" ] } }, @@ -131,7 +131,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahiveinhiveout/queryexecid_9" + "/spark/spark_spark-master_7077" ] } } @@ -163,7 +163,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahiveinhiveout/queryexecid_10" + "/spark/spark_spark-master_7077" ] } }, @@ -233,7 +233,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/javahiveinhiveout/queryexecid_11" + "/spark/spark_spark-master_7077" ] } } diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HdfsOut1.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HdfsOut1.json index b469519ff5..b7ea7abf73 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HdfsOut1.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HdfsOut1.json @@ -28,7 +28,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/pythonhdfsin2hdfsout1" + "/spark/spark_spark-master_7077" ] } } @@ -61,7 +61,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhdfsin2hdfsout1/queryexecid_4" + "/spark/spark_spark-master_7077" ] } }, diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HdfsOut2.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HdfsOut2.json index 9b9d95c5a0..146bafe322 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HdfsOut2.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HdfsOut2.json @@ -28,7 +28,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/pythonhdfsin2hdfsout2" + "/spark/spark_spark-master_7077" ] } } @@ -80,7 +80,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhdfsin2hdfsout2/queryexecid_4" + "/spark/spark_spark-master_7077" ] } } @@ -118,7 +118,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhdfsin2hdfsout2/queryexecid_5" + "/spark/spark_spark-master_7077" ] } }, diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HiveCreateInsertTable.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HiveCreateInsertTable.json index 3b54202df0..34d751486d 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HiveCreateInsertTable.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HiveCreateInsertTable.json @@ -28,7 +28,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/pythonhdfsin2hivecreateinserttable" + "/spark/spark_spark-master_7077" ] } } @@ -66,7 +66,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhdfsin2hivecreateinserttable/queryexecid_5" + "/spark/spark_spark-master_7077" ] } }, @@ -102,7 +102,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhdfsin2hivecreateinserttable/queryexecid_6" + "/spark/spark_spark-master_7077" ] } }, @@ -165,7 +165,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhdfsin2hivecreateinserttable/queryexecid_7" + "/spark/spark_spark-master_7077" ] } }, diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HiveCreateTable.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HiveCreateTable.json index b179784bb8..dd206c4ce5 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HiveCreateTable.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHdfsIn2HiveCreateTable.json @@ -23,7 +23,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/pythonhdfsin2hivecreatetable" + "/spark/spark_spark-master_7077" ] } }, @@ -69,7 +69,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhdfsin2hivecreatetable/queryexecid_5" + "/spark/spark_spark-master_7077" ] } }, diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHiveInHiveOut.json b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHiveInHiveOut.json index cbc6e7ae6f..ff9c31633c 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHiveInHiveOut.json +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/golden_json/PythonHiveInHiveOut.json @@ -28,7 +28,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/spark_spark-master_7077/pythonhiveinhiveout" + "/spark/spark_spark-master_7077" ] } } @@ -79,7 +79,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhiveinhiveout/queryexecid_10" + "/spark/spark_spark-master_7077" ] } } @@ -130,7 +130,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhiveinhiveout/queryexecid_9" + "/spark/spark_spark-master_7077" ] } } @@ -157,7 +157,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhiveinhiveout/queryexecid_8" + "/spark/spark_spark-master_7077" ] } }, @@ -219,7 +219,7 @@ { "com.linkedin.common.BrowsePaths": { "paths": [ - "/spark/pythonhiveinhiveout/queryexecid_11" + "/spark/spark_spark-master_7077" ] } }, diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/setup_spark_smoke_test.sh b/metadata-integration/java/spark-lineage/spark-smoke-test/setup_spark_smoke_test.sh index d721027149..6572ffa0f5 100755 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/setup_spark_smoke_test.sh +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/setup_spark_smoke_test.sh @@ -1,5 +1,6 @@ #!/bin/bash +set -e pip install -r requirements.txt echo "--------------------------------------------------------------------" diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/smoke.sh b/metadata-integration/java/spark-lineage/spark-smoke-test/smoke.sh index 5be37d2825..5e3e70ca7f 100755 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/smoke.sh +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/smoke.sh @@ -1,6 +1,6 @@ #!/bin/bash - +set -e # Script assumptions: # - The gradle build has already been run. # - Python 3.6+ is installed and in the PATH. diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/build.gradle b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/build.gradle index 6337f8c9be..12aa1775d6 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/build.gradle +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/build.gradle @@ -17,6 +17,17 @@ repositories { jcenter() } +tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(8) + } +} +tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(8) + } +} + dependencies { implementation 'org.apache.spark:spark-sql_2.11:2.4.8' } diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradle/wrapper/gradle-wrapper.properties b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradle/wrapper/gradle-wrapper.properties index 5028f28f8e..ec991f9aa1 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradle/wrapper/gradle-wrapper.properties +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-5.6.4-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-6.9.2-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/test_e2e.py b/metadata-integration/java/spark-lineage/spark-smoke-test/test_e2e.py index dafbc43db1..47e86bdb56 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/test_e2e.py +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/test_e2e.py @@ -74,4 +74,6 @@ def test_ingestion_via_rest(json_file): data = response.json() diff = json_compare.check(value, data) print(urn) + if diff != NO_DIFF: + print("Expected: {} Actual: {}".format(value, data)) assert diff == NO_DIFF diff --git a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java index 68615fb1d0..447200d855 100644 --- a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java +++ b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java @@ -17,7 +17,6 @@ import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestRule; @@ -29,7 +28,6 @@ import org.mockserver.model.HttpResponse; import org.mockserver.model.JsonBody; import org.mockserver.socket.PortFactory; import org.mockserver.verify.VerificationTimes; -import org.testcontainers.containers.PostgreSQLContainer; import com.linkedin.common.FabricType; @@ -58,9 +56,6 @@ public class TestCoalesceJobLineage { private static final String PIPELINE_PLATFORM_INSTANCE = "test_machine"; private static final String DATASET_PLATFORM_INSTANCE = "test_dev_dataset"; - @ClassRule - public static PostgreSQLContainer db = new PostgreSQLContainer<>("postgres:9.6.12") - .withDatabaseName("sparkcoalescetestdb"); private static SparkSession spark; private static Properties jdbcConnnProperties; private static ClientAndServer mockServer; diff --git a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java index 1b5ff4dac2..26c0eab25e 100644 --- a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java +++ b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.sql.Connection; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -54,6 +55,7 @@ import datahub.spark.model.dataset.CatalogTableDataset; import datahub.spark.model.dataset.HdfsPathDataset; import datahub.spark.model.dataset.JdbcDataset; import datahub.spark.model.dataset.SparkDataset; +import org.testcontainers.containers.wait.strategy.Wait; //!!!! IMP !!!!!!!! //Add the test number before naming the test. This will ensure that tests run in specified order. @@ -86,8 +88,12 @@ public class TestSparkJobsLineage { private static final String DATASET_PLATFORM_INSTANCE = "test_dev_dataset"; @ClassRule - public static PostgreSQLContainer db = new PostgreSQLContainer<>("postgres:9.6.12") - .withDatabaseName("sparktestdb"); + public static PostgreSQLContainer db; + static { + db = new PostgreSQLContainer<>("postgres:9.6.12") + .withDatabaseName("sparktestdb"); + db.waitingFor(Wait.forListeningPort()).withStartupTimeout(Duration.ofMinutes(15)).start(); + } private static SparkSession spark; private static Properties jdbcConnnProperties; private static DatasetLineageAccumulator acc; diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 336039fd21..c53c835773 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -101,7 +101,7 @@ tasks.register('generateOpenApiPojos', GenerateSwaggerCode) { it.setAdditionalProperties([ "group-id" : "io.datahubproject", "dateLibrary" : "java8", - "java8" : "true", + "java11" : "true", "modelPropertyNaming" : "original", "modelPackage" : "io.datahubproject.openapi.generated"] as Map) diff --git a/metadata-models/build.gradle b/metadata-models/build.gradle index 12e9711296..928a947c7f 100644 --- a/metadata-models/build.gradle +++ b/metadata-models/build.gradle @@ -3,6 +3,16 @@ import io.datahubproject.GenerateJsonSchemaTask apply plugin: 'pegasus' +tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(8) + } +} +tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(8) + } +} dependencies { compile spec.product.pegasus.data diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index 638f334c7c..c0474d7125 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -34,7 +34,6 @@ import io.datahubproject.openapi.generated.MetadataChangeProposal; import io.datahubproject.openapi.generated.OneOfEnvelopedAspectValue; import io.datahubproject.openapi.generated.OneOfGenericAspectValue; import io.datahubproject.openapi.generated.Status; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Locale; @@ -46,11 +45,7 @@ import java.util.stream.Collectors; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; import org.reflections.Reflections; -import org.reflections.scanners.ResourcesScanner; import org.reflections.scanners.SubTypesScanner; -import org.reflections.util.ClasspathHelper; -import org.reflections.util.ConfigurationBuilder; -import org.reflections.util.FilterBuilder; import org.springframework.beans.factory.config.BeanDefinition; import org.springframework.context.annotation.ClassPathScanningCandidateComponentProvider; import org.springframework.core.type.filter.AssignableTypeFilter; @@ -98,17 +93,10 @@ public class MappingUtil { components = provider.findCandidateComponents("io/datahubproject/openapi/generated"); components.forEach(MappingUtil::putGenericAspectEntry); - List classLoadersList = new ArrayList<>(); - classLoadersList.add(ClasspathHelper.contextClassLoader()); - classLoadersList.add(ClasspathHelper.staticClassLoader()); - // Build a map from fully qualified Pegasus generated class name to class - Reflections reflections = new Reflections(new ConfigurationBuilder() - .setScanners(new SubTypesScanner(false), new ResourcesScanner()) - .setUrls(ClasspathHelper.forClassLoader(classLoadersList.toArray(new ClassLoader[0]))) - .filterInputsBy(new FilterBuilder().include(FilterBuilder.prefix(PEGASUS_PACKAGE)))); - Set> pegasusComponents = reflections.getSubTypesOf(RecordTemplate.class); - pegasusComponents.forEach(aClass -> PEGASUS_TYPE_MAP.put(aClass.getSimpleName(), aClass)); + new Reflections(PEGASUS_PACKAGE, new SubTypesScanner(false)) + .getSubTypesOf(RecordTemplate.class) + .forEach(aClass -> PEGASUS_TYPE_MAP.put(aClass.getSimpleName(), aClass)); } public static Map mapServiceResponse(Map serviceResponse, diff --git a/test-models/build.gradle b/test-models/build.gradle index a2656f1f50..4cfbcc1399 100644 --- a/test-models/build.gradle +++ b/test-models/build.gradle @@ -1,6 +1,17 @@ apply plugin: 'pegasus' apply plugin: 'java' +tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(8) + } +} +tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(8) + } +} + dependencies { compile spec.product.pegasus.data compile externalDependency.commonsIo