feat(spark): OpenLineage 1.24.2 upgrade (#11830)

This commit is contained in:
Tamas Nemeth 2024-11-20 00:13:24 +01:00 committed by GitHub
parent 85c8e605be
commit 1f396e87c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 89 additions and 53 deletions

View File

@ -56,7 +56,7 @@ buildscript {
ext.hazelcastVersion = '5.3.6' ext.hazelcastVersion = '5.3.6'
ext.ebeanVersion = '15.5.2' ext.ebeanVersion = '15.5.2'
ext.googleJavaFormatVersion = '1.18.1' ext.googleJavaFormatVersion = '1.18.1'
ext.openLineageVersion = '1.19.0' ext.openLineageVersion = '1.24.2'
ext.logbackClassicJava8 = '1.2.12' ext.logbackClassicJava8 = '1.2.12'
ext.docker_registry = 'acryldata' ext.docker_registry = 'acryldata'

View File

@ -25,6 +25,8 @@ dependencies {
because("previous versions are vulnerable to CVE-2022-25857") because("previous versions are vulnerable to CVE-2022-25857")
} }
} }
api project(path: ':li-utils')
api project(path: ':li-utils', configuration: "dataTemplate")
dataModel project(':li-utils') dataModel project(':li-utils')
annotationProcessor externalDependency.lombok annotationProcessor externalDependency.lombok

View File

@ -165,6 +165,7 @@ information like tokens.
| spark.datahub.rest.server | | http://localhost:8080 | Datahub server url eg:<http://localhost:8080> | | spark.datahub.rest.server | | http://localhost:8080 | Datahub server url eg:<http://localhost:8080> |
| spark.datahub.rest.token | | | Authentication token. | | spark.datahub.rest.token | | | Authentication token. |
| spark.datahub.rest.disable_ssl_verification | | false | Disable SSL certificate validation. Caution: Only use this if you know what you are doing! | | spark.datahub.rest.disable_ssl_verification | | false | Disable SSL certificate validation. Caution: Only use this if you know what you are doing! |
| spark.datahub.rest.disable_chunked_encoding | | false | Disable Chunked Transfer Encoding. In some environment chunked encoding causes issues. With this config option it can be disabled. ||
| spark.datahub.rest.max_retries | | 0 | Number of times a request retried if failed | | spark.datahub.rest.max_retries | | 0 | Number of times a request retried if failed |
| spark.datahub.rest.retry_interval | | 10 | Number of seconds to wait between retries | | spark.datahub.rest.retry_interval | | 10 | Number of seconds to wait between retries |
| spark.datahub.file.filename | | | The file where metadata will be written if file emitter is set | | spark.datahub.file.filename | | | The file where metadata will be written if file emitter is set |

View File

@ -1,7 +1,7 @@
plugins { plugins {
id("com.palantir.git-version") apply false id("com.palantir.git-version") apply false
} }
apply plugin: 'java' apply plugin: 'java-library'
apply plugin: 'com.github.johnrengelman.shadow' apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'signing' apply plugin: 'signing'
apply plugin: 'io.codearte.nexus-staging' apply plugin: 'io.codearte.nexus-staging'
@ -51,8 +51,8 @@ dependencies {
implementation project(':metadata-integration:java:openlineage-converter') implementation project(':metadata-integration:java:openlineage-converter')
implementation project(path: ':metadata-integration:java:datahub-client', configuration: 'shadow') implementation project(path: ':metadata-integration:java:datahub-client')
implementation project(path: ':metadata-integration:java:openlineage-converter', configuration: 'shadow') implementation project(path: ':metadata-integration:java:openlineage-converter')
//implementation "io.acryl:datahub-client:0.10.2" //implementation "io.acryl:datahub-client:0.10.2"
implementation "io.openlineage:openlineage-spark_2.12:$openLineageVersion" implementation "io.openlineage:openlineage-spark_2.12:$openLineageVersion"
@ -91,6 +91,8 @@ shadowJar {
zip64 = true zip64 = true
archiveClassifier = '' archiveClassifier = ''
mergeServiceFiles() mergeServiceFiles()
project.configurations.implementation.canBeResolved = true
configurations = [project.configurations.implementation]
def exclude_modules = project def exclude_modules = project
.configurations .configurations
@ -106,6 +108,8 @@ shadowJar {
exclude(dependency { exclude(dependency {
exclude_modules.contains(it.name) exclude_modules.contains(it.name)
}) })
exclude(dependency("org.slf4j::"))
exclude("org/apache/commons/logging/**")
} }
// preventing java multi-release JAR leakage // preventing java multi-release JAR leakage
@ -123,39 +127,36 @@ shadowJar {
relocate 'com.sun.activation', 'io.acryl.shaded.com.sun.activation' relocate 'com.sun.activation', 'io.acryl.shaded.com.sun.activation'
relocate 'com.sun.codemodel', 'io.acryl.shaded.com.sun.codemodel' relocate 'com.sun.codemodel', 'io.acryl.shaded.com.sun.codemodel'
relocate 'com.sun.mail', 'io.acryl.shaded.com.sun.mail' relocate 'com.sun.mail', 'io.acryl.shaded.com.sun.mail'
relocate 'com.fasterxml.jackson', 'datahub.spark2.shaded.jackson'
relocate 'org.slf4j', 'datahub.spark2.shaded.org.slf4j'
// //
relocate 'org.apache.hc', 'io.acryl.shaded.http' relocate 'org.apache.hc', 'io.acryl.shaded.http'
relocate 'org.apache.commons.codec', 'datahub.spark2.shaded.o.a.c.codec' relocate 'org.apache.commons.codec', 'io.acryl.shaded.org.apache.commons.codec'
relocate 'org.apache.commons.compress', 'datahub.spark2.shaded.o.a.c.compress' relocate 'org.apache.commons.compress', 'io.acryl.shaded.org.apache.commons.compress'
relocate 'org.apache.commons.lang3', 'datahub.spark2.shaded.o.a.c.lang3' relocate 'org.apache.commons.lang3', 'io.acryl.shaded.org.apache.commons.lang3'
relocate 'mozilla', 'datahub.spark2.shaded.mozilla' relocate 'mozilla', 'datahub.spark2.shaded.mozilla'
relocate 'com.typesafe', 'datahub.spark2.shaded.typesafe' relocate 'com.typesafe', 'io.acryl.shaded.com.typesafe'
relocate 'io.opentracing', 'datahub.spark2.shaded.io.opentracing' relocate 'io.opentracing', 'io.acryl.shaded.io.opentracing'
relocate 'io.netty', 'datahub.spark2.shaded.io.netty' relocate 'io.netty', 'io.acryl.shaded.io.netty'
relocate 'ch.randelshofer', 'datahub.spark2.shaded.ch.randelshofer' relocate 'ch.randelshofer', 'io.acryl.shaded.ch.randelshofer'
relocate 'ch.qos', 'datahub.spark2.shaded.ch.qos' relocate 'ch.qos', 'io.acryl.shaded.ch.qos'
relocate 'org.springframework', 'io.acryl.shaded.org.springframework' relocate 'org.springframework', 'io.acryl.shaded.org.springframework'
relocate 'com.fasterxml.jackson', 'io.acryl.shaded.jackson' relocate 'com.fasterxml.jackson', 'io.acryl.shaded.jackson'
relocate 'org.yaml', 'io.acryl.shaded.org.yaml' // Required for shading snakeyaml relocate 'org.yaml', 'io.acryl.shaded.org.yaml' // Required for shading snakeyaml
relocate 'net.jcip.annotations', 'io.acryl.shaded.annotations' relocate 'net.jcip.annotations', 'io.acryl.shaded.annotations'
relocate 'javassist', 'io.acryl.shaded.javassist' relocate 'javassist', 'io.acryl.shaded.javassist'
relocate 'edu.umd.cs.findbugs', 'io.acryl.shaded.findbugs' relocate 'edu.umd.cs.findbugs', 'io.acryl.shaded.findbugs'
relocate 'org.antlr', 'io.acryl.shaded.org.antlr' //relocate 'org.antlr', 'io.acryl.shaded.org.antlr'
relocate 'antlr', 'io.acryl.shaded.antlr' //relocate 'antlr', 'io.acryl.shaded.antlr'
relocate 'com.google.common', 'io.acryl.shaded.com.google.common' relocate 'com.google.common', 'io.acryl.shaded.com.google.common'
relocate 'org.apache.commons', 'io.acryl.shaded.org.apache.commons'
relocate 'org.reflections', 'io.acryl.shaded.org.reflections' relocate 'org.reflections', 'io.acryl.shaded.org.reflections'
relocate 'st4hidden', 'io.acryl.shaded.st4hidden' relocate 'st4hidden', 'io.acryl.shaded.st4hidden'
relocate 'org.stringtemplate', 'io.acryl.shaded.org.stringtemplate' relocate 'org.stringtemplate', 'io.acryl.shaded.org.stringtemplate'
relocate 'org.abego.treelayout', 'io.acryl.shaded.treelayout' relocate 'org.abego.treelayout', 'io.acryl.shaded.treelayout'
relocate 'org.slf4j', 'io.acryl.shaded.slf4j'
relocate 'javax.annotation', 'io.acryl.shaded.javax.annotation' relocate 'javax.annotation', 'io.acryl.shaded.javax.annotation'
relocate 'com.github.benmanes.caffeine', 'io.acryl.shaded.com.github.benmanes.caffeine' relocate 'com.github.benmanes.caffeine', 'io.acryl.shaded.com.github.benmanes.caffeine'
relocate 'org.checkerframework', 'io.acryl.shaded.org.checkerframework' relocate 'org.checkerframework', 'io.acryl.shaded.org.checkerframework'
relocate 'com.google.errorprone', 'io.acryl.shaded.com.google.errorprone' relocate 'com.google.errorprone', 'io.acryl.shaded.com.google.errorprone'
relocate 'com.sun.jna', 'io.acryl.shaded.com.sun.jna' relocate 'com.sun.jna', 'io.acryl.shaded.com.sun.jna'
} }
checkShadowJar { checkShadowJar {

View File

@ -120,7 +120,9 @@ public class DatahubSparkListener extends SparkListener {
boolean disableSslVerification = boolean disableSslVerification =
sparkConf.hasPath(SparkConfigParser.DISABLE_SSL_VERIFICATION_KEY) sparkConf.hasPath(SparkConfigParser.DISABLE_SSL_VERIFICATION_KEY)
&& sparkConf.getBoolean(SparkConfigParser.DISABLE_SSL_VERIFICATION_KEY); && sparkConf.getBoolean(SparkConfigParser.DISABLE_SSL_VERIFICATION_KEY);
boolean disableChunkedEncoding =
sparkConf.hasPath(SparkConfigParser.REST_DISABLE_CHUNKED_ENCODING)
&& sparkConf.getBoolean(SparkConfigParser.REST_DISABLE_CHUNKED_ENCODING);
int retry_interval_in_sec = int retry_interval_in_sec =
sparkConf.hasPath(SparkConfigParser.RETRY_INTERVAL_IN_SEC) sparkConf.hasPath(SparkConfigParser.RETRY_INTERVAL_IN_SEC)
? sparkConf.getInt(SparkConfigParser.RETRY_INTERVAL_IN_SEC) ? sparkConf.getInt(SparkConfigParser.RETRY_INTERVAL_IN_SEC)
@ -150,6 +152,7 @@ public class DatahubSparkListener extends SparkListener {
.disableSslVerification(disableSslVerification) .disableSslVerification(disableSslVerification)
.maxRetries(max_retries) .maxRetries(max_retries)
.retryIntervalSec(retry_interval_in_sec) .retryIntervalSec(retry_interval_in_sec)
.disableChunkedEncoding(disableChunkedEncoding)
.build(); .build();
return Optional.of(new RestDatahubEmitterConfig(restEmitterConf)); return Optional.of(new RestDatahubEmitterConfig(restEmitterConf));
case "kafka": case "kafka":
@ -374,7 +377,8 @@ public class DatahubSparkListener extends SparkListener {
String disabledFacets; String disabledFacets;
if (openLineageConfig.getFacetsConfig() != null if (openLineageConfig.getFacetsConfig() != null
&& openLineageConfig.getFacetsConfig().getDisabledFacets() != null) { && openLineageConfig.getFacetsConfig().getDisabledFacets() != null) {
disabledFacets = String.join(";", openLineageConfig.getFacetsConfig().getDisabledFacets()); disabledFacets =
String.join(";", openLineageConfig.getFacetsConfig().getEffectiveDisabledFacets());
} else { } else {
disabledFacets = ""; disabledFacets = "";
} }

View File

@ -30,6 +30,8 @@ public class SparkConfigParser {
public static final String GMS_AUTH_TOKEN = "rest.token"; public static final String GMS_AUTH_TOKEN = "rest.token";
public static final String FILE_EMITTER_FILE_NAME = "file.filename"; public static final String FILE_EMITTER_FILE_NAME = "file.filename";
public static final String DISABLE_SSL_VERIFICATION_KEY = "rest.disable_ssl_verification"; public static final String DISABLE_SSL_VERIFICATION_KEY = "rest.disable_ssl_verification";
public static final String REST_DISABLE_CHUNKED_ENCODING = "rest.disable_chunked_encoding";
public static final String MAX_RETRIES = "rest.max_retries"; public static final String MAX_RETRIES = "rest.max_retries";
public static final String RETRY_INTERVAL_IN_SEC = "rest.retry_interval_in_sec"; public static final String RETRY_INTERVAL_IN_SEC = "rest.retry_interval_in_sec";
public static final String KAFKA_MCP_TOPIC = "kafka.mcp_topic"; public static final String KAFKA_MCP_TOPIC = "kafka.mcp_topic";

View File

@ -5,14 +5,13 @@
package io.openlineage.spark.agent.util; package io.openlineage.spark.agent.util;
import static io.openlineage.spark.agent.lifecycle.ExecutionContext.CAMEL_TO_SNAKE_CASE;
import com.typesafe.config.Config; import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory; import com.typesafe.config.ConfigFactory;
import datahub.spark.conf.SparkLineageConf; import datahub.spark.conf.SparkLineageConf;
import io.datahubproject.openlineage.dataset.HdfsPathDataset; import io.datahubproject.openlineage.dataset.HdfsPathDataset;
import io.openlineage.client.OpenLineage; import io.openlineage.client.OpenLineage;
import io.openlineage.spark.agent.Versions; import io.openlineage.spark.agent.Versions;
import io.openlineage.spark.api.naming.NameNormalizer;
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
@ -21,7 +20,6 @@ import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.UUID; import java.util.UUID;
@ -186,7 +184,7 @@ public class PlanUtils {
.run(new OpenLineage.ParentRunFacetRunBuilder().runId(parentRunId).build()) .run(new OpenLineage.ParentRunFacetRunBuilder().runId(parentRunId).build())
.job( .job(
new OpenLineage.ParentRunFacetJobBuilder() new OpenLineage.ParentRunFacetJobBuilder()
.name(parentJob.replaceAll(CAMEL_TO_SNAKE_CASE, "_$1").toLowerCase(Locale.ROOT)) .name(NameNormalizer.normalize(parentJob))
.namespace(parentJobNamespace) .namespace(parentJobNamespace)
.build()) .build())
.build(); .build();
@ -287,8 +285,6 @@ public class PlanUtils {
* @param pfn * @param pfn
* @param x * @param x
* @return * @return
* @param <T>
* @param <D>
*/ */
public static <T, D> List<T> safeApply(PartialFunction<D, List<T>> pfn, D x) { public static <T, D> List<T> safeApply(PartialFunction<D, List<T>> pfn, D x) {
try { try {

View File

@ -7,6 +7,7 @@ package io.openlineage.spark.agent.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Stream; import java.util.stream.Stream;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.commons.lang3.reflect.FieldUtils;
@ -18,6 +19,7 @@ import org.apache.spark.rdd.HadoopRDD;
import org.apache.spark.rdd.MapPartitionsRDD; import org.apache.spark.rdd.MapPartitionsRDD;
import org.apache.spark.rdd.ParallelCollectionRDD; import org.apache.spark.rdd.ParallelCollectionRDD;
import org.apache.spark.rdd.RDD; import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.execution.datasources.FilePartition;
import org.apache.spark.sql.execution.datasources.FileScanRDD; import org.apache.spark.sql.execution.datasources.FileScanRDD;
import scala.Tuple2; import scala.Tuple2;
import scala.collection.immutable.Seq; import scala.collection.immutable.Seq;
@ -90,7 +92,7 @@ public class RddPathUtils {
@SuppressWarnings("PMD.AvoidLiteralsInIfCondition") @SuppressWarnings("PMD.AvoidLiteralsInIfCondition")
public Stream<Path> extract(FileScanRDD rdd) { public Stream<Path> extract(FileScanRDD rdd) {
return ScalaConversionUtils.fromSeq(rdd.filePartitions()).stream() return ScalaConversionUtils.fromSeq(rdd.filePartitions()).stream()
.flatMap(fp -> Arrays.stream(fp.files())) .flatMap((FilePartition fp) -> Arrays.stream(fp.files()))
.map( .map(
f -> { f -> {
if ("3.4".compareTo(package$.MODULE$.SPARK_VERSION()) <= 0) { if ("3.4".compareTo(package$.MODULE$.SPARK_VERSION()) <= 0) {
@ -115,11 +117,15 @@ public class RddPathUtils {
@Override @Override
public Stream<Path> extract(ParallelCollectionRDD rdd) { public Stream<Path> extract(ParallelCollectionRDD rdd) {
int SEQ_LIMIT = 1000;
AtomicBoolean loggingDone = new AtomicBoolean(false);
try { try {
Object data = FieldUtils.readField(rdd, "data", true); Object data = FieldUtils.readField(rdd, "data", true);
log.debug("ParallelCollectionRDD data: {}", data); log.debug("ParallelCollectionRDD data: {}", data);
if (data instanceof Seq) { if ((data instanceof Seq) && ((Seq) data).head() instanceof Tuple2) {
return ScalaConversionUtils.fromSeq((Seq) data).stream() // exit if the first element is invalid
Seq data_slice = (Seq) ((Seq) data).slice(0, SEQ_LIMIT);
return ScalaConversionUtils.fromSeq(data_slice).stream()
.map( .map(
el -> { el -> {
Path path = null; Path path = null;
@ -127,9 +133,9 @@ public class RddPathUtils {
// we're able to extract path // we're able to extract path
path = parentOf(((Tuple2) el)._1.toString()); path = parentOf(((Tuple2) el)._1.toString());
log.debug("Found input {}", path); log.debug("Found input {}", path);
} else { } else if (!loggingDone.get()) {
// Change to debug to silence error log.warn("unable to extract Path from {}", el.getClass().getCanonicalName());
log.debug("unable to extract Path from {}", el.getClass().getCanonicalName()); loggingDone.set(true);
} }
return path; return path;
}) })

View File

@ -1,6 +1,6 @@
plugins { plugins {
id("com.palantir.git-version") apply false id("com.palantir.git-version") apply false
id 'java' id 'java-library'
id 'com.github.johnrengelman.shadow' id 'com.github.johnrengelman.shadow'
id 'jacoco' id 'jacoco'
id 'signing' id 'signing'
@ -12,11 +12,13 @@ apply from: "../versioning.gradle"
import org.apache.tools.ant.filters.ReplaceTokens import org.apache.tools.ant.filters.ReplaceTokens
jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation jar {
archiveClassifier = "lib"
}
dependencies { dependencies {
implementation project(':entity-registry') api project(':entity-registry')
implementation project(':metadata-integration:java:datahub-event') api project(':metadata-integration:java:datahub-event')
implementation(externalDependency.kafkaAvroSerializer) { implementation(externalDependency.kafkaAvroSerializer) {
exclude group: "org.apache.avro" exclude group: "org.apache.avro"
} }
@ -33,7 +35,7 @@ dependencies {
implementation externalDependency.jacksonDataBind implementation externalDependency.jacksonDataBind
runtimeOnly externalDependency.jna runtimeOnly externalDependency.jna
implementation externalDependency.slf4jApi api externalDependency.slf4jApi
compileOnly externalDependency.lombok compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok annotationProcessor externalDependency.lombok
// VisibleForTesting // VisibleForTesting
@ -78,6 +80,11 @@ shadowJar {
// https://github.com/johnrengelman/shadow/issues/729 // https://github.com/johnrengelman/shadow/issues/729
exclude('module-info.class', 'META-INF/versions/**', exclude('module-info.class', 'META-INF/versions/**',
'**/LICENSE', '**/LICENSE*.txt', '**/NOTICE', '**/NOTICE.txt', 'licenses/**', 'log4j2.*', 'log4j.*') '**/LICENSE', '**/LICENSE*.txt', '**/NOTICE', '**/NOTICE.txt', 'licenses/**', 'log4j2.*', 'log4j.*')
dependencies {
exclude(dependency("org.slf4j::"))
exclude(dependency("antlr::"))
exclude("org/apache/commons/logging/**")
}
mergeServiceFiles() mergeServiceFiles()
// we relocate namespaces manually, because we want to know exactly which libs we are exposing and why // we relocate namespaces manually, because we want to know exactly which libs we are exposing and why
// we can move to automatic relocation using ConfigureShadowRelocation after we get to a good place on these first // we can move to automatic relocation using ConfigureShadowRelocation after we get to a good place on these first
@ -88,15 +95,20 @@ shadowJar {
relocate 'javassist', 'datahub.shaded.javassist' relocate 'javassist', 'datahub.shaded.javassist'
relocate 'edu.umd.cs.findbugs', 'datahub.shaded.findbugs' relocate 'edu.umd.cs.findbugs', 'datahub.shaded.findbugs'
relocate 'org.antlr', 'datahub.shaded.org.antlr' relocate 'org.antlr', 'datahub.shaded.org.antlr'
relocate 'antlr', 'datahub.shaded.antlr' //relocate 'antlr', 'datahub.shaded.antlr'
relocate 'com.google.common', 'datahub.shaded.com.google.common' relocate 'com.google.common', 'datahub.shaded.com.google.common'
relocate 'org.apache.commons', 'datahub.shaded.org.apache.commons' relocate 'org.apache.commons.codec', 'datahub.shaded.org.apache.commons.codec'
relocate 'org.apache.commons.compress', 'datahub.shaded.org.apache.commons.compress'
relocate 'org.apache.commons.lang3', 'datahub.shaded.org.apache.commons.lang3'
relocate 'org.apache.commons.lang', 'datahub.shaded.org.apache.commons.lang'
relocate 'org.apache.commons.cli', 'datahub.shaded.org.apache.commons.cli'
relocate 'org.apache.commons.text', 'datahub.shaded.org.apache.commons.text'
relocate 'org.apache.commons.io', 'datahub.shaded.org.apache.commons.io'
relocate 'org.apache.maven', 'datahub.shaded.org.apache.maven' relocate 'org.apache.maven', 'datahub.shaded.org.apache.maven'
relocate 'org.reflections', 'datahub.shaded.org.reflections' relocate 'org.reflections', 'datahub.shaded.org.reflections'
relocate 'st4hidden', 'datahub.shaded.st4hidden' relocate 'st4hidden', 'datahub.shaded.st4hidden'
relocate 'org.stringtemplate', 'datahub.shaded.org.stringtemplate' relocate 'org.stringtemplate', 'datahub.shaded.org.stringtemplate'
relocate 'org.abego.treelayout', 'datahub.shaded.treelayout' relocate 'org.abego.treelayout', 'datahub.shaded.treelayout'
relocate 'org.slf4j', 'datahub.shaded.slf4j'
relocate 'javax.annotation', 'datahub.shaded.javax.annotation' relocate 'javax.annotation', 'datahub.shaded.javax.annotation'
relocate 'com.github.benmanes.caffeine', 'datahub.shaded.com.github.benmanes.caffeine' relocate 'com.github.benmanes.caffeine', 'datahub.shaded.com.github.benmanes.caffeine'
relocate 'org.checkerframework', 'datahub.shaded.org.checkerframework' relocate 'org.checkerframework', 'datahub.shaded.org.checkerframework'

View File

@ -48,7 +48,6 @@ public class DatahubHttpRequestRetryStrategy extends DefaultHttpRequestRetryStra
@Override @Override
public boolean retryRequest(HttpResponse response, int execCount, HttpContext context) { public boolean retryRequest(HttpResponse response, int execCount, HttpContext context) {
log.warn("Retrying request due to error: {}", response);
return super.retryRequest(response, execCount, context); return super.retryRequest(response, execCount, context);
} }
} }

View File

@ -1,6 +1,7 @@
package datahub.client.rest; package datahub.client.rest;
import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.Constants.*;
import static org.apache.hc.core5.http.HttpHeaders.*;
import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.core.StreamReadConstraints;
@ -18,6 +19,7 @@ import datahub.event.MetadataChangeProposalWrapper;
import datahub.event.UpsertAspectRequest; import datahub.event.UpsertAspectRequest;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.KeyManagementException; import java.security.KeyManagementException;
import java.security.KeyStoreException; import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException; import java.security.NoSuchAlgorithmException;
@ -26,6 +28,7 @@ import java.util.Objects;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer; import java.util.function.Consumer;
import javax.annotation.concurrent.ThreadSafe; import javax.annotation.concurrent.ThreadSafe;
@ -97,17 +100,20 @@ public class RestEmitter implements Emitter {
this.config = config; this.config = config;
HttpAsyncClientBuilder httpClientBuilder = this.config.getAsyncHttpClientBuilder(); HttpAsyncClientBuilder httpClientBuilder = this.config.getAsyncHttpClientBuilder();
httpClientBuilder.setRetryStrategy(new DatahubHttpRequestRetryStrategy()); httpClientBuilder.setRetryStrategy(new DatahubHttpRequestRetryStrategy());
if ((config.getTimeoutSec() != null) || (config.isDisableChunkedEncoding())) {
// Override httpClient settings with RestEmitter configs if present RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
if (config.getTimeoutSec() != null) { // Override httpClient settings with RestEmitter configs if present
httpClientBuilder.setDefaultRequestConfig( if (config.getTimeoutSec() != null) {
RequestConfig.custom() requestConfigBuilder
.setConnectionRequestTimeout( .setConnectionRequestTimeout(config.getTimeoutSec() * 1000, TimeUnit.MILLISECONDS)
config.getTimeoutSec() * 1000, java.util.concurrent.TimeUnit.MILLISECONDS) .setResponseTimeout(config.getTimeoutSec() * 1000, TimeUnit.MILLISECONDS);
.setResponseTimeout( }
config.getTimeoutSec() * 1000, java.util.concurrent.TimeUnit.MILLISECONDS) if (config.isDisableChunkedEncoding()) {
.build()); requestConfigBuilder.setContentCompressionEnabled(false);
}
httpClientBuilder.setDefaultRequestConfig(requestConfigBuilder.build());
} }
PoolingAsyncClientConnectionManagerBuilder poolingAsyncClientConnectionManagerBuilder = PoolingAsyncClientConnectionManagerBuilder poolingAsyncClientConnectionManagerBuilder =
PoolingAsyncClientConnectionManagerBuilder.create(); PoolingAsyncClientConnectionManagerBuilder.create();
@ -223,8 +229,13 @@ public class RestEmitter implements Emitter {
if (this.config.getToken() != null) { if (this.config.getToken() != null) {
simpleRequestBuilder.setHeader("Authorization", "Bearer " + this.config.getToken()); simpleRequestBuilder.setHeader("Authorization", "Bearer " + this.config.getToken());
} }
if (this.config.isDisableChunkedEncoding()) {
byte[] payloadBytes = payloadJson.getBytes(StandardCharsets.UTF_8);
simpleRequestBuilder.setBody(payloadBytes, ContentType.APPLICATION_JSON);
} else {
simpleRequestBuilder.setBody(payloadJson, ContentType.APPLICATION_JSON);
}
simpleRequestBuilder.setBody(payloadJson, ContentType.APPLICATION_JSON);
AtomicReference<MetadataWriteResponse> responseAtomicReference = new AtomicReference<>(); AtomicReference<MetadataWriteResponse> responseAtomicReference = new AtomicReference<>();
CountDownLatch responseLatch = new CountDownLatch(1); CountDownLatch responseLatch = new CountDownLatch(1);
FutureCallback<SimpleHttpResponse> httpCallback = FutureCallback<SimpleHttpResponse> httpCallback =

View File

@ -30,6 +30,8 @@ public class RestEmitterConfig {
Integer timeoutSec; Integer timeoutSec;
@Builder.Default boolean disableSslVerification = false; @Builder.Default boolean disableSslVerification = false;
@Builder.Default boolean disableChunkedEncoding = false;
@Builder.Default int maxRetries = 0; @Builder.Default int maxRetries = 0;
@Builder.Default int retryIntervalSec = 10; @Builder.Default int retryIntervalSec = 10;

View File

@ -1,4 +1,4 @@
apply plugin: 'java' apply plugin: 'java-library'
apply plugin: 'com.github.johnrengelman.shadow' apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'signing' apply plugin: 'signing'
apply plugin: 'maven-publish' apply plugin: 'maven-publish'