mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-25 18:38:55 +00:00
35 lines
1.5 KiB
Bash
Executable File
35 lines
1.5 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -ex
|
|
|
|
PYSPARK_JARS="$(python -m site --user-site)/pyspark/jars"
|
|
|
|
function replace_jar {
|
|
JAR_PREFIX=$1
|
|
TRANSITIVE=$2
|
|
DEPENDENCY=$3
|
|
|
|
echo "Removing version conflicts for $PYSPARK_JARS/$JAR_PREFIX*.jar"
|
|
ls "$PYSPARK_JARS/$JAR_PREFIX"*.jar || true
|
|
rm "$PYSPARK_JARS/$JAR_PREFIX"*.jar || true
|
|
rm -r "$HOME/.m2" || true
|
|
|
|
if [ ! -z "$DEPENDENCY" ]; then
|
|
echo "Resolving $DEPENDENCY"
|
|
mvn dependency:get -Dtransitive=$TRANSITIVE -Dartifact="$DEPENDENCY" >/dev/null
|
|
|
|
echo "Moving jars to $PYSPARK_JARS"
|
|
find "$HOME/.m2" -type f -name "$JAR_PREFIX*.jar" -exec echo "{}" \;
|
|
find "$HOME/.m2" -type f -name "$JAR_PREFIX*.jar" -exec cp {} "$PYSPARK_JARS/" \;
|
|
fi
|
|
}
|
|
|
|
replace_jar "zookeeper-" "false" "${ZOOKEEPER_DEPENDENCY:-org.apache.zookeeper:zookeeper:3.7.2}"
|
|
replace_jar "hadoop-client-" "true" "${HADOOP_CLIENT_API_DEPENDENCY:-org.apache.hadoop:hadoop-client-api:3.3.6}"
|
|
replace_jar "hadoop-client-" "true" "${HADOOP_CLIENT_RUNTIME_DEPENDENCY:-org.apache.hadoop:hadoop-client-runtime:3.3.6}"
|
|
replace_jar "hadoop-yarn-" "true" "${HADOOP_YARN_DEPENDENCY:-org.apache.hadoop:hadoop-yarn-server-web-proxy:3.3.6}"
|
|
replace_jar "snappy-java-" "false" "${SNAPPY_JAVA_DEPENDENCY:-org.xerial.snappy:snappy-java:1.1.10.5}"
|
|
replace_jar "libthrift-" "false" "${LIBTHRIFT_DEPENDENCY:-org.apache.thrift:libthrift:0.19.0}"
|
|
replace_jar "ivy-" "false" "${IVY_DEPENDENCY:-org.apache.ivy:ivy:2.5.2}"
|
|
replace_jar "parquet-jackson-" "false" "${PARQUET_JACKSON_DEPENDENCY:-org.apache.parquet:parquet-jackson:1.13.1}"
|