mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-02 05:43:17 +00:00
fix(spark-lineage): smoke test fixes, M1 support (#6372)
This commit is contained in:
parent
4c6dd0656f
commit
8cfcefb19c
@ -1,4 +1,4 @@
|
|||||||
FROM rappdw/docker-java-python:openjdk1.8.0_171-python3.6.6
|
FROM python:3.9
|
||||||
|
|
||||||
ARG shared_workspace=/opt/workspace
|
ARG shared_workspace=/opt/workspace
|
||||||
|
|
||||||
@ -7,21 +7,32 @@ ENV SHARED_WORKSPACE=${shared_workspace}
|
|||||||
|
|
||||||
# -- Layer: Apache Spark
|
# -- Layer: Apache Spark
|
||||||
|
|
||||||
ARG spark_version=2.4.8
|
ARG spark_version=3.2.0
|
||||||
ARG hadoop_version=2.7
|
ARG hadoop_version=2.7
|
||||||
|
|
||||||
RUN apt-get update -y && \
|
RUN apt-get update -y && \
|
||||||
apt-get install -y curl && \
|
apt-get install -y --no-install-recommends curl gnupg software-properties-common && \
|
||||||
|
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 0xB1998361219BD9C9 && \
|
||||||
|
curl https://cdn.azul.com/zulu/bin/zulu-repo_1.0.0-3_all.deb -o /tmp/zulu-repo_1.0.0-3_all.deb && \
|
||||||
|
apt-get install /tmp/zulu-repo_1.0.0-3_all.deb && \
|
||||||
|
apt-get update && \
|
||||||
|
# apt-cache search zulu && \
|
||||||
|
apt-get install -y --no-install-recommends zulu11-jre && \
|
||||||
|
apt-get clean && \
|
||||||
curl -sS https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz && \
|
curl -sS https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz && \
|
||||||
tar -xf spark.tgz && \
|
tar -xf spark.tgz && \
|
||||||
mv spark-${spark_version}-bin-hadoop${hadoop_version} /usr/bin/ && \
|
mv spark-${spark_version}-bin-hadoop${hadoop_version} /usr/bin/ && \
|
||||||
mkdir /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}/logs && \
|
mkdir /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}/logs && \
|
||||||
rm spark.tgz
|
rm spark.tgz && \
|
||||||
|
rm -rf /var/tmp/* /tmp/* /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN set -e; \
|
||||||
|
pip install JPype1
|
||||||
|
|
||||||
ENV SPARK_HOME /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}
|
ENV SPARK_HOME /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}
|
||||||
ENV SPARK_MASTER_HOST spark-master
|
ENV SPARK_MASTER_HOST spark-master
|
||||||
ENV SPARK_MASTER_PORT 7077
|
ENV SPARK_MASTER_PORT 7077
|
||||||
ENV PYSPARK_PYTHON python2.7
|
ENV PYSPARK_PYTHON python3.9
|
||||||
ENV PATH=$PATH:$SPARK_HOME/bin
|
ENV PATH=$PATH:$SPARK_HOME/bin
|
||||||
|
|
||||||
COPY workspace $SHARED_WORKSPACE
|
COPY workspace $SHARED_WORKSPACE
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
|
#!/bin/bash -xe
|
||||||
#Remove old configuration
|
#Remove old configuration
|
||||||
rm -rf workspace
|
rm -rf workspace
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
version: "3.6"
|
version: "3.6"
|
||||||
|
|
||||||
services:
|
services:
|
||||||
spark-master:
|
spark-master:
|
||||||
image: spark-master
|
image: spark-master
|
||||||
|
@ -76,11 +76,11 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/JavaHdfsIn2HdfsOut1/out.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/JavaHdfsIn2HdfsOut1/out.csv,PROD)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -62,11 +62,11 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/JavaHdfsIn2HdfsOut2/out.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/JavaHdfsIn2HdfsOut2/out.csv,PROD)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -50,8 +50,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,JavaHdfsIn2HiveCreateInsertTable.foo4,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:hive,JavaHdfsIn2HiveCreateInsertTable.foo4,PROD)"
|
||||||
@ -114,8 +114,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,javahdfsin2hivecreateinserttable.foo4,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:hive,javahdfsin2hivecreateinserttable.foo4,PROD)"
|
||||||
@ -179,8 +179,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,javahdfsin2hivecreateinserttable.foo4,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:hive,javahdfsin2hivecreateinserttable.foo4,PROD)"
|
||||||
|
@ -76,11 +76,11 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,JavaHdfsIn2HiveCreateTable.foo3,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,JavaHdfsIn2HiveCreateTable.foo3,PROD)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -62,11 +62,11 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,JavaHiveInHiveOut.foo5,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,JavaHiveInHiveOut.foo5,PROD)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -50,11 +50,11 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/PythonHdfsIn2HdfsOut1/out.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/PythonHdfsIn2HdfsOut1/out.csv,PROD)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -102,11 +102,11 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/PythonHdfsIn2HdfsOut2/out2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/PythonHdfsIn2HdfsOut2/out2.csv,PROD)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -55,8 +55,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,PythonHdfsIn2HiveCreateInsertTable.foo4,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:hive,PythonHdfsIn2HiveCreateInsertTable.foo4,PROD)"
|
||||||
@ -123,8 +123,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,pythonhdfsin2hivecreateinserttable.foo4,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:hive,pythonhdfsin2hivecreateinserttable.foo4,PROD)"
|
||||||
@ -154,8 +154,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,pythonhdfsin2hivecreateinserttable.foo4,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:hive,pythonhdfsin2hivecreateinserttable.foo4,PROD)"
|
||||||
|
@ -76,8 +76,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,PythonHdfsIn2HiveCreateTable.foo3,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:hive,PythonHdfsIn2HiveCreateTable.foo3,PROD)"
|
||||||
|
@ -164,8 +164,8 @@
|
|||||||
{
|
{
|
||||||
"com.linkedin.datajob.DataJobInputOutput": {
|
"com.linkedin.datajob.DataJobInputOutput": {
|
||||||
"inputDatasets": [
|
"inputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in1.csv,PROD)",
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:file,file:/opt/workspace/resources/data/in2.csv,PROD)"
|
||||||
],
|
],
|
||||||
"outputDatasets": [
|
"outputDatasets": [
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:hive,PythonHiveInHiveOut.foo5,PROD)"
|
"urn:li:dataset:(urn:li:dataPlatform:hive,PythonHiveInHiveOut.foo5,PROD)"
|
||||||
|
@ -7,25 +7,25 @@
|
|||||||
|
|
||||||
saluation () {
|
saluation () {
|
||||||
echo "--------------------------------------------------------"
|
echo "--------------------------------------------------------"
|
||||||
echo "Starting exectuion"
|
echo "Starting execution $1"
|
||||||
echo "--------------------------------------------------------"
|
echo "--------------------------------------------------------"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
saluation
|
saluation "HdfsIn2HdfsOut1.py"
|
||||||
|
|
||||||
spark-submit --properties-file $2 HdfsIn2HdfsOut1.py
|
spark-submit --properties-file $2 HdfsIn2HdfsOut1.py
|
||||||
|
|
||||||
saluation
|
saluation "HdfsIn2HdfsOut2.py"
|
||||||
spark-submit --properties-file $2 HdfsIn2HdfsOut2.py
|
spark-submit --properties-file $2 HdfsIn2HdfsOut2.py
|
||||||
|
|
||||||
saluation
|
saluation "HdfsIn2HiveCreateTable.py"
|
||||||
spark-submit --properties-file $2 HdfsIn2HiveCreateTable.py
|
spark-submit --properties-file $2 HdfsIn2HiveCreateTable.py
|
||||||
|
|
||||||
saluation
|
saluation "HdfsIn2HiveCreateInsertTable.py"
|
||||||
spark-submit --properties-file $2 HdfsIn2HiveCreateInsertTable.py
|
spark-submit --properties-file $2 HdfsIn2HiveCreateInsertTable.py
|
||||||
|
|
||||||
saluation
|
saluation "HiveInHiveOut.py"
|
||||||
spark-submit --properties-file $2 HiveInHiveOut.py
|
spark-submit --properties-file $2 HiveInHiveOut.py
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash -x
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
SMOKE_TEST_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
echo "--------------------------------------------------------------------"
|
echo "--------------------------------------------------------------------"
|
||||||
@ -25,6 +28,7 @@ echo "--------------------------------------------------------------------"
|
|||||||
echo "Bringing up spark cluster"
|
echo "Bringing up spark cluster"
|
||||||
echo "--------------------------------------------------------------------"
|
echo "--------------------------------------------------------------------"
|
||||||
|
|
||||||
|
cd "${SMOKE_TEST_ROOT_DIR}"/docker
|
||||||
#bring up spark cluster
|
#bring up spark cluster
|
||||||
docker-compose -f spark-docker-compose.yml up -d
|
docker-compose -f spark-docker-compose.yml up -d
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash -x
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
# Script assumptions:
|
# Script assumptions:
|
||||||
@ -7,6 +7,24 @@ set -e
|
|||||||
# - pytest is installed
|
# - pytest is installed
|
||||||
# - requests is installed
|
# - requests is installed
|
||||||
|
|
||||||
|
is_healthy() {
|
||||||
|
local service="$1"
|
||||||
|
local -r -i max_attempts="$2"; shift
|
||||||
|
local -i attempt_num=1
|
||||||
|
|
||||||
|
until [ -n "$(docker ps -f name="$service" -f "health=healthy"|tail -n +2)" ]
|
||||||
|
do
|
||||||
|
if (( attempt_num == max_attempts ))
|
||||||
|
then
|
||||||
|
echo "Attempt $attempt_num failed and there are no more attempts left!"
|
||||||
|
return 1
|
||||||
|
else
|
||||||
|
echo "Attempt $attempt_num failed! Trying again in $attempt_num seconds..."
|
||||||
|
sleep $(( attempt_num++ ))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
cd "$DIR"
|
cd "$DIR"
|
||||||
|
|
||||||
@ -22,12 +40,8 @@ echo "--------------------------------------------------------------------"
|
|||||||
|
|
||||||
pwd ../../../
|
pwd ../../../
|
||||||
|
|
||||||
datahub docker quickstart \
|
../../../../docker/dev.sh -d
|
||||||
--build-locally \
|
is_healthy "datahub-gms" 60
|
||||||
--quickstart-compose-file ../../../../docker/docker-compose.yml \
|
|
||||||
--quickstart-compose-file ../../../../docker/docker-compose.override.yml \
|
|
||||||
--quickstart-compose-file ../../../../docker/docker-compose.dev.yml \
|
|
||||||
--dump-logs-on-failure
|
|
||||||
|
|
||||||
echo "--------------------------------------------------------------------"
|
echo "--------------------------------------------------------------------"
|
||||||
echo "Setup environment for pytest"
|
echo "Setup environment for pytest"
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
saluation () {
|
saluation () {
|
||||||
echo "--------------------------------------------------------"
|
echo "--------------------------------------------------------"
|
||||||
echo "Starting exectuion"
|
echo "Starting execution $1"
|
||||||
echo "--------------------------------------------------------"
|
echo "--------------------------------------------------------"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
saluation
|
saluation "test.spark.lineage.HdfsIn2HdfsOut1"
|
||||||
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HdfsIn2HdfsOut1 build/libs/test-spark-lineage.jar
|
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HdfsIn2HdfsOut1 build/libs/test-spark-lineage.jar
|
||||||
|
|
||||||
saluation
|
saluation "test.spark.lineage.HdfsIn2HdfsOut2"
|
||||||
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HdfsIn2HdfsOut2 build/libs/test-spark-lineage.jar
|
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HdfsIn2HdfsOut2 build/libs/test-spark-lineage.jar
|
||||||
|
|
||||||
saluation
|
saluation "test.spark.lineage.HdfsIn2HiveCreateTable"
|
||||||
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HdfsIn2HiveCreateTable build/libs/test-spark-lineage.jar
|
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HdfsIn2HiveCreateTable build/libs/test-spark-lineage.jar
|
||||||
|
|
||||||
saluation
|
saluation "test.spark.lineage.HdfsIn2HiveCreateInsertTable"
|
||||||
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HdfsIn2HiveCreateInsertTable build/libs/test-spark-lineage.jar
|
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HdfsIn2HiveCreateInsertTable build/libs/test-spark-lineage.jar
|
||||||
|
|
||||||
saluation
|
saluation "test.spark.lineage.HiveInHiveOut"
|
||||||
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HiveInHiveOut build/libs/test-spark-lineage.jar
|
$1/bin/spark-submit --properties-file $2 --class test.spark.lineage.HiveInHiveOut build/libs/test-spark-lineage.jar
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ from jsoncomparison import Compare, NO_DIFF
|
|||||||
GMS_ENDPOINT = "http://localhost:8080"
|
GMS_ENDPOINT = "http://localhost:8080"
|
||||||
GOLDEN_FILES_PATH = "./spark-smoke-test/golden_json/"
|
GOLDEN_FILES_PATH = "./spark-smoke-test/golden_json/"
|
||||||
golden_files = os.listdir(GOLDEN_FILES_PATH)
|
golden_files = os.listdir(GOLDEN_FILES_PATH)
|
||||||
|
|
||||||
print(golden_files)
|
print(golden_files)
|
||||||
[file_name.strip(".json") for file_name in golden_files]
|
[file_name.strip(".json") for file_name in golden_files]
|
||||||
restli_default_headers = {
|
restli_default_headers = {
|
||||||
@ -59,6 +60,14 @@ def test_healthchecks(wait_for_healthchecks):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def sort_aspects(input):
|
||||||
|
print(input)
|
||||||
|
item_id = list(input["value"].keys())[0]
|
||||||
|
input["value"][item_id]["aspects"] = sorted(
|
||||||
|
input["value"][item_id]["aspects"], key=lambda x: list(x.keys())[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.dependency(depends=["test_healthchecks"])
|
@pytest.mark.dependency(depends=["test_healthchecks"])
|
||||||
@pytest.mark.parametrize("json_file", golden_files, )
|
@pytest.mark.parametrize("json_file", golden_files, )
|
||||||
def test_ingestion_via_rest(json_file):
|
def test_ingestion_via_rest(json_file):
|
||||||
@ -71,7 +80,9 @@ def test_ingestion_via_rest(json_file):
|
|||||||
print(url)
|
print(url)
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
|
||||||
|
data = sort_aspects(response.json())
|
||||||
|
value = sort_aspects(value)
|
||||||
diff = json_compare.check(value, data)
|
diff = json_compare.check(value, data)
|
||||||
print(urn)
|
print(urn)
|
||||||
if diff != NO_DIFF:
|
if diff != NO_DIFF:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user