2023-11-01 15:23:44 -04:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
2023-11-29 08:41:19 -05:00
|
|
|
set -u -o pipefail
|
2023-11-01 15:23:44 -04:00
|
|
|
|
2023-12-11 20:04:15 -05:00
|
|
|
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
2023-11-29 08:41:19 -05:00
|
|
|
SKIPPED_FILES_LOG=$SCRIPT_DIR/skipped-files.txt
|
|
|
|
# If the file already exists, reset it
|
|
|
|
if [ -f "$SKIPPED_FILES_LOG" ]; then
|
2023-12-18 23:48:21 -08:00
|
|
|
rm "$SKIPPED_FILES_LOG"
|
2023-11-29 08:41:19 -05:00
|
|
|
fi
|
2023-12-05 15:55:19 -05:00
|
|
|
touch "$SKIPPED_FILES_LOG"
|
2023-11-01 15:23:44 -04:00
|
|
|
cd "$SCRIPT_DIR"/.. || exit 1
|
|
|
|
|
|
|
|
# NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs
|
|
|
|
export OMP_THREAD_LIMIT=1
|
|
|
|
|
|
|
|
all_tests=(
|
2024-08-05 20:41:02 +00:00
|
|
|
'astradb.sh'
|
2023-12-18 23:48:21 -08:00
|
|
|
'azure.sh'
|
|
|
|
'azure-cognitive-search.sh'
|
|
|
|
'box.sh'
|
2023-12-19 08:58:23 -08:00
|
|
|
'chroma.sh'
|
2024-03-21 09:36:21 -07:00
|
|
|
'clarifai.sh'
|
2023-12-18 23:48:21 -08:00
|
|
|
'delta-table.sh'
|
|
|
|
'dropbox.sh'
|
2023-12-20 01:26:58 +00:00
|
|
|
'elasticsearch.sh'
|
2023-12-18 23:48:21 -08:00
|
|
|
'gcs.sh'
|
2024-06-22 16:26:23 -07:00
|
|
|
'kafka-local.sh'
|
|
|
|
'mongodb.sh'
|
2024-02-06 21:28:55 +00:00
|
|
|
'opensearch.sh'
|
2024-01-04 13:33:16 -06:00
|
|
|
'pgvector.sh'
|
2023-12-18 23:48:21 -08:00
|
|
|
'pinecone.sh'
|
2024-01-02 14:08:20 -08:00
|
|
|
'qdrant.sh'
|
2023-12-18 23:48:21 -08:00
|
|
|
's3.sh'
|
|
|
|
'sharepoint-embed-cog-index.sh'
|
2024-01-04 13:33:16 -06:00
|
|
|
'sqlite.sh'
|
2024-02-01 06:38:34 -08:00
|
|
|
'vectara.sh'
|
2024-07-03 11:15:39 -04:00
|
|
|
'singlestore.sh'
|
2024-01-04 13:33:16 -06:00
|
|
|
'weaviate.sh'
|
2024-07-03 14:01:16 -05:00
|
|
|
'databricks-volumes.sh'
|
2023-11-01 15:23:44 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
full_python_matrix_tests=(
|
2023-12-18 23:48:21 -08:00
|
|
|
'azure.sh'
|
|
|
|
'gcs.sh'
|
|
|
|
's3.sh'
|
2023-11-01 15:23:44 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
CURRENT_TEST="none"
|
|
|
|
|
|
|
|
function print_last_run() {
|
2023-12-18 23:48:21 -08:00
|
|
|
if [ "$CURRENT_TEST" != "none" ]; then
|
|
|
|
echo "Last ran script: $CURRENT_TEST"
|
|
|
|
fi
|
|
|
|
echo "######## SKIPPED TESTS: ########"
|
|
|
|
cat "$SKIPPED_FILES_LOG"
|
2023-11-01 15:23:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
trap print_last_run EXIT
|
|
|
|
|
|
|
|
python_version=$(python --version 2>&1)
|
|
|
|
|
|
|
|
tests_to_ignore=(
|
2023-12-18 23:48:21 -08:00
|
|
|
'notion.sh'
|
|
|
|
'dropbox.sh'
|
|
|
|
'sharepoint.sh'
|
2024-07-30 11:06:21 -04:00
|
|
|
'databricks-volumes.sh'
|
2024-10-10 13:03:49 -07:00
|
|
|
'vectara.sh'
|
2023-11-01 15:23:44 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
for test in "${all_tests[@]}"; do
|
2023-12-18 23:48:21 -08:00
|
|
|
CURRENT_TEST="$test"
|
|
|
|
# IF: python_version is not 3.10 (wildcarded to match any subminor version) AND the current test is not in full_python_matrix_tests
|
|
|
|
# Note: to test we expand the full_python_matrix_tests array to a string and then regex match the current test
|
|
|
|
if [[ "$python_version" != "Python 3.10"* ]] && [[ ! "${full_python_matrix_tests[*]}" =~ $test ]]; then
|
|
|
|
echo "--------- SKIPPING SCRIPT $test ---------"
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
echo "--------- RUNNING SCRIPT $test ---------"
|
|
|
|
echo "Running ./test_unstructured_ingest/$test"
|
|
|
|
./test_unstructured_ingest/dest/"$test"
|
|
|
|
rc=$?
|
|
|
|
if [[ $rc -eq 8 ]]; then
|
|
|
|
echo "$test (skipped due to missing env var)" | tee -a "$SKIPPED_FILES_LOG"
|
|
|
|
elif [[ "${tests_to_ignore[*]}" =~ $test ]]; then
|
|
|
|
echo "$test (skipped checking error code: $rc)" | tee -a "$SKIPPED_FILES_LOG"
|
|
|
|
continue
|
|
|
|
elif [[ $rc -ne 0 ]]; then
|
|
|
|
exit $rc
|
|
|
|
fi
|
|
|
|
echo "--------- FINISHED SCRIPT $test ---------"
|
2023-11-01 15:23:44 -04:00
|
|
|
done
|