mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-04 03:53:45 +00:00
### Summary Closes #2959. Updates the dependency and CI to add support for Python 3.12. The MongoDB ingest tests were disabled due to jobs like [this one](https://github.com/Unstructured-IO/unstructured/actions/runs/9133383127/job/25116767333) failing due to issues with the `bson` package. `bson` is a dependency for the AstraDB connector, but `pymongo` does not work when `bson` is installed from `pip`. This issue is documented by MongoDB [here](https://pymongo.readthedocs.io/en/stable/installation.html). Spun off #3049 to resolve this. Issue seems unrelated to Python 3.12, though unsure why this didn't surface previously. Disables the `argilla` tests because `argilla` does not yet support Python 3.12. We can add the `argilla` tests back in once the PR references below is merged. You can still use the `stage_for_argilla` function if you're on `python<3.12` and you install `argilla` yourself. - https://github.com/argilla-io/argilla/pull/4837 --------- Co-authored-by: Nicolò Boschi <boschi1997@gmail.com>
91 lines
2.4 KiB
Bash
Executable File
91 lines
2.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -u -o pipefail
|
|
|
|
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
|
SKIPPED_FILES_LOG=$SCRIPT_DIR/skipped-files.txt
|
|
# If the file already exists, reset it
|
|
if [ -f "$SKIPPED_FILES_LOG" ]; then
|
|
rm "$SKIPPED_FILES_LOG"
|
|
fi
|
|
touch "$SKIPPED_FILES_LOG"
|
|
cd "$SCRIPT_DIR"/.. || exit 1
|
|
|
|
# NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs
|
|
export OMP_THREAD_LIMIT=1
|
|
|
|
all_tests=(
|
|
'astra.sh'
|
|
'azure.sh'
|
|
'azure-cognitive-search.sh'
|
|
'box.sh'
|
|
'chroma.sh'
|
|
'clarifai.sh'
|
|
'delta-table.sh'
|
|
'dropbox.sh'
|
|
'elasticsearch.sh'
|
|
'gcs.sh'
|
|
'opensearch.sh'
|
|
'pgvector.sh'
|
|
'pinecone.sh'
|
|
'qdrant.sh'
|
|
's3.sh'
|
|
'sharepoint-embed-cog-index.sh'
|
|
'sqlite.sh'
|
|
'vectara.sh'
|
|
'weaviate.sh'
|
|
# NOTE(robinson) - mongo conflicts with astra because it ships with its
|
|
# own version of bson, and installing bson from pip causes mongo to fail
|
|
# ref: https://pymongo.readthedocs.io/en/stable/installation.html
|
|
# 'mongodb.sh'
|
|
)
|
|
|
|
full_python_matrix_tests=(
|
|
'azure.sh'
|
|
'gcs.sh'
|
|
's3.sh'
|
|
)
|
|
|
|
CURRENT_TEST="none"
|
|
|
|
function print_last_run() {
|
|
if [ "$CURRENT_TEST" != "none" ]; then
|
|
echo "Last ran script: $CURRENT_TEST"
|
|
fi
|
|
echo "######## SKIPPED TESTS: ########"
|
|
cat "$SKIPPED_FILES_LOG"
|
|
}
|
|
|
|
trap print_last_run EXIT
|
|
|
|
python_version=$(python --version 2>&1)
|
|
|
|
tests_to_ignore=(
|
|
'notion.sh'
|
|
'dropbox.sh'
|
|
'sharepoint.sh'
|
|
)
|
|
|
|
for test in "${all_tests[@]}"; do
|
|
CURRENT_TEST="$test"
|
|
# IF: python_version is not 3.10 (wildcarded to match any subminor version) AND the current test is not in full_python_matrix_tests
|
|
# Note: to test we expand the full_python_matrix_tests array to a string and then regex match the current test
|
|
if [[ "$python_version" != "Python 3.10"* ]] && [[ ! "${full_python_matrix_tests[*]}" =~ $test ]]; then
|
|
echo "--------- SKIPPING SCRIPT $test ---------"
|
|
continue
|
|
fi
|
|
echo "--------- RUNNING SCRIPT $test ---------"
|
|
echo "Running ./test_unstructured_ingest/$test"
|
|
./test_unstructured_ingest/dest/"$test"
|
|
rc=$?
|
|
if [[ $rc -eq 8 ]]; then
|
|
echo "$test (skipped due to missing env var)" | tee -a "$SKIPPED_FILES_LOG"
|
|
elif [[ "${tests_to_ignore[*]}" =~ $test ]]; then
|
|
echo "$test (skipped checking error code: $rc)" | tee -a "$SKIPPED_FILES_LOG"
|
|
continue
|
|
elif [[ $rc -ne 0 ]]; then
|
|
exit $rc
|
|
fi
|
|
echo "--------- FINISHED SCRIPT $test ---------"
|
|
done
|