mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-10 02:25:55 +00:00

Intermittently the various destination test will fail with: ``` {noformat}--- Cleanup done --- gs://utic-test-ingest-fixtures-output/1699377964/example-docs/ deleting gs://utic-test-ingest-fixtures-output/1699377964 Removing objects: ERROR: (gcloud.storage.rm) The following URLs matched no objects or files: -gs://utic-test-ingest-fixtures-output/1699377964 Last ran script: gcs.sh Error: Process completed with exit code 1.{noformat} ``` Reference trace [here](https://github.com/Unstructured-IO/unstructured/actions/runs/6787927424/job/18452240764?pr=2020) After some investigation it looks like this error is due to collisions that occur because we’re assuming 1s date accuracy is sufficient when generating (and deleting) "unique" test destination location names. The likelihood is actually pretty high given that we run these tests against a test matrix. Instead we should just use a uuid for these unique destinations. ## Changes - Use uuidgen instead of `date +%s` for unique destinations
56 lines
1.7 KiB
Bash
Executable File
56 lines
1.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#TODO currently box api/sdk does not work to create folders and check for content similar to other fsspec ingest tests
|
|
|
|
#
|
|
#set -e
|
|
#
|
|
#DEST_PATH=$(dirname "$(realpath "$0")")
|
|
#SCRIPT_DIR=$(dirname "$DEST_PATH")
|
|
#cd "$SCRIPT_DIR"/.. || exit 1
|
|
#OUTPUT_FOLDER_NAME=box-dest
|
|
#OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
|
|
#WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
|
|
#max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
|
|
#DESTINATION_BOX="box://utic-dev-tech-fixtures/utic-ingest-test-fixtures-output/$(uuidgen)/"
|
|
#
|
|
#CI=${CI:-"false"}
|
|
#
|
|
#if [ -z "$BOX_APP_CONFIG" ] && [ -z "$BOX_APP_CONFIG_PATH" ]; then
|
|
# echo "Skipping Box ingest test because neither BOX_APP_CONFIG nor BOX_APP_CONFIG_PATH env vars are set."
|
|
# exit 0
|
|
#fi
|
|
#
|
|
#if [ -z "$BOX_APP_CONFIG_PATH" ]; then
|
|
# # Create temporary service key file
|
|
# BOX_APP_CONFIG_PATH=$(mktemp)
|
|
# echo "$BOX_APP_CONFIG" >"$BOX_APP_CONFIG_PATH"
|
|
#fi
|
|
#
|
|
## shellcheck disable=SC1091
|
|
#source "$SCRIPT_DIR"/cleanup.sh
|
|
#function cleanup() {
|
|
# cleanup_dir "$OUTPUT_DIR"
|
|
# cleanup_dir "$WORK_DIR"
|
|
# if [ "$CI" == "true" ]; then
|
|
# cleanup_dir "$DOWNLOAD_DIR"
|
|
# fi
|
|
#}
|
|
#trap cleanup EXIT
|
|
#
|
|
#RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
|
#PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
|
|
# local \
|
|
# --num-processes "$max_processes" \
|
|
# --output-dir "$OUTPUT_DIR" \
|
|
# --strategy fast \
|
|
# --verbose \
|
|
# --reprocess \
|
|
# --input-path example-docs/fake-memo.pdf \
|
|
# --work-dir "$WORK_DIR" \
|
|
# box \
|
|
# --box-app-config "$BOX_APP_CONFIG_PATH" \
|
|
# --remote-url "$DESTINATION_BOX" \
|
|
#
|
|
## Simply check the number of files uploaded
|
|
#expected_num_files=1
|