build(CI): add access token for github-ingest test (#482)

Avoids the occaisonal CI test failures in test-ingest-github.sh that were due to
rate-limited non-auth'ed requests against a GitHub repo.
This commit is contained in:
cragwolfe 2023-04-14 11:14:21 -07:00 committed by GitHub
parent 137b4b9a2e
commit 46ac2a2226
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 3 deletions

View File

@ -101,6 +101,8 @@ jobs:
source .venv/bin/activate
make install-ci
- name: Test
env:
GH_READ_ONLY_ACCESS_TOKEN: ${{ secrets.GH_READ_ONLY_ACCESS_TOKEN }}
run: |
source .venv/bin/activate
make install-detectron2

View File

@ -3,7 +3,17 @@
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd "$SCRIPT_DIR"/.. || exit 1
if [[ "$CI" == "true" ]]; then
GH_READ_ONLY_ACCESS_TOKEN=${GH_READ_ONLY_ACCESS_TOKEN:-none}
ACCESS_TOKEN_FLAGS=""
# to update test fixtures, "export OVERWRITE_FIXTURES=true" and rerun this script
if [[ "$GH_READ_ONLY_ACCESS_TOKEN" != "none" ]]; then
ACCESS_TOKEN_FLAGS="--git-access-token $GH_READ_ONLY_ACCESS_TOKEN"
elif [[ "$CI" == "true" ]]; then
echo "Warning: GH_READ_ONLY_ACCESS_TOKEN is not defined in the CI environment."
echo "This can lead to intermittent failures in test-ingest-github.sh, as non-auth'ed"
echo "requests are severely rate limited by GitHub."
echo
if [ "$(( RANDOM % 10))" -lt 1 ] ; then
# NOTE(crag): proper fix is being tracked here: https://github.com/Unstructured-IO/unstructured/issues/306
echo "Skipping ingest 90% of github ingest tests to avoid rate limiting issue."
@ -11,7 +21,7 @@ if [[ "$CI" == "true" ]]; then
fi
fi
#shellcheck disable=SC2086
PYTHONPATH=. ./unstructured/ingest/main.py \
--metadata-exclude filename \
--github-url dcneiner/Downloadify \
@ -19,7 +29,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
--structured-output-dir github-downloadify-output \
--reprocess \
--preserve-downloads \
--verbose
--verbose $ACCESS_TOKEN_FLAGS
OVERWRITE_FIXTURES=${OVERWRITE_FIXTURES:-false}