mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-18 13:45:45 +00:00

Add GitLab data connector for ingest. Involves more general Git functionality that is shared between the GitHub and GitLab data connectors. Prevent code duplication for functionality between GitHub and GitLab ingest connectors. Renamed github-access-token, github-branch and github-file-glob to git-access-token, git-branch and git-file-glob, respectively. These work for GitHub and GitLab.
28 lines
1.2 KiB
Bash
Executable File
28 lines
1.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
|
cd "$SCRIPT_DIR"/.. || exit 1
|
|
|
|
if [[ "$CI" == "true" ]]; then
|
|
if [ "$(( RANDOM % 10))" -lt 2 ] ; then
|
|
# NOTE(crag): proper fix is being tracked here: https://github.com/Unstructured-IO/unstructured/issues/306
|
|
echo "Skipping ingest 80% of github ingest tests to avoid rate limiting issue."
|
|
exit 0
|
|
fi
|
|
fi
|
|
|
|
|
|
PYTHONPATH=. ./unstructured/ingest/main.py --github-url dcneiner/Downloadify --git-file-glob '*.html,*.txt' --structured-output-dir github-downloadify-output --verbose
|
|
|
|
if ! diff -ru github-downloadify-output test_unstructured_ingest/expected-structured-output/github-downloadify ; then
|
|
echo
|
|
echo "There are differences from the previously checked-in structured outputs."
|
|
echo
|
|
echo "If these differences are acceptable, copy the outputs from"
|
|
echo "github-downloadify-output/ to test_unstructured_ingest/expected-structured-output/github-downloadify/ after running"
|
|
echo
|
|
echo " PYTHONPATH=. ./unstructured/ingest/main.py --github-url dcneiner/Downloadify --github-file-glob '*.html,*.txt' --structured-output-dir github-downloadify-output --verbose"
|
|
echo
|
|
exit 1
|
|
fi
|