mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

Add GitLab data connector for ingest. Involves more general Git functionality that is shared between the GitHub and GitLab data connectors. Prevent code duplication for functionality between GitHub and GitLab ingest connectors. Renamed github-access-token, github-branch and github-file-glob to git-access-token, git-branch and git-file-glob, respectively. These work for GitHub and GitLab.
18 lines
526 B
Bash
Executable File
18 lines
526 B
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
|
cd "$SCRIPT_DIR"/.. || exit 1
|
|
|
|
PYTHONPATH=. ./unstructured/ingest/main.py \
|
|
--gitlab-url https://gitlab.com/gitlab-com/content-sites/docsy-gitlab \
|
|
--git-file-glob '*.md,*.txt' \
|
|
--structured-output-dir gitlab-ingest-output \
|
|
--git-branch 'v0.0.7' \
|
|
--verbose
|
|
|
|
if [ "$(find 'gitlab-ingest-output' -type f -printf '.' | wc -c)" != 2 ]; then
|
|
echo
|
|
echo "2 files should have been created."
|
|
exit 1
|
|
fi
|