chore: install all extras in Dockerfile (#419)

* Adds step to install all extras
* Adds smoke test of wikipedia ingest to validate in CI
This commit is contained in:
ryannikolaidis 2023-03-30 13:23:30 -07:00 committed by GitHub
parent 32c79caee3
commit 59785e4332
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 58 additions and 0 deletions

View File

@ -47,6 +47,7 @@ jobs:
- name: Test AMD image
run: |
DOCKER_PLATFORM="linux/amd64" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA" make docker-test
IMAGE_NAME=$DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA make docker-smoke-test
- name: Push AMD image
run: |
# write to the build repository to cache for the publish-images job
@ -80,6 +81,7 @@ jobs:
run: |
# only run a subset of tests on ARM, since they take a long time with emulation
DOCKER_PLATFORM="linux/arm64" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA" make docker-test TEST_NAME=partition/test_text.py
IMAGE_NAME=$DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA make docker-smoke-test
- name: Push ARM image
run: |
# write to the build repository to cache for the publish-images job

View File

@ -72,6 +72,13 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
pip install --no-cache -r requirements/test.txt && \
pip install --no-cache -r requirements/huggingface.txt && \
pip install --no-cache -r requirements/dev.txt && \
pip install --no-cache -r requirements/ingest-azure.txt && \
pip install --no-cache -r requirements/ingest-github.txt && \
pip install --no-cache -r requirements/ingest-gitlab.txt && \
pip install --no-cache -r requirements/ingest-google-drive.txt && \
pip install --no-cache -r requirements/ingest-reddit.txt && \
pip install --no-cache -r requirements/ingest-s3.txt && \
pip install --no-cache -r requirements/ingest-wikipedia.txt && \
pip install --no-cache -r requirements/local-inference.txt && \
pip install --no-cache "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"

View File

@ -203,3 +203,7 @@ docker-test:
-v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \
$(DOCKER_IMAGE) \
bash -c "pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"
.PHONY: docker-smoke-test
docker-smoke-test:
./scripts/docker-smoke-test.sh

45
scripts/docker-smoke-test.sh Executable file
View File

@ -0,0 +1,45 @@
#!/bin/bash
# Start the containerized repository and run ingest tests
# shellcheck disable=SC2317 # Shellcheck complains that trap functions are unreachable...
set -eux -o pipefail
CONTAINER_NAME=unstructured-smoke-test
IMAGE_NAME="${IMAGE_NAME:-unstructured:latest}"
# Change to the root of the repository
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd "$SCRIPT_DIR"/.. || exit 1
start_container() {
echo Starting container "$CONTAINER_NAME"
docker run -dt --rm --name "$CONTAINER_NAME" "$IMAGE_NAME"
}
await_container() {
echo Waiting for container to start
until [ "$(docker inspect -f '{{.State.Status}}' $CONTAINER_NAME)" == "running" ]; do
sleep 1
done
}
stop_container() {
echo Stopping container "$CONTAINER_NAME"
docker stop "$CONTAINER_NAME"
}
start_container
# Regardless of test result, stop the container
trap stop_container EXIT
await_container
# Run the tests
docker cp test_unstructured_ingest $CONTAINER_NAME:/home
docker exec "$CONTAINER_NAME" /bin/bash -c "/home/test_unstructured_ingest/test-ingest-wikipedia.sh"
result=$?
exit $result