mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-03 19:43:24 +00:00
we are seeing some .eml files come through the VLM partitioner. Which then downgrades to hi-res i believe. For some reason they have a date format that is not standard email format. But it is still legitimate. This uses a more robust date package to parse the date. This package is already installed. --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: potter-potter <potter-potter@users.noreply.github.com>
49 lines
1.3 KiB
Bash
Executable File
49 lines
1.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# Start the containerized repository and run ingest tests
|
|
|
|
# shellcheck disable=SC2317 # Shellcheck complains that trap functions are unreachable...
|
|
# shellcheck disable=SC2329 # Functions are invoked indirectly
|
|
|
|
set -eux -o pipefail
|
|
|
|
CONTAINER_NAME=unstructured-smoke-test
|
|
DOCKER_IMAGE="${DOCKER_IMAGE:-unstructured:dev}"
|
|
|
|
# Change to the root of the repository
|
|
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
|
cd "$SCRIPT_DIR"/.. || exit 1
|
|
|
|
start_container() {
|
|
echo Starting container "$CONTAINER_NAME"
|
|
docker run -dt --rm --name "$CONTAINER_NAME" "$DOCKER_IMAGE"
|
|
}
|
|
|
|
await_container() {
|
|
echo Waiting for container to start
|
|
until [ "$(docker inspect -f '{{.State.Status}}' $CONTAINER_NAME)" == "running" ]; do
|
|
sleep 1
|
|
done
|
|
}
|
|
|
|
stop_container() {
|
|
echo Stopping container "$CONTAINER_NAME"
|
|
docker stop "$CONTAINER_NAME"
|
|
}
|
|
|
|
start_container
|
|
|
|
# Regardless of test result, stop the container
|
|
trap stop_container EXIT
|
|
|
|
await_container
|
|
|
|
# Run the tests
|
|
docker cp test_unstructured_ingest $CONTAINER_NAME:/app
|
|
docker cp requirements/ingest $CONTAINER_NAME:/app/requirements/ingest
|
|
docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R notebook-user:notebook-user /app/test_unstructured_ingest"
|
|
docker exec "$CONTAINER_NAME" /bin/bash -c "/app/test_unstructured_ingest/src/local.sh"
|
|
|
|
result=$?
|
|
exit $result
|