cragwolfe bd8a74d686
chore: shell scripts default indent of 2 instead of 4 (#2287)
Given the tendency for shell scripts to easily enter into a few levels
of indentation and long line lengths, update the default to 2 spaces.
2023-12-19 07:48:21 +00:00

68 lines
2.7 KiB
Bash
Executable File

#!/usr/bin/env bash
# Usage:
# - Set the required environment variables (listed below)
# - Run the script: ./scripts/performance/benchmark.sh
# Environment Variables:
# - DOCKER_TEST: Set to "true" to run benchmark inside a Docker container (default: false)
# - NUM_ITERATIONS: Number of iterations for benchmark (e.g., 100) (default: 3)
# - INSTANCE_TYPE: Type of benchmark instance (e.g., "c5.xlarge") (default: "unspecified")
# - PUBLISH_RESULTS: Set to "true" to publish results to S3 bucket (default: false)
SLOW_FILES=("DA-619p.pdf" "layout-parser-paper-hi_res-16p.pdf" "layout-parser-paper-10p.jpg")
HI_RES_STRATEGY_FILES=("layout-parser-paper-hi_res-16p.pdf")
NUM_ITERATIONS=${NUM_ITERATIONS:-2}
INSTANCE_TYPE=${INSTANCE_TYPE:-"unspecified"}
S3_BUCKET="utic-dev-tech-fixtures"
S3_RESULTS_DIR="performance-test/results"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
GIT_HASH="$(git rev-parse --short HEAD)"
# Save the results filename to a temporary file
RESULTS_FILENAME_FILE=$(mktemp)
trap 'rm -f $RESULTS_FILENAME_FILE' EXIT
function read_benchmark_logs_for_results() {
if [[ $line =~ Results\ saved\ to:\ ([^\ ]+) ]]; then
results_filename="${BASH_REMATCH[1]}"
echo "CSV file value found: $results_filename"
echo "$results_filename" >"$RESULTS_FILENAME_FILE" # Store the value in the temporary file
fi
}
if [[ "$DOCKER_TEST" == "true" ]]; then
DOCKER_IMAGE=unstructured:perf-test make docker-build
docker rm -f unstructured-perf-test >/dev/null 2>&1
docker run \
--name unstructured-perf-test \
--rm \
-e NUM_ITERATIONS="$NUM_ITERATIONS" \
-e INSTANCE_TYPE="$INSTANCE_TYPE" \
-e GIT_HASH="$GIT_HASH" \
-e SLOW_FILES="${SLOW_FILES[*]}" \
-e HI_RES_STRATEGY_FILES="${HI_RES_STRATEGY_FILES[*]}" \
-v "${SCRIPT_DIR}":/home/notebook-user/scripts/performance \
unstructured:perf-test \
bash /home/notebook-user/scripts/performance/benchmark-local.sh 2>&1 | tee >(while IFS= read -r line; do
read_benchmark_logs_for_results
done)
else
NUM_ITERATIONS="$NUM_ITERATIONS" INSTANCE_TYPE="$INSTANCE_TYPE" GIT_HASH="$GIT_HASH" SLOW_FILES="${SLOW_FILES[*]}" HI_RES_STRATEGY_FILES="${HI_RES_STRATEGY_FILES[*]}" "$SCRIPT_DIR"/benchmark-local.sh 2>&1 |
tee >(while IFS= read -r line; do
read_benchmark_logs_for_results
done)
fi
# Read the result filename from the temporary file
results_filename=$(<"$RESULTS_FILENAME_FILE")
if [[ -z $results_filename ]]; then
echo "Error: Results filename value not found in the benchmark logs."
exit 1
fi
if [[ "$PUBLISH_RESULTS" == "true" ]]; then
S3_RESULTS_PATH="$S3_BUCKET/$S3_RESULTS_DIR"
echo "Publishing results to S3 bucket: $S3_RESULTS_PATH"
aws s3 cp "$SCRIPT_DIR/benchmark_results/$results_filename" "s3://$S3_RESULTS_PATH/"
fi