From 9787d007b99c6413a522b9ed9c0e961951b39dc8 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Thu, 12 Jun 2025 21:02:46 +0000 Subject: [PATCH] Pulling in bigger benchmark script from vllm branch to main --- scripts/run_benchmark.sh | 190 ++++++++++++++++++++++++++++++++------- 1 file changed, 156 insertions(+), 34 deletions(-) diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark.sh index e7667b0..4d00b70 100755 --- a/scripts/run_benchmark.sh +++ b/scripts/run_benchmark.sh @@ -1,7 +1,39 @@ #!/bin/bash +# Runs an olmocr-bench run using the full pipeline (no fallback) +# Without model parameter (default behavior):, uses the default image from hugging face +# ./scripts/run_benchmark.sh +# With model parameter: for testing custom models +# ./scripts/run_benchmark.sh --model your-model-name + set -e +# Parse command line arguments +MODEL="" +while [[ $# -gt 0 ]]; do + case $1 in + --model) + MODEL="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 [--model MODEL_NAME]" + exit 1 + ;; + esac +done + +# Check for uncommitted changes +if ! git diff-index --quiet HEAD --; then + echo "Error: There are uncommitted changes in the repository." + echo "Please commit or stash your changes before running the benchmark." + echo "" + echo "Uncommitted changes:" + git status --short + exit 1 +fi + # Use conda environment Python if available, otherwise use system Python if [ -n "$CONDA_PREFIX" ]; then PYTHON="$CONDA_PREFIX/bin/python" @@ -36,63 +68,153 @@ BEAKER_USER=$(beaker account whoami --format json | jq -r '.[0].name') echo "Beaker user: $BEAKER_USER" # Push image to beaker -echo "Pushing image to Beaker..." -beaker image create --workspace ai2/oe-data-pdf --name $IMAGE_TAG $IMAGE_TAG +echo "Trying to push image to Beaker..." +if ! beaker image create --workspace ai2/oe-data-pdf --name $IMAGE_TAG $IMAGE_TAG 2>/dev/null; then + echo "Warning: Beaker image with tag $IMAGE_TAG already exists. Using existing image." +fi # Create Python script to run beaker experiment cat << 'EOF' > /tmp/run_benchmark_experiment.py import sys -from beaker import Beaker, ExperimentSpec, TaskSpec, TaskContext, ResultSpec, TaskResources, ImageSource, Priority, Constraints +from beaker import Beaker, ExperimentSpec, TaskSpec, TaskContext, ResultSpec, TaskResources, ImageSource, Priority, Constraints, EnvVar -# Get image tag, beaker user, git branch, and git hash from command line +# Get image tag, beaker user, git branch, git hash, and optional model from command line image_tag = sys.argv[1] beaker_user = sys.argv[2] git_branch = sys.argv[3] git_hash = sys.argv[4] +model = sys.argv[5] if len(sys.argv) > 5 else None # Initialize Beaker client b = Beaker.from_env(default_workspace="ai2/olmocr") -# Create experiment spec +# Build the pipeline command with optional model parameter +pipeline_cmd = "python -m olmocr.pipeline ./localworkspace --markdown --pdfs ./olmOCR-bench/bench_data/pdfs/**/*.pdf" +if model: + pipeline_cmd += f" --model {model}" + +# Check if AWS credentials secret exists +aws_creds_secret = f"{beaker_user}-AWS_CREDENTIALS_FILE" +try: + # Try to get the secret to see if it exists + b.secret.get(aws_creds_secret, workspace="ai2/olmocr") + has_aws_creds = True + print(f"Found AWS credentials secret: {aws_creds_secret}") +except: + has_aws_creds = False + print(f"AWS credentials secret not found: {aws_creds_secret}") + +# First experiment: Original benchmark job +commands = [] +if has_aws_creds: + commands.extend([ + "mkdir -p ~/.aws", + 'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials' + ]) +commands.extend([ + "git clone https://huggingface.co/datasets/allenai/olmOCR-bench", + "cd olmOCR-bench && git lfs pull && cd ..", + pipeline_cmd, + "python olmocr/bench/scripts/workspace_to_bench.py localworkspace/ olmOCR-bench/bench_data/olmocr --bench-path ./olmOCR-bench/", + "python -m olmocr.bench.benchmark --dir ./olmOCR-bench/bench_data" +]) + +# Build task spec with optional env vars +task_spec_args = { + "name": "olmocr-benchmark", + "image": ImageSource(beaker=f"{beaker_user}/{image_tag}"), + "command": [ + "bash", "-c", + " && ".join(commands) + ], + "context": TaskContext( + priority=Priority.normal, + preemptible=True, + ), + "resources": TaskResources(gpu_count=1), + "constraints": Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]), + "result": ResultSpec(path="/noop-results"), +} + +# Add env vars if AWS credentials exist +if has_aws_creds: + task_spec_args["env_vars"] = [ + EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret) + ] + +# Create first experiment spec experiment_spec = ExperimentSpec( description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}", budget="ai2/oe-data", - tasks=[ - TaskSpec( - name="olmocr-benchmark", - image=ImageSource(beaker=f"{beaker_user}/{image_tag}"), - command=[ - "bash", "-c", - " && ".join([ - "git clone https://huggingface.co/datasets/allenai/olmOCR-bench", - "cd olmOCR-bench && git lfs pull && cd ..", - "python -m olmocr.pipeline ./localworkspace --markdown --pdfs ./olmOCR-bench/bench_data/pdfs/**/*.pdf", - "python olmocr/bench/scripts/workspace_to_bench.py localworkspace/ olmOCR-bench/bench_data/olmocr --bench-path ./olmOCR-bench/", - "python -m olmocr.bench.benchmark --dir ./olmOCR-bench/bench_data" - ]) - ], - context=TaskContext( - priority=Priority.normal, - preemptible=True, - ), - resources=TaskResources(gpu_count=1), - constraints=Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]), - result=ResultSpec(path="/noop-results"), - ) - ], + tasks=[TaskSpec(**task_spec_args)], ) -# Create the experiment +# Create the first experiment experiment = b.experiment.create(spec=experiment_spec, workspace="ai2/olmocr") -print(f"Created experiment: {experiment.id}") +print(f"Created benchmark experiment: {experiment.id}") print(f"View at: https://beaker.org/ex/{experiment.id}") +print("-------") +print("") + +# Second experiment: Performance test job +perf_pipeline_cmd = "python -m olmocr.pipeline ./localworkspace --markdown --pdfs s3://ai2-oe-data/jakep/olmocr/olmOCR-mix-0225/benchmark_set/*.pdf" +if model: + perf_pipeline_cmd += f" --model {model}" + +perf_commands = [] +if has_aws_creds: + perf_commands.extend([ + "mkdir -p ~/.aws", + 'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials' + ]) +perf_commands.append(perf_pipeline_cmd) + +# Build performance task spec +perf_task_spec_args = { + "name": "olmocr-performance", + "image": ImageSource(beaker=f"{beaker_user}/{image_tag}"), + "command": [ + "bash", "-c", + " && ".join(perf_commands) + ], + "context": TaskContext( + priority=Priority.normal, + preemptible=True, + ), + "resources": TaskResources(gpu_count=1), + "constraints": Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]), + "result": ResultSpec(path="/noop-results"), +} + +# Add env vars if AWS credentials exist +if has_aws_creds: + perf_task_spec_args["env_vars"] = [ + EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret) + ] + +# Create performance experiment spec +perf_experiment_spec = ExperimentSpec( + description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}", + budget="ai2/oe-data", + tasks=[TaskSpec(**perf_task_spec_args)], +) + +# Create the performance experiment +perf_experiment = b.experiment.create(spec=perf_experiment_spec, workspace="ai2/olmocr") +print(f"Created performance experiment: {perf_experiment.id}") +print(f"View at: https://beaker.org/ex/{perf_experiment.id}") EOF -# Run the Python script to create the experiment -echo "Creating Beaker experiment..." -$PYTHON /tmp/run_benchmark_experiment.py $IMAGE_TAG $BEAKER_USER $GIT_BRANCH $GIT_HASH +# Run the Python script to create the experiments +echo "Creating Beaker experiments..." +if [ -n "$MODEL" ]; then + echo "Using model: $MODEL" + $PYTHON /tmp/run_benchmark_experiment.py $IMAGE_TAG $BEAKER_USER $GIT_BRANCH $GIT_HASH "$MODEL" +else + $PYTHON /tmp/run_benchmark_experiment.py $IMAGE_TAG $BEAKER_USER $GIT_BRANCH $GIT_HASH +fi # Clean up temporary file rm /tmp/run_benchmark_experiment.py -echo "Benchmark experiment submitted successfully!" \ No newline at end of file +echo "Benchmark experiments submitted successfully!" \ No newline at end of file