From a2ee4d46c0ae45996db53f3b62d048a3b7508eaf Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Wed, 20 Aug 2025 22:35:19 +0000 Subject: [PATCH] gpro trainer test 1 --- scripts/train/grpotrainer-beaker.sh | 276 ++++++++++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100755 scripts/train/grpotrainer-beaker.sh diff --git a/scripts/train/grpotrainer-beaker.sh b/scripts/train/grpotrainer-beaker.sh new file mode 100755 index 0000000..8d3493c --- /dev/null +++ b/scripts/train/grpotrainer-beaker.sh @@ -0,0 +1,276 @@ +#!/bin/bash + +set -e + +# Parse command line arguments +MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct" +SKIP_DOCKER_BUILD=false +PREEMPTIBLE=false +MAX_TRAIN_SAMPLES="" +MAX_EVAL_SAMPLES="" +NUM_EPOCHS=1 +LEARNING_RATE="1e-6" +BATCH_SIZE=1 +GRAD_ACCUM_STEPS=4 +USE_WANDB=false +WANDB_PROJECT="olmocr-grpo" +WANDB_RUN_NAME="" + +while [[ $# -gt 0 ]]; do + case $1 in + --model) + MODEL_NAME="$2" + shift 2 + ;; + --skip-docker-build) + SKIP_DOCKER_BUILD=true + shift + ;; + --preemptible) + PREEMPTIBLE=true + shift + ;; + --max-train-samples) + MAX_TRAIN_SAMPLES="$2" + shift 2 + ;; + --max-eval-samples) + MAX_EVAL_SAMPLES="$2" + shift 2 + ;; + --num-epochs) + NUM_EPOCHS="$2" + shift 2 + ;; + --learning-rate) + LEARNING_RATE="$2" + shift 2 + ;; + --batch-size) + BATCH_SIZE="$2" + shift 2 + ;; + --grad-accum-steps) + GRAD_ACCUM_STEPS="$2" + shift 2 + ;; + --use-wandb) + USE_WANDB=true + shift + ;; + --wandb-project) + WANDB_PROJECT="$2" + shift 2 + ;; + --wandb-run-name) + WANDB_RUN_NAME="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 [options]" + echo "Options:" + echo " --model MODEL_NAME Model to use (default: Qwen/Qwen2.5-VL-7B-Instruct)" + echo " --skip-docker-build Skip Docker build" + echo " --preemptible Use preemptible instances" + echo " --max-train-samples N Max training samples" + echo " --max-eval-samples N Max evaluation samples" + echo " --num-epochs N Number of training epochs (default: 1)" + echo " --learning-rate LR Learning rate (default: 1e-6)" + echo " --batch-size N Batch size per device (default: 1)" + echo " --grad-accum-steps N Gradient accumulation steps (default: 4)" + echo " --use-wandb Enable W&B logging" + echo " --wandb-project NAME W&B project name" + echo " --wandb-run-name NAME W&B run name" + exit 1 + ;; + esac +done + +echo "Model: $MODEL_NAME" +echo "Preemptible: $PREEMPTIBLE" +echo "Use W&B: $USE_WANDB" + +# Use conda environment Python if available, otherwise use system Python +if [ -n "$CONDA_PREFIX" ]; then + PYTHON="$CONDA_PREFIX/bin/python" + echo "Using conda Python from: $CONDA_PREFIX" +else + PYTHON="python" + echo "Warning: No conda environment detected, using system Python" +fi + +# Get version from version.py +VERSION=$($PYTHON -c 'import olmocr.version; print(olmocr.version.VERSION)') +echo "OlmOCR version: $VERSION" + +# Get first 10 characters of git hash +GIT_HASH=$(git rev-parse HEAD | cut -c1-10) +echo "Git hash: $GIT_HASH" + +# Get current git branch name +GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) +echo "Git branch: $GIT_BRANCH" + +# Create full image tag +IMAGE_TAG="olmocr-grpo-${VERSION}-${GIT_HASH}" +echo "Building Docker image with tag: $IMAGE_TAG" + +# Build and push Docker image if not skipping +if [ "$SKIP_DOCKER_BUILD" = false ]; then + echo "Building Docker image..." + docker build --platform linux/amd64 -f ./Dockerfile -t $IMAGE_TAG . + + # Push image to beaker + echo "Trying to push image to Beaker..." + if ! beaker image create --workspace ai2/oe-data-pdf --name $IMAGE_TAG $IMAGE_TAG 2>/dev/null; then + echo "Warning: Beaker image with tag $IMAGE_TAG already exists. Using existing image." + fi +else + echo "Skipping Docker build as requested" +fi + +# Get Beaker username +BEAKER_USER=$(beaker account whoami --format json | jq -r '.[0].name') +echo "Beaker user: $BEAKER_USER" + +# Create Python script to run beaker experiment +cat << 'EOF' > /tmp/run_grpo_experiment.py +import sys +from beaker import Beaker, ExperimentSpec, TaskSpec, TaskContext, ResultSpec, TaskResources, ImageSource, Priority, Constraints, EnvVar, DataMount + +# Get parameters from command line +image_tag = sys.argv[1] +beaker_user = sys.argv[2] +git_branch = sys.argv[3] +git_hash = sys.argv[4] +model_name = sys.argv[5] +preemptible = sys.argv[6] == "true" +max_train_samples = sys.argv[7] +max_eval_samples = sys.argv[8] +num_epochs = sys.argv[9] +learning_rate = sys.argv[10] +batch_size = sys.argv[11] +grad_accum_steps = sys.argv[12] +use_wandb = sys.argv[13] == "true" +wandb_project = sys.argv[14] +wandb_run_name = sys.argv[15] + +# Initialize Beaker client +b = Beaker.from_env(default_workspace="ai2/olmocr") + +# Build the training command +commands = [ + # Install dependencies + "pip install .[train]", + "pip install trl wandb", + "pip install transformers==4.55.2", # Updated for GRPO compatibility + "pip install flash-attn==2.8.0.post2 --no-build-isolation", + "pip install s5cmd", + + # Sync the bench data from S3 + "echo 'Syncing bench data from S3...'", + "mkdir -p /data/olmOCR-bench", + "s5cmd sync 's3://ai2-oe-data/jakep/olmocr/olmOCR-bench-snapshot-082225/*' /data/olmOCR-bench/", + + # Build GRPO training command + "echo 'Starting GRPO training...'", +] + +# Build the python command with all parameters +grpo_cmd = [ + "python -m olmocr.train.grpo_train", + "--train_bench_data_folder /data/olmOCR-bench", + "--eval_bench_data_folder /data/olmOCR-bench", # Using same data for now + f"--model_name {model_name}", + "--output_dir /weka/oe-training-default/olmocr-grpo-checkpoints", + f"--num_train_epochs {num_epochs}", + f"--learning_rate {learning_rate}", + f"--per_device_train_batch_size {batch_size}", + f"--per_device_eval_batch_size {batch_size}", + f"--gradient_accumulation_steps {grad_accum_steps}", +] + +# Add optional parameters +if max_train_samples: + grpo_cmd.append(f"--max_train_samples {max_train_samples}") +if max_eval_samples: + grpo_cmd.append(f"--max_eval_samples {max_eval_samples}") +if use_wandb: + grpo_cmd.append("--use_wandb") + grpo_cmd.append(f"--wandb_project {wandb_project}") + if wandb_run_name: + grpo_cmd.append(f"--wandb_run_name {wandb_run_name}") + +# Add the GRPO command to the commands list +commands.append(" ".join(grpo_cmd)) + +# Build task spec +task_spec = TaskSpec( + name="olmocr-grpo-training", + image=ImageSource(beaker=f"{beaker_user}/{image_tag}"), + command=[ + "bash", "-c", + " && ".join(commands) + ], + context=TaskContext( + priority=Priority.normal, + preemptible=preemptible, + ), + resources=TaskResources( + gpu_count=1, + shared_memory="10GiB" + ), + constraints=Constraints(cluster=["ai2/titan-cirrascale"]), + result=ResultSpec(path="/noop-results"), + env_vars=[ + EnvVar(name="LOG_FILTER_TYPE", value="local_rank0_only"), + EnvVar(name="OMP_NUM_THREADS", value="8"), + EnvVar(name="BEAKER_USER_ID", value=beaker_user), + EnvVar(name="AWS_ACCESS_KEY_ID", secret="ALLENNLP_AWS_ACCESS_KEY_ID"), + EnvVar(name="AWS_SECRET_ACCESS_KEY", secret="ALLENNLP_AWS_SECRET_ACCESS_KEY"), + EnvVar(name="WANDB_API_KEY", secret="JAKE_WANDB_API_KEY"), + EnvVar(name="HF_TOKEN", secret="HF_TOKEN"), # For accessing gated models + ], + datasets=[ + DataMount.new(mount_path="/weka/oe-data-default", weka="oe-data-default"), + DataMount.new(mount_path="/weka/oe-training-default", weka="oe-training-default"), + ] +) + +# Create experiment spec +experiment_spec = ExperimentSpec( + description=f"OlmOCR GRPO Training - Model: {model_name}, Branch: {git_branch}, Commit: {git_hash}", + budget="ai2/oe-base", + tasks=[task_spec], +) + +# Create the experiment +experiment = b.experiment.create(spec=experiment_spec, workspace="ai2/olmocr") +print(f"Created GRPO training experiment: {experiment.id}") +print(f"View at: https://beaker.org/ex/{experiment.id}") +EOF + +# Run the Python script to create the experiment +echo "Creating Beaker GRPO experiment..." +$PYTHON /tmp/run_grpo_experiment.py \ + "$IMAGE_TAG" \ + "$BEAKER_USER" \ + "$GIT_BRANCH" \ + "$GIT_HASH" \ + "$MODEL_NAME" \ + "$PREEMPTIBLE" \ + "$MAX_TRAIN_SAMPLES" \ + "$MAX_EVAL_SAMPLES" \ + "$NUM_EPOCHS" \ + "$LEARNING_RATE" \ + "$BATCH_SIZE" \ + "$GRAD_ACCUM_STEPS" \ + "$USE_WANDB" \ + "$WANDB_PROJECT" \ + "$WANDB_RUN_NAME" + +# Clean up temporary file +rm /tmp/run_grpo_experiment.py + +echo "GRPO training experiment submitted successfully!" \ No newline at end of file