Using new budget code

2025-12-25 06:06:23 +00:00 · 2025-08-06 16:31:08 +00:00 · 2025-08-06 16:31:08 +00:00 · 2fca448105
commit 2fca448105
parent e664dc5f36
12 changed files with 15 additions and 63 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -115,7 +115,7 @@ jobs:
    env:
      BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
      BEAKER_IMAGE: jakep/olmocr-gpu-ci
-      BEAKER_BUDGET: ai2/oe-data
+      BEAKER_BUDGET: ai2/oe-base
      BEAKER_WORKSPACE: ai2/olmocr
    steps:
      - name: Determine current commit SHA (pull request)
--- a/scripts/compare_vllm.sh
+++ b/scripts/compare_vllm.sh
@ -176,7 +176,7 @@ if has_aws_creds:
 # Create experiment spec
 experiment_spec = ExperimentSpec(
    description=f"OlmOCR vLLM vs HF Comparison - Branch: {git_branch}, Commit: {git_hash}, Model: {model_path}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[TaskSpec(**task_spec_args)],
 )

--- a/scripts/compress_model.sh
+++ b/scripts/compress_model.sh
@ -159,7 +159,7 @@ if has_aws_creds:
 # Create experiment spec
 experiment_spec = ExperimentSpec(
    description=f"OlmOCR Model Compression - Branch: {git_branch}, Commit: {git_hash}, Recipe: {recipe}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[TaskSpec(**task_spec_args)],
 )

--- a/scripts/molmo-7b-lora-gantry.sh
+++ b/scripts/molmo-7b-lora-gantry.sh
@ -1,48 +0,0 @@
-#!/usr/bin/env bash
-
-set -ex
-
-# check if jq is installed
-if ! command -v jq &> /dev/null
-then
-    echo "jq could not be found. Please install it."
-    exit
-fi
-
-
-EXTRA_ARGS="-c olmocr/train/config/molmo-o-lora-8192.yaml --num_proc 64 --save.path \"s3://ai2-oe-data/jakep/experiments/molmo-pdf/v1/models/\${BEAKER_USER_ID}\""
-
-run_name=$(basename "$0" .sh)
-
-# --cluster 'ai2/jupiter*' \
-# --cluster 'ai2/pluto*' \
-# --cluster 'ai2/allennlp-cirrascale' \
-# --priority high \
-
-CLUSTER='jupiter'
-
-gantry run \
-    --description "${run_name}-8192"\
-    --task-name "${run_name}-8192"\
-    --allow-dirty \
-    --host-networking \
-    --workspace ai2/oe-data-model-based-cleanup \
-    --beaker-image 'jakep/jakep-pdf-finetunev1.2' \
-    --venv 'base' \
-    --pip gantry-requirements.txt \
-    --priority high \
-    --gpus 8 \
-    --cluster "ai2/${CLUSTER}*" \
-    --budget ai2/oe-data \
-    --weka "oe-data-default:/data" \
-    --env LOG_FILTER_TYPE=local_rank0_only \
-    --env OMP_NUM_THREADS=8 \
-    --env BEAKER_USER_ID=$(beaker account whoami --format json | jq '.[0].name' -cr) \
-    --env-secret AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
-    --env-secret AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
-    --env-secret DS_AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
-    --env-secret DS_AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
-    --env-secret WANDB_API_KEY=JAKE_WANDB_API_KEY \
-    --shared-memory 10GiB \
-    --yes \
-    -- /bin/bash -c "source scripts/beaker/${CLUSTER}-ib.sh && python -m olmocr.train.loaddataset ${EXTRA_ARGS} && accelerate launch --multi_gpu --num_processes \${BEAKER_ASSIGNED_GPU_COUNT} --mixed_precision bf16 -m olmocr.train.train ${EXTRA_ARGS}"
--- a/scripts/rich_tagging_pipeline.py
+++ b/scripts/rich_tagging_pipeline.py
@ -754,7 +754,7 @@ def submit_beaker_job(args):

    # Create the experiment spec
    experiment_spec = ExperimentSpec(
-        budget="ai2/oe-data",
+        budget="ai2/oe-base",
        description=task_name,
        tasks=[
            TaskSpec(
--- a/scripts/run_benchmark.sh
+++ b/scripts/run_benchmark.sh
@ -145,7 +145,7 @@ if has_aws_creds:
 # Create first experiment spec
 experiment_spec = ExperimentSpec(
    description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[TaskSpec(**task_spec_args)],
 )

@ -196,7 +196,7 @@ if has_aws_creds:
 # Create performance experiment spec
 perf_experiment_spec = ExperimentSpec(
    description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[TaskSpec(**perf_task_spec_args)],
 )

--- a/scripts/run_benchmark_guided_decoding.sh
+++ b/scripts/run_benchmark_guided_decoding.sh
@ -145,7 +145,7 @@ if has_aws_creds:
 # Create first experiment spec
 experiment_spec = ExperimentSpec(
    description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[TaskSpec(**task_spec_args)],
 )

@ -195,7 +195,7 @@ if has_aws_creds:
 # Create performance experiment spec
 perf_experiment_spec = ExperimentSpec(
    description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[TaskSpec(**perf_task_spec_args)],
 )

--- a/scripts/run_marker_benchmark.sh
+++ b/scripts/run_marker_benchmark.sh
@ -128,7 +128,7 @@ if has_aws_creds:
 # Create first experiment spec
 experiment_spec = ExperimentSpec(
    description=f"Marker {marker_version} Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[TaskSpec(**task_spec_args)],
 )

@ -182,7 +182,7 @@ if has_aws_creds:
 # Create performance experiment spec
 perf_experiment_spec = ExperimentSpec(
    description=f"Marker {marker_version} Performance Test - Branch: {git_branch}, Commit: {git_hash}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[TaskSpec(**perf_task_spec_args)],
 )

--- a/scripts/run_tagging_pipeline.sh
+++ b/scripts/run_tagging_pipeline.sh
@ -2,6 +2,6 @@

 set -e

-gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-data --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && pip install --upgrade sglang==0.4.5.post3 transformers==4.51.3 && python scripts/tagging_pipeline.py s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini_scratch"
+gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-base --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && pip install --upgrade sglang==0.4.5.post3 transformers==4.51.3 && python scripts/tagging_pipeline.py s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini_scratch"

-gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-data --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu,bench] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && huggingface-cli download allenai/olmOCR-bench --repo-type dataset --local-dir ./olmOCR-bench &&  olmocr/bench/scripts/convert_all.sh"
+gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-base --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu,bench] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && huggingface-cli download allenai/olmOCR-bench --repo-type dataset --local-dir ./olmOCR-bench &&  olmocr/bench/scripts/convert_all.sh"
--- a/scripts/tagging_pipeline.py
+++ b/scripts/tagging_pipeline.py
@ -628,7 +628,7 @@ def submit_beaker_job(args):

    # Create the experiment spec
    experiment_spec = ExperimentSpec(
-        budget="ai2/oe-data",
+        budget="ai2/oe-base",
        description=task_name,
        tasks=[
            TaskSpec(
--- a/scripts/tagging_pipeline_v2.py
+++ b/scripts/tagging_pipeline_v2.py
@ -642,7 +642,7 @@ def submit_beaker_job(args):

    # Create the experiment spec
    experiment_spec = ExperimentSpec(
-        budget="ai2/oe-data",
+        budget="ai2/oe-base",
        description=task_name,
        tasks=[
            TaskSpec(
--- a/scripts/train/newtrainer-beaker.sh
+++ b/scripts/train/newtrainer-beaker.sh
@ -135,7 +135,7 @@ task_spec = TaskSpec(
 # Create experiment spec
 experiment_spec = ExperimentSpec(
    description=f"OlmOCR Training Run - Branch: {git_branch}, Commit: {git_hash}",
-    budget="ai2/oe-data",
+    budget="ai2/oe-base",
    tasks=[task_spec],
 )