Using new budget code

This commit is contained in:
Jake Poznanski 2025-08-06 16:31:08 +00:00
parent e664dc5f36
commit 2fca448105
12 changed files with 15 additions and 63 deletions

View File

@ -115,7 +115,7 @@ jobs:
env:
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
BEAKER_IMAGE: jakep/olmocr-gpu-ci
BEAKER_BUDGET: ai2/oe-data
BEAKER_BUDGET: ai2/oe-base
BEAKER_WORKSPACE: ai2/olmocr
steps:
- name: Determine current commit SHA (pull request)

View File

@ -176,7 +176,7 @@ if has_aws_creds:
# Create experiment spec
experiment_spec = ExperimentSpec(
description=f"OlmOCR vLLM vs HF Comparison - Branch: {git_branch}, Commit: {git_hash}, Model: {model_path}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[TaskSpec(**task_spec_args)],
)

View File

@ -159,7 +159,7 @@ if has_aws_creds:
# Create experiment spec
experiment_spec = ExperimentSpec(
description=f"OlmOCR Model Compression - Branch: {git_branch}, Commit: {git_hash}, Recipe: {recipe}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[TaskSpec(**task_spec_args)],
)

View File

@ -1,48 +0,0 @@
#!/usr/bin/env bash
set -ex
# check if jq is installed
if ! command -v jq &> /dev/null
then
echo "jq could not be found. Please install it."
exit
fi
EXTRA_ARGS="-c olmocr/train/config/molmo-o-lora-8192.yaml --num_proc 64 --save.path \"s3://ai2-oe-data/jakep/experiments/molmo-pdf/v1/models/\${BEAKER_USER_ID}\""
run_name=$(basename "$0" .sh)
# --cluster 'ai2/jupiter*' \
# --cluster 'ai2/pluto*' \
# --cluster 'ai2/allennlp-cirrascale' \
# --priority high \
CLUSTER='jupiter'
gantry run \
--description "${run_name}-8192"\
--task-name "${run_name}-8192"\
--allow-dirty \
--host-networking \
--workspace ai2/oe-data-model-based-cleanup \
--beaker-image 'jakep/jakep-pdf-finetunev1.2' \
--venv 'base' \
--pip gantry-requirements.txt \
--priority high \
--gpus 8 \
--cluster "ai2/${CLUSTER}*" \
--budget ai2/oe-data \
--weka "oe-data-default:/data" \
--env LOG_FILTER_TYPE=local_rank0_only \
--env OMP_NUM_THREADS=8 \
--env BEAKER_USER_ID=$(beaker account whoami --format json | jq '.[0].name' -cr) \
--env-secret AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
--env-secret AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
--env-secret DS_AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
--env-secret DS_AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
--env-secret WANDB_API_KEY=JAKE_WANDB_API_KEY \
--shared-memory 10GiB \
--yes \
-- /bin/bash -c "source scripts/beaker/${CLUSTER}-ib.sh && python -m olmocr.train.loaddataset ${EXTRA_ARGS} && accelerate launch --multi_gpu --num_processes \${BEAKER_ASSIGNED_GPU_COUNT} --mixed_precision bf16 -m olmocr.train.train ${EXTRA_ARGS}"

View File

@ -754,7 +754,7 @@ def submit_beaker_job(args):
# Create the experiment spec
experiment_spec = ExperimentSpec(
budget="ai2/oe-data",
budget="ai2/oe-base",
description=task_name,
tasks=[
TaskSpec(

View File

@ -145,7 +145,7 @@ if has_aws_creds:
# Create first experiment spec
experiment_spec = ExperimentSpec(
description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[TaskSpec(**task_spec_args)],
)
@ -196,7 +196,7 @@ if has_aws_creds:
# Create performance experiment spec
perf_experiment_spec = ExperimentSpec(
description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[TaskSpec(**perf_task_spec_args)],
)

View File

@ -145,7 +145,7 @@ if has_aws_creds:
# Create first experiment spec
experiment_spec = ExperimentSpec(
description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[TaskSpec(**task_spec_args)],
)
@ -195,7 +195,7 @@ if has_aws_creds:
# Create performance experiment spec
perf_experiment_spec = ExperimentSpec(
description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[TaskSpec(**perf_task_spec_args)],
)

View File

@ -128,7 +128,7 @@ if has_aws_creds:
# Create first experiment spec
experiment_spec = ExperimentSpec(
description=f"Marker {marker_version} Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[TaskSpec(**task_spec_args)],
)
@ -182,7 +182,7 @@ if has_aws_creds:
# Create performance experiment spec
perf_experiment_spec = ExperimentSpec(
description=f"Marker {marker_version} Performance Test - Branch: {git_branch}, Commit: {git_hash}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[TaskSpec(**perf_task_spec_args)],
)

View File

@ -2,6 +2,6 @@
set -e
gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-data --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && pip install --upgrade sglang==0.4.5.post3 transformers==4.51.3 && python scripts/tagging_pipeline.py s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini_scratch"
gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-base --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && pip install --upgrade sglang==0.4.5.post3 transformers==4.51.3 && python scripts/tagging_pipeline.py s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini_scratch"
gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-data --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu,bench] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && huggingface-cli download allenai/olmOCR-bench --repo-type dataset --local-dir ./olmOCR-bench && olmocr/bench/scripts/convert_all.sh"
gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-base --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu,bench] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && huggingface-cli download allenai/olmOCR-bench --repo-type dataset --local-dir ./olmOCR-bench && olmocr/bench/scripts/convert_all.sh"

View File

@ -628,7 +628,7 @@ def submit_beaker_job(args):
# Create the experiment spec
experiment_spec = ExperimentSpec(
budget="ai2/oe-data",
budget="ai2/oe-base",
description=task_name,
tasks=[
TaskSpec(

View File

@ -642,7 +642,7 @@ def submit_beaker_job(args):
# Create the experiment spec
experiment_spec = ExperimentSpec(
budget="ai2/oe-data",
budget="ai2/oe-base",
description=task_name,
tasks=[
TaskSpec(

View File

@ -135,7 +135,7 @@ task_spec = TaskSpec(
# Create experiment spec
experiment_spec = ExperimentSpec(
description=f"OlmOCR Training Run - Branch: {git_branch}, Commit: {git_hash}",
budget="ai2/oe-data",
budget="ai2/oe-base",
tasks=[task_spec],
)