mirror of
https://github.com/allenai/olmocr.git
synced 2025-12-25 06:06:23 +00:00
Using new budget code
This commit is contained in:
parent
e664dc5f36
commit
2fca448105
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@ -115,7 +115,7 @@ jobs:
|
||||
env:
|
||||
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
|
||||
BEAKER_IMAGE: jakep/olmocr-gpu-ci
|
||||
BEAKER_BUDGET: ai2/oe-data
|
||||
BEAKER_BUDGET: ai2/oe-base
|
||||
BEAKER_WORKSPACE: ai2/olmocr
|
||||
steps:
|
||||
- name: Determine current commit SHA (pull request)
|
||||
|
||||
@ -176,7 +176,7 @@ if has_aws_creds:
|
||||
# Create experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
description=f"OlmOCR vLLM vs HF Comparison - Branch: {git_branch}, Commit: {git_hash}, Model: {model_path}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[TaskSpec(**task_spec_args)],
|
||||
)
|
||||
|
||||
|
||||
@ -159,7 +159,7 @@ if has_aws_creds:
|
||||
# Create experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
description=f"OlmOCR Model Compression - Branch: {git_branch}, Commit: {git_hash}, Recipe: {recipe}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[TaskSpec(**task_spec_args)],
|
||||
)
|
||||
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
# check if jq is installed
|
||||
if ! command -v jq &> /dev/null
|
||||
then
|
||||
echo "jq could not be found. Please install it."
|
||||
exit
|
||||
fi
|
||||
|
||||
|
||||
EXTRA_ARGS="-c olmocr/train/config/molmo-o-lora-8192.yaml --num_proc 64 --save.path \"s3://ai2-oe-data/jakep/experiments/molmo-pdf/v1/models/\${BEAKER_USER_ID}\""
|
||||
|
||||
run_name=$(basename "$0" .sh)
|
||||
|
||||
# --cluster 'ai2/jupiter*' \
|
||||
# --cluster 'ai2/pluto*' \
|
||||
# --cluster 'ai2/allennlp-cirrascale' \
|
||||
# --priority high \
|
||||
|
||||
CLUSTER='jupiter'
|
||||
|
||||
gantry run \
|
||||
--description "${run_name}-8192"\
|
||||
--task-name "${run_name}-8192"\
|
||||
--allow-dirty \
|
||||
--host-networking \
|
||||
--workspace ai2/oe-data-model-based-cleanup \
|
||||
--beaker-image 'jakep/jakep-pdf-finetunev1.2' \
|
||||
--venv 'base' \
|
||||
--pip gantry-requirements.txt \
|
||||
--priority high \
|
||||
--gpus 8 \
|
||||
--cluster "ai2/${CLUSTER}*" \
|
||||
--budget ai2/oe-data \
|
||||
--weka "oe-data-default:/data" \
|
||||
--env LOG_FILTER_TYPE=local_rank0_only \
|
||||
--env OMP_NUM_THREADS=8 \
|
||||
--env BEAKER_USER_ID=$(beaker account whoami --format json | jq '.[0].name' -cr) \
|
||||
--env-secret AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
|
||||
--env-secret AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
|
||||
--env-secret DS_AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
|
||||
--env-secret DS_AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
|
||||
--env-secret WANDB_API_KEY=JAKE_WANDB_API_KEY \
|
||||
--shared-memory 10GiB \
|
||||
--yes \
|
||||
-- /bin/bash -c "source scripts/beaker/${CLUSTER}-ib.sh && python -m olmocr.train.loaddataset ${EXTRA_ARGS} && accelerate launch --multi_gpu --num_processes \${BEAKER_ASSIGNED_GPU_COUNT} --mixed_precision bf16 -m olmocr.train.train ${EXTRA_ARGS}"
|
||||
@ -754,7 +754,7 @@ def submit_beaker_job(args):
|
||||
|
||||
# Create the experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
description=task_name,
|
||||
tasks=[
|
||||
TaskSpec(
|
||||
|
||||
@ -145,7 +145,7 @@ if has_aws_creds:
|
||||
# Create first experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[TaskSpec(**task_spec_args)],
|
||||
)
|
||||
|
||||
@ -196,7 +196,7 @@ if has_aws_creds:
|
||||
# Create performance experiment spec
|
||||
perf_experiment_spec = ExperimentSpec(
|
||||
description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[TaskSpec(**perf_task_spec_args)],
|
||||
)
|
||||
|
||||
|
||||
@ -145,7 +145,7 @@ if has_aws_creds:
|
||||
# Create first experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[TaskSpec(**task_spec_args)],
|
||||
)
|
||||
|
||||
@ -195,7 +195,7 @@ if has_aws_creds:
|
||||
# Create performance experiment spec
|
||||
perf_experiment_spec = ExperimentSpec(
|
||||
description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[TaskSpec(**perf_task_spec_args)],
|
||||
)
|
||||
|
||||
|
||||
@ -128,7 +128,7 @@ if has_aws_creds:
|
||||
# Create first experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
description=f"Marker {marker_version} Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[TaskSpec(**task_spec_args)],
|
||||
)
|
||||
|
||||
@ -182,7 +182,7 @@ if has_aws_creds:
|
||||
# Create performance experiment spec
|
||||
perf_experiment_spec = ExperimentSpec(
|
||||
description=f"Marker {marker_version} Performance Test - Branch: {git_branch}, Commit: {git_hash}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[TaskSpec(**perf_task_spec_args)],
|
||||
)
|
||||
|
||||
|
||||
@ -2,6 +2,6 @@
|
||||
|
||||
set -e
|
||||
|
||||
gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-data --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && pip install --upgrade sglang==0.4.5.post3 transformers==4.51.3 && python scripts/tagging_pipeline.py s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini_scratch"
|
||||
gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-base --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && pip install --upgrade sglang==0.4.5.post3 transformers==4.51.3 && python scripts/tagging_pipeline.py s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini s3://ai2-oe-data/jakep/s2pdf_dedupe_minhash_v1_mini_scratch"
|
||||
|
||||
gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-data --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu,bench] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && huggingface-cli download allenai/olmOCR-bench --repo-type dataset --local-dir ./olmOCR-bench && olmocr/bench/scripts/convert_all.sh"
|
||||
gantry run --gpus 1 --workspace ai2/olmocr --beaker-image ai2/pytorch2.5.1-cuda12.1-python3.11 --cluster ai2/jupiter-cirrascale-2 --budget ai2/oe-base --priority normal --env-secret AWS_CREDENTIALS_FILE=jakep-AWS_CREDENTIALS_FILE --env-secret HF_TOKEN=jake-HF_TOKEN --allow-dirty -- /bin/bash -c "pip install -e .[gpu,bench] --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ && huggingface-cli download allenai/olmOCR-bench --repo-type dataset --local-dir ./olmOCR-bench && olmocr/bench/scripts/convert_all.sh"
|
||||
@ -628,7 +628,7 @@ def submit_beaker_job(args):
|
||||
|
||||
# Create the experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
description=task_name,
|
||||
tasks=[
|
||||
TaskSpec(
|
||||
|
||||
@ -642,7 +642,7 @@ def submit_beaker_job(args):
|
||||
|
||||
# Create the experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
description=task_name,
|
||||
tasks=[
|
||||
TaskSpec(
|
||||
|
||||
@ -135,7 +135,7 @@ task_spec = TaskSpec(
|
||||
# Create experiment spec
|
||||
experiment_spec = ExperimentSpec(
|
||||
description=f"OlmOCR Training Run - Branch: {git_branch}, Commit: {git_hash}",
|
||||
budget="ai2/oe-data",
|
||||
budget="ai2/oe-base",
|
||||
tasks=[task_spec],
|
||||
)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user