mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-17 11:12:33 +00:00
Some cleanup stuff
This commit is contained in:
parent
e86511e11b
commit
5c2d69a3d7
@ -10,7 +10,7 @@ model:
|
||||
trust_remote_code: true
|
||||
torch_dtype: auto
|
||||
use_flash_attention: true
|
||||
attn_implementation: sdpa
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# LoRA settings (disabled by default)
|
||||
use_lora: false
|
||||
@ -65,9 +65,11 @@ training:
|
||||
per_device_train_batch_size: 1
|
||||
per_device_eval_batch_size: 1
|
||||
gradient_accumulation_steps: 8
|
||||
|
||||
gradient_checkpointing: False
|
||||
|
||||
# Learning rate
|
||||
learning_rate: 2e-5
|
||||
learning_rate: 1e-6
|
||||
lr_scheduler_type: cosine
|
||||
warmup_ratio: 0.1
|
||||
|
||||
|
@ -162,9 +162,13 @@ def main():
|
||||
total_eval_samples = sum(len(dataset) for dataset in eval_datasets.values())
|
||||
logger.info(f"Total evaluation samples across {len(eval_datasets)} datasets: {total_eval_samples}")
|
||||
|
||||
# Construct full output directory by appending run_name to base output_dir
|
||||
full_output_dir = os.path.join(config.training.output_dir, config.run_name)
|
||||
logger.info(f"Setting output directory to: {full_output_dir}")
|
||||
|
||||
# Set up training arguments
|
||||
training_args = TrainingArguments(
|
||||
output_dir=config.training.output_dir,
|
||||
output_dir=full_output_dir,
|
||||
num_train_epochs=config.training.num_train_epochs,
|
||||
per_device_train_batch_size=config.training.per_device_train_batch_size,
|
||||
per_device_eval_batch_size=config.training.per_device_eval_batch_size,
|
||||
|
@ -37,7 +37,7 @@ dependencies = [
|
||||
"boto3",
|
||||
"httpx",
|
||||
"torch>=2.7.0",
|
||||
"transformers>=4.51.1",
|
||||
"transformers==4.52.4",
|
||||
"img2pdf",
|
||||
"beaker-py",
|
||||
]
|
||||
|
@ -52,7 +52,7 @@ gantry run \
|
||||
--priority normal \
|
||||
--gpus 1 \
|
||||
--preemptible \
|
||||
--cluster "ai2/jupiter-cirrascale-2" \
|
||||
--cluster "ai2/titan-cirrascale" \
|
||||
--budget ai2/oe-data \
|
||||
--env LOG_FILTER_TYPE=local_rank0_only \
|
||||
--env OMP_NUM_THREADS=8 \
|
||||
@ -64,4 +64,4 @@ gantry run \
|
||||
--weka oe-training-default:/weka/oe-training-default \
|
||||
--shared-memory 10GiB \
|
||||
--yes \
|
||||
-- /bin/bash -c "source scripts/beaker/jupiter-ib.sh && python -m olmocr.train.train --config olmocr/train/configs/example_config.yaml"
|
||||
-- /bin/bash -c "pip install flash-attn==2.8.0.post2 --no-build-isolation && python -m olmocr.train.train --config olmocr/train/configs/example_config.yaml"
|
Loading…
x
Reference in New Issue
Block a user