mirror of
https://github.com/allenai/olmocr.git
synced 2025-06-27 04:00:02 +00:00
Hopefuly will train now
This commit is contained in:
parent
e864b9d88f
commit
65a9c9981e
@ -143,8 +143,8 @@ def run_train(config: TrainConfig):
|
||||
train_ds = dataset["train"].to_iterable_dataset(num_shards=64)
|
||||
validation_ds = dataset["validation"]
|
||||
|
||||
train_ds = train_ds.map(partial(prepare_data_for_qwen2_training, processor=processor, add_batch_dim=True), remove_columns=train_ds.column_names).filter(filter_by_max_seq_len)
|
||||
validation_ds = validation_ds.map(partial(prepare_data_for_qwen2_training, processor=processor, add_batch_dim=True), remove_columns=validation_ds.column_names)
|
||||
train_ds = train_ds.map(partial(prepare_data_for_qwen2_training, processor=processor), remove_columns=train_ds.column_names).filter(filter_by_max_seq_len)
|
||||
validation_ds = validation_ds.map(partial(prepare_data_for_qwen2_training, processor=processor), remove_columns=validation_ds.column_names)
|
||||
|
||||
print(train_ds)
|
||||
print(validation_ds)
|
||||
|
@ -67,6 +67,7 @@ train = [
|
||||
"wandb",
|
||||
"omegaconf",
|
||||
"s3fs",
|
||||
"necessary",
|
||||
"transformers @ git+https://github.com/huggingface/transformers.git@f9b44097261270377199bb0fcbceb01fd9f7d0c0"
|
||||
]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user