Hopefuly will train now

This commit is contained in:
Jake Poznanski 2024-09-27 15:16:12 +00:00
parent e864b9d88f
commit 65a9c9981e
2 changed files with 3 additions and 2 deletions

View File

@ -143,8 +143,8 @@ def run_train(config: TrainConfig):
train_ds = dataset["train"].to_iterable_dataset(num_shards=64)
validation_ds = dataset["validation"]
train_ds = train_ds.map(partial(prepare_data_for_qwen2_training, processor=processor, add_batch_dim=True), remove_columns=train_ds.column_names).filter(filter_by_max_seq_len)
validation_ds = validation_ds.map(partial(prepare_data_for_qwen2_training, processor=processor, add_batch_dim=True), remove_columns=validation_ds.column_names)
train_ds = train_ds.map(partial(prepare_data_for_qwen2_training, processor=processor), remove_columns=train_ds.column_names).filter(filter_by_max_seq_len)
validation_ds = validation_ds.map(partial(prepare_data_for_qwen2_training, processor=processor), remove_columns=validation_ds.column_names)
print(train_ds)
print(validation_ds)

View File

@ -67,6 +67,7 @@ train = [
"wandb",
"omegaconf",
"s3fs",
"necessary",
"transformers @ git+https://github.com/huggingface/transformers.git@f9b44097261270377199bb0fcbceb01fd9f7d0c0"
]