mirror of
				https://github.com/allenai/olmocr.git
				synced 2025-10-31 10:04:26 +00:00 
			
		
		
		
	Hopefuly will train now
This commit is contained in:
		
							parent
							
								
									e864b9d88f
								
							
						
					
					
						commit
						65a9c9981e
					
				| @ -143,8 +143,8 @@ def run_train(config: TrainConfig): | |||||||
|     train_ds = dataset["train"].to_iterable_dataset(num_shards=64) |     train_ds = dataset["train"].to_iterable_dataset(num_shards=64) | ||||||
|     validation_ds = dataset["validation"] |     validation_ds = dataset["validation"] | ||||||
| 
 | 
 | ||||||
|     train_ds = train_ds.map(partial(prepare_data_for_qwen2_training, processor=processor, add_batch_dim=True), remove_columns=train_ds.column_names).filter(filter_by_max_seq_len) |     train_ds = train_ds.map(partial(prepare_data_for_qwen2_training, processor=processor), remove_columns=train_ds.column_names).filter(filter_by_max_seq_len) | ||||||
|     validation_ds = validation_ds.map(partial(prepare_data_for_qwen2_training, processor=processor, add_batch_dim=True), remove_columns=validation_ds.column_names) |     validation_ds = validation_ds.map(partial(prepare_data_for_qwen2_training, processor=processor), remove_columns=validation_ds.column_names) | ||||||
| 
 | 
 | ||||||
|     print(train_ds) |     print(train_ds) | ||||||
|     print(validation_ds) |     print(validation_ds) | ||||||
|  | |||||||
| @ -67,6 +67,7 @@ train = [ | |||||||
|     "wandb", |     "wandb", | ||||||
|     "omegaconf", |     "omegaconf", | ||||||
|     "s3fs", |     "s3fs", | ||||||
|  |     "necessary", | ||||||
|     "transformers @ git+https://github.com/huggingface/transformers.git@f9b44097261270377199bb0fcbceb01fd9f7d0c0" |     "transformers @ git+https://github.com/huggingface/transformers.git@f9b44097261270377199bb0fcbceb01fd9f7d0c0" | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Jake Poznanski
						Jake Poznanski