mirror of
				https://github.com/allenai/olmocr.git
				synced 2025-10-31 10:04:26 +00:00 
			
		
		
		
	Hopefuly will train now
This commit is contained in:
		
							parent
							
								
									e864b9d88f
								
							
						
					
					
						commit
						65a9c9981e
					
				| @ -143,8 +143,8 @@ def run_train(config: TrainConfig): | ||||
|     train_ds = dataset["train"].to_iterable_dataset(num_shards=64) | ||||
|     validation_ds = dataset["validation"] | ||||
| 
 | ||||
|     train_ds = train_ds.map(partial(prepare_data_for_qwen2_training, processor=processor, add_batch_dim=True), remove_columns=train_ds.column_names).filter(filter_by_max_seq_len) | ||||
|     validation_ds = validation_ds.map(partial(prepare_data_for_qwen2_training, processor=processor, add_batch_dim=True), remove_columns=validation_ds.column_names) | ||||
|     train_ds = train_ds.map(partial(prepare_data_for_qwen2_training, processor=processor), remove_columns=train_ds.column_names).filter(filter_by_max_seq_len) | ||||
|     validation_ds = validation_ds.map(partial(prepare_data_for_qwen2_training, processor=processor), remove_columns=validation_ds.column_names) | ||||
| 
 | ||||
|     print(train_ds) | ||||
|     print(validation_ds) | ||||
|  | ||||
| @ -67,6 +67,7 @@ train = [ | ||||
|     "wandb", | ||||
|     "omegaconf", | ||||
|     "s3fs", | ||||
|     "necessary", | ||||
|     "transformers @ git+https://github.com/huggingface/transformers.git@f9b44097261270377199bb0fcbceb01fd9f7d0c0" | ||||
| ] | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Jake Poznanski
						Jake Poznanski