mirror of
https://github.com/allenai/olmocr.git
synced 2025-08-19 14:22:26 +00:00
List configs to list
This commit is contained in:
parent
ffe470bf0e
commit
f13d0a5741
@ -93,7 +93,7 @@ def prepare_data_for_qwen2_training(example, processor, target_longest_image_dim
|
||||
}
|
||||
|
||||
|
||||
def batch_prepare_data_for_qwen2_training(batch, processor, target_longest_image_dim: int, target_anchor_text_len: int):
|
||||
def batch_prepare_data_for_qwen2_training(batch, processor, target_longest_image_dim: list[int], target_anchor_text_len: list[int]):
|
||||
# Process each example in the batch using the helper function
|
||||
processed_examples = []
|
||||
for i in range(len(batch["response"])):
|
||||
|
@ -74,8 +74,8 @@ def make_dataset(config: TrainConfig, processor: AutoProcessor) -> tuple[Dataset
|
||||
partial(
|
||||
batch_prepare_data_for_qwen2_training,
|
||||
processor=processor,
|
||||
target_longest_image_dim=target_longest_image_dim,
|
||||
target_anchor_text_len=target_anchor_text_len,
|
||||
target_longest_image_dim=list(target_longest_image_dim),
|
||||
target_anchor_text_len=list(target_anchor_text_len),
|
||||
)
|
||||
)
|
||||
|
||||
@ -86,8 +86,8 @@ def make_dataset(config: TrainConfig, processor: AutoProcessor) -> tuple[Dataset
|
||||
partial(
|
||||
batch_prepare_data_for_qwen2_training,
|
||||
processor=processor,
|
||||
target_longest_image_dim=source.target_longest_image_dim,
|
||||
target_anchor_text_len=source.target_anchor_text_len,
|
||||
target_longest_image_dim=list(source.target_longest_image_dim),
|
||||
target_anchor_text_len=list(source.target_anchor_text_len),
|
||||
)
|
||||
)
|
||||
for source in config.valid_data.sources
|
||||
|
Loading…
x
Reference in New Issue
Block a user