This commit is contained in:
Jake Poznanski 2025-01-23 23:32:56 +00:00
commit aa59d38a5b
2 changed files with 8 additions and 1 deletions

View File

@ -20,7 +20,13 @@ def main():
print("Training dataset........") print("Training dataset........")
print(train_dataset) print(train_dataset)
print(train_dataset[0])
train_example = train_dataset[0]
print(train_example)
print({(x, y.shape) for x,y in train_example.items()})
print("\nTokens")
print(processor.tokenizer.batch_decode(train_example["input_ids"]))
print("\n\n") print("\n\n")
print("Validation dataset........") print("Validation dataset........")

View File

@ -37,6 +37,7 @@ class MolmoProcessorTest(unittest.TestCase):
print(inputs) print(inputs)
print("\nShapes") print("\nShapes")
# {('input_ids', torch.Size([1, 589])), ('images', torch.Size([1, 5, 576, 588])), ('image_masks', torch.Size([1, 5, 576])), ('image_input_idx', torch.Size([1, 5, 144]))}
print({(x, y.shape) for x,y in inputs.items()}) print({(x, y.shape) for x,y in inputs.items()})
print("\nTokens") print("\nTokens")