mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-20 04:29:08 +00:00
Lints
This commit is contained in:
parent
369fd4d23a
commit
aa239eb34c
@ -18,7 +18,7 @@ import torch
|
||||
import torch.distributed as dist
|
||||
import wandb
|
||||
from PIL import Image
|
||||
from rapidfuzz import fuzz, distance
|
||||
from rapidfuzz import distance, fuzz
|
||||
from torch.utils.data import Dataset
|
||||
from transformers import (
|
||||
AutoProcessor,
|
||||
|
@ -84,8 +84,8 @@ def test_repackage_and_prepare_olmocrmix():
|
||||
if relative_path.suffix == ".jsonl":
|
||||
# For JSONL files, compare as sets of lines (order doesn't matter)
|
||||
# Filter out empty lines
|
||||
sample_lines = set(line for line in sample_file.read_text().strip().split('\n') if line.strip())
|
||||
unpacked_lines = set(line for line in unpacked_file.read_text().strip().split('\n') if line.strip())
|
||||
sample_lines = set(line for line in sample_file.read_text().strip().split("\n") if line.strip())
|
||||
unpacked_lines = set(line for line in unpacked_file.read_text().strip().split("\n") if line.strip())
|
||||
assert sample_lines == unpacked_lines, f"JSONL file contents differ for {relative_path}"
|
||||
else:
|
||||
# For other files, compare as bytes
|
||||
|
Loading…
x
Reference in New Issue
Block a user