mirror of
https://github.com/allenai/olmocr.git
synced 2025-11-01 18:43:45 +00:00
Lints
This commit is contained in:
parent
702c42f8e7
commit
875337f962
@ -22,7 +22,6 @@ import threading
|
||||
import unittest
|
||||
import weakref
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@ -16,7 +16,6 @@ import argparse
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
from threading import Lock
|
||||
|
||||
import torch
|
||||
from paddleocr import PPStructureV3
|
||||
|
||||
# Run's paddle paddle as in the docs here: https://huggingface.co/PaddlePaddle/PP-OCRv5_server_det
|
||||
|
||||
@ -20,7 +20,7 @@ from playwright.async_api import async_playwright
|
||||
from syntok.segmenter import process
|
||||
from tqdm import tqdm
|
||||
|
||||
from olmocr.bench.tests import TableTest, TestType, load_single_test, parse_html_tables
|
||||
from olmocr.bench.tests import TableTest, TestType, parse_html_tables
|
||||
from olmocr.data.renderpdf import (
|
||||
get_png_dimensions_from_base64,
|
||||
render_pdf_to_base64png,
|
||||
|
||||
@ -13,17 +13,12 @@ The script:
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import List, Optional
|
||||
|
||||
import pypdf
|
||||
from pypdf import PageObject, Transformation
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@ import sys
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@ -9,12 +9,11 @@
|
||||
import argparse
|
||||
import csv
|
||||
import hashlib
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Set, Tuple
|
||||
from typing import Dict, Optional, Set, Tuple
|
||||
|
||||
import img2pdf
|
||||
import requests
|
||||
|
||||
@ -16,10 +16,9 @@ import json
|
||||
import tarfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterator, List, Optional, Tuple
|
||||
from typing import Dict, Iterator, List, Optional
|
||||
|
||||
import pandas as pd
|
||||
import yaml
|
||||
from tqdm import tqdm
|
||||
|
||||
from olmocr.prompts import PageResponse
|
||||
|
||||
@ -32,14 +32,12 @@ from pypdf import PdfReader
|
||||
from torch.utils.data import Dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
from olmocr.bench.katex.render import render_equation
|
||||
from olmocr.data.renderpdf import render_pdf_to_base64png
|
||||
from olmocr.prompts.anchor import get_anchor_text
|
||||
from olmocr.prompts.prompts import (
|
||||
PageResponse,
|
||||
build_finetuning_prompt,
|
||||
build_no_anchoring_v4_yaml_prompt,
|
||||
build_no_anchoring_yaml_prompt,
|
||||
)
|
||||
|
||||
# Type alias for samples
|
||||
|
||||
@ -3,27 +3,23 @@ GRPO (Generative Reward-based Policy Optimization) training script for OlmOCR.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import base64
|
||||
import glob
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from functools import lru_cache, partial
|
||||
from functools import lru_cache
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import wandb
|
||||
from PIL import Image
|
||||
from rapidfuzz import fuzz
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
from torch.utils.data import Dataset
|
||||
from transformers import (
|
||||
AutoProcessor,
|
||||
Qwen2_5_VLForConditionalGeneration,
|
||||
|
||||
@ -25,7 +25,6 @@
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user