mirror of
https://github.com/allenai/olmocr.git
synced 2025-06-27 04:00:02 +00:00
Ruff fixes to CI
This commit is contained in:
parent
1348a29ce8
commit
dc7cb5c8b5
@ -2,7 +2,6 @@ import argparse
|
||||
import asyncio
|
||||
import glob
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
|
||||
from tqdm import tqdm
|
||||
@ -83,7 +82,7 @@ if __name__ == "__main__":
|
||||
"methods",
|
||||
nargs="+",
|
||||
help="Methods to run in the format method[:key=value ...]. "
|
||||
"Example: gotocr mineru:temperature=2 marker:runs=3. "
|
||||
"Example: gotocr mineru:temperature=2 marker:u=3. "
|
||||
"Use 'name=folder_name' to specify a custom output folder name.",
|
||||
)
|
||||
parser.add_argument("--repeats", type=int, default=1, help="Number of times to repeat the conversion for each PDF.")
|
||||
|
@ -7,7 +7,6 @@ from collections import Counter
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
import syntok.segmenter as segmenter
|
||||
import syntok.tokenizer as tokenizer
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
|
@ -1,12 +1,7 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
# Import necessary components from olmocr
|
||||
from olmocr.pipeline import (
|
||||
@ -58,13 +53,13 @@ async def run_olmocr(pdf_path: str, page_num: int = 1, temperature: float = 0.8)
|
||||
semaphore = asyncio.Semaphore(1)
|
||||
|
||||
# Ensure server is running
|
||||
server_task = None
|
||||
_server_task = None
|
||||
try:
|
||||
await asyncio.wait_for(sglang_server_ready(), timeout=5)
|
||||
print("Using existing sglang server")
|
||||
except Exception:
|
||||
print("Starting new sglang server")
|
||||
server_task = asyncio.create_task(sglang_server_host(args, semaphore))
|
||||
_server_task = asyncio.create_task(sglang_server_host(args, semaphore))
|
||||
await sglang_server_ready()
|
||||
|
||||
try:
|
||||
|
@ -4,7 +4,7 @@ from typing import Type
|
||||
import regex as re
|
||||
from tqdm import tqdm
|
||||
|
||||
from .aligners import BaseAligner
|
||||
from .aligners import BaseAligner, AlignerRegistry
|
||||
from .registry import BaseRegistry
|
||||
from .segmenters import BaseSegmenter, SegmenterRegistry
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
import re
|
||||
from typing import Type
|
||||
|
||||
import torch
|
||||
from spacy.lang.en import English
|
||||
|
||||
from .registry import BaseRegistry
|
||||
|
@ -50,7 +50,7 @@ def query_infinigram(ngram, index="v4_rpj_llama_s4", retries=3):
|
||||
result = response.json()
|
||||
if "count" in result:
|
||||
return result["count"]
|
||||
except Exception as e: # type: ignore
|
||||
except Exception: # type: ignore
|
||||
time.sleep(1)
|
||||
return 0
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user