Ruff fixes to CI

This commit is contained in:
Jake Poznanski 2025-03-03 15:56:39 -08:00
parent 1348a29ce8
commit dc7cb5c8b5
6 changed files with 5 additions and 14 deletions

View File

@ -2,7 +2,6 @@ import argparse
import asyncio
import glob
import importlib
import inspect
import os
from tqdm import tqdm
@ -83,7 +82,7 @@ if __name__ == "__main__":
"methods",
nargs="+",
help="Methods to run in the format method[:key=value ...]. "
"Example: gotocr mineru:temperature=2 marker:runs=3. "
"Example: gotocr mineru:temperature=2 marker:u=3. "
"Use 'name=folder_name' to specify a custom output folder name.",
)
parser.add_argument("--repeats", type=int, default=1, help="Number of times to repeat the conversion for each PDF.")

View File

@ -7,7 +7,6 @@ from collections import Counter
from difflib import SequenceMatcher
import syntok.segmenter as segmenter
import syntok.tokenizer as tokenizer
from google import genai
from google.genai import types

View File

@ -1,12 +1,7 @@
import argparse
import asyncio
import json
import logging
import os
import tempfile
from dataclasses import dataclass
from functools import partial
from typing import Optional
# Import necessary components from olmocr
from olmocr.pipeline import (
@ -58,13 +53,13 @@ async def run_olmocr(pdf_path: str, page_num: int = 1, temperature: float = 0.8)
semaphore = asyncio.Semaphore(1)
# Ensure server is running
server_task = None
_server_task = None
try:
await asyncio.wait_for(sglang_server_ready(), timeout=5)
print("Using existing sglang server")
except Exception:
print("Starting new sglang server")
server_task = asyncio.create_task(sglang_server_host(args, semaphore))
_server_task = asyncio.create_task(sglang_server_host(args, semaphore))
await sglang_server_ready()
try:

View File

@ -4,7 +4,7 @@ from typing import Type
import regex as re
from tqdm import tqdm
from .aligners import BaseAligner
from .aligners import BaseAligner, AlignerRegistry
from .registry import BaseRegistry
from .segmenters import BaseSegmenter, SegmenterRegistry

View File

@ -1,7 +1,5 @@
import re
from typing import Type
import torch
from spacy.lang.en import English
from .registry import BaseRegistry

View File

@ -50,7 +50,7 @@ def query_infinigram(ngram, index="v4_rpj_llama_s4", retries=3):
result = response.json()
if "count" in result:
return result["count"]
except Exception as e: # type: ignore
except Exception: # type: ignore
time.sleep(1)
return 0