Ruff fixes to CI

This commit is contained in:
Jake Poznanski 2025-03-03 15:56:39 -08:00
parent 1348a29ce8
commit dc7cb5c8b5
6 changed files with 5 additions and 14 deletions

View File

@ -2,7 +2,6 @@ import argparse
import asyncio import asyncio
import glob import glob
import importlib import importlib
import inspect
import os import os
from tqdm import tqdm from tqdm import tqdm
@ -83,7 +82,7 @@ if __name__ == "__main__":
"methods", "methods",
nargs="+", nargs="+",
help="Methods to run in the format method[:key=value ...]. " help="Methods to run in the format method[:key=value ...]. "
"Example: gotocr mineru:temperature=2 marker:runs=3. " "Example: gotocr mineru:temperature=2 marker:u=3. "
"Use 'name=folder_name' to specify a custom output folder name.", "Use 'name=folder_name' to specify a custom output folder name.",
) )
parser.add_argument("--repeats", type=int, default=1, help="Number of times to repeat the conversion for each PDF.") parser.add_argument("--repeats", type=int, default=1, help="Number of times to repeat the conversion for each PDF.")

View File

@ -7,7 +7,6 @@ from collections import Counter
from difflib import SequenceMatcher from difflib import SequenceMatcher
import syntok.segmenter as segmenter import syntok.segmenter as segmenter
import syntok.tokenizer as tokenizer
from google import genai from google import genai
from google.genai import types from google.genai import types

View File

@ -1,12 +1,7 @@
import argparse
import asyncio import asyncio
import json import json
import logging import logging
import os
import tempfile
from dataclasses import dataclass from dataclasses import dataclass
from functools import partial
from typing import Optional
# Import necessary components from olmocr # Import necessary components from olmocr
from olmocr.pipeline import ( from olmocr.pipeline import (
@ -58,13 +53,13 @@ async def run_olmocr(pdf_path: str, page_num: int = 1, temperature: float = 0.8)
semaphore = asyncio.Semaphore(1) semaphore = asyncio.Semaphore(1)
# Ensure server is running # Ensure server is running
server_task = None _server_task = None
try: try:
await asyncio.wait_for(sglang_server_ready(), timeout=5) await asyncio.wait_for(sglang_server_ready(), timeout=5)
print("Using existing sglang server") print("Using existing sglang server")
except Exception: except Exception:
print("Starting new sglang server") print("Starting new sglang server")
server_task = asyncio.create_task(sglang_server_host(args, semaphore)) _server_task = asyncio.create_task(sglang_server_host(args, semaphore))
await sglang_server_ready() await sglang_server_ready()
try: try:

View File

@ -4,7 +4,7 @@ from typing import Type
import regex as re import regex as re
from tqdm import tqdm from tqdm import tqdm
from .aligners import BaseAligner from .aligners import BaseAligner, AlignerRegistry
from .registry import BaseRegistry from .registry import BaseRegistry
from .segmenters import BaseSegmenter, SegmenterRegistry from .segmenters import BaseSegmenter, SegmenterRegistry

View File

@ -1,7 +1,5 @@
import re
from typing import Type from typing import Type
import torch
from spacy.lang.en import English from spacy.lang.en import English
from .registry import BaseRegistry from .registry import BaseRegistry

View File

@ -50,7 +50,7 @@ def query_infinigram(ngram, index="v4_rpj_llama_s4", retries=3):
result = response.json() result = response.json()
if "count" in result: if "count" in result:
return result["count"] return result["count"]
except Exception as e: # type: ignore except Exception: # type: ignore
time.sleep(1) time.sleep(1)
return 0 return 0