From dc7cb5c8b5af29fd64a63e6e15238beca3840e5c Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Mon, 3 Mar 2025 15:56:39 -0800 Subject: [PATCH] Ruff fixes to CI --- olmocr/bench/convert.py | 3 +-- olmocr/bench/miners/mine_diffs.py | 1 - olmocr/bench/runners/run_olmocr.py | 9 ++------- olmocr/eval/dolma_refine/metrics.py | 2 +- olmocr/eval/dolma_refine/segmenters.py | 2 -- scripts/infinigram_count.py | 2 +- 6 files changed, 5 insertions(+), 14 deletions(-) diff --git a/olmocr/bench/convert.py b/olmocr/bench/convert.py index 4dd2846..72d4092 100644 --- a/olmocr/bench/convert.py +++ b/olmocr/bench/convert.py @@ -2,7 +2,6 @@ import argparse import asyncio import glob import importlib -import inspect import os from tqdm import tqdm @@ -83,7 +82,7 @@ if __name__ == "__main__": "methods", nargs="+", help="Methods to run in the format method[:key=value ...]. " - "Example: gotocr mineru:temperature=2 marker:runs=3. " + "Example: gotocr mineru:temperature=2 marker:u=3. " "Use 'name=folder_name' to specify a custom output folder name.", ) parser.add_argument("--repeats", type=int, default=1, help="Number of times to repeat the conversion for each PDF.") diff --git a/olmocr/bench/miners/mine_diffs.py b/olmocr/bench/miners/mine_diffs.py index 4f88420..aaea6db 100644 --- a/olmocr/bench/miners/mine_diffs.py +++ b/olmocr/bench/miners/mine_diffs.py @@ -7,7 +7,6 @@ from collections import Counter from difflib import SequenceMatcher import syntok.segmenter as segmenter -import syntok.tokenizer as tokenizer from google import genai from google.genai import types diff --git a/olmocr/bench/runners/run_olmocr.py b/olmocr/bench/runners/run_olmocr.py index 980ff42..0e82cfc 100644 --- a/olmocr/bench/runners/run_olmocr.py +++ b/olmocr/bench/runners/run_olmocr.py @@ -1,12 +1,7 @@ -import argparse import asyncio import json import logging -import os -import tempfile from dataclasses import dataclass -from functools import partial -from typing import Optional # Import necessary components from olmocr from olmocr.pipeline import ( @@ -58,13 +53,13 @@ async def run_olmocr(pdf_path: str, page_num: int = 1, temperature: float = 0.8) semaphore = asyncio.Semaphore(1) # Ensure server is running - server_task = None + _server_task = None try: await asyncio.wait_for(sglang_server_ready(), timeout=5) print("Using existing sglang server") except Exception: print("Starting new sglang server") - server_task = asyncio.create_task(sglang_server_host(args, semaphore)) + _server_task = asyncio.create_task(sglang_server_host(args, semaphore)) await sglang_server_ready() try: diff --git a/olmocr/eval/dolma_refine/metrics.py b/olmocr/eval/dolma_refine/metrics.py index 0f1b752..4bb2aa9 100644 --- a/olmocr/eval/dolma_refine/metrics.py +++ b/olmocr/eval/dolma_refine/metrics.py @@ -4,7 +4,7 @@ from typing import Type import regex as re from tqdm import tqdm -from .aligners import BaseAligner +from .aligners import BaseAligner, AlignerRegistry from .registry import BaseRegistry from .segmenters import BaseSegmenter, SegmenterRegistry diff --git a/olmocr/eval/dolma_refine/segmenters.py b/olmocr/eval/dolma_refine/segmenters.py index 3e70db6..47dfa65 100644 --- a/olmocr/eval/dolma_refine/segmenters.py +++ b/olmocr/eval/dolma_refine/segmenters.py @@ -1,7 +1,5 @@ -import re from typing import Type -import torch from spacy.lang.en import English from .registry import BaseRegistry diff --git a/scripts/infinigram_count.py b/scripts/infinigram_count.py index 10efe07..b0eb5f2 100644 --- a/scripts/infinigram_count.py +++ b/scripts/infinigram_count.py @@ -50,7 +50,7 @@ def query_infinigram(ngram, index="v4_rpj_llama_s4", retries=3): result = response.json() if "count" in result: return result["count"] - except Exception as e: # type: ignore + except Exception: # type: ignore time.sleep(1) return 0