Lints

2025-06-27 04:00:02 +00:00 · 2025-06-17 15:58:16 +00:00 · 2025-06-17 15:58:16 +00:00 · e489b28421
commit e489b28421
parent 6fcd26d66a
4 changed files with 32 additions and 32 deletions
--- a/olmocr/bench/runners/run_marker.py
+++ b/olmocr/bench/runners/run_marker.py
@ -1,10 +1,10 @@
 import os
 import tempfile

+from marker.config.parser import ConfigParser
 from marker.converters.pdf import PdfConverter
 from marker.models import create_model_dict
 from marker.output import text_from_rendered
-from marker.config.parser import ConfigParser
 from pypdf import PdfReader, PdfWriter

 _marker_converter = None
--- a/olmocr/metrics.py
+++ b/olmocr/metrics.py
@ -91,11 +91,7 @@ class MetricsKeeper:
        current_time = time.time()
        elapsed_time = current_time - self.start_time

-        summary = {
-            "elapsed_time_seconds": elapsed_time,
-            "total_metrics": dict(self.total_metrics),
-            "rates": {}
-        }
+        summary = {"elapsed_time_seconds": elapsed_time, "total_metrics": dict(self.total_metrics), "rates": {}}

        # Calculate rates for each metric
        if elapsed_time > 0:
--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@ -574,10 +574,13 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
        "vllm",
        "serve",
        model_name_or_path,
-        "--port", str(BASE_SERVER_PORT),
+        "--port",
+        str(BASE_SERVER_PORT),
        "--disable-log-requests",
-        "--uvicorn-log-level", "warning",
-        "--served-model-name", "Qwen/Qwen2-VL-7B-Instruct",
+        "--uvicorn-log-level",
+        "warning",
+        "--served-model-name",
+        "Qwen/Qwen2-VL-7B-Instruct",
    ]
    cmd.extend(mem_fraction_arg)

@ -615,11 +618,11 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
            server_printed_ready_message = True
            last_semaphore_release = time.time()

-        match = re.search(r'Running: (\d+)', line)
+        match = re.search(r"Running: (\d+)", line)
        if match:
            last_running_req = int(match.group(1))

-        match = re.search(r'Waiting: (\d+)', line)
+        match = re.search(r"Waiting: (\d+)", line)
        if match:
            last_queue_req = int(match.group(1))
            logger.info(f"vllm running req: {last_running_req} queue req: {last_queue_req}")
@ -675,7 +678,9 @@ async def vllm_server_host(model_name_or_path, args, semaphore):
    if retry >= MAX_RETRIES:
        logger.error(f"Ended up starting the vllm server more than {retry} times, cancelling pipeline")
        logger.error("")
-        logger.error("Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html")
+        logger.error(
+            "Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html"
+        )
        sys.exit(1)


@ -1140,7 +1145,7 @@ async def main():
        return

    # If you get this far, then you are doing inference and need a GPU
-    #check_sglang_version()
+    # check_sglang_version()
    check_torch_gpu_available()

    logger.info(f"Starting pipeline with PID {os.getpid()}")
@ -1189,8 +1194,8 @@ async def main():
    logger.info(f"Total elapsed time: {metrics_summary['elapsed_time_seconds']:.2f} seconds")

    # Output token counts and rates
-    total_metrics = metrics_summary['total_metrics']
-    rates = metrics_summary['rates']
+    total_metrics = metrics_summary["total_metrics"]
+    rates = metrics_summary["rates"]

    logger.info(f"Total Server Input tokens: {total_metrics.get('server_input_tokens', 0):,}")
    logger.info(f"Total Server Output tokens: {total_metrics.get('server_output_tokens', 0):,}")
@ -1199,9 +1204,9 @@ async def main():
    logger.info(f"Finished output tokens: {total_metrics.get('finished_output_tokens', 0):,}")

    # Output rates
-    if 'server_input_tokens_per_sec' in rates:
+    if "server_input_tokens_per_sec" in rates:
        logger.info(f"Input tokens/sec rate: {rates['server_input_tokens_per_sec']:.2f}")
-    if 'server_output_tokens_per_sec' in rates:
+    if "server_output_tokens_per_sec" in rates:
        logger.info(f"Output tokens/sec rate: {rates['server_output_tokens_per_sec']:.2f}")

    logger.info("=" * 80)
--- a/olmocr/train/compressqwen2checkpoint.py
+++ b/olmocr/train/compressqwen2checkpoint.py
@ -4,8 +4,7 @@ from transformers import AutoTokenizer, Qwen2VLForConditionalGeneration

 MODEL_ID = "/home/ubuntu/olmocr/olmOCR-7B-0225-preview"

-model = Qwen2VLForConditionalGeneration.from_pretrained(
-  MODEL_ID, device_map="auto", torch_dtype="auto")
+model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

 from llmcompressor import oneshot