From e489b2842115755fbcb02dec9896f6361d214345 Mon Sep 17 00:00:00 2001
From: Jake Poznanski <jakep@allenai.org>
Date: Tue, 17 Jun 2025 15:58:16 +0000
Subject: [PATCH] Lints

---
 olmocr/bench/runners/run_marker.py      |  4 +--
 olmocr/metrics.py                       | 16 ++++------
 olmocr/pipeline.py                      | 39 ++++++++++++++-----------
 olmocr/train/compressqwen2checkpoint.py |  5 ++--
 4 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/olmocr/bench/runners/run_marker.py b/olmocr/bench/runners/run_marker.py
index d444408..86bd75f 100644
--- a/olmocr/bench/runners/run_marker.py
+++ b/olmocr/bench/runners/run_marker.py
@@ -1,10 +1,10 @@
 import os
 import tempfile
 
+from marker.config.parser import ConfigParser
 from marker.converters.pdf import PdfConverter
 from marker.models import create_model_dict
 from marker.output import text_from_rendered
-from marker.config.parser import ConfigParser
 from pypdf import PdfReader, PdfWriter
 
 _marker_converter = None
@@ -17,7 +17,7 @@ def run_marker(pdf_path: str, page_num: int = 1) -> str:
         # Create a configuration dictionary with the necessary settings
         config = {
             "force_ocr": True,  # This enables conversion of inline math to LaTeX
-            "use_llm": False, # We would prefer to run just plain marker for reporting bench results, not hybrid mode
+            "use_llm": False,  # We would prefer to run just plain marker for reporting bench results, not hybrid mode
             "disable_tqdm": True,  # Disable tqdm for cleaner output
             "recognition_batch_size": 256,
             "layout_batch_size": 48,
diff --git a/olmocr/metrics.py b/olmocr/metrics.py
index d95cb37..40fd6ed 100644
--- a/olmocr/metrics.py
+++ b/olmocr/metrics.py
@@ -75,7 +75,7 @@ class MetricsKeeper:
     def get_total_metrics(self):
         """
         Returns the total cumulative metrics since the MetricsKeeper was created.
-        
+
         Returns:
             dict: Dictionary of metric names to their total values.
         """
@@ -84,24 +84,20 @@ class MetricsKeeper:
     def get_metrics_summary(self):
         """
         Returns a summary of metrics including totals and rates.
-        
+
         Returns:
             dict: Dictionary containing total metrics and overall rates.
         """
         current_time = time.time()
         elapsed_time = current_time - self.start_time
-        
-        summary = {
-            "elapsed_time_seconds": elapsed_time,
-            "total_metrics": dict(self.total_metrics),
-            "rates": {}
-        }
-        
+
+        summary = {"elapsed_time_seconds": elapsed_time, "total_metrics": dict(self.total_metrics), "rates": {}}
+
         # Calculate rates for each metric
         if elapsed_time > 0:
             for key, value in self.total_metrics.items():
                 summary["rates"][f"{key}_per_sec"] = value / elapsed_time
-        
+
         return summary
 
 
diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py
index 410389f..0ec1db5 100644
--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@@ -574,10 +574,13 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
         "vllm",
         "serve",
         model_name_or_path,
-        "--port", str(BASE_SERVER_PORT),
+        "--port",
+        str(BASE_SERVER_PORT),
         "--disable-log-requests",
-        "--uvicorn-log-level", "warning",
-        "--served-model-name", "Qwen/Qwen2-VL-7B-Instruct",
+        "--uvicorn-log-level",
+        "warning",
+        "--served-model-name",
+        "Qwen/Qwen2-VL-7B-Instruct",
     ]
     cmd.extend(mem_fraction_arg)
 
@@ -615,11 +618,11 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
             server_printed_ready_message = True
             last_semaphore_release = time.time()
 
-        match = re.search(r'Running: (\d+)', line)
+        match = re.search(r"Running: (\d+)", line)
         if match:
             last_running_req = int(match.group(1))
 
-        match = re.search(r'Waiting: (\d+)', line)
+        match = re.search(r"Waiting: (\d+)", line)
         if match:
             last_queue_req = int(match.group(1))
             logger.info(f"vllm running req: {last_running_req} queue req: {last_queue_req}")
@@ -675,7 +678,9 @@ async def vllm_server_host(model_name_or_path, args, semaphore):
     if retry >= MAX_RETRIES:
         logger.error(f"Ended up starting the vllm server more than {retry} times, cancelling pipeline")
         logger.error("")
-        logger.error("Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html")
+        logger.error(
+            "Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html"
+        )
         sys.exit(1)
 
 
@@ -1140,7 +1145,7 @@ async def main():
         return
 
     # If you get this far, then you are doing inference and need a GPU
-    #check_sglang_version()
+    # check_sglang_version()
     check_torch_gpu_available()
 
     logger.info(f"Starting pipeline with PID {os.getpid()}")
@@ -1180,30 +1185,30 @@ async def main():
 
     vllm_server.cancel()
     metrics_task.cancel()
-    
+
     # Output final metrics summary
     metrics_summary = metrics.get_metrics_summary()
     logger.info("=" * 80)
     logger.info("FINAL METRICS SUMMARY")
     logger.info("=" * 80)
     logger.info(f"Total elapsed time: {metrics_summary['elapsed_time_seconds']:.2f} seconds")
-    
+
     # Output token counts and rates
-    total_metrics = metrics_summary['total_metrics']
-    rates = metrics_summary['rates']
-    
+    total_metrics = metrics_summary["total_metrics"]
+    rates = metrics_summary["rates"]
+
     logger.info(f"Total Server Input tokens: {total_metrics.get('server_input_tokens', 0):,}")
     logger.info(f"Total Server Output tokens: {total_metrics.get('server_output_tokens', 0):,}")
-    
+
     logger.info(f"Finished input tokens: {total_metrics.get('finished_input_tokens', 0):,}")
     logger.info(f"Finished output tokens: {total_metrics.get('finished_output_tokens', 0):,}")
-    
+
     # Output rates
-    if 'server_input_tokens_per_sec' in rates:
+    if "server_input_tokens_per_sec" in rates:
         logger.info(f"Input tokens/sec rate: {rates['server_input_tokens_per_sec']:.2f}")
-    if 'server_output_tokens_per_sec' in rates:
+    if "server_output_tokens_per_sec" in rates:
         logger.info(f"Output tokens/sec rate: {rates['server_output_tokens_per_sec']:.2f}")
-    
+
     logger.info("=" * 80)
     logger.info("Work done")
 
diff --git a/olmocr/train/compressqwen2checkpoint.py b/olmocr/train/compressqwen2checkpoint.py
index 70c6320..093c28a 100644
--- a/olmocr/train/compressqwen2checkpoint.py
+++ b/olmocr/train/compressqwen2checkpoint.py
@@ -4,8 +4,7 @@ from transformers import AutoTokenizer, Qwen2VLForConditionalGeneration
 
 MODEL_ID = "/home/ubuntu/olmocr/olmOCR-7B-0225-preview"
 
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-  MODEL_ID, device_map="auto", torch_dtype="auto")
+model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
 from llmcompressor import oneshot
@@ -28,4 +27,4 @@ oneshot(model=model, recipe=recipe)
 # Save the model.
 SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic-Recipe"
 model.save_pretrained(SAVE_DIR)
-tokenizer.save_pretrained(SAVE_DIR)
\ No newline at end of file
+tokenizer.save_pretrained(SAVE_DIR)