Lints

2025-06-27 04:00:02 +00:00 · 2025-06-17 15:58:16 +00:00 · 2025-06-17 15:58:16 +00:00 · e489b28421
commit e489b28421
parent 6fcd26d66a
4 changed files with 32 additions and 32 deletions
--- a/olmocr/bench/runners/run_marker.py
+++ b/olmocr/bench/runners/run_marker.py
@ -1,10 +1,10 @@
 import os
 import tempfile

+from marker.config.parser import ConfigParser
 from marker.converters.pdf import PdfConverter
 from marker.models import create_model_dict
 from marker.output import text_from_rendered
-from marker.config.parser import ConfigParser
 from pypdf import PdfReader, PdfWriter

 _marker_converter = None
@ -17,7 +17,7 @@ def run_marker(pdf_path: str, page_num: int = 1) -> str:
        # Create a configuration dictionary with the necessary settings
        config = {
            "force_ocr": True,  # This enables conversion of inline math to LaTeX
-            "use_llm": False, # We would prefer to run just plain marker for reporting bench results, not hybrid mode
+            "use_llm": False,  # We would prefer to run just plain marker for reporting bench results, not hybrid mode
            "disable_tqdm": True,  # Disable tqdm for cleaner output
            "recognition_batch_size": 256,
            "layout_batch_size": 48,
--- a/olmocr/metrics.py
+++ b/olmocr/metrics.py
@ -75,7 +75,7 @@ class MetricsKeeper:
    def get_total_metrics(self):
        """
        Returns the total cumulative metrics since the MetricsKeeper was created.
-        
+
        Returns:
            dict: Dictionary of metric names to their total values.
        """
@ -84,24 +84,20 @@ class MetricsKeeper:
    def get_metrics_summary(self):
        """
        Returns a summary of metrics including totals and rates.
-        
+
        Returns:
            dict: Dictionary containing total metrics and overall rates.
        """
        current_time = time.time()
        elapsed_time = current_time - self.start_time
-        
-        summary = {
-            "elapsed_time_seconds": elapsed_time,
-            "total_metrics": dict(self.total_metrics),
-            "rates": {}
-        }
-        
+
+        summary = {"elapsed_time_seconds": elapsed_time, "total_metrics": dict(self.total_metrics), "rates": {}}
+
        # Calculate rates for each metric
        if elapsed_time > 0:
            for key, value in self.total_metrics.items():
                summary["rates"][f"{key}_per_sec"] = value / elapsed_time
-        
+
        return summary


--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@ -574,10 +574,13 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
        "vllm",
        "serve",
        model_name_or_path,
-        "--port", str(BASE_SERVER_PORT),
+        "--port",
+        str(BASE_SERVER_PORT),
        "--disable-log-requests",
-        "--uvicorn-log-level", "warning",
-        "--served-model-name", "Qwen/Qwen2-VL-7B-Instruct",
+        "--uvicorn-log-level",
+        "warning",
+        "--served-model-name",
+        "Qwen/Qwen2-VL-7B-Instruct",
    ]
    cmd.extend(mem_fraction_arg)

@ -615,11 +618,11 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
            server_printed_ready_message = True
            last_semaphore_release = time.time()

-        match = re.search(r'Running: (\d+)', line)
+        match = re.search(r"Running: (\d+)", line)
        if match:
            last_running_req = int(match.group(1))

-        match = re.search(r'Waiting: (\d+)', line)
+        match = re.search(r"Waiting: (\d+)", line)
        if match:
            last_queue_req = int(match.group(1))
            logger.info(f"vllm running req: {last_running_req} queue req: {last_queue_req}")
@ -675,7 +678,9 @@ async def vllm_server_host(model_name_or_path, args, semaphore):
    if retry >= MAX_RETRIES:
        logger.error(f"Ended up starting the vllm server more than {retry} times, cancelling pipeline")
        logger.error("")
-        logger.error("Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html")
+        logger.error(
+            "Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html"
+        )
        sys.exit(1)


@ -1140,7 +1145,7 @@ async def main():
        return

    # If you get this far, then you are doing inference and need a GPU
-    #check_sglang_version()
+    # check_sglang_version()
    check_torch_gpu_available()

    logger.info(f"Starting pipeline with PID {os.getpid()}")
@ -1180,30 +1185,30 @@ async def main():

    vllm_server.cancel()
    metrics_task.cancel()
-    
+
    # Output final metrics summary
    metrics_summary = metrics.get_metrics_summary()
    logger.info("=" * 80)
    logger.info("FINAL METRICS SUMMARY")
    logger.info("=" * 80)
    logger.info(f"Total elapsed time: {metrics_summary['elapsed_time_seconds']:.2f} seconds")
-    
+
    # Output token counts and rates
-    total_metrics = metrics_summary['total_metrics']
-    rates = metrics_summary['rates']
-    
+    total_metrics = metrics_summary["total_metrics"]
+    rates = metrics_summary["rates"]
+
    logger.info(f"Total Server Input tokens: {total_metrics.get('server_input_tokens', 0):,}")
    logger.info(f"Total Server Output tokens: {total_metrics.get('server_output_tokens', 0):,}")
-    
+
    logger.info(f"Finished input tokens: {total_metrics.get('finished_input_tokens', 0):,}")
    logger.info(f"Finished output tokens: {total_metrics.get('finished_output_tokens', 0):,}")
-    
+
    # Output rates
-    if 'server_input_tokens_per_sec' in rates:
+    if "server_input_tokens_per_sec" in rates:
        logger.info(f"Input tokens/sec rate: {rates['server_input_tokens_per_sec']:.2f}")
-    if 'server_output_tokens_per_sec' in rates:
+    if "server_output_tokens_per_sec" in rates:
        logger.info(f"Output tokens/sec rate: {rates['server_output_tokens_per_sec']:.2f}")
-    
+
    logger.info("=" * 80)
    logger.info("Work done")

--- a/olmocr/train/compressqwen2checkpoint.py
+++ b/olmocr/train/compressqwen2checkpoint.py
@ -4,8 +4,7 @@ from transformers import AutoTokenizer, Qwen2VLForConditionalGeneration

 MODEL_ID = "/home/ubuntu/olmocr/olmOCR-7B-0225-preview"

-model = Qwen2VLForConditionalGeneration.from_pretrained(
-  MODEL_ID, device_map="auto", torch_dtype="auto")
+model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

 from llmcompressor import oneshot
@ -28,4 +27,4 @@ oneshot(model=model, recipe=recipe)
 # Save the model.
 SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic-Recipe"
 model.save_pretrained(SAVE_DIR)
-tokenizer.save_pretrained(SAVE_DIR)
+tokenizer.save_pretrained(SAVE_DIR)