From e489b2842115755fbcb02dec9896f6361d214345 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Tue, 17 Jun 2025 15:58:16 +0000 Subject: [PATCH] Lints --- olmocr/bench/runners/run_marker.py | 4 +-- olmocr/metrics.py | 16 ++++------ olmocr/pipeline.py | 39 ++++++++++++++----------- olmocr/train/compressqwen2checkpoint.py | 5 ++-- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/olmocr/bench/runners/run_marker.py b/olmocr/bench/runners/run_marker.py index d444408..86bd75f 100644 --- a/olmocr/bench/runners/run_marker.py +++ b/olmocr/bench/runners/run_marker.py @@ -1,10 +1,10 @@ import os import tempfile +from marker.config.parser import ConfigParser from marker.converters.pdf import PdfConverter from marker.models import create_model_dict from marker.output import text_from_rendered -from marker.config.parser import ConfigParser from pypdf import PdfReader, PdfWriter _marker_converter = None @@ -17,7 +17,7 @@ def run_marker(pdf_path: str, page_num: int = 1) -> str: # Create a configuration dictionary with the necessary settings config = { "force_ocr": True, # This enables conversion of inline math to LaTeX - "use_llm": False, # We would prefer to run just plain marker for reporting bench results, not hybrid mode + "use_llm": False, # We would prefer to run just plain marker for reporting bench results, not hybrid mode "disable_tqdm": True, # Disable tqdm for cleaner output "recognition_batch_size": 256, "layout_batch_size": 48, diff --git a/olmocr/metrics.py b/olmocr/metrics.py index d95cb37..40fd6ed 100644 --- a/olmocr/metrics.py +++ b/olmocr/metrics.py @@ -75,7 +75,7 @@ class MetricsKeeper: def get_total_metrics(self): """ Returns the total cumulative metrics since the MetricsKeeper was created. - + Returns: dict: Dictionary of metric names to their total values. """ @@ -84,24 +84,20 @@ class MetricsKeeper: def get_metrics_summary(self): """ Returns a summary of metrics including totals and rates. - + Returns: dict: Dictionary containing total metrics and overall rates. """ current_time = time.time() elapsed_time = current_time - self.start_time - - summary = { - "elapsed_time_seconds": elapsed_time, - "total_metrics": dict(self.total_metrics), - "rates": {} - } - + + summary = {"elapsed_time_seconds": elapsed_time, "total_metrics": dict(self.total_metrics), "rates": {}} + # Calculate rates for each metric if elapsed_time > 0: for key, value in self.total_metrics.items(): summary["rates"][f"{key}_per_sec"] = value / elapsed_time - + return summary diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py index 410389f..0ec1db5 100644 --- a/olmocr/pipeline.py +++ b/olmocr/pipeline.py @@ -574,10 +574,13 @@ async def vllm_server_task(model_name_or_path, args, semaphore): "vllm", "serve", model_name_or_path, - "--port", str(BASE_SERVER_PORT), + "--port", + str(BASE_SERVER_PORT), "--disable-log-requests", - "--uvicorn-log-level", "warning", - "--served-model-name", "Qwen/Qwen2-VL-7B-Instruct", + "--uvicorn-log-level", + "warning", + "--served-model-name", + "Qwen/Qwen2-VL-7B-Instruct", ] cmd.extend(mem_fraction_arg) @@ -615,11 +618,11 @@ async def vllm_server_task(model_name_or_path, args, semaphore): server_printed_ready_message = True last_semaphore_release = time.time() - match = re.search(r'Running: (\d+)', line) + match = re.search(r"Running: (\d+)", line) if match: last_running_req = int(match.group(1)) - match = re.search(r'Waiting: (\d+)', line) + match = re.search(r"Waiting: (\d+)", line) if match: last_queue_req = int(match.group(1)) logger.info(f"vllm running req: {last_running_req} queue req: {last_queue_req}") @@ -675,7 +678,9 @@ async def vllm_server_host(model_name_or_path, args, semaphore): if retry >= MAX_RETRIES: logger.error(f"Ended up starting the vllm server more than {retry} times, cancelling pipeline") logger.error("") - logger.error("Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html") + logger.error( + "Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html" + ) sys.exit(1) @@ -1140,7 +1145,7 @@ async def main(): return # If you get this far, then you are doing inference and need a GPU - #check_sglang_version() + # check_sglang_version() check_torch_gpu_available() logger.info(f"Starting pipeline with PID {os.getpid()}") @@ -1180,30 +1185,30 @@ async def main(): vllm_server.cancel() metrics_task.cancel() - + # Output final metrics summary metrics_summary = metrics.get_metrics_summary() logger.info("=" * 80) logger.info("FINAL METRICS SUMMARY") logger.info("=" * 80) logger.info(f"Total elapsed time: {metrics_summary['elapsed_time_seconds']:.2f} seconds") - + # Output token counts and rates - total_metrics = metrics_summary['total_metrics'] - rates = metrics_summary['rates'] - + total_metrics = metrics_summary["total_metrics"] + rates = metrics_summary["rates"] + logger.info(f"Total Server Input tokens: {total_metrics.get('server_input_tokens', 0):,}") logger.info(f"Total Server Output tokens: {total_metrics.get('server_output_tokens', 0):,}") - + logger.info(f"Finished input tokens: {total_metrics.get('finished_input_tokens', 0):,}") logger.info(f"Finished output tokens: {total_metrics.get('finished_output_tokens', 0):,}") - + # Output rates - if 'server_input_tokens_per_sec' in rates: + if "server_input_tokens_per_sec" in rates: logger.info(f"Input tokens/sec rate: {rates['server_input_tokens_per_sec']:.2f}") - if 'server_output_tokens_per_sec' in rates: + if "server_output_tokens_per_sec" in rates: logger.info(f"Output tokens/sec rate: {rates['server_output_tokens_per_sec']:.2f}") - + logger.info("=" * 80) logger.info("Work done") diff --git a/olmocr/train/compressqwen2checkpoint.py b/olmocr/train/compressqwen2checkpoint.py index 70c6320..093c28a 100644 --- a/olmocr/train/compressqwen2checkpoint.py +++ b/olmocr/train/compressqwen2checkpoint.py @@ -4,8 +4,7 @@ from transformers import AutoTokenizer, Qwen2VLForConditionalGeneration MODEL_ID = "/home/ubuntu/olmocr/olmOCR-7B-0225-preview" -model = Qwen2VLForConditionalGeneration.from_pretrained( - MODEL_ID, device_map="auto", torch_dtype="auto") +model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) from llmcompressor import oneshot @@ -28,4 +27,4 @@ oneshot(model=model, recipe=recipe) # Save the model. SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic-Recipe" model.save_pretrained(SAVE_DIR) -tokenizer.save_pretrained(SAVE_DIR) \ No newline at end of file +tokenizer.save_pretrained(SAVE_DIR)