From b5b1de98dd2851a77fc429cf471c7752c102b734 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Mon, 29 Sep 2025 22:12:27 +0000 Subject: [PATCH] Allowing more max tokens in pipeline for new models --- olmocr/pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py index 1dfa9bc..41c3582 100644 --- a/olmocr/pipeline.py +++ b/olmocr/pipeline.py @@ -105,7 +105,7 @@ class PageResult: async def build_page_query(local_pdf_path: str, page: int, target_longest_image_dim: int, image_rotation: int = 0) -> dict: - MAX_TOKENS = 4500 + MAX_TOKENS = 8000 assert image_rotation in [0, 90, 180, 270], "Invalid image rotation provided in build_page_query" # Allow the page rendering to process in the background, but limit the number of workers otherwise you can overload the system @@ -678,7 +678,7 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non # Check if we should release the semaphore should_release = ( server_printed_ready_message - and last_queue_req <= int(peak_running_req * 0.1) + and last_queue_req <= int(peak_running_req * 0.2) and time.time() - last_semaphore_release > 30 and semaphore.locked() and (last_running_req == 0 or running_reqs_decreased)