From b5b1de98dd2851a77fc429cf471c7752c102b734 Mon Sep 17 00:00:00 2001
From: Jake Poznanski <jakep@allenai.org>
Date: Mon, 29 Sep 2025 22:12:27 +0000
Subject: [PATCH] Allowing more max tokens in pipeline for new models

---
 olmocr/pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py
index 1dfa9bc..41c3582 100644
--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@@ -105,7 +105,7 @@ class PageResult:
 
 
 async def build_page_query(local_pdf_path: str, page: int, target_longest_image_dim: int, image_rotation: int = 0) -> dict:
-    MAX_TOKENS = 4500
+    MAX_TOKENS = 8000
     assert image_rotation in [0, 90, 180, 270], "Invalid image rotation provided in build_page_query"
 
     # Allow the page rendering to process in the background, but limit the number of workers otherwise you can overload the system
@@ -678,7 +678,7 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
                 # Check if we should release the semaphore
                 should_release = (
                     server_printed_ready_message
-                    and last_queue_req <= int(peak_running_req * 0.1)
+                    and last_queue_req <= int(peak_running_req * 0.2)
                     and time.time() - last_semaphore_release > 30
                     and semaphore.locked()
                     and (last_running_req == 0 or running_reqs_decreased)