mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-13 17:22:13 +00:00
Allowing more max tokens in pipeline for new models
This commit is contained in:
parent
7f4b728dcd
commit
b5b1de98dd
@ -105,7 +105,7 @@ class PageResult:
|
|||||||
|
|
||||||
|
|
||||||
async def build_page_query(local_pdf_path: str, page: int, target_longest_image_dim: int, image_rotation: int = 0) -> dict:
|
async def build_page_query(local_pdf_path: str, page: int, target_longest_image_dim: int, image_rotation: int = 0) -> dict:
|
||||||
MAX_TOKENS = 4500
|
MAX_TOKENS = 8000
|
||||||
assert image_rotation in [0, 90, 180, 270], "Invalid image rotation provided in build_page_query"
|
assert image_rotation in [0, 90, 180, 270], "Invalid image rotation provided in build_page_query"
|
||||||
|
|
||||||
# Allow the page rendering to process in the background, but limit the number of workers otherwise you can overload the system
|
# Allow the page rendering to process in the background, but limit the number of workers otherwise you can overload the system
|
||||||
@ -678,7 +678,7 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
|
|||||||
# Check if we should release the semaphore
|
# Check if we should release the semaphore
|
||||||
should_release = (
|
should_release = (
|
||||||
server_printed_ready_message
|
server_printed_ready_message
|
||||||
and last_queue_req <= int(peak_running_req * 0.1)
|
and last_queue_req <= int(peak_running_req * 0.2)
|
||||||
and time.time() - last_semaphore_release > 30
|
and time.time() - last_semaphore_release > 30
|
||||||
and semaphore.locked()
|
and semaphore.locked()
|
||||||
and (last_running_req == 0 or running_reqs_decreased)
|
and (last_running_req == 0 or running_reqs_decreased)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user