mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-11 08:12:22 +00:00
Add chunked prefill and limit mm per prompt options
This commit is contained in:
parent
8f88a98e5d
commit
fe425fde20
@ -599,6 +599,8 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
|
||||
str(args.tensor_parallel_size),
|
||||
"--data-parallel-size",
|
||||
str(args.data_parallel_size),
|
||||
"--enable-chunked-prefill",
|
||||
"--limit-mm-per-prompt '{\"video\": 0}'"
|
||||
]
|
||||
|
||||
if args.gpu_memory_utilization is not None:
|
||||
|
Loading…
x
Reference in New Issue
Block a user