mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-11 16:22:29 +00:00
Add chunked prefill and limit mm per prompt options
This commit is contained in:
parent
8f88a98e5d
commit
fe425fde20
@ -599,6 +599,8 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
|
|||||||
str(args.tensor_parallel_size),
|
str(args.tensor_parallel_size),
|
||||||
"--data-parallel-size",
|
"--data-parallel-size",
|
||||||
str(args.data_parallel_size),
|
str(args.data_parallel_size),
|
||||||
|
"--enable-chunked-prefill",
|
||||||
|
"--limit-mm-per-prompt '{\"video\": 0}'"
|
||||||
]
|
]
|
||||||
|
|
||||||
if args.gpu_memory_utilization is not None:
|
if args.gpu_memory_utilization is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user