mirror of
https://github.com/allenai/olmocr.git
synced 2025-11-02 02:54:53 +00:00
Merge pull request #341 from charitarthchugh/charitarthchugh/vllm-defaults-speedup
Add chunked prefill and limit mm per prompt options
This commit is contained in:
commit
2b70b50312
@ -636,6 +636,8 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
|
||||
str(args.tensor_parallel_size),
|
||||
"--data-parallel-size",
|
||||
str(args.data_parallel_size),
|
||||
"--enable-chunked-prefill",
|
||||
"--limit-mm-per-prompt '{\"video\": 0}'"
|
||||
]
|
||||
|
||||
if args.gpu_memory_utilization is not None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user