mirror of
https://github.com/allenai/olmocr.git
synced 2025-11-03 11:35:29 +00:00
Merge pull request #341 from charitarthchugh/charitarthchugh/vllm-defaults-speedup
Add chunked prefill and limit mm per prompt options
This commit is contained in:
commit
2b70b50312
@ -636,6 +636,8 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
|
|||||||
str(args.tensor_parallel_size),
|
str(args.tensor_parallel_size),
|
||||||
"--data-parallel-size",
|
"--data-parallel-size",
|
||||||
str(args.data_parallel_size),
|
str(args.data_parallel_size),
|
||||||
|
"--enable-chunked-prefill",
|
||||||
|
"--limit-mm-per-prompt '{\"video\": 0}'"
|
||||||
]
|
]
|
||||||
|
|
||||||
if args.gpu_memory_utilization is not None:
|
if args.gpu_memory_utilization is not None:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user