mirror of
https://github.com/allenai/olmocr.git
synced 2025-11-07 05:39:49 +00:00
Trying idea of priority scheduler to get more throughput on cluster
This commit is contained in:
parent
ec1bf2471c
commit
1de9e4ba76
@ -269,6 +269,10 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
|
|||||||
# Change temperature as number of attempts increases to overcome repetition issues at expense of quality
|
# Change temperature as number of attempts increases to overcome repetition issues at expense of quality
|
||||||
query["temperature"] = TEMPERATURE_BY_ATTEMPT[lookup_attempt]
|
query["temperature"] = TEMPERATURE_BY_ATTEMPT[lookup_attempt]
|
||||||
|
|
||||||
|
# Add priority optionally, to help get retries done faster and the queue cleared sooner
|
||||||
|
# this helps on situations where your jobs are preemptible on a cluster
|
||||||
|
query["priority"] = MAX_RETRIES - attempt
|
||||||
|
|
||||||
# Enable guided decoding regex if needed
|
# Enable guided decoding regex if needed
|
||||||
if args.guided_decoding:
|
if args.guided_decoding:
|
||||||
query["guided_regex"] = (
|
query["guided_regex"] = (
|
||||||
@ -639,6 +643,8 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
|
|||||||
str(args.tensor_parallel_size),
|
str(args.tensor_parallel_size),
|
||||||
"--data-parallel-size",
|
"--data-parallel-size",
|
||||||
str(args.data_parallel_size),
|
str(args.data_parallel_size),
|
||||||
|
"--scheduling-policy",
|
||||||
|
"priority",
|
||||||
"--limit-mm-per-prompt",
|
"--limit-mm-per-prompt",
|
||||||
'{"video": 0}', # Disabling video encoder saves RAM that you can put towards the KV cache, thanks @charitarthchugh
|
'{"video": 0}', # Disabling video encoder saves RAM that you can put towards the KV cache, thanks @charitarthchugh
|
||||||
]
|
]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user