Trying idea of priority scheduler to get more throughput on cluster

2025-11-07 05:39:49 +00:00 · 2025-10-30 16:52:13 +00:00 · 2025-10-30 16:52:13 +00:00 · 1de9e4ba76
commit 1de9e4ba76
parent ec1bf2471c
1 changed files with 6 additions and 0 deletions
--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@ -269,6 +269,10 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
        # Change temperature as number of attempts increases to overcome repetition issues at expense of quality
        query["temperature"] = TEMPERATURE_BY_ATTEMPT[lookup_attempt]
        # Add priority optionally, to help get retries done faster and the queue cleared sooner
        # this helps on situations where your jobs are preemptible on a cluster
        query["priority"] = MAX_RETRIES - attempt
        # Enable guided decoding regex if needed
        if args.guided_decoding:
            query["guided_regex"] = (
@ -639,6 +643,8 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
        str(args.tensor_parallel_size),
        "--data-parallel-size",
        str(args.data_parallel_size),
        "--scheduling-policy",
        "priority",
        "--limit-mm-per-prompt",
        '{"video": 0}',  # Disabling video encoder saves RAM that you can put towards the KV cache, thanks @charitarthchugh
    ]