diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py
index 1560c3a..50aa4ea 100644
--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@@ -269,6 +269,10 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
         # Change temperature as number of attempts increases to overcome repetition issues at expense of quality
         query["temperature"] = TEMPERATURE_BY_ATTEMPT[lookup_attempt]
 
+        # Add priority optionally, to help get retries done faster and the queue cleared sooner
+        # this helps on situations where your jobs are preemptible on a cluster
+        query["priority"] = MAX_RETRIES - attempt
+
         # Enable guided decoding regex if needed
         if args.guided_decoding:
             query["guided_regex"] = (
@@ -639,6 +643,8 @@ async def vllm_server_task(model_name_or_path, args, semaphore, unknown_args=Non
         str(args.tensor_parallel_size),
         "--data-parallel-size",
         str(args.data_parallel_size),
+        "--scheduling-policy",
+        "priority",
         "--limit-mm-per-prompt",
         '{"video": 0}',  # Disabling video encoder saves RAM that you can put towards the KV cache, thanks @charitarthchugh
     ]