diff --git a/README.md b/README.md
index e385e70..23a87a4 100644
--- a/README.md
+++ b/README.md
@@ -260,12 +260,14 @@ export DEEPINFRA_API_KEY="your-api-key-here"
 python -m olmocr.pipeline ./localworkspace \
   --server https://api.deepinfra.com/v1/openai \
   --api_key $DEEPINFRA_API_KEY \
+  --pages_per_group 100 \
   --model allenai/olmOCR-7B-0725-FP8 \
   --markdown \
   --pdfs path/to/your/*.pdf
 ```
 - `--server`: DeepInfra's OpenAI-compatible endpoint: `https://api.deepinfra.com/v1/openai`
 - `--api_key`: Your DeepInfra API key
+- `--pages_per_group`: You may want a smaller number of pages per group as many external provides have lower concurrent request limits
 - `--model`: The model identifier on DeepInfra: `allenai/olmOCR-7B-0725-FP8`
 - Other arguments work the same as with local inference
 
diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py
index 1c5febb..fb3893f 100644
--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@@ -284,6 +284,8 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
 
             if status_code == 400:
                 raise ValueError(f"Got BadRequestError from server: {response_body}, skipping this response")
+            elif status_code == 429:
+                raise ConnectionError(f"Too many requests, doing exponential backoff")
             elif status_code == 500:
                 raise ValueError(f"Got InternalServerError from server: {response_body}, skipping this response")
             elif status_code != 200: