diff --git a/olmocr/bench/runners/run_server.py b/olmocr/bench/runners/run_server.py
index 4425368..d2646b5 100644
--- a/olmocr/bench/runners/run_server.py
+++ b/olmocr/bench/runners/run_server.py
@@ -22,6 +22,7 @@ async def run_server(
     target_longest_image_dim: int = 1024,
     prompt_template: Literal["full", "basic", "finetune"] = "finetune",
     response_template: Literal["plain", "json"] = "json",
+    prompt_image_first: bool = False,
 ) -> str:
     """
     Convert page of a PDF file to markdown by calling a request
@@ -48,20 +49,36 @@ async def run_server(
     else:
         raise ValueError("Unknown prompt template")
 
-    request = {
-        "model": model,
-        "messages": [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": prompt},
-                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
-                ],
-            }
-        ],
-        "temperature": temperature,
-        "max_tokens": 3000,
-    }
+    if prompt_image_first:
+        request = {
+            "model": model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
+                        {"type": "text", "text": prompt},
+                    ],
+                }
+            ],
+            "temperature": temperature,
+            "max_tokens": 3000,
+        }
+    else:
+        request = {
+            "model": model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt},
+                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
+                    ],
+                }
+            ],
+            "temperature": temperature,
+            "max_tokens": 3000,
+        }
 
     # Make request and get response using httpx
     url = f"http://{server}/v1/chat/completions"
diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py
index e14e6bb..a884458 100644
--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@@ -138,8 +138,8 @@ async def build_page_query(local_pdf_path: str, page: int, target_longest_image_
             {
                 "role": "user",
                 "content": [
-                    {"type": "text", "text": build_finetuning_prompt(anchor_text)},
                     {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
+                    {"type": "text", "text": build_finetuning_prompt(anchor_text)},
                 ],
             }
         ],
@@ -500,7 +500,7 @@ async def worker(args, work_queue: WorkQueue, semaphore, worker_id):
 async def sglang_server_task(model_name_or_path, args, semaphore):
     # Check GPU memory, lower mem devices need a bit less KV cache space because the VLM takes additional memory
     gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)  # Convert to GB
-    mem_fraction_arg = ["--mem-fraction-static", "0.80"] if gpu_memory < 60 else []
+    mem_fraction_arg = ["--mem-fraction-static", "0.70"] if gpu_memory < 60 else []
 
     cmd = [
         "python3",
diff --git a/olmocr/prompts/prompts.py b/olmocr/prompts/prompts.py
index d5dff9b..84899a9 100644
--- a/olmocr/prompts/prompts.py
+++ b/olmocr/prompts/prompts.py
@@ -98,7 +98,7 @@ def openai_response_format_schema() -> dict:
 
 # This is a base prompt that will be used for training and running the fine tuned model
 # It's simplified from the prompt which was used to generate the silver data, and can change from dataset to dataset
-def build_finetuning_prompt_qwen2(base_text: str) -> str:
+def build_finetuning_prompt(base_text: str) -> str:
     return (
         f"Below is the image of one page of a document, as well as some raw textual content that was previously extracted for it. "
         f"Just return the plain text representation of this document as if you were reading it naturally.\n"
@@ -107,11 +107,11 @@ def build_finetuning_prompt_qwen2(base_text: str) -> str:
     )
 
 # This is the new fine tuning prompt we are trying for qwen2.5 vl
-def build_finetuning_prompt(base_text: str) -> str:
-    return (
-        f"Below is the image of one page of a document. "
-        f"Just return the plain text representation of this document as if you were reading it naturally.\n"
-    )
+# def build_finetuning_prompt(base_text: str) -> str:
+#     return (
+#         f"Below is the image of one page of a document. "
+#         f"Just return the plain text representation of this document as if you were reading it naturally.\n"
+#     )
 
 
 # Extracts the anchor text component from an existing prompt string