mirror of
https://github.com/allenai/olmocr.git
synced 2025-09-26 17:04:02 +00:00
Convert script work with server backends
This commit is contained in:
parent
87875b3e2f
commit
5cb32c3289
@ -74,7 +74,8 @@ async def process_pdfs(config, pdf_directory, data_directory, repeats, force):
|
||||
else:
|
||||
# Run synchronous function
|
||||
markdown = method(pdf_path, page_num=1, **kwargs)
|
||||
except:
|
||||
except Exception as ex:
|
||||
print(f"Exception {str(ex)} occurred while processing {base_name}_{i}")
|
||||
markdown = None
|
||||
|
||||
if markdown is None:
|
||||
@ -106,6 +107,7 @@ if __name__ == "__main__":
|
||||
"marker": ("olmocr.bench.runners.run_marker", "run_marker"),
|
||||
"mineru": ("olmocr.bench.runners.run_mineru", "run_mineru"),
|
||||
"chatgpt": ("olmocr.bench.runners.run_chatgpt", "run_chatgpt"),
|
||||
"server": ("olmocr.bench.runners.run_server", "run_server"),
|
||||
}
|
||||
|
||||
# Build config by importing only requested methods.
|
||||
|
@ -11,13 +11,11 @@ from olmocr.prompts.prompts import (
|
||||
openai_response_format_schema,
|
||||
)
|
||||
|
||||
|
||||
def run_chatgpt(pdf_path: str, page_num: int = 1, model: str = "gpt-4o-2024-08-06", temperature: float = 0.1) -> str:
|
||||
"""
|
||||
Convert page of a PDF file to markdown using GOT-OCR.
|
||||
Convert page of a PDF file to markdown using the commercial openAI APIs.
|
||||
|
||||
This function renders the first page of the PDF to an image, runs OCR on that image,
|
||||
and returns the OCR result as a markdown-formatted string.
|
||||
See run_server.py for running against an openai compatible server
|
||||
|
||||
Args:
|
||||
pdf_path (str): The local path to the PDF file.
|
||||
|
@ -32,7 +32,10 @@ class Args:
|
||||
|
||||
async def run_olmocr(pdf_path: str, page_num: int = 1, temperature: float = 0.8) -> str:
|
||||
"""
|
||||
Process a single page of a PDF using the olmocr pipeline.
|
||||
Process a single page of a PDF using the official olmocr pipeline, as in pipeline.py
|
||||
|
||||
The idea is that this is getting called exactly how it is in the pipeline script, so we can do comparisons.
|
||||
Though, this method is slow, because it only does one request at a time.
|
||||
|
||||
Args:
|
||||
pdf_path: Path to the PDF file
|
||||
|
74
olmocr/bench/runners/run_server.py
Normal file
74
olmocr/bench/runners/run_server.py
Normal file
@ -0,0 +1,74 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Literal
|
||||
import httpx
|
||||
|
||||
from olmocr.data.renderpdf import render_pdf_to_base64png
|
||||
from olmocr.prompts.anchor import get_anchor_text
|
||||
from olmocr.prompts.prompts import (
|
||||
PageResponse,
|
||||
build_openai_silver_data_prompt,
|
||||
build_finetuning_prompt,
|
||||
)
|
||||
|
||||
async def run_server(pdf_path: str, page_num: int = 1, server: str = "localhost:30000", model: str = "allenai/olmOCR-7B-0225-preview",
|
||||
temperature: float = 0.1, target_longest_image_dim: int = 1024,
|
||||
prompt_template: Literal["full", "finetune"]="finetune",
|
||||
response_template: Literal["plain", "json"]="json") -> str:
|
||||
"""
|
||||
Convert page of a PDF file to markdown by calling a request
|
||||
running against an openai compatible server.
|
||||
|
||||
You can use this for running against vllm, sglang, servers
|
||||
as well as mixing and matching different model's.
|
||||
|
||||
It will only make one direct request, with no retries or error checking.
|
||||
|
||||
Returns:
|
||||
str: The OCR result in markdown format.
|
||||
"""
|
||||
# Convert the first page of the PDF to a base64-encoded PNG image.
|
||||
image_base64 = render_pdf_to_base64png(pdf_path, page_num=page_num, target_longest_image_dim=target_longest_image_dim)
|
||||
anchor_text = get_anchor_text(pdf_path, page_num, pdf_engine="pdfreport")
|
||||
|
||||
if prompt_template == "full":
|
||||
prompt = build_openai_silver_data_prompt(anchor_text)
|
||||
else:
|
||||
prompt = build_finetuning_prompt(anchor_text)
|
||||
|
||||
request = {
|
||||
"model": model,
|
||||
"messages":[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||
],
|
||||
}
|
||||
],
|
||||
"temperature": temperature,
|
||||
"max_tokens": 3000,
|
||||
}
|
||||
|
||||
# Make request and get response using httpx
|
||||
url = f"http://{server}/v1/chat/completions"
|
||||
|
||||
async with httpx.AsyncClient(timeout=300) as client:
|
||||
response = await client.post(url, json=request)
|
||||
|
||||
print(response.status_code)
|
||||
data = response.json()
|
||||
|
||||
print(data)
|
||||
choice = data["choices"][0]
|
||||
print(choice)
|
||||
assert choice["finish_reason"] == "stop", "Response from server did not finish with finish_reason stop as expected, this is probably going to lead to bad data"
|
||||
|
||||
if response_template == "json":
|
||||
data = choice["message"]["content"]
|
||||
page_data = json.loads(page_data)
|
||||
page_response = PageResponse(**page_data)
|
||||
return page_response.natural_text
|
||||
elif response_template == "plain":
|
||||
return choice["message"]["content"]
|
@ -1,7 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit on error but allow the trap to execute
|
||||
set -e
|
||||
|
||||
# Global variable to track server PID
|
||||
SERVER_PID=""
|
||||
|
||||
# Trap function to handle Ctrl+C (SIGINT)
|
||||
cleanup() {
|
||||
echo -e "\n[INFO] Received interrupt signal. Cleaning up..."
|
||||
|
||||
# Find and kill any Python processes started by this script
|
||||
echo "[INFO] Stopping any running Python processes"
|
||||
pkill -P $$ python || true
|
||||
|
||||
# Stop sglang server if running
|
||||
if [ -n "$SERVER_PID" ] && kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||
echo "[INFO] Stopping sglang server (PID: $SERVER_PID)"
|
||||
kill -TERM "$SERVER_PID" 2>/dev/null || true
|
||||
wait "$SERVER_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "[INFO] Cleanup complete. Exiting."
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Set the trap for SIGINT (Ctrl+C)
|
||||
trap cleanup SIGINT
|
||||
|
||||
# Function to create conda environment if it doesn't exist
|
||||
create_conda_env() {
|
||||
env_name=$1
|
||||
@ -16,10 +42,62 @@ create_conda_env() {
|
||||
fi
|
||||
}
|
||||
|
||||
# # Create and activate olmocr environment
|
||||
# create_conda_env "olmocr" "3.11"
|
||||
# source $(conda info --base)/etc/profile.d/conda.sh
|
||||
# source activate olmocr
|
||||
# Function to start sglang server with OpenAI API for a specific model
|
||||
start_sglang_server() {
|
||||
model_name=$1
|
||||
echo "Starting sglang server for model: $model_name"
|
||||
|
||||
# Start the server in the background and save the PID
|
||||
python -m sglang.launch_server --model $model_name --chat-template qwen2-vl &
|
||||
SERVER_PID=$!
|
||||
|
||||
# Check if the server process is running
|
||||
if ! kill -0 $SERVER_PID 2>/dev/null; then
|
||||
echo "Failed to start server process. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Wait for the server to be ready by checking the models endpoint
|
||||
echo "Waiting for server to be ready..."
|
||||
max_attempts=300
|
||||
attempt=0
|
||||
|
||||
while [ $attempt -lt $max_attempts ]; do
|
||||
# Try to reach the models endpoint with an API key header
|
||||
if curl -s "http://localhost:30000/v1/models" \
|
||||
-o /dev/null -w "%{http_code}" | grep -q "200"; then
|
||||
echo "Server is ready!"
|
||||
return 0
|
||||
fi
|
||||
|
||||
attempt=$((attempt + 1))
|
||||
echo "Waiting for server... attempt $attempt/$max_attempts"
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo "Server failed to become ready after multiple attempts. Exiting."
|
||||
kill $SERVER_PID
|
||||
SERVER_PID=""
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Function to stop the sglang server
|
||||
stop_sglang_server() {
|
||||
echo "Stopping sglang server with PID: $SERVER_PID"
|
||||
if [ -n "$SERVER_PID" ] && kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||
kill $SERVER_PID
|
||||
wait $SERVER_PID 2>/dev/null || true
|
||||
echo "Server stopped."
|
||||
else
|
||||
echo "No server to stop."
|
||||
fi
|
||||
SERVER_PID=""
|
||||
}
|
||||
|
||||
# Create and activate olmocr environment
|
||||
create_conda_env "olmocr" "3.11"
|
||||
source $(conda info --base)/etc/profile.d/conda.sh
|
||||
source activate olmocr
|
||||
|
||||
# # Run olmocr benchmarks
|
||||
# echo "Running olmocr benchmarks..."
|
||||
@ -39,9 +117,28 @@ create_conda_env() {
|
||||
# echo "Running chatgpt benchmarks..."
|
||||
# python -m olmocr.bench.convert chatgpt
|
||||
|
||||
# Run raw server benchmarks with sglang server
|
||||
# For each model, start server, run benchmark, then stop server
|
||||
|
||||
# olmocr_base_temp0_1
|
||||
start_sglang_server "allenai/olmOCR-7B-0225-preview"
|
||||
python -m olmocr.bench.convert server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:response_template=json --repeats 5
|
||||
python -m olmocr.bench.convert server:name=olmocr_base_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:response_template=json --repeats 5
|
||||
stop_sglang_server
|
||||
|
||||
# qwen2_vl_7b
|
||||
start_sglang_server "Qwen/Qwen2-VL-7B-Instruct"
|
||||
python -m olmocr.bench.convert server:name=qwen2_vl_7b:model=Qwen/Qwen2-VL-7B-Instruct:temperature=0.1:response_template=plain --repeats 5
|
||||
stop_sglang_server
|
||||
|
||||
# qwen25_vl_7b
|
||||
start_sglang_server "Qwen/Qwen2.5-VL-7B-Instruct"
|
||||
python -m olmocr.bench.convert server:name=qwen25_vl_7b:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:response_template=plain --repeats 5
|
||||
stop_sglang_server
|
||||
|
||||
# Create and activate mineru environment
|
||||
create_conda_env "mineru" "3.11"
|
||||
source activate mineru
|
||||
# create_conda_env "mineru" "3.11"
|
||||
# source activate mineru
|
||||
|
||||
# Install magic-pdf and run benchmarks
|
||||
# TODO: Fix this, I was not able to get it to all install successfully
|
||||
@ -53,4 +150,9 @@ source activate mineru
|
||||
# python download_models_hf.py
|
||||
# python -m olmocr.bench.convert mineru
|
||||
|
||||
# Final cleanup
|
||||
if [ -n "$SERVER_PID" ] && kill -0 $SERVER_PID 2>/dev/null; then
|
||||
stop_sglang_server
|
||||
fi
|
||||
|
||||
echo "All benchmarks completed successfully."
|
Loading…
x
Reference in New Issue
Block a user