mirror of
https://github.com/allenai/olmocr.git
synced 2025-09-26 17:04:02 +00:00
Convert script work with server backends
This commit is contained in:
parent
87875b3e2f
commit
5cb32c3289
@ -74,7 +74,8 @@ async def process_pdfs(config, pdf_directory, data_directory, repeats, force):
|
|||||||
else:
|
else:
|
||||||
# Run synchronous function
|
# Run synchronous function
|
||||||
markdown = method(pdf_path, page_num=1, **kwargs)
|
markdown = method(pdf_path, page_num=1, **kwargs)
|
||||||
except:
|
except Exception as ex:
|
||||||
|
print(f"Exception {str(ex)} occurred while processing {base_name}_{i}")
|
||||||
markdown = None
|
markdown = None
|
||||||
|
|
||||||
if markdown is None:
|
if markdown is None:
|
||||||
@ -106,6 +107,7 @@ if __name__ == "__main__":
|
|||||||
"marker": ("olmocr.bench.runners.run_marker", "run_marker"),
|
"marker": ("olmocr.bench.runners.run_marker", "run_marker"),
|
||||||
"mineru": ("olmocr.bench.runners.run_mineru", "run_mineru"),
|
"mineru": ("olmocr.bench.runners.run_mineru", "run_mineru"),
|
||||||
"chatgpt": ("olmocr.bench.runners.run_chatgpt", "run_chatgpt"),
|
"chatgpt": ("olmocr.bench.runners.run_chatgpt", "run_chatgpt"),
|
||||||
|
"server": ("olmocr.bench.runners.run_server", "run_server"),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Build config by importing only requested methods.
|
# Build config by importing only requested methods.
|
||||||
|
@ -11,13 +11,11 @@ from olmocr.prompts.prompts import (
|
|||||||
openai_response_format_schema,
|
openai_response_format_schema,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def run_chatgpt(pdf_path: str, page_num: int = 1, model: str = "gpt-4o-2024-08-06", temperature: float = 0.1) -> str:
|
def run_chatgpt(pdf_path: str, page_num: int = 1, model: str = "gpt-4o-2024-08-06", temperature: float = 0.1) -> str:
|
||||||
"""
|
"""
|
||||||
Convert page of a PDF file to markdown using GOT-OCR.
|
Convert page of a PDF file to markdown using the commercial openAI APIs.
|
||||||
|
|
||||||
This function renders the first page of the PDF to an image, runs OCR on that image,
|
See run_server.py for running against an openai compatible server
|
||||||
and returns the OCR result as a markdown-formatted string.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
pdf_path (str): The local path to the PDF file.
|
pdf_path (str): The local path to the PDF file.
|
||||||
|
@ -32,7 +32,10 @@ class Args:
|
|||||||
|
|
||||||
async def run_olmocr(pdf_path: str, page_num: int = 1, temperature: float = 0.8) -> str:
|
async def run_olmocr(pdf_path: str, page_num: int = 1, temperature: float = 0.8) -> str:
|
||||||
"""
|
"""
|
||||||
Process a single page of a PDF using the olmocr pipeline.
|
Process a single page of a PDF using the official olmocr pipeline, as in pipeline.py
|
||||||
|
|
||||||
|
The idea is that this is getting called exactly how it is in the pipeline script, so we can do comparisons.
|
||||||
|
Though, this method is slow, because it only does one request at a time.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
pdf_path: Path to the PDF file
|
pdf_path: Path to the PDF file
|
||||||
|
74
olmocr/bench/runners/run_server.py
Normal file
74
olmocr/bench/runners/run_server.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from typing import Literal
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from olmocr.data.renderpdf import render_pdf_to_base64png
|
||||||
|
from olmocr.prompts.anchor import get_anchor_text
|
||||||
|
from olmocr.prompts.prompts import (
|
||||||
|
PageResponse,
|
||||||
|
build_openai_silver_data_prompt,
|
||||||
|
build_finetuning_prompt,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def run_server(pdf_path: str, page_num: int = 1, server: str = "localhost:30000", model: str = "allenai/olmOCR-7B-0225-preview",
|
||||||
|
temperature: float = 0.1, target_longest_image_dim: int = 1024,
|
||||||
|
prompt_template: Literal["full", "finetune"]="finetune",
|
||||||
|
response_template: Literal["plain", "json"]="json") -> str:
|
||||||
|
"""
|
||||||
|
Convert page of a PDF file to markdown by calling a request
|
||||||
|
running against an openai compatible server.
|
||||||
|
|
||||||
|
You can use this for running against vllm, sglang, servers
|
||||||
|
as well as mixing and matching different model's.
|
||||||
|
|
||||||
|
It will only make one direct request, with no retries or error checking.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The OCR result in markdown format.
|
||||||
|
"""
|
||||||
|
# Convert the first page of the PDF to a base64-encoded PNG image.
|
||||||
|
image_base64 = render_pdf_to_base64png(pdf_path, page_num=page_num, target_longest_image_dim=target_longest_image_dim)
|
||||||
|
anchor_text = get_anchor_text(pdf_path, page_num, pdf_engine="pdfreport")
|
||||||
|
|
||||||
|
if prompt_template == "full":
|
||||||
|
prompt = build_openai_silver_data_prompt(anchor_text)
|
||||||
|
else:
|
||||||
|
prompt = build_finetuning_prompt(anchor_text)
|
||||||
|
|
||||||
|
request = {
|
||||||
|
"model": model,
|
||||||
|
"messages":[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"temperature": temperature,
|
||||||
|
"max_tokens": 3000,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Make request and get response using httpx
|
||||||
|
url = f"http://{server}/v1/chat/completions"
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=300) as client:
|
||||||
|
response = await client.post(url, json=request)
|
||||||
|
|
||||||
|
print(response.status_code)
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
print(data)
|
||||||
|
choice = data["choices"][0]
|
||||||
|
print(choice)
|
||||||
|
assert choice["finish_reason"] == "stop", "Response from server did not finish with finish_reason stop as expected, this is probably going to lead to bad data"
|
||||||
|
|
||||||
|
if response_template == "json":
|
||||||
|
data = choice["message"]["content"]
|
||||||
|
page_data = json.loads(page_data)
|
||||||
|
page_response = PageResponse(**page_data)
|
||||||
|
return page_response.natural_text
|
||||||
|
elif response_template == "plain":
|
||||||
|
return choice["message"]["content"]
|
@ -1,7 +1,33 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Exit on error but allow the trap to execute
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# Global variable to track server PID
|
||||||
|
SERVER_PID=""
|
||||||
|
|
||||||
|
# Trap function to handle Ctrl+C (SIGINT)
|
||||||
|
cleanup() {
|
||||||
|
echo -e "\n[INFO] Received interrupt signal. Cleaning up..."
|
||||||
|
|
||||||
|
# Find and kill any Python processes started by this script
|
||||||
|
echo "[INFO] Stopping any running Python processes"
|
||||||
|
pkill -P $$ python || true
|
||||||
|
|
||||||
|
# Stop sglang server if running
|
||||||
|
if [ -n "$SERVER_PID" ] && kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||||
|
echo "[INFO] Stopping sglang server (PID: $SERVER_PID)"
|
||||||
|
kill -TERM "$SERVER_PID" 2>/dev/null || true
|
||||||
|
wait "$SERVER_PID" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[INFO] Cleanup complete. Exiting."
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set the trap for SIGINT (Ctrl+C)
|
||||||
|
trap cleanup SIGINT
|
||||||
|
|
||||||
# Function to create conda environment if it doesn't exist
|
# Function to create conda environment if it doesn't exist
|
||||||
create_conda_env() {
|
create_conda_env() {
|
||||||
env_name=$1
|
env_name=$1
|
||||||
@ -16,10 +42,62 @@ create_conda_env() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# # Create and activate olmocr environment
|
# Function to start sglang server with OpenAI API for a specific model
|
||||||
# create_conda_env "olmocr" "3.11"
|
start_sglang_server() {
|
||||||
# source $(conda info --base)/etc/profile.d/conda.sh
|
model_name=$1
|
||||||
# source activate olmocr
|
echo "Starting sglang server for model: $model_name"
|
||||||
|
|
||||||
|
# Start the server in the background and save the PID
|
||||||
|
python -m sglang.launch_server --model $model_name --chat-template qwen2-vl &
|
||||||
|
SERVER_PID=$!
|
||||||
|
|
||||||
|
# Check if the server process is running
|
||||||
|
if ! kill -0 $SERVER_PID 2>/dev/null; then
|
||||||
|
echo "Failed to start server process. Exiting."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Wait for the server to be ready by checking the models endpoint
|
||||||
|
echo "Waiting for server to be ready..."
|
||||||
|
max_attempts=300
|
||||||
|
attempt=0
|
||||||
|
|
||||||
|
while [ $attempt -lt $max_attempts ]; do
|
||||||
|
# Try to reach the models endpoint with an API key header
|
||||||
|
if curl -s "http://localhost:30000/v1/models" \
|
||||||
|
-o /dev/null -w "%{http_code}" | grep -q "200"; then
|
||||||
|
echo "Server is ready!"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
attempt=$((attempt + 1))
|
||||||
|
echo "Waiting for server... attempt $attempt/$max_attempts"
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Server failed to become ready after multiple attempts. Exiting."
|
||||||
|
kill $SERVER_PID
|
||||||
|
SERVER_PID=""
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to stop the sglang server
|
||||||
|
stop_sglang_server() {
|
||||||
|
echo "Stopping sglang server with PID: $SERVER_PID"
|
||||||
|
if [ -n "$SERVER_PID" ] && kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||||
|
kill $SERVER_PID
|
||||||
|
wait $SERVER_PID 2>/dev/null || true
|
||||||
|
echo "Server stopped."
|
||||||
|
else
|
||||||
|
echo "No server to stop."
|
||||||
|
fi
|
||||||
|
SERVER_PID=""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create and activate olmocr environment
|
||||||
|
create_conda_env "olmocr" "3.11"
|
||||||
|
source $(conda info --base)/etc/profile.d/conda.sh
|
||||||
|
source activate olmocr
|
||||||
|
|
||||||
# # Run olmocr benchmarks
|
# # Run olmocr benchmarks
|
||||||
# echo "Running olmocr benchmarks..."
|
# echo "Running olmocr benchmarks..."
|
||||||
@ -39,9 +117,28 @@ create_conda_env() {
|
|||||||
# echo "Running chatgpt benchmarks..."
|
# echo "Running chatgpt benchmarks..."
|
||||||
# python -m olmocr.bench.convert chatgpt
|
# python -m olmocr.bench.convert chatgpt
|
||||||
|
|
||||||
|
# Run raw server benchmarks with sglang server
|
||||||
|
# For each model, start server, run benchmark, then stop server
|
||||||
|
|
||||||
|
# olmocr_base_temp0_1
|
||||||
|
start_sglang_server "allenai/olmOCR-7B-0225-preview"
|
||||||
|
python -m olmocr.bench.convert server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:response_template=json --repeats 5
|
||||||
|
python -m olmocr.bench.convert server:name=olmocr_base_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:response_template=json --repeats 5
|
||||||
|
stop_sglang_server
|
||||||
|
|
||||||
|
# qwen2_vl_7b
|
||||||
|
start_sglang_server "Qwen/Qwen2-VL-7B-Instruct"
|
||||||
|
python -m olmocr.bench.convert server:name=qwen2_vl_7b:model=Qwen/Qwen2-VL-7B-Instruct:temperature=0.1:response_template=plain --repeats 5
|
||||||
|
stop_sglang_server
|
||||||
|
|
||||||
|
# qwen25_vl_7b
|
||||||
|
start_sglang_server "Qwen/Qwen2.5-VL-7B-Instruct"
|
||||||
|
python -m olmocr.bench.convert server:name=qwen25_vl_7b:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:response_template=plain --repeats 5
|
||||||
|
stop_sglang_server
|
||||||
|
|
||||||
# Create and activate mineru environment
|
# Create and activate mineru environment
|
||||||
create_conda_env "mineru" "3.11"
|
# create_conda_env "mineru" "3.11"
|
||||||
source activate mineru
|
# source activate mineru
|
||||||
|
|
||||||
# Install magic-pdf and run benchmarks
|
# Install magic-pdf and run benchmarks
|
||||||
# TODO: Fix this, I was not able to get it to all install successfully
|
# TODO: Fix this, I was not able to get it to all install successfully
|
||||||
@ -53,4 +150,9 @@ source activate mineru
|
|||||||
# python download_models_hf.py
|
# python download_models_hf.py
|
||||||
# python -m olmocr.bench.convert mineru
|
# python -m olmocr.bench.convert mineru
|
||||||
|
|
||||||
|
# Final cleanup
|
||||||
|
if [ -n "$SERVER_PID" ] && kill -0 $SERVER_PID 2>/dev/null; then
|
||||||
|
stop_sglang_server
|
||||||
|
fi
|
||||||
|
|
||||||
echo "All benchmarks completed successfully."
|
echo "All benchmarks completed successfully."
|
Loading…
x
Reference in New Issue
Block a user