olmocr/olmocr/bench/scripts/convert_all.sh

#!/bin/bash

# Exit on error but allow the trap to execute
set -e

# Global variable to track server PID
SERVER_PID=""

# Trap function to handle Ctrl+C (SIGINT)
cleanup() {
    echo -e "\n[INFO] Received interrupt signal. Cleaning up..."
    
    # Find and kill any Python processes started by this script
    echo "[INFO] Stopping any running Python processes"
    pkill -P $$ python || true
    
    # Stop server if running
    if [ -n "$SERVER_PID" ] && kill -0 "$SERVER_PID" 2>/dev/null; then
        echo "[INFO] Stopping server (PID: $SERVER_PID)"
        kill -TERM "$SERVER_PID" 2>/dev/null || true
        wait "$SERVER_PID" 2>/dev/null || true
    fi

    pkill vllm
    
    echo "[INFO] Cleanup complete. Exiting."
    exit 1
}

# Set the trap for SIGINT (Ctrl+C)
trap cleanup SIGINT

# Function to check if port 30000 is in use
check_port() {
    port=30000
    echo "[INFO] Checking if port $port is available..."
    
    if command -v lsof >/dev/null 2>&1; then
        # Linux/macOS
        if lsof -i :$port >/dev/null 2>&1; then
            echo "[ERROR] Port $port is already in use. Process details:"
            lsof -i :$port
            echo "[ERROR] Please stop the process using this port and try again."
            echo "        You can use: kill -9 <PID>"
            return 1
        fi
    elif command -v netstat >/dev/null 2>&1; then
        # Windows/other systems with netstat
        if netstat -an | grep -q ":$port "; then
            echo "[ERROR] Port $port is already in use. Process details:"
            if command -v findstr >/dev/null 2>&1; then
                # Windows
                netstat -ano | findstr ":$port"
                echo "[ERROR] Please stop the process using this port and try again."
                echo "        You can use: taskkill /F /PID <PID>"
            else
                netstat -an | grep ":$port "
                echo "[ERROR] Please stop the process using this port and try again."
            fi
            return 1
        fi
    else
        # Fallback method using nc if available
        if command -v nc >/dev/null 2>&1; then
            nc -z localhost $port >/dev/null 2>&1
            if [ $? -eq 0 ]; then
                echo "[ERROR] Port $port is already in use, but cannot determine which process."
                echo "[ERROR] Please ensure port $port is available before continuing."
                return 1
            fi
        else
            echo "[WARNING] Cannot check if port $port is in use (neither lsof, netstat, nor nc available)."
            echo "[WARNING] Continuing anyway, but this might fail if the port is already in use."
            return 0
        fi
    fi
    
    echo "[INFO] Port $port is available."
    return 0
}

# Function to create conda environment if it doesn't exist
create_conda_env() {
    env_name=$1
    python_version=$2
    
    # Check if environment exists
    if conda info --envs | grep -q "^$env_name "; then
        echo "Environment $env_name already exists, using it."
    else
        echo "Creating conda environment: $env_name"
        conda create -y -n $env_name python=$python_version
    fi
}

# Generic function to start a server (either vllm or sglang)
start_server() {
    server_type=$1
    model_name=$2
    shift 2  # Remove server type and model name from the argument list
    
    echo "Starting $server_type server for model: $model_name"
    echo "Additional arguments: $@"
    
    if [ "$server_type" = "sglang" ]; then
        python -m sglang.launch_server --port 30000 --model "$model_name" "$@" &
    elif [ "$server_type" = "vllm" ]; then
        vllm serve $model_name --port 30000 "$@" &
    else
        echo "Unsupported server type: $server_type"
        exit 1
    fi
    SERVER_PID=$!
    
    # Check if the server process is running
    if ! kill -0 $SERVER_PID 2>/dev/null; then
        echo "Failed to start server process. Exiting."
        exit 1
    fi
    
    # Wait for the server to be ready by checking the models endpoint
    echo "Waiting for server to be ready..."
    max_attempts=300
    attempt=0
    while [ $attempt -lt $max_attempts ]; do
        if curl -s "http://localhost:30000/v1/models" -o /dev/null -w "%{http_code}" | grep -q "200"; then
            echo "Server is ready!"
            return 0
        fi
        attempt=$((attempt + 1))
        echo "Waiting for server... attempt $attempt/$max_attempts"
        sleep 2
    done
    
    echo "Server failed to become ready after multiple attempts. Exiting."
    kill $SERVER_PID
    SERVER_PID=""
    exit 1
}

# Function to stop the server
stop_server() {
    echo "Stopping server with PID: $SERVER_PID"
    if [ -n "$SERVER_PID" ] && kill -0 "$SERVER_PID" 2>/dev/null; then
        kill $SERVER_PID
        wait $SERVER_PID 2>/dev/null || true
        echo "Server stopped."
    else
        echo "No server to stop."
    fi
    SERVER_PID=""
}

# Create and activate olmocr environment
create_conda_env "olmocr" "3.11"
source $(conda info --base)/etc/profile.d/conda.sh
source activate olmocr

# Run olmocr benchmarks, exactly as the pipeline.py does it
echo "Running olmocr benchmarks..."
python -m olmocr.bench.convert olmocr_pipeline --repeats 5

# Install marker-pdf and run benchmarks
echo "Installing marker-pdf and running benchmarks..."
pip install marker-pdf==1.6.1
python -m olmocr.bench.convert marker

# Install verovio and run benchmarks
# echo "Installing verovio and running benchmarks..."
# pip install verovio
# python -m olmocr.bench.convert gotocr

# Run chatgpt benchmarks
echo "Running chatgpt benchmarks..."
python -m olmocr.bench.convert chatgpt
#python -m olmocr.bench.convert chatgpt:name=chatgpt45:model=gpt-4.5-preview-2025-02-27

# Run gemini benchmarks
echo "Running gemini benchmarks..."
python -m olmocr.bench.convert gemini:name=gemini_flash2:model=gemini-2.0-flash --parallel 4

echo "Running mistral..."
pip install mistralai
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data mistral

# Run raw server benchmarks with generic server function
# For each model, start server, run benchmark, then stop server

# Check port availability at script start
check_port || exit 1

# olmocr_base_temp0_1 using sglang server
start_server sglang "allenai/olmOCR-7B-0225-preview" --chat-template qwen2-vl --mem-fraction-static 0.7
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_0:model=allenai/olmOCR-7B-0225-preview:temperature=0.0:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_2:model=allenai/olmOCR-7B-0225-preview:temperature=0.2:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_3:model=allenai/olmOCR-7B-0225-preview:temperature=0.3:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_4:model=allenai/olmOCR-7B-0225-preview:temperature=0.4:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_5:model=allenai/olmOCR-7B-0225-preview:temperature=0.5:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_6:model=allenai/olmOCR-7B-0225-preview:temperature=0.6:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_7:model=allenai/olmOCR-7B-0225-preview:temperature=0.7:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50

python -m olmocr.bench.convert server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50
python -m olmocr.bench.convert server:name=olmocr_base_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50
stop_server

start_server vllm "allenai/olmOCR-7B-0225-preview"
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp_vllm0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50
python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp_vllm0_7:model=allenai/olmOCR-7B-0225-preview:temperature=0.7:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50

python -m olmocr.bench.convert server:name=olmocr_base_vllm_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50
python -m olmocr.bench.convert server:name=olmocr_base_vllm_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50
stop_server

# Feel free to enable if you want
# qwen2_vl_7b using sglang server
# start_server sglang "Qwen/Qwen2-VL-7B-Instruct" --chat-template qwen2-vl --mem-fraction-static 0.7
# python -m olmocr.bench.convert server:name=qwen2_vl_7b:model=Qwen/Qwen2-VL-7B-Instruct:temperature=0.1:prompt_template=full:response_template=plain --repeats 5 --parallel 50
# stop_server

# TODO: qwen2.5 Not working right now in sglang
# qwen25_vl_7b
# create_conda_env "qwen25" "3.11"
# source activate qwen25
# pip install olmocr
# pip install "sglang[all]>=0.4.3.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python transformers==4.48.3
# start_server sglang "Qwen/Qwen2.5-VL-7B-Instruct" --chat-template qwen2-vl --mem-fraction-static 0.7
# python -m olmocr.bench.convert server:name=qwen25_vl_7b:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=full:response_template=plain --repeats 5 --parallel 50
# stop_server

# TODO: Fix this, I was not able to get it to all install successfully
# Create and activate mineru environment
# create_conda_env "mineru" "3.11"
# source activate mineru

# Install magic-pdf and run benchmarks
# echo "Installing magic-pdf and running mineru benchmarks..."
# pip install -U "magic-pdf[full]==1.2.2" --extra-index-url https://wheels.myhloli.com
# python -m pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
# pip install huggingface_hub Pillow paddleocr ultralytics doclayout-yolo pycocotools 
# wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models_hf.py
# python download_models_hf.py
# python -m olmocr.bench.convert mineru

# Final cleanup
if [ -n "$SERVER_PID" ] && kill -0 $SERVER_PID 2>/dev/null; then
    stop_server
fi

echo "All benchmarks completed successfully."
Adding more work on diffs 2025-03-04 15:08:59 -08:00			`#!/bin/bash`

Convert script work with server backends 2025-03-05 13:33:39 -08:00			`# Exit on error but allow the trap to execute`
Adding more work on diffs 2025-03-04 15:08:59 -08:00			`set -e`

Convert script work with server backends 2025-03-05 13:33:39 -08:00			`# Global variable to track server PID`
			`SERVER_PID=""`

			`# Trap function to handle Ctrl+C (SIGINT)`
			`cleanup() {`
			`echo -e "\n[INFO] Received interrupt signal. Cleaning up..."`

			`# Find and kill any Python processes started by this script`
			`echo "[INFO] Stopping any running Python processes"`
			`pkill -P $$ python \|\| true`

Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`# Stop server if running`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`if [ -n "$SERVER_PID" ] && kill -0 "$SERVER_PID" 2>/dev/null; then`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`echo "[INFO] Stopping server (PID: $SERVER_PID)"`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`kill -TERM "$SERVER_PID" 2>/dev/null \|\| true`
			`wait "$SERVER_PID" 2>/dev/null \|\| true`
			`fi`
More tests for olmocrbench 2025-03-12 11:59:11 -07:00
			`pkill vllm`
Convert script work with server backends 2025-03-05 13:33:39 -08:00
			`echo "[INFO] Cleanup complete. Exiting."`
			`exit 1`
			`}`

			`# Set the trap for SIGINT (Ctrl+C)`
			`trap cleanup SIGINT`

Better conversion script, run on more things 2025-03-05 14:16:29 -08:00			`# Function to check if port 30000 is in use`
			`check_port() {`
			`port=30000`
			`echo "[INFO] Checking if port $port is available..."`

			`if command -v lsof >/dev/null 2>&1; then`
			`# Linux/macOS`
			`if lsof -i :$port >/dev/null 2>&1; then`
			`echo "[ERROR] Port $port is already in use. Process details:"`
			`lsof -i :$port`
			`echo "[ERROR] Please stop the process using this port and try again."`
			`echo " You can use: kill -9 <PID>"`
			`return 1`
			`fi`
			`elif command -v netstat >/dev/null 2>&1; then`
			`# Windows/other systems with netstat`
			`if netstat -an \| grep -q ":$port "; then`
			`echo "[ERROR] Port $port is already in use. Process details:"`
			`if command -v findstr >/dev/null 2>&1; then`
			`# Windows`
			`netstat -ano \| findstr ":$port"`
			`echo "[ERROR] Please stop the process using this port and try again."`
			`echo " You can use: taskkill /F /PID <PID>"`
			`else`
			`netstat -an \| grep ":$port "`
			`echo "[ERROR] Please stop the process using this port and try again."`
			`fi`
			`return 1`
			`fi`
			`else`
			`# Fallback method using nc if available`
			`if command -v nc >/dev/null 2>&1; then`
			`nc -z localhost $port >/dev/null 2>&1`
			`if [ $? -eq 0 ]; then`
			`echo "[ERROR] Port $port is already in use, but cannot determine which process."`
			`echo "[ERROR] Please ensure port $port is available before continuing."`
			`return 1`
			`fi`
			`else`
			`echo "[WARNING] Cannot check if port $port is in use (neither lsof, netstat, nor nc available)."`
			`echo "[WARNING] Continuing anyway, but this might fail if the port is already in use."`
			`return 0`
			`fi`
			`fi`

			`echo "[INFO] Port $port is available."`
			`return 0`
			`}`

Convert scripts for benchmark 2025-03-05 12:03:34 -08:00			`# Function to create conda environment if it doesn't exist`
			`create_conda_env() {`
			`env_name=$1`
			`python_version=$2`

			`# Check if environment exists`
			`if conda info --envs \| grep -q "^$env_name "; then`
			`echo "Environment $env_name already exists, using it."`
			`else`
			`echo "Creating conda environment: $env_name"`
			`conda create -y -n $env_name python=$python_version`
			`fi`
			`}`
Adding more work on diffs 2025-03-04 15:08:59 -08:00
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`# Generic function to start a server (either vllm or sglang)`
			`start_server() {`
			`server_type=$1`
			`model_name=$2`
			`shift 2 # Remove server type and model name from the argument list`
Model runners 2025-03-05 13:55:40 -08:00
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`echo "Starting $server_type server for model: $model_name"`
Model runners 2025-03-05 13:55:40 -08:00			`echo "Additional arguments: $@"`
Convert script work with server backends 2025-03-05 13:33:39 -08:00
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`if [ "$server_type" = "sglang" ]; then`
			`python -m sglang.launch_server --port 30000 --model "$model_name" "$@" &`
			`elif [ "$server_type" = "vllm" ]; then`
More tests for olmocrbench 2025-03-12 11:59:11 -07:00			`vllm serve $model_name --port 30000 "$@" &`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`else`
			`echo "Unsupported server type: $server_type"`
			`exit 1`
			`fi`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`SERVER_PID=$!`

			`# Check if the server process is running`
			`if ! kill -0 $SERVER_PID 2>/dev/null; then`
			`echo "Failed to start server process. Exiting."`
			`exit 1`
			`fi`

			`# Wait for the server to be ready by checking the models endpoint`
			`echo "Waiting for server to be ready..."`
			`max_attempts=300`
			`attempt=0`
			`while [ $attempt -lt $max_attempts ]; do`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`if curl -s "http://localhost:30000/v1/models" -o /dev/null -w "%{http_code}" \| grep -q "200"; then`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`echo "Server is ready!"`
			`return 0`
			`fi`
			`attempt=$((attempt + 1))`
			`echo "Waiting for server... attempt $attempt/$max_attempts"`
			`sleep 2`
			`done`

			`echo "Server failed to become ready after multiple attempts. Exiting."`
			`kill $SERVER_PID`
			`SERVER_PID=""`
			`exit 1`
			`}`

Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`# Function to stop the server`
			`stop_server() {`
			`echo "Stopping server with PID: $SERVER_PID"`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`if [ -n "$SERVER_PID" ] && kill -0 "$SERVER_PID" 2>/dev/null; then`
			`kill $SERVER_PID`
			`wait $SERVER_PID 2>/dev/null \|\| true`
			`echo "Server stopped."`
			`else`
			`echo "No server to stop."`
			`fi`
			`SERVER_PID=""`
			`}`

			`# Create and activate olmocr environment`
			`create_conda_env "olmocr" "3.11"`
			`source $(conda info --base)/etc/profile.d/conda.sh`
			`source activate olmocr`
Adding more work on diffs 2025-03-04 15:08:59 -08:00
Conversion fixes 2025-03-05 15:31:45 -08:00			`# Run olmocr benchmarks, exactly as the pipeline.py does it`
Better convert script 2025-03-05 14:31:39 -08:00			`echo "Running olmocr benchmarks..."`
Conversion fixes 2025-03-05 15:31:45 -08:00			`python -m olmocr.bench.convert olmocr_pipeline --repeats 5`
Adding more work on diffs 2025-03-04 15:08:59 -08:00
Better convert script 2025-03-05 14:31:39 -08:00			`# Install marker-pdf and run benchmarks`
			`echo "Installing marker-pdf and running benchmarks..."`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`pip install marker-pdf==1.6.1`
			`python -m olmocr.bench.convert marker`
Adding more work on diffs 2025-03-04 15:08:59 -08:00
Better convert script 2025-03-05 14:31:39 -08:00			`# Install verovio and run benchmarks`
Leaving with some more data, but still cases to investigate 2025-03-10 15:53:01 -07:00			`# echo "Installing verovio and running benchmarks..."`
			`# pip install verovio`
			`# python -m olmocr.bench.convert gotocr`
Adding more work on diffs 2025-03-04 15:08:59 -08:00
Better convert script 2025-03-05 14:31:39 -08:00			`# Run chatgpt benchmarks`
			`echo "Running chatgpt benchmarks..."`
			`python -m olmocr.bench.convert chatgpt`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`#python -m olmocr.bench.convert chatgpt:name=chatgpt45:model=gpt-4.5-preview-2025-02-27`
Convert scripts for benchmark 2025-03-05 12:03:34 -08:00
More test documents 2025-03-10 17:09:42 +00:00			`# Run gemini benchmarks`
			`echo "Running gemini benchmarks..."`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`python -m olmocr.bench.convert gemini:name=gemini_flash2:model=gemini-2.0-flash --parallel 4`
More test documents 2025-03-10 17:09:42 +00:00
Adding mistral ocr to eval 2025-03-06 13:29:56 -08:00			`echo "Running mistral..."`
			`pip install mistralai`
Bump version to v0.1.60 for release 2025-03-17 08:59:18 -07:00			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data mistral`
Adding mistral ocr to eval 2025-03-06 13:29:56 -08:00
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`# Run raw server benchmarks with generic server function`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`# For each model, start server, run benchmark, then stop server`

Better conversion script, run on more things 2025-03-05 14:16:29 -08:00			`# Check port availability at script start`
			`check_port \|\| exit 1`

Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`# olmocr_base_temp0_1 using sglang server`
			`start_server sglang "allenai/olmOCR-7B-0225-preview" --chat-template qwen2-vl --mem-fraction-static 0.7`
Bump version to v0.1.60 for release 2025-03-17 08:59:18 -07:00			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_0:model=allenai/olmOCR-7B-0225-preview:temperature=0.0:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50`
Basic convert script 2025-03-14 10:35:46 -07:00			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50`
Bump version to v0.1.60 for release 2025-03-17 08:59:18 -07:00			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_2:model=allenai/olmOCR-7B-0225-preview:temperature=0.2:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50`
			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_3:model=allenai/olmOCR-7B-0225-preview:temperature=0.3:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50`
			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_4:model=allenai/olmOCR-7B-0225-preview:temperature=0.4:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50`
			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_5:model=allenai/olmOCR-7B-0225-preview:temperature=0.5:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50`
			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_6:model=allenai/olmOCR-7B-0225-preview:temperature=0.6:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50`
			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_7:model=allenai/olmOCR-7B-0225-preview:temperature=0.7:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50`
Basic convert script 2025-03-14 10:35:46 -07:00
			`python -m olmocr.bench.convert server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50`
			`python -m olmocr.bench.convert server:name=olmocr_base_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`stop_server`
Convert script work with server backends 2025-03-05 13:33:39 -08:00
More tests for olmocrbench 2025-03-12 11:59:11 -07:00			`start_server vllm "allenai/olmOCR-7B-0225-preview"`
Basic convert script 2025-03-14 10:35:46 -07:00			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp_vllm0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50`
Bump version to v0.1.60 for release 2025-03-17 08:59:18 -07:00			`python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp_vllm0_7:model=allenai/olmOCR-7B-0225-preview:temperature=0.7:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50`
Basic convert script 2025-03-14 10:35:46 -07:00
			`python -m olmocr.bench.convert server:name=olmocr_base_vllm_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50`
			`python -m olmocr.bench.convert server:name=olmocr_base_vllm_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`stop_server`
Convert script work with server backends 2025-03-05 13:33:39 -08:00
More tests for olmocrbench 2025-03-12 11:59:11 -07:00			`# Feel free to enable if you want`
			`# qwen2_vl_7b using sglang server`
			`# start_server sglang "Qwen/Qwen2-VL-7B-Instruct" --chat-template qwen2-vl --mem-fraction-static 0.7`
Basic convert script 2025-03-14 10:35:46 -07:00			`# python -m olmocr.bench.convert server:name=qwen2_vl_7b:model=Qwen/Qwen2-VL-7B-Instruct:temperature=0.1:prompt_template=full:response_template=plain --repeats 5 --parallel 50`
More tests for olmocrbench 2025-03-12 11:59:11 -07:00			`# stop_server`

			`# TODO: qwen2.5 Not working right now in sglang`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`# qwen25_vl_7b`
Conversion fixes 2025-03-05 15:31:45 -08:00			`# create_conda_env "qwen25" "3.11"`
			`# source activate qwen25`
			`# pip install olmocr`
			`# pip install "sglang[all]>=0.4.3.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python transformers==4.48.3`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`# start_server sglang "Qwen/Qwen2.5-VL-7B-Instruct" --chat-template qwen2-vl --mem-fraction-static 0.7`
Basic convert script 2025-03-14 10:35:46 -07:00			`# python -m olmocr.bench.convert server:name=qwen25_vl_7b:model=Qwen/Qwen2.5-VL-7B-Instruct:temperature=0.1:prompt_template=full:response_template=plain --repeats 5 --parallel 50`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`# stop_server`
Convert script work with server backends 2025-03-05 13:33:39 -08:00
Conversion fixes 2025-03-05 15:31:45 -08:00			`# TODO: Fix this, I was not able to get it to all install successfully`
Convert scripts for benchmark 2025-03-05 12:03:34 -08:00			`# Create and activate mineru environment`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`# create_conda_env "mineru" "3.11"`
			`# source activate mineru`
Convert scripts for benchmark 2025-03-05 12:03:34 -08:00
			`# Install magic-pdf and run benchmarks`
			`# echo "Installing magic-pdf and running mineru benchmarks..."`
			`# pip install -U "magic-pdf[full]==1.2.2" --extra-index-url https://wheels.myhloli.com`
			`# python -m pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/`
			`# pip install huggingface_hub Pillow paddleocr ultralytics doclayout-yolo pycocotools`
			`# wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models_hf.py`
			`# python download_models_hf.py`
			`# python -m olmocr.bench.convert mineru`

Convert script work with server backends 2025-03-05 13:33:39 -08:00			`# Final cleanup`
			`if [ -n "$SERVER_PID" ] && kill -0 $SERVER_PID 2>/dev/null; then`
Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`stop_server`
Convert script work with server backends 2025-03-05 13:33:39 -08:00			`fi`

Convert scripts and other fun 2025-03-12 11:04:20 -07:00			`echo "All benchmarks completed successfully."`