From 95b03a1df0e16b7bf39ce1020acfb7d7a960c584 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Mon, 7 Apr 2025 10:20:58 -0700 Subject: [PATCH 1/2] Fixing gemini conver script to use new API --- olmocr/bench/runners/run_gemini.py | 54 +++++++---------------------- olmocr/bench/scripts/convert_all.sh | 46 ++++++++++++------------ 2 files changed, 35 insertions(+), 65 deletions(-) diff --git a/olmocr/bench/runners/run_gemini.py b/olmocr/bench/runners/run_gemini.py index 03e022a..43d88d8 100644 --- a/olmocr/bench/runners/run_gemini.py +++ b/olmocr/bench/runners/run_gemini.py @@ -1,8 +1,8 @@ import base64 import os -from google.ai import generativelanguage as glm -from google.api_core import client_options +from google import genai +from google.genai import types from olmocr.data.renderpdf import render_pdf_to_base64png from olmocr.prompts.anchor import get_anchor_text @@ -30,56 +30,26 @@ def run_gemini(pdf_path: str, page_num: int = 1, model: str = "gemini-2.0-flash" image_base64 = render_pdf_to_base64png(pdf_path, page_num=page_num, target_longest_image_dim=2048) anchor_text = get_anchor_text(pdf_path, page_num, pdf_engine="pdfreport") api_key = os.getenv("GEMINI_API_KEY") - client = glm.GenerativeServiceClient( - client_options=client_options.ClientOptions( - api_key=api_key, - ), - ) + client = genai.Client(api_key=api_key) + image_part = types.Part(inline_data=types.Blob(mime_type="image/png", data=base64.b64decode(image_base64))) - image_part = glm.Part(inline_data=glm.Blob(mime_type="image/png", data=base64.b64decode(image_base64))) - - text_part = glm.Part(text=f"""{build_openai_silver_data_prompt(anchor_text)}""") - generation_config = glm.GenerationConfig( + text_part = types.Part(text=f"""{build_openai_silver_data_prompt(anchor_text)}""") + generation_config = types.GenerateContentConfig( temperature=temperature, top_p=1.0, top_k=32, max_output_tokens=4096, + ) - # response_schema = gemini_response_format_schema() - request = glm.GenerateContentRequest( + + response = client.models.generate_content( model=f"models/{model}", - contents=[glm.Content(parts=[image_part, text_part])], - generation_config=generation_config, + contents=[types.Content(parts=[image_part, text_part])], + config=generation_config, ) - # request = glm.GenerateContentRequest( - # model=f"models/{model}", - # contents=[glm.Content(parts=[image_part, text_part])], - # generation_config=generation_config, - # tools=[ - # glm.Tool( - # function_declarations=[ - # glm.FunctionDeclaration( - # name="page_response", - # parameters=response_schema - # ) - # ] - # ) - # ], - # tool_config=glm.ToolConfig( - # function_calling_config=glm.FunctionCallingConfig( - # mode="any", - # allowed_function_names=["page_response"] - # ) - # ) - # ) - - response = client.generate_content(request) - assert len(response.candidates) > 0, "No candidates found" - assert ( - response.candidates[0].finish_reason == glm.Candidate.FinishReason.STOP - ), "Finish reason was not STOP, likely a processing error or repetition failure" + assert response.candidates[0].finish_reason == types.FinishReason.STOP, "Finish reason was not STOP, likely a processing error or repetition failure" result = response.candidates[0].content.parts[0].text return result diff --git a/olmocr/bench/scripts/convert_all.sh b/olmocr/bench/scripts/convert_all.sh index 586a451..616732e 100755 --- a/olmocr/bench/scripts/convert_all.sh +++ b/olmocr/bench/scripts/convert_all.sh @@ -158,12 +158,12 @@ source activate olmocr # Run olmocr benchmarks, exactly as the pipeline.py does it echo "Running olmocr benchmarks..." -python -m olmocr.bench.convert olmocr_pipeline --parralel 50 +python -m olmocr.bench.convert olmocr_pipeline --parallel 50 --dir olmOCR-bench/bench_data/ # Install marker-pdf and run benchmarks echo "Installing marker-pdf and running benchmarks..." pip install marker-pdf==1.6.1 -python -m olmocr.bench.convert marker +python -m olmocr.bench.convert marker --dir olmOCR-bench/bench_data/ # Install verovio and run benchmarks # echo "Installing verovio and running benchmarks..." @@ -172,16 +172,16 @@ python -m olmocr.bench.convert marker # Run chatgpt benchmarks echo "Running chatgpt benchmarks..." -python -m olmocr.bench.convert chatgpt +python -m olmocr.bench.convert chatgpt --dir olmOCR-bench/bench_data/ #python -m olmocr.bench.convert chatgpt:name=chatgpt45:model=gpt-4.5-preview-2025-02-27 # Run gemini benchmarks echo "Running gemini benchmarks..." -python -m olmocr.bench.convert gemini:name=gemini_flash2:model=gemini-2.0-flash --parallel 4 +python -m olmocr.bench.convert gemini:name=gemini_flash2:model=gemini-2.0-flash --parallel 4 --dir olmOCR-bench/bench_data/ echo "Running mistral..." pip install mistralai -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data mistral +python -m olmocr.bench.convert --dir olmOCR-bench/bench_data mistral # Run raw server benchmarks with generic server function # For each model, start server, run benchmark, then stop server @@ -190,27 +190,27 @@ python -m olmocr.bench.convert --dir olmOCR-bench/bench_data mistral check_port || exit 1 # olmocr_base_temp0_1 using sglang server -start_server sglang "allenai/olmOCR-7B-0225-preview" --chat-template qwen2-vl --mem-fraction-static 0.7 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_0:model=allenai/olmOCR-7B-0225-preview:temperature=0.0:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_2:model=allenai/olmOCR-7B-0225-preview:temperature=0.2:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_3:model=allenai/olmOCR-7B-0225-preview:temperature=0.3:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_4:model=allenai/olmOCR-7B-0225-preview:temperature=0.4:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_5:model=allenai/olmOCR-7B-0225-preview:temperature=0.5:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_6:model=allenai/olmOCR-7B-0225-preview:temperature=0.6:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_7:model=allenai/olmOCR-7B-0225-preview:temperature=0.7:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 +# start_server sglang "allenai/olmOCR-7B-0225-preview" --chat-template qwen2-vl --mem-fraction-static 0.7 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_0:model=allenai/olmOCR-7B-0225-preview:temperature=0.0:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_2:model=allenai/olmOCR-7B-0225-preview:temperature=0.2:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_3:model=allenai/olmOCR-7B-0225-preview:temperature=0.3:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_4:model=allenai/olmOCR-7B-0225-preview:temperature=0.4:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_5:model=allenai/olmOCR-7B-0225-preview:temperature=0.5:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_6:model=allenai/olmOCR-7B-0225-preview:temperature=0.6:prompt_template=fine_tune:response_template=json --repeats 1 --parallel 50 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp0_7:model=allenai/olmOCR-7B-0225-preview:temperature=0.7:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 -python -m olmocr.bench.convert server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 -python -m olmocr.bench.convert server:name=olmocr_base_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 -stop_server +# python -m olmocr.bench.convert server:name=olmocr_base_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 +# python -m olmocr.bench.convert server:name=olmocr_base_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 +# stop_server -start_server vllm "allenai/olmOCR-7B-0225-preview" -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp_vllm0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 -python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp_vllm0_7:model=allenai/olmOCR-7B-0225-preview:temperature=0.7:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 +# start_server vllm "allenai/olmOCR-7B-0225-preview" +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp_vllm0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 +# python -m olmocr.bench.convert --dir olmOCR-bench/bench_data server:name=olmocr_base_temp_vllm0_7:model=allenai/olmOCR-7B-0225-preview:temperature=0.7:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 -python -m olmocr.bench.convert server:name=olmocr_base_vllm_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 -python -m olmocr.bench.convert server:name=olmocr_base_vllm_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 -stop_server +# python -m olmocr.bench.convert server:name=olmocr_base_vllm_temp0_1:model=allenai/olmOCR-7B-0225-preview:temperature=0.1:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 +# python -m olmocr.bench.convert server:name=olmocr_base_vllm_temp0_8:model=allenai/olmOCR-7B-0225-preview:temperature=0.8:prompt_template=fine_tune:response_template=json --repeats 5 --parallel 50 +# stop_server # Feel free to enable if you want # qwen2_vl_7b using sglang server From b607aecbbcb47344b2c37c9ca0fbdaf4969f9ab7 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Mon, 7 Apr 2025 10:21:35 -0700 Subject: [PATCH 2/2] Lints --- olmocr/bench/runners/run_gemini.py | 1 - 1 file changed, 1 deletion(-) diff --git a/olmocr/bench/runners/run_gemini.py b/olmocr/bench/runners/run_gemini.py index 43d88d8..1254339 100644 --- a/olmocr/bench/runners/run_gemini.py +++ b/olmocr/bench/runners/run_gemini.py @@ -39,7 +39,6 @@ def run_gemini(pdf_path: str, page_num: int = 1, model: str = "gemini-2.0-flash" top_p=1.0, top_k=32, max_output_tokens=4096, - ) response = client.models.generate_content(