diff --git a/.gitignore b/.gitignore
index 2a1c30a..cf93ea3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ olmOCR-bench/*
table_data*/
/synth*/
dolma_samples/*
+old_train/
/*.html
scoreelo.csv
debug.log
diff --git a/README.md b/README.md
index 6e95c54..ee0040b 100644
--- a/README.md
+++ b/README.md
@@ -61,18 +61,6 @@ We also ship a comprehensive benchmark suite covering over 7,000 test cases acro
-
- Marker v1.6.2 |
- 24.3 |
- 22.1 |
- 69.8 |
- 24.3 |
- 87.1 |
- 71.0 |
- 76.9 |
- 99.5 |
- 59.4 ± 1.1 |
-
MinerU v1.3.10 |
75.4 |
@@ -87,7 +75,7 @@ We also ship a comprehensive benchmark suite covering over 7,000 test cases acro
Mistral OCR API |
- 77.2 |
+ 77.2 |
67.5 |
60.6 |
29.3 |
@@ -97,6 +85,18 @@ We also ship a comprehensive benchmark suite covering over 7,000 test cases acro
99.4 |
72.0 ± 1.1 |
+
+ Marker v1.7.4 (hybrid) |
+ 77.7 |
+ 71.2 |
+ 78.1 |
+ 32.3 |
+ 83.4 |
+ 73.8 |
+ 79.0 |
+ 99.2 |
+ 74.3 ± 1.1 |
+
olmOCR v0.1.68 (pipeline.py) |
75.6 |
diff --git a/olmocr/bench/README.md b/olmocr/bench/README.md
index 8cd0f72..65c29bb 100644
--- a/olmocr/bench/README.md
+++ b/olmocr/bench/README.md
@@ -37,7 +37,7 @@ to run it against your own OCR tools. Your tool just needs to support Markdown o
GOT OCR |
52.7 |
52.0 |
- 0.2 |
+ 0.20 |
22.1 |
93.6 |
42.0 |
@@ -46,16 +46,16 @@ to run it against your own OCR tools. Your tool just needs to support Markdown o
48.3 ± 1.1 |
- Marker v1.6.2 |
- 24.3 |
- 22.1 |
- 69.8 |
- 24.3 |
- 87.1 |
- 71.0 |
- 76.9 |
- 99.5 |
- 59.4 ± 1.1 |
+ Marker v1.7.5 (base) |
+ 76.0 |
+ 57.9 |
+ 57.6 |
+ 27.8 |
+ 84.9 |
+ 72.9 |
+ 84.6 |
+ 99.1 |
+ 70.1 ± 1.1 |
MinerU v1.3.10 |
@@ -78,7 +78,7 @@ to run it against your own OCR tools. Your tool just needs to support Markdown o
93.6 |
71.3 |
77.1 |
- 99.4 |
+ 99.4 |
72.0 ± 1.1 |
@@ -121,7 +121,7 @@ to run it against your own OCR tools. Your tool just needs to support Markdown o
Gemini Flash 2 (Anchored) |
54.5 |
56.1 |
- 72.1 |
+ 72.1 |
34.2 |
64.7 |
61.5 |
@@ -157,7 +157,7 @@ to run it against your own OCR tools. Your tool just needs to support Markdown o
olmOCR v0.1.68 (No Anchor) |
72.1 |
74.7 |
- 71.5 |
+ 71.5 |
43.7 |
91.6 |
78.5 |
@@ -288,6 +288,3 @@ We have an internal data annotation tool that can be used to review the question
```bash
python -m olmocr.bench.review_app --port 5000 --debug ./olmOCR-bench/bench_data/multi_column.jsonl --force
```
-
-
-
diff --git a/olmocr/bench/runners/run_marker.py b/olmocr/bench/runners/run_marker.py
index 58733cd..d444408 100644
--- a/olmocr/bench/runners/run_marker.py
+++ b/olmocr/bench/runners/run_marker.py
@@ -4,6 +4,7 @@ import tempfile
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
+from marker.config.parser import ConfigParser
from pypdf import PdfReader, PdfWriter
_marker_converter = None
@@ -15,10 +16,22 @@ def run_marker(pdf_path: str, page_num: int = 1) -> str:
if _marker_converter is None:
# Create a configuration dictionary with the necessary settings
config = {
- "texify_inline_spans": True, # This enables conversion of inline math to LaTeX
+ "force_ocr": True, # This enables conversion of inline math to LaTeX
+ "use_llm": False, # We would prefer to run just plain marker for reporting bench results, not hybrid mode
+ "disable_tqdm": True, # Disable tqdm for cleaner output
+ "recognition_batch_size": 256,
+ "layout_batch_size": 48,
+ "detection_batch_size": 48,
+ "equation_batch_size": 64,
+ "table_rec_batch_size": 48,
+ "ocr_error_batch_size": 64,
}
+ config_parser = ConfigParser(config)
- _marker_converter = PdfConverter(artifact_dict=create_model_dict(), config=config)
+ _marker_converter = PdfConverter(
+ artifact_dict=create_model_dict(),
+ config=config_parser.generate_config_dict(),
+ )
# Extract the specific page from the PDF
pdf_to_process = pdf_path
diff --git a/olmocr/bench/tests.py b/olmocr/bench/tests.py
index ec87313..320d31a 100644
--- a/olmocr/bench/tests.py
+++ b/olmocr/bench/tests.py
@@ -123,6 +123,8 @@ def normalize_text(md_content: str) -> str:
# Remove markdown bold formatting (** or __ for bold)
md_content = re.sub(r"\*\*(.*?)\*\*", r"\1", md_content)
md_content = re.sub(r"__(.*?)__", r"\1", md_content)
+ md_content = re.sub(r"?b>", "", md_content) # Remove tags if they exist
+ md_content = re.sub(r"?i>", "", md_content) # Remove tags if they exist
# Remove markdown italics formatting (* or _ for italics)
md_content = re.sub(r"\*(.*?)\*", r"\1", md_content)
diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py
index 0899d77..410389f 100644
--- a/olmocr/pipeline.py
+++ b/olmocr/pipeline.py
@@ -329,7 +329,7 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
async def process_pdf(args, worker_id: int, pdf_orig_path: str):
- with tempfile.NamedTemporaryFile("wb+", suffix=".pdf") as tf:
+ with tempfile.NamedTemporaryFile("wb+", suffix=".pdf", delete=False) as tf:
try:
data = await asyncio.to_thread(lambda: get_s3_bytes_with_backoff(pdf_s3, pdf_orig_path))
tf.write(data)
@@ -347,6 +347,7 @@ async def process_pdf(args, worker_id: int, pdf_orig_path: str):
tf.write(convert_image_to_pdf_bytes(tf.name))
tf.flush()
+ try:
try:
reader = PdfReader(tf.name)
num_pages = reader.get_num_pages()
@@ -398,6 +399,9 @@ async def process_pdf(args, worker_id: int, pdf_orig_path: str):
# You can't build a dolma doc with even 1 failed page, so just get out of here
# However, you don't want to propagate an exception higher up and cancel the entire work_group
return None
+ finally:
+ if os.path.exists(tf.name):
+ os.unlink(tf.name)
def build_dolma_document(pdf_orig_path, page_results):
@@ -698,19 +702,31 @@ async def vllm_server_ready():
raise Exception("vllm server did not become ready after waiting.")
-async def download_model(model_name_or_path: str):
- if model_name_or_path.startswith("s3://") or model_name_or_path.startswith("gs://") or model_name_or_path.startswith("weka://"):
- logger.info(f"Downloading model directory from '{model_name_or_path}'")
- model_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "olmocr", "model")
- download_directory([model_name_or_path], model_cache_dir)
- return model_cache_dir
- elif os.path.isabs(model_name_or_path) and os.path.isdir(model_name_or_path):
- logger.info(f"Using local model path at '{model_name_or_path}'")
- return model_name_or_path
- else:
- logger.info(f"Downloading model with hugging face '{model_name_or_path}'")
- snapshot_download(repo_id=model_name_or_path)
- return model_name_or_path
+async def download_model(model_name_or_path: str, max_retries: int = 5):
+ for retry in range(max_retries):
+ try:
+ if model_name_or_path.startswith("s3://") or model_name_or_path.startswith("gs://") or model_name_or_path.startswith("weka://"):
+ logger.info(f"Downloading model directory from '{model_name_or_path}'")
+ model_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "olmocr", "model")
+ # Delete existing model cache directory if it exists
+ if os.path.exists(model_cache_dir):
+ shutil.rmtree(model_cache_dir)
+ download_directory([model_name_or_path], model_cache_dir)
+ return model_cache_dir
+ elif os.path.isabs(model_name_or_path) and os.path.isdir(model_name_or_path):
+ logger.info(f"Using local model path at '{model_name_or_path}'")
+ return model_name_or_path
+ else:
+ logger.info(f"Downloading model with hugging face '{model_name_or_path}'")
+ snapshot_download(repo_id=model_name_or_path)
+ return model_name_or_path
+ except Exception:
+ if retry == max_retries - 1:
+ raise # Raise on final attempt and fail the job
+
+ sleep_time = random.randrange(2, 20) * 2**retry
+ logger.exception(f"Could not download model, sleeping for {sleep_time} seconds to retry ({retry + 1}/{max_retries})")
+ await asyncio.sleep(random.randrange(10, 30) * 2**retry)
async def metrics_reporter(work_queue):
@@ -899,6 +915,7 @@ def print_stats(args, root_work_queue):
logger.warning(f"Error processing {s3_path}: {e}")
return 0, 0, 0, 0, 0, set(), 0, 0
+ print(f"\nCompleted work items {completed_items:,} out of {total_items:,}: {completed_items/total_items*100:.2f}%")
print("\nProcessing output files...")
docs_total = 0
input_tokens_total = 0
@@ -1026,8 +1043,8 @@ async def main():
# Wait a little bit so that not all beaker jobs in a task start at the same time and download the model at the same time
replica_count = int(os.environ.get("BEAKER_REPLICA_COUNT", "1"))
- interval = 10 if (replica_count - 1) * 10 <= 240 else 240 / max(1, replica_count - 1)
- sleep_time = int(int(os.environ.get("BEAKER_REPLICA_RANK", "0")) * interval)
+ interval = 10 if (replica_count - 1) * 10 <= 30 else 30 / max(1, replica_count - 1)
+ sleep_time = int(os.environ.get("BEAKER_REPLICA_RANK", "0")) * interval
logger.info(f"Beaker job sleeping for {sleep_time} seconds to stagger model downloads")
await asyncio.sleep(sleep_time)
diff --git a/scripts/pareto_plot.py b/scripts/pareto_plot.py
index d3806df..7c04a70 100644
--- a/scripts/pareto_plot.py
+++ b/scripts/pareto_plot.py
@@ -64,7 +64,7 @@ data = {
"MinerU",
"Gemini Flash 2",
"Gemini Flash 2 (Batch)",
- "Marker v1.6.2",
+ "Marker v1.7.5",
"Ours",
"Qwen 2 VL",
"Qwen 2.5 VL",
@@ -77,7 +77,7 @@ data = {
61.5, # MinerU
63.8, # Gemini Flash 2 (Anchored)
63.8, # Same performance for batch
- 59.4, # marker v1.6.2
+ 70.1, # marker v1.7.5 base
77.4, # Ours (performance is the same across hardware)
31.5, # Qwen2VL
65.5, # Qwen2.5VL
@@ -94,7 +94,7 @@ model_categories = {
"MinerU": "Open Source Tool",
"Gemini Flash 2": "Commercial VLM",
"Gemini Flash 2 (Batch)": "Commercial VLM",
- "Marker v1.6.2": "Open Source Tool",
+ "Marker v1.7.5": "Open Source Tool",
"Ours": "Ours",
"Qwen 2 VL": "Open VLM",
"Qwen 2.5 VL": "Open VLM",
@@ -132,7 +132,7 @@ model_label_offsets = {
"MinerU": [-15, -20],
"Gemini Flash 2": [-10, 10],
"Gemini Flash 2 (Batch)": [-50, -15],
- "Marker v1.6.2": [-35, -20],
+ "Marker v1.7.5": [-20, 15],
"Ours": [-20, 10],
"Qwen 2 VL": [-35, 10],
"Qwen 2.5 VL": [-35, 10],
diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark.sh
index b6b0526..4d00b70 100755
--- a/scripts/run_benchmark.sh
+++ b/scripts/run_benchmark.sh
@@ -104,7 +104,7 @@ except:
has_aws_creds = False
print(f"AWS credentials secret not found: {aws_creds_secret}")
-# Build commands list
+# First experiment: Original benchmark job
commands = []
if has_aws_creds:
commands.extend([
@@ -142,21 +142,71 @@ if has_aws_creds:
EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret)
]
-# Create experiment spec
+# Create first experiment spec
experiment_spec = ExperimentSpec(
description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
budget="ai2/oe-data",
tasks=[TaskSpec(**task_spec_args)],
)
-# Create the experiment
+# Create the first experiment
experiment = b.experiment.create(spec=experiment_spec, workspace="ai2/olmocr")
-print(f"Created experiment: {experiment.id}")
+print(f"Created benchmark experiment: {experiment.id}")
print(f"View at: https://beaker.org/ex/{experiment.id}")
+print("-------")
+print("")
+
+# Second experiment: Performance test job
+perf_pipeline_cmd = "python -m olmocr.pipeline ./localworkspace --markdown --pdfs s3://ai2-oe-data/jakep/olmocr/olmOCR-mix-0225/benchmark_set/*.pdf"
+if model:
+ perf_pipeline_cmd += f" --model {model}"
+
+perf_commands = []
+if has_aws_creds:
+ perf_commands.extend([
+ "mkdir -p ~/.aws",
+ 'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials'
+ ])
+perf_commands.append(perf_pipeline_cmd)
+
+# Build performance task spec
+perf_task_spec_args = {
+ "name": "olmocr-performance",
+ "image": ImageSource(beaker=f"{beaker_user}/{image_tag}"),
+ "command": [
+ "bash", "-c",
+ " && ".join(perf_commands)
+ ],
+ "context": TaskContext(
+ priority=Priority.normal,
+ preemptible=True,
+ ),
+ "resources": TaskResources(gpu_count=1),
+ "constraints": Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]),
+ "result": ResultSpec(path="/noop-results"),
+}
+
+# Add env vars if AWS credentials exist
+if has_aws_creds:
+ perf_task_spec_args["env_vars"] = [
+ EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret)
+ ]
+
+# Create performance experiment spec
+perf_experiment_spec = ExperimentSpec(
+ description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}",
+ budget="ai2/oe-data",
+ tasks=[TaskSpec(**perf_task_spec_args)],
+)
+
+# Create the performance experiment
+perf_experiment = b.experiment.create(spec=perf_experiment_spec, workspace="ai2/olmocr")
+print(f"Created performance experiment: {perf_experiment.id}")
+print(f"View at: https://beaker.org/ex/{perf_experiment.id}")
EOF
-# Run the Python script to create the experiment
-echo "Creating Beaker experiment..."
+# Run the Python script to create the experiments
+echo "Creating Beaker experiments..."
if [ -n "$MODEL" ]; then
echo "Using model: $MODEL"
$PYTHON /tmp/run_benchmark_experiment.py $IMAGE_TAG $BEAKER_USER $GIT_BRANCH $GIT_HASH "$MODEL"
@@ -167,4 +217,4 @@ fi
# Clean up temporary file
rm /tmp/run_benchmark_experiment.py
-echo "Benchmark experiment submitted successfully!"
\ No newline at end of file
+echo "Benchmark experiments submitted successfully!"
\ No newline at end of file
diff --git a/scripts/run_marker_benchmark.sh b/scripts/run_marker_benchmark.sh
new file mode 100755
index 0000000..332a3f0
--- /dev/null
+++ b/scripts/run_marker_benchmark.sh
@@ -0,0 +1,202 @@
+#!/bin/bash
+
+# Runs marker benchmark, measuring both olmOCR-bench performance and per document processing performance
+# ./scripts/run_marker_benchmark.sh
+# ./scripts/run_marker_benchmark.sh 1.7.5
+
+set -e
+
+# Parse command line arguments
+MARKER_VERSION="${1:-1.7.5}"
+echo "Using marker version: $MARKER_VERSION"
+
+# Check for uncommitted changes
+if ! git diff-index --quiet HEAD --; then
+ echo "Error: There are uncommitted changes in the repository."
+ echo "Please commit or stash your changes before running the benchmark."
+ echo ""
+ echo "Uncommitted changes:"
+ git status --short
+ exit 1
+fi
+
+# Use conda environment Python if available, otherwise use system Python
+if [ -n "$CONDA_PREFIX" ]; then
+ PYTHON="$CONDA_PREFIX/bin/python"
+ echo "Using conda Python from: $CONDA_PREFIX"
+else
+ PYTHON="python"
+ echo "Warning: No conda environment detected, using system Python"
+fi
+
+# Get version from version.py
+VERSION=$($PYTHON -c 'import olmocr.version; print(olmocr.version.VERSION)')
+echo "OlmOCR version: $VERSION"
+
+# Get first 10 characters of git hash
+GIT_HASH=$(git rev-parse HEAD | cut -c1-10)
+echo "Git hash: $GIT_HASH"
+
+# Get current git branch name
+GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
+echo "Git branch: $GIT_BRANCH"
+
+# Create full image tag
+IMAGE_TAG="olmocr-benchmark-${VERSION}-${GIT_HASH}"
+echo "Building Docker image with tag: $IMAGE_TAG"
+
+# Build the Docker image
+echo "Building Docker image..."
+docker build --platform linux/amd64 -f ./Dockerfile -t $IMAGE_TAG .
+
+# Get Beaker username
+BEAKER_USER=$(beaker account whoami --format json | jq -r '.[0].name')
+echo "Beaker user: $BEAKER_USER"
+
+# Push image to beaker
+echo "Trying to push image to Beaker..."
+if ! beaker image create --workspace ai2/oe-data-pdf --name $IMAGE_TAG $IMAGE_TAG 2>/dev/null; then
+ echo "Warning: Beaker image with tag $IMAGE_TAG already exists. Using existing image."
+fi
+
+# Create Python script to run beaker experiment
+cat << 'EOF' > /tmp/run_benchmark_experiment.py
+import sys
+from beaker import Beaker, ExperimentSpec, TaskSpec, TaskContext, ResultSpec, TaskResources, ImageSource, Priority, Constraints, EnvVar
+
+# Get image tag, beaker user, git branch, git hash, and marker version from command line
+image_tag = sys.argv[1]
+beaker_user = sys.argv[2]
+git_branch = sys.argv[3]
+git_hash = sys.argv[4]
+marker_version = sys.argv[5]
+
+# Initialize Beaker client
+b = Beaker.from_env(default_workspace="ai2/olmocr")
+
+
+# Check if AWS credentials secret exists
+aws_creds_secret = f"{beaker_user}-AWS_CREDENTIALS_FILE"
+try:
+ # Try to get the secret to see if it exists
+ b.secret.get(aws_creds_secret, workspace="ai2/olmocr")
+ has_aws_creds = True
+ print(f"Found AWS credentials secret: {aws_creds_secret}")
+except:
+ has_aws_creds = False
+ print(f"AWS credentials secret not found: {aws_creds_secret}")
+
+# First experiment: Original benchmark job
+commands = []
+if has_aws_creds:
+ commands.extend([
+ "mkdir -p ~/.aws",
+ 'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials'
+ ])
+commands.extend([
+ "git clone https://huggingface.co/datasets/allenai/olmOCR-bench",
+ "cd olmOCR-bench && git lfs pull && cd ..",
+ f"pip install marker-pdf=={marker_version}",
+ "pip install --upgrade torchvision",
+ "python -m olmocr.bench.convert marker --dir ./olmOCR-bench/bench_data",
+ "python -m olmocr.bench.benchmark --dir ./olmOCR-bench/bench_data"
+])
+
+# Build task spec with optional env vars
+task_spec_args = {
+ "name": "marker-benchmark",
+ "image": ImageSource(beaker=f"{beaker_user}/{image_tag}"),
+ "command": [
+ "bash", "-c",
+ " && ".join(commands)
+ ],
+ "context": TaskContext(
+ priority=Priority.normal,
+ preemptible=True,
+ ),
+ "resources": TaskResources(gpu_count=1),
+ "constraints": Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]),
+ "result": ResultSpec(path="/noop-results"),
+}
+
+# Add env vars if AWS credentials exist
+if has_aws_creds:
+ task_spec_args["env_vars"] = [
+ EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret)
+ ]
+
+# Create first experiment spec
+experiment_spec = ExperimentSpec(
+ description=f"Marker {marker_version} Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
+ budget="ai2/oe-data",
+ tasks=[TaskSpec(**task_spec_args)],
+)
+
+# Create the first experiment
+experiment = b.experiment.create(spec=experiment_spec, workspace="ai2/olmocr")
+print(f"Created benchmark experiment: {experiment.id}")
+print(f"View at: https://beaker.org/ex/{experiment.id}")
+print("-------")
+print("")
+
+
+perf_commands = []
+if has_aws_creds:
+ perf_commands.extend([
+ "mkdir -p ~/.aws",
+ 'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials'
+ ])
+perf_commands.extend([
+ f"pip install marker-pdf=={marker_version}",
+ "pip install --upgrade torchvision",
+ "pip install awscli",
+ "aws s3 cp --recursive s3://ai2-oe-data/jakep/olmocr/olmOCR-mix-0225/benchmark_set/ /root/olmOCR-mix-0225_benchmark_set/",
+ # Tried with workers 8, but it was taking a really huge amount of time
+ #"time marker --force_ocr /root/olmOCR-mix-0225_benchmark_set/ --output_dir /root/olmOCR-mix-0225_benchmark_set_marker --workers 8"
+ "time marker --force_ocr /root/olmOCR-mix-0225_benchmark_set/ --output_dir /root/olmOCR-mix-0225_benchmark_set_marker"
+])
+
+# Build performance task spec
+perf_task_spec_args = {
+ "name": "marker-performance",
+ "image": ImageSource(beaker=f"{beaker_user}/{image_tag}"),
+ "command": [
+ "bash", "-c",
+ " && ".join(perf_commands)
+ ],
+ "context": TaskContext(
+ priority=Priority.normal,
+ preemptible=True,
+ ),
+ "resources": TaskResources(gpu_count=1),
+ "constraints": Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]),
+ "result": ResultSpec(path="/noop-results"),
+}
+
+# Add env vars if AWS credentials exist
+if has_aws_creds:
+ perf_task_spec_args["env_vars"] = [
+ EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret)
+ ]
+
+# Create performance experiment spec
+perf_experiment_spec = ExperimentSpec(
+ description=f"Marker {marker_version} Performance Test - Branch: {git_branch}, Commit: {git_hash}",
+ budget="ai2/oe-data",
+ tasks=[TaskSpec(**perf_task_spec_args)],
+)
+
+# Create the performance experiment
+perf_experiment = b.experiment.create(spec=perf_experiment_spec, workspace="ai2/olmocr")
+print(f"Created performance experiment: {perf_experiment.id}")
+print(f"View at: https://beaker.org/ex/{perf_experiment.id}")
+EOF
+
+# Run the Python script to create the experiments
+echo "Creating Beaker experiments..."
+$PYTHON /tmp/run_benchmark_experiment.py $IMAGE_TAG $BEAKER_USER $GIT_BRANCH $GIT_HASH $MARKER_VERSION
+
+# Clean up temporary file
+rm /tmp/run_benchmark_experiment.py
+
+echo "Benchmark experiments submitted successfully!"
\ No newline at end of file