Actually install the right thing

This commit is contained in:
Jake Poznanski 2025-06-12 21:18:58 +00:00
parent 548187902b
commit 4bfcfce767

View File

@ -96,7 +96,7 @@ if has_aws_creds:
commands.extend([
"git clone https://huggingface.co/datasets/allenai/olmOCR-bench",
"cd olmOCR-bench && git lfs pull && cd ..",
f"pip install marker=={marker_version}",
f"pip install marker-pdf=={marker_version}",
"python -m olmocr.bench.convert marker --dir ./olmOCR-bench/bench_data",
"python -m olmocr.bench.benchmark --dir ./olmOCR-bench/bench_data"
])
@ -146,7 +146,7 @@ if has_aws_creds:
'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials'
])
perf_commands.extend([
f"pip install marker=={marker_version}",
f"pip install marker-pdf=={marker_version}",
"s5cmd cp s3://ai2-oe-data/jakep/olmocr/olmOCR-mix-0225/benchmark_set/* /root/olmOCR-mix-0225_benchmark_set/",
"marker --force_ocr /root/olmOCR-mix-0225_benchmark_set/ --output_dir /root/olmOCR-mix-0225_benchmark_set_marker --workers 8"
])