olmocr/Dockerfile

56 lines
2.6 KiB
Docker
Raw Normal View History

2025-10-06 18:57:16 +00:00
FROM vllm/vllm-openai:v0.11.0
ENV PYTHON_VERSION=3.12
ENV CUSTOM_PY="/usr/bin/python${PYTHON_VERSION}"
# Workaround for installing fonts, which are needed for good rendering of documents
RUN DIST_PY=$(ls /usr/bin/python3.[0-9]* | sort -V | head -n1) && \
# If a python alternative scheme already exists, remember its value so we \
# can restore it later; otherwise, we will restore to CUSTOM_PY when we \
# are done. \
if update-alternatives --query python3 >/dev/null 2>&1; then \
ORIGINAL_PY=$(update-alternatives --query python3 | awk -F": " '/Value:/ {print $2}'); \
else \
ORIGINAL_PY=$CUSTOM_PY; \
fi && \
# ---- APT operations that require the distro python3 ------------------- \
echo "Temporarily switching python3 alternative to ${DIST_PY} so that APT scripts use the distrobuilt Python runtime." && \
update-alternatives --install /usr/bin/python3 python3 ${DIST_PY} 1 && \
update-alternatives --set python3 ${DIST_PY} && \
update-alternatives --install /usr/bin/python python ${DIST_PY} 1 && \
update-alternatives --set python ${DIST_PY} && \
apt-get update -y && \
2025-10-06 18:57:16 +00:00
apt-get remove -y python3-blinker || true && \
# Preseed the Microsoft Core Fonts EULA so the build is noninteractive \
echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | debconf-set-selections && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python3-apt \
update-notifier-common \
poppler-utils \
fonts-crosextra-caladea \
fonts-crosextra-carlito \
gsfonts \
lcdf-typetools \
2025-10-06 19:47:19 +00:00
ttf-mscorefonts-installer \
git git-lfs curl wget unzip && \
2025-10-06 18:57:16 +00:00
# ---- Restore the original / custom Python alternative ----------------- \
echo "Restoring python3 alternative to ${ORIGINAL_PY}" && \
update-alternatives --install /usr/bin/python3 python3 ${ORIGINAL_PY} 1 && \
update-alternatives --set python3 ${ORIGINAL_PY} && \
update-alternatives --install /usr/bin/python python ${ORIGINAL_PY} 1 || true && \
update-alternatives --set python ${ORIGINAL_PY} || true && \
# Ensure pip is available for the restored Python \
curl -sS https://bootstrap.pypa.io/get-pip.py | ${ORIGINAL_PY}
2025-06-23 20:05:33 +00:00
# keep the build context clean
WORKDIR /build
COPY . /build
2025-06-02 18:39:32 +00:00
# Needed to resolve setuptools dependencies
ENV UV_INDEX_STRATEGY="unsafe-best-match"
2025-06-02 18:34:47 +00:00
RUN uv pip install --system --no-cache ".[bench]"
2025-06-23 20:05:33 +00:00
2025-05-22 14:28:40 -07:00
RUN playwright install-deps
RUN playwright install chromium
2025-05-21 10:57:04 -07:00
RUN python3 -m olmocr.pipeline --help