Hopefully a much better dockerfile

This commit is contained in:
Jake Poznanski 2025-06-02 18:34:47 +00:00
parent 04dd71c6bf
commit 97da87a3b2

View File

@ -2,36 +2,46 @@ ARG CUDA_VERSION=12.8.1
FROM --platform=linux/amd64 nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 FROM --platform=linux/amd64 nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
RUN apt-get update -y && apt-get install -y software-properties-common \ # Needs to be repeated below the FROM, or else it's not picked up
&& add-apt-repository ppa:deadsnakes/ppa \ ARG PYTHON_VERSION=3.12
&& apt-get -y update ARG CUDA_VERSION=12.8.1
RUN apt-get update && apt-get -y install python3-apt # Set environment variable to prevent interactive prompts
RUN echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | debconf-set-selections ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -y && apt-get install -y poppler-utils ttf-mscorefonts-installer msttcorefonts fonts-crosextra-caladea fonts-crosextra-carlito gsfonts lcdf-typetools
RUN apt-get update -y && apt-get install -y --no-install-recommends \ # From original VLLM dockerfile https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile
git \ # Install Python and other dependencies
git-lfs \ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
python3.11 \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
python3.11-dev \ && apt-get update -y \
python3.11-distutils \ && apt-get install -y ccache software-properties-common git curl sudo python3-apt \
ca-certificates \ && for i in 1 2 3; do \
build-essential \ add-apt-repository -y ppa:deadsnakes/ppa && break || \
curl \ { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \
wget \ done \
unzip && apt-get update -y \
&& apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
&& update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
&& ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
&& python3 --version && python3 -m pip --version
RUN rm -rf /var/lib/apt/lists/* \ # Install uv for faster pip installs
&& unlink /usr/bin/python3 \ RUN --mount=type=cache,target=/root/.cache/uv \
&& ln -s /usr/bin/python3.11 /usr/bin/python3 \ python3 -m pip install uv
&& ln -s /usr/bin/python3 /usr/bin/python \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python \ # olmOCR Specific Installs
&& pip3 install -U pip # Install fonts with workaround for update-notifier issue
RUN echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | debconf-set-selections && \
apt-get update -y && \
apt-get install -y --no-install-recommends poppler-utils fonts-crosextra-caladea fonts-crosextra-carlito gsfonts lcdf-typetools && \
# Temporarily fix the python symlink for the installer
ln -sf /usr/bin/python3.8 /usr/bin/python3 && \
apt-get install -y --no-install-recommends ttf-mscorefonts-installer && \
# Restore our Python 3.12 symlink
update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION}
RUN apt-get update && apt-get -y install python3.11-venv
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
RUN /install.sh && rm /install.sh
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
@ -39,9 +49,9 @@ WORKDIR /root
COPY pyproject.toml pyproject.toml COPY pyproject.toml pyproject.toml
COPY olmocr/version.py olmocr/version.py COPY olmocr/version.py olmocr/version.py
RUN /root/.local/bin/uv pip install --system --no-cache -e . RUN uv pip install --system --no-cache -e .
RUN /root/.local/bin/uv pip install --system --no-cache ".[gpu]" --extra-index-url https://download.pytorch.org/whl/cu128 RUN uv pip install --system --no-cache ".[gpu]" --extra-index-url https://download.pytorch.org/whl/cu128
RUN /root/.local/bin/uv pip install --system --no-cache ".[bench]" RUN uv pip install --system --no-cache ".[bench]"
RUN playwright install-deps RUN playwright install-deps
RUN playwright install chromium RUN playwright install chromium
COPY olmocr olmocr COPY olmocr olmocr