olmocr/scripts/beaker/Dockerfile-inference

52 lines
1.8 KiB
Plaintext
Raw Normal View History

2024-11-13 14:24:23 -08:00
FROM --platform=linux/amd64 nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
2024-11-13 09:35:34 -08:00
RUN apt-get update -y && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get -y update
2024-11-13 12:59:52 -08:00
# Install requirements specific to pdfs
RUN apt-get update && apt-get -y install python3-apt
RUN echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | debconf-set-selections
RUN apt-get update -y && apt-get install -y poppler-utils ttf-mscorefonts-installer msttcorefonts fonts-crosextra-caladea fonts-crosextra-carlito gsfonts lcdf-typetools
2024-11-13 09:35:34 -08:00
RUN apt-get update -y && apt-get install -y --no-install-recommends \
2024-11-13 14:24:23 -08:00
git \
2024-11-13 09:35:34 -08:00
python3.11 \
python3.11-dev \
python3.11-distutils \
ca-certificates \
build-essential \
curl \
unzip
2024-11-13 09:46:08 -08:00
RUN rm -rf /var/lib/apt/lists/* \
&& unlink /usr/bin/python3 \
&& ln -s /usr/bin/python3.11 /usr/bin/python3 \
&& ln -s /usr/bin/python3 /usr/bin/python \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python \
&& pip3 install -U pip
2024-11-13 12:59:52 -08:00
RUN apt-get update && apt-get -y install python3.11-venv
2024-11-13 09:35:34 -08:00
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
RUN /install.sh && rm /install.sh
2024-11-13 12:59:52 -08:00
# Flash inference install sooner
RUN /root/.local/bin/uv pip install --system flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
2024-11-13 09:35:34 -08:00
ENV PYTHONUNBUFFERED=1
WORKDIR /root
COPY pyproject.toml pyproject.toml
COPY olmocr/version.py olmocr/version.py
2024-11-13 09:35:34 -08:00
RUN /root/.local/bin/uv pip install --system --no-cache -e .[inference]
COPY olmocr olmocr
2024-11-14 08:49:12 -08:00
2024-11-18 15:04:50 -08:00
# TODO You can remove this once pypdf > 5.10 comes out
RUN /root/.local/bin/uv pip install --system --no-cache git+https://github.com/py-pdf/pypdf.git@c6e43374ab002d76811ec85333fdc2c82c268251
2024-11-14 08:49:12 -08:00
WORKDIR /root
COPY olmocr olmocr
2024-11-13 09:35:34 -08:00
2024-11-14 08:49:12 -08:00
RUN python3 -m sglang.launch_server --help
RUN python3 -m olmocr.beakerpipeline --help