basic docker image and test

This commit is contained in:
Chris Wilhelm 2025-03-13 13:05:09 -07:00
parent 9f38a8a602
commit 29b9054749
3 changed files with 57 additions and 0 deletions

View File

@ -0,0 +1,36 @@
FROM --platform=linux/amd64 nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
RUN apt-get update -y && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get -y update
# Install requirements specific to pdfs
RUN apt-get update && apt-get -y install python3-apt
RUN echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | debconf-set-selections
RUN apt-get update -y && apt-get install -y poppler-utils ttf-mscorefonts-installer msttcorefonts fonts-crosextra-caladea fonts-crosextra-carlito gsfonts lcdf-typetools
RUN apt-get update -y && apt-get install -y --no-install-recommends \
git \
python3.11 \
python3.11-dev \
python3.11-distutils \
ca-certificates \
build-essential \
curl \
unzip
RUN rm -rf /var/lib/apt/lists/* \
&& unlink /usr/bin/python3 \
&& ln -s /usr/bin/python3.11 /usr/bin/python3 \
&& ln -s /usr/bin/python3 /usr/bin/python \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python \
&& pip3 install -U pip
RUN apt-get update && apt-get -y install python3.11-venv
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
RUN /install.sh && rm /install.sh
WORKDIR /root
ENV PYTHONUNBUFFERED=1

View File

@ -0,0 +1,14 @@
set -ex
git clone git@github.com:allenai/olmocr.git olmocr \
&& cd olmocr \
&& git checkout $GIT_REVISION \
&& /root/.local/bin/uv pip install --system --no-cache . \
&& /root/.local/bin/uv pip install --system --no-cache sgl-kernel==0.0.3.post1 --force-reinstall --no-deps \
&& /root/.local/bin/uv pip install --system --no-cache "sglang[all]==0.4.2" --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/
&& /root/.local/bin/uv pip install --system --no-cache pytest \
&& python -m olmocr.pipeline ./localworkspace --pdfs tests/gnarly_pdfs/*.pdf \
&& python tests/gnarly_pdfs/evaluate_gnarly_pdfs.py

View File

@ -0,0 +1,7 @@
import unittest
class TestGnarlyPdfs(unittest.TestCase):
def test_nothing_in_particular(self) -> None:
"""Noop pending jake's impl"""
self.assertTrue(True)