OCRmyPDF/.docker/Dockerfile.alpine
2025-04-21 11:00:22 -07:00

81 lines
2.1 KiB
Docker

# SPDX-FileCopyrightText: 2023 James R. Barlow
# SPDX-License-Identifier: MPL-2.0
# Note: Alpine 3.20 builds tesseract with --enable-opencl, which is not
# supported by anyone. OCRmyPDF is not compatible with Alpine 3.20.0
# through 3.20.3. The issue is fixed in 3.21.
# Details
# https://gitlab.alpinelinux.org/alpine/aports/-/issues/16143
# https://github.com/ocrmypdf/OCRmyPDF/issues/1395
FROM alpine:3.21 AS base
ENV LANG=C.UTF-8
ENV TZ=UTC
RUN apk add --no-cache \
python3 \
zlib
FROM base AS builder
# Yes it really is python3-dev, and py3-package
RUN apk add --no-cache \
ca-certificates \
git \
python3-dev \
py3-pyarrow \
curl
WORKDIR /app
COPY --from=ghcr.io/astral-sh/uv:0.6.14 /uv /uvx /bin/
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
RUN uv venv --system-site-packages .venv
# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-install-project --no-dev
# Then, add the rest of the project source code and install it
# Installing separately from its dependencies allows optimal layer caching
COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen \
--extra test --extra webservice --extra watcher --no-dev \
--no-install-package pyarrow
FROM base
RUN apk add --no-cache \
ghostscript \
jbig2dec \
jbig2enc \
pngquant \
tesseract-ocr \
tesseract-ocr-data-chi_sim \
tesseract-ocr-data-deu \
tesseract-ocr-data-eng \
tesseract-ocr-data-fra \
tesseract-ocr-data-osd \
tesseract-ocr-data-por \
tesseract-ocr-data-spa \
ttf-droid \
unpaper \
&& rm -rf /var/cache/apk/*
WORKDIR /app
COPY --from=builder --chown=app:app /app /app
RUN rm -rf /app/.git && \
ln -s /app/misc/webservice.py /app/webservice.py && \
ln -s /app/misc/watcher.py /app/watcher.py
ENV PATH="/app/.venv/bin:${PATH}"
ENTRYPOINT ["/app/.venv/bin/ocrmypdf"]