Update Dockerfile to use multistage build and cache layers (#785)

* Update Dockerfile to use multistage build and cache layers

* Fix Dockerfile
This commit is contained in:
Roman Isecke 2023-06-21 13:12:45 -04:00 committed by GitHub
parent e08936b6fb
commit 61ea00a06f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,5 +1,5 @@
# syntax=docker/dockerfile:experimental
FROM quay.io/unstructured-io/base-images:rocky8.7-2
FROM quay.io/unstructured-io/base-images:rocky8.7-2 as base
ARG PIP_VERSION
@ -11,6 +11,7 @@ RUN mkdir ${HOME}/.ssh && chmod go-rwx ${HOME}/.ssh \
ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
ENV PATH="/home/usr/.local/bin:${PATH}"
FROM base as deps
# Copy and install Unstructured
COPY requirements requirements
@ -32,11 +33,14 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
dnf -y groupremove "Development Tools" && \
dnf clean all
RUN python3.8 -c "import nltk; nltk.download('punkt')" && \
python3.8 -c "import nltk; nltk.download('averaged_perceptron_tagger')"
FROM deps as code
COPY example-docs example-docs
COPY unstructured unstructured
RUN python3.8 -c "import nltk; nltk.download('punkt')" && \
python3.8 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
python3.8 -c "from unstructured.ingest.doc_processor.generalized import initialize; initialize()"
RUN python3.8 -c "from unstructured.ingest.doc_processor.generalized import initialize; initialize()"
CMD ["/bin/bash"]