mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-27 23:24:27 +00:00
This PR aims to fix a `test_dockerfile` job [failure](https://github.com/Unstructured-IO/unstructured/actions/runs/9613636416/job/26517074221?pr=3234) in CI after `base-images` repo update.
28 lines
1.1 KiB
Plaintext
28 lines
1.1 KiB
Plaintext
FROM quay.io/unstructured-io/base-images:wolfi-base@sha256:7c3af225a39f730f4feee705df6cd8d1570739dc130456cf589ac53347da0f1d as base
|
|
|
|
USER root
|
|
|
|
WORKDIR /app
|
|
|
|
COPY ./requirements requirements/
|
|
COPY unstructured unstructured
|
|
COPY test_unstructured test_unstructured
|
|
COPY example-docs example-docs
|
|
|
|
RUN chown -R notebook-user:notebook-user /app
|
|
|
|
USER notebook-user
|
|
|
|
RUN find requirements/ -type f -name "*.txt" -exec pip3.11 install --no-cache-dir --user -r '{}' ';'
|
|
RUN pip3.11 install unstructured.paddlepaddle
|
|
|
|
RUN python3.11 -c "import nltk; nltk.download('punkt')" && \
|
|
python3.11 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
|
|
python3.11 -c "from unstructured.partition.model_init import initialize; initialize()" && \
|
|
python3.11 -c "from unstructured_inference.models.tables import UnstructuredTableTransformerModel; model = UnstructuredTableTransformerModel(); model.initialize('microsoft/table-transformer-structure-recognition')"
|
|
|
|
ENV PATH="${PATH}:/home/notebook-user/.local/bin"
|
|
ENV TESSDATA_PREFIX=/usr/local/share/tessdata
|
|
|
|
CMD ["/bin/bash"]
|