mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-16 12:49:12 +00:00
build: pull from wolfi base image (#3213)
### Summary Updates the `wolfi` image to pull from the upstream `wolfi-base` base image to avoid maintaining the base layers in both locations. Closes #3105 by pulling in the fix from upstream. ### Testing `test_dockerfile` should continue to pass with the changes.
This commit is contained in:
parent
9552fbbfbf
commit
08383a27de
10
CHANGELOG.md
10
CHANGELOG.md
@ -1,3 +1,13 @@
|
|||||||
|
## 0.14.7-dev0
|
||||||
|
|
||||||
|
### Enhancements
|
||||||
|
|
||||||
|
* **Pull from `wolfi-base` image.** The amd64 image now pulls from the `unstructured` `wolfi-base` image to avoid duplication of dependency setup steps.
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
## 0.14.6
|
## 0.14.6
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
@ -1,34 +1,17 @@
|
|||||||
FROM cgr.dev/chainguard/wolfi-base:latest
|
FROM quay.io/unstructured-io/base-images:wolfi-base@sha256:6c00a236c648ffdaf196ccbc446f5c6cc9eb4e3ab9e437178abcfac710b2b373
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
COPY ./docker-packages/*.apk packages/
|
WORKDIR /app
|
||||||
|
|
||||||
COPY ./requirements requirements/
|
COPY ./requirements requirements/
|
||||||
COPY unstructured unstructured
|
COPY unstructured unstructured
|
||||||
COPY test_unstructured test_unstructured
|
COPY test_unstructured test_unstructured
|
||||||
COPY example-docs example-docs
|
COPY example-docs example-docs
|
||||||
|
|
||||||
RUN apk update && apk add py3.11-pip mesa-gl glib cmake && \
|
RUN chown -R notebook-user:notebook-user /app
|
||||||
apk add --allow-untrusted packages/pandoc-3.1.8-r0.apk && \
|
|
||||||
apk add --allow-untrusted packages/poppler-23.09.0-r0.apk && \
|
|
||||||
apk add --allow-untrusted packages/leptonica-1.83.0-r0.apk && \
|
|
||||||
apk add --allow-untrusted packages/tesseract-5.3.2-r0.apk && \
|
|
||||||
apk add --allow-untrusted packages/libreoffice-7.6.5-r0.apk && \
|
|
||||||
apk add bash && \
|
|
||||||
apk add libmagic && \
|
|
||||||
mv /share/tessdata/configs /usr/local/share/tessdata/ && \
|
|
||||||
mv /share/tessdata/tessconfigs /usr/local/share/tessdata/ && \
|
|
||||||
ln -s /usr/local/lib/libreoffice/program/soffice.bin /usr/local/bin/libreoffice && \
|
|
||||||
ln -s /usr/local/lib/libreoffice/program/soffice.bin /usr/local/bin/soffice && \
|
|
||||||
chmod +x /usr/local/lib/libreoffice/program/soffice.bin && \
|
|
||||||
chmod +x /usr/local/bin/libreoffice && \
|
|
||||||
chmod +x /usr/local/bin/soffice
|
|
||||||
|
|
||||||
RUN chown -R nonroot:nonroot /app
|
USER notebook-user
|
||||||
|
|
||||||
USER nonroot
|
|
||||||
|
|
||||||
RUN find requirements/ -type f -name "*.txt" -exec pip3.11 install --no-cache-dir --user -r '{}' ';'
|
RUN find requirements/ -type f -name "*.txt" -exec pip3.11 install --no-cache-dir --user -r '{}' ';'
|
||||||
RUN pip3.11 install unstructured.paddlepaddle
|
RUN pip3.11 install unstructured.paddlepaddle
|
||||||
@ -38,7 +21,7 @@ RUN python3.11 -c "import nltk; nltk.download('punkt')" && \
|
|||||||
python3.11 -c "from unstructured.partition.model_init import initialize; initialize()" && \
|
python3.11 -c "from unstructured.partition.model_init import initialize; initialize()" && \
|
||||||
python3.11 -c "from unstructured_inference.models.tables import UnstructuredTableTransformerModel; model = UnstructuredTableTransformerModel(); model.initialize('microsoft/table-transformer-structure-recognition')"
|
python3.11 -c "from unstructured_inference.models.tables import UnstructuredTableTransformerModel; model = UnstructuredTableTransformerModel(); model.initialize('microsoft/table-transformer-structure-recognition')"
|
||||||
|
|
||||||
ENV PATH="${PATH}:/home/nonroot/.local/bin"
|
ENV PATH="${PATH}:/home/notebook-user/.local/bin"
|
||||||
ENV TESSDATA_PREFIX=/usr/local/share/tessdata
|
ENV TESSDATA_PREFIX=/usr/local/share/tessdata
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@ -32,13 +31,8 @@ from unstructured.documents.elements import (
|
|||||||
from unstructured.partition.doc import partition_doc
|
from unstructured.partition.doc import partition_doc
|
||||||
from unstructured.partition.docx import partition_docx
|
from unstructured.partition.docx import partition_docx
|
||||||
|
|
||||||
is_in_docker = os.path.exists("/.dockerenv")
|
|
||||||
|
|
||||||
|
|
||||||
def test_partition_doc_matches_partition_docx(request: FixtureRequest):
|
def test_partition_doc_matches_partition_docx(request: FixtureRequest):
|
||||||
# NOTE(robinson) - was having issues with the tempfile not being found in the docker tests
|
|
||||||
if is_in_docker:
|
|
||||||
request.applymarker(pytest.mark.xfail)
|
|
||||||
doc_file_path = example_doc_path("simple.doc")
|
doc_file_path = example_doc_path("simple.doc")
|
||||||
docx_file_path = example_doc_path("simple.docx")
|
docx_file_path = example_doc_path("simple.docx")
|
||||||
|
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.14.6" # pragma: no cover
|
__version__ = "0.14.7-dev0" # pragma: no cover
|
||||||
|
Loading…
x
Reference in New Issue
Block a user