diff --git a/CHANGELOG.md b/CHANGELOG.md index 60e1a9b0d..cf914c65f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## 0.14.7-dev0 + +### Enhancements + +* **Pull from `wolfi-base` image.** The amd64 image now pulls from the `unstructured` `wolfi-base` image to avoid duplication of dependency setup steps. + +### Features + +### Fixes + ## 0.14.6 ### Enhancements diff --git a/Dockerfile-amd64 b/Dockerfile-amd64 index f2fc3c675..2caf5cf90 100644 --- a/Dockerfile-amd64 +++ b/Dockerfile-amd64 @@ -1,34 +1,17 @@ -FROM cgr.dev/chainguard/wolfi-base:latest - -WORKDIR /app +FROM quay.io/unstructured-io/base-images:wolfi-base@sha256:6c00a236c648ffdaf196ccbc446f5c6cc9eb4e3ab9e437178abcfac710b2b373 USER root -COPY ./docker-packages/*.apk packages/ +WORKDIR /app + COPY ./requirements requirements/ COPY unstructured unstructured COPY test_unstructured test_unstructured COPY example-docs example-docs -RUN apk update && apk add py3.11-pip mesa-gl glib cmake && \ - apk add --allow-untrusted packages/pandoc-3.1.8-r0.apk && \ - apk add --allow-untrusted packages/poppler-23.09.0-r0.apk && \ - apk add --allow-untrusted packages/leptonica-1.83.0-r0.apk && \ - apk add --allow-untrusted packages/tesseract-5.3.2-r0.apk && \ - apk add --allow-untrusted packages/libreoffice-7.6.5-r0.apk && \ - apk add bash && \ - apk add libmagic && \ - mv /share/tessdata/configs /usr/local/share/tessdata/ && \ - mv /share/tessdata/tessconfigs /usr/local/share/tessdata/ && \ - ln -s /usr/local/lib/libreoffice/program/soffice.bin /usr/local/bin/libreoffice && \ - ln -s /usr/local/lib/libreoffice/program/soffice.bin /usr/local/bin/soffice && \ - chmod +x /usr/local/lib/libreoffice/program/soffice.bin && \ - chmod +x /usr/local/bin/libreoffice && \ - chmod +x /usr/local/bin/soffice +RUN chown -R notebook-user:notebook-user /app -RUN chown -R nonroot:nonroot /app - -USER nonroot +USER notebook-user RUN find requirements/ -type f -name "*.txt" -exec pip3.11 install --no-cache-dir --user -r '{}' ';' RUN pip3.11 install unstructured.paddlepaddle @@ -38,7 +21,7 @@ RUN python3.11 -c "import nltk; nltk.download('punkt')" && \ python3.11 -c "from unstructured.partition.model_init import initialize; initialize()" && \ python3.11 -c "from unstructured_inference.models.tables import UnstructuredTableTransformerModel; model = UnstructuredTableTransformerModel(); model.initialize('microsoft/table-transformer-structure-recognition')" -ENV PATH="${PATH}:/home/nonroot/.local/bin" +ENV PATH="${PATH}:/home/notebook-user/.local/bin" ENV TESSDATA_PREFIX=/usr/local/share/tessdata CMD ["/bin/bash"] diff --git a/test_unstructured/partition/test_doc.py b/test_unstructured/partition/test_doc.py index 9531de8f5..165fdcadc 100644 --- a/test_unstructured/partition/test_doc.py +++ b/test_unstructured/partition/test_doc.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os import pathlib import tempfile from typing import Any @@ -32,13 +31,8 @@ from unstructured.documents.elements import ( from unstructured.partition.doc import partition_doc from unstructured.partition.docx import partition_docx -is_in_docker = os.path.exists("/.dockerenv") - def test_partition_doc_matches_partition_docx(request: FixtureRequest): - # NOTE(robinson) - was having issues with the tempfile not being found in the docker tests - if is_in_docker: - request.applymarker(pytest.mark.xfail) doc_file_path = example_doc_path("simple.doc") docx_file_path = example_doc_path("simple.docx") diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 283ba1c53..95b68c482 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.14.6" # pragma: no cover +__version__ = "0.14.7-dev0" # pragma: no cover