mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
build: remove test and dev deps from docker image (#3969)
Removed the dependencies contained in `test.txt`, `dev.txt`, and `constraints.txt` from the things that get installed in the docker image. In order to keep testing the image (running the tests), I added a step to the `docker-test` make target to install `test.txt` and `dev.txt`. Thus we presumably get a smaller image (probably not much smaller), reduce the dependency chain or our images, and have less exposure to vulnerabilities while still testing as robustly as before. Incidentally, I removed the `Dockerfile` for our ubuntu image, since it made reference to non-existent make targets, which tells me it's stale and wasn't being used. ### Review: - Reviewer should ensure the dev and test dependencies are not being installed in the docker image. One way to check is to check the logs in CI, and note, e.g. that [this](https://github.com/Unstructured-IO/unstructured/actions/runs/14112971425/job/39536304012#step:3:1700) is the first reference to `pytest` in the docker build and test logs, after the image build is completed. - Reviewer should ensure docker image is still being tested in CI and is passing.
This commit is contained in:
parent
3f07840b80
commit
9a239fa18b
10
CHANGELOG.md
10
CHANGELOG.md
@ -1,3 +1,13 @@
|
||||
## 0.17.5
|
||||
|
||||
### Enhancements
|
||||
- **Remove test and dev dependencies from docker image.** This reduces the docker image size slightly and reduces potential security vulnerabilities.
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
- **Removed out of date ubuntu Dockerfile.** The Dockerfile was out of date and non-functional.
|
||||
|
||||
## 0.17.4
|
||||
|
||||
### Enhancements
|
||||
|
@ -25,7 +25,7 @@ ENV TESSDATA_PREFIX=/usr/local/share/tessdata
|
||||
ENV NLTK_DATA=/home/notebook-user/nltk_data
|
||||
|
||||
# Install Python dependencies and download required NLTK packages
|
||||
RUN find requirements/ -type f -name "*.txt" -exec $PIP install --no-cache-dir --user -r '{}' ';' && \
|
||||
RUN find requirements/ -type f -name "*.txt" ! -name "test.txt" ! -name "dev.txt" ! -name "constraints.txt" -exec $PIP install --no-cache-dir --user -r '{}' ';' && \
|
||||
mkdir -p ${NLTK_DATA} && \
|
||||
$PYTHON -m nltk.downloader -d ${NLTK_DATA} punkt_tab averaged_perceptron_tagger_eng && \
|
||||
$PYTHON -c "from unstructured.partition.model_init import initialize; initialize()" && \
|
||||
|
3
Makefile
3
Makefile
@ -310,7 +310,8 @@ docker-test:
|
||||
-v ${CURRENT_DIR}/test_unstructured_ingest:/home/notebook-user/test_unstructured_ingest \
|
||||
$(if $(wildcard uns_test_env_file),--env-file uns_test_env_file,) \
|
||||
$(DOCKER_IMAGE) \
|
||||
bash -c "CI=$(CI) \
|
||||
bash -c "pip install -r requirements/test.txt -r requirements/dev.txt && \
|
||||
CI=$(CI) \
|
||||
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) \
|
||||
python3 -m pytest $(if $(TEST_FILE),$(TEST_FILE),test_unstructured)"
|
||||
|
||||
|
@ -22,7 +22,7 @@ COPY requirements requirements
|
||||
|
||||
RUN python3.10 -m pip install pip==${PIP_VERSION} && \
|
||||
dnf -y groupinstall "Development Tools" && \
|
||||
find requirements/ -type f -name "*.txt" -exec python3 -m pip install --no-cache -r '{}' ';' && \
|
||||
find requirements/ -type f -name "*.txt" ! -name "test.txt" ! -name "dev.txt" ! -name "constraints.txt" -exec python3 -m pip install --no-cache -r '{}' ';' && \
|
||||
dnf -y groupremove "Development Tools" && \
|
||||
dnf clean all
|
||||
|
||||
|
@ -1,26 +0,0 @@
|
||||
# Dockerfile that approximates the CI image
|
||||
#
|
||||
# Mainly useful for updating test-ingest fixtures
|
||||
|
||||
FROM ubuntu:22.04
|
||||
|
||||
COPY scripts/setup_ubuntu.sh scripts/setup_ubuntu.sh
|
||||
|
||||
RUN bash scripts/setup_ubuntu.sh root
|
||||
|
||||
COPY requirements/ requirements/
|
||||
COPY Makefile Makefile
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
RUN source ~/.bashrc && pyenv virtualenv 3.10 unstructured && \
|
||||
source ~/.pyenv/versions/unstructured/bin/activate && \
|
||||
make install-ci && \
|
||||
make install-ingest-s3 && \
|
||||
make install-ingest-azure && \
|
||||
make install-ingest-github && \
|
||||
make install-ingest-gitlab && \
|
||||
make install-ingest-wikipedia && \
|
||||
make install-ingest-discord && \
|
||||
make install install-ingest-slack && \
|
||||
make install-ingest-confluence
|
@ -1 +1 @@
|
||||
__version__ = "0.17.4" # pragma: no cover
|
||||
__version__ = "0.17.5" # pragma: no cover
|
||||
|
Loading…
x
Reference in New Issue
Block a user