From e4535d29cac18cecf79c85987ad1c012dee62327 Mon Sep 17 00:00:00 2001 From: Trevor Bossert <37596773+tabossert@users.noreply.github.com> Date: Tue, 29 Aug 2023 18:01:44 -0700 Subject: [PATCH] Set user for container to same as api image. (#1239) This is security best practice, a user can override this with their own Dockerfile if required. --- CHANGELOG.md | 2 +- Dockerfile | 17 +++++++++++++---- Makefile | 4 ++-- scripts/docker-smoke-test.sh | 2 +- unstructured/__version__.py | 2 +- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c7fc459a..649e1c78f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.10.9-dev3 +## 0.10.9 ### Enhancements diff --git a/Dockerfile b/Dockerfile index 356bdeaab..3210d59ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,23 @@ # syntax=docker/dockerfile:experimental FROM quay.io/unstructured-io/base-images:rocky8.7-5 as base +# NOTE(crag): NB_USER ARG for mybinder.org compat: +# https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html +ARG NB_USER=notebook-user +ARG NB_UID=1000 ARG PIP_VERSION # Set up environment -ENV HOME /home/ -WORKDIR ${HOME} -RUN mkdir ${HOME}/.ssh && chmod go-rwx ${HOME}/.ssh \ - && ssh-keyscan -t rsa github.com >> /home/.ssh/known_hosts +ENV HOME /home/${NB_USER} ENV PYTHONPATH="${PYTHONPATH}:${HOME}" ENV PATH="/home/usr/.local/bin:${PATH}" +RUN groupadd --gid ${NB_UID} ${NB_USER} +RUN useradd --uid ${NB_UID} --gid ${NB_UID} ${NB_USER} +WORKDIR ${HOME} +RUN mkdir ${HOME}/.ssh && chmod go-rwx ${HOME}/.ssh \ + && ssh-keyscan -t rsa github.com >> ${HOME}/.ssh/known_hosts + FROM base as deps # Copy and install Unstructured COPY requirements requirements @@ -55,6 +62,8 @@ RUN python3.10 -c "import nltk; nltk.download('punkt')" && \ FROM deps as code +USER ${NB_USER} + COPY example-docs example-docs COPY unstructured unstructured diff --git a/Makefile b/Makefile index 0acf54706..e020f07e3 100644 --- a/Makefile +++ b/Makefile @@ -404,8 +404,8 @@ docker-start-bash: .PHONY: docker-test docker-test: docker run --rm \ - -v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \ - -v ${CURRENT_DIR}/test_unstructured_ingest:/home/test_unstructured_ingest \ + -v ${CURRENT_DIR}/test_unstructured:/home/notebook-user/test_unstructured \ + -v ${CURRENT_DIR}/test_unstructured_ingest:/home/notebook-user/test_unstructured_ingest \ $(if $(wildcard uns_test_env_file),--env-file uns_test_env_file,) \ $(DOCKER_IMAGE) \ bash -c "CI=$(CI) pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured" diff --git a/scripts/docker-smoke-test.sh b/scripts/docker-smoke-test.sh index 53256407e..220073a51 100755 --- a/scripts/docker-smoke-test.sh +++ b/scripts/docker-smoke-test.sh @@ -39,7 +39,7 @@ await_container # Run the tests docker cp test_unstructured_ingest $CONTAINER_NAME:/home -docker exec "$CONTAINER_NAME" /bin/bash -c "/home/test_unstructured_ingest/test-ingest-wikipedia.sh" +docker exec "$CONTAINER_NAME" /bin/bash -c "/home/notebook_user/test_unstructured_ingest/test-ingest-wikipedia.sh" result=$? exit $result diff --git a/unstructured/__version__.py b/unstructured/__version__.py index e65925396..781f70fed 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.10.9-dev3" # pragma: no cover +__version__ = "0.10.9" # pragma: no cover