From 00c823cdff067c329d687875a32f368db74dde6a Mon Sep 17 00:00:00 2001 From: oryx1729 <78848855+oryx1729@users.noreply.github.com> Date: Tue, 4 Jan 2022 14:33:13 +0100 Subject: [PATCH] Add GitHub Action for Docker Build for GPU (#1916) --- .github/workflows/docker_build.yml | 55 +++++++++++++++++++++++++ Dockerfile-GPU | 65 ++++++++++++++++++------------ 2 files changed, 95 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/docker_build.yml diff --git a/.github/workflows/docker_build.yml b/.github/workflows/docker_build.yml new file mode 100644 index 000000000..8f337e7d1 --- /dev/null +++ b/.github/workflows/docker_build.yml @@ -0,0 +1,55 @@ +name: docker-build + +on: + push: + branches: + - master + - docker-build + +jobs: + build: + runs-on: ubuntu-20.04 + + strategy: + fail-fast: false + matrix: + include: + - dockerfile: Dockerfile + repository: deepset/haystack-cpu + + - dockerfile: Dockerfile-GPU + repository: deepset/haystack-gpu + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v3 + with: + images: | + ${{ matrix.repository }} + flavor: | + latest=true + tags: | + type=ref,event=tag + type=sha + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_HUB_USER }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Build and push docker image + uses: docker/build-push-action@v2 + with: + file: ${{ matrix.dockerfile }} + tags: ${{ steps.meta.outputs.tags }} + push: true + cache-from: type=registry,ref=${{ matrix.repository }}:latest + cache-to: type=inline diff --git a/Dockerfile-GPU b/Dockerfile-GPU index fa3c78323..6f4bafbc4 100644 --- a/Dockerfile-GPU +++ b/Dockerfile-GPU @@ -2,51 +2,66 @@ FROM nvidia/cuda:11.0-runtime-ubuntu20.04 WORKDIR /home/user -RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa -RUN apt-get update && apt-get install -y python3.7 python3.7-dev python3.7-distutils python3-pip curl wget git pkg-config cmake swig - ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 -# Set default Python version -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 -RUN update-alternatives --set python3 /usr/bin/python3.7 +# create folder for /file-upload API endpoint with write permissions, this might be adjusted depending on FILE_UPLOAD_PATH +RUN mkdir -p /home/user/file-upload && chmod 777 /home/user/file-upload # Install PDF converter -RUN wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz && \ -tar -xvf xpdf-tools-linux-4.03.tar.gz && cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin +RUN curl -s https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz | tar -xvzf - -C /usr/local/bin --strip-components=2 xpdf-tools-linux-4.03/bin64/pdftotext -RUN apt-get install libpoppler-cpp-dev pkg-config -y --fix-missing -RUN apt-get install tesseract-ocr libtesseract-dev poppler-utils -y +# Install software dependencies +RUN apt-get update && apt-get install -y software-properties-common && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && apt-get install -y \ + cmake \ + curl \ + git \ + libpoppler-cpp-dev \ + libtesseract-dev \ + pkg-config \ + poppler-utils \ + python3-pip \ + python3.7 \ + python3.7-dev \ + python3.7-distutils \ + swig \ + tesseract-ocr \ + wget -# copy code -COPY haystack /home/user/haystack +# Set default Python version +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 && \ + update-alternatives --set python3 /usr/bin/python3.7 -# install as a package +# Copy package setup files COPY setup.py requirements.txt README.md /home/user/ -RUN pip3 install numpy scipy Cython -# Install PyTorch for CUDA 11 -RUN pip3 install torch==1.7.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html -# Install faiss separately as building latest versions can cause trouble with swig -RUN pip3 install faiss-cpu==1.6.3 - -RUN pip3 install -r requirements.txt -RUN pip3 install -e . +RUN echo "Install required packages" && \ + # Install PyTorch for CUDA 11 + pip3 install torch==1.7.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html && \ + # Install faiss separately as building latest versions can cause trouble with swig + pip3 install faiss-cpu==1.6.3 && \ + # Install extra packages + pip3 install Cython && \ + # Install from requirements.txt + pip3 install -r requirements.txt # download punkt tokenizer to be included in image RUN python3 -c "import nltk;nltk.download('punkt', download_dir='/usr/nltk_data')" -# create folder for /file-upload API endpoint with write permissions, this might be adjusted depending on FILE_UPLOAD_PATH -RUN mkdir -p /home/user/file-upload -RUN chmod 777 /home/user/file-upload - # copy saved models COPY README.md models* /home/user/models/ # Copy REST API code COPY rest_api /home/user/rest_api +# copy code +COPY haystack /home/user/haystack + +# Install package +RUN pip3 install -e . + # optional : copy sqlite db if needed for testing #COPY qa.db /home/user/