# # DEPRECATION NOTICE # # This Dockerfile and the relative image deepset/haystack-gpu:minimal # have been deprecated in 1.9.0 in favor of: # https://github.com/deepset-ai/haystack/tree/main/docker # FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04 WORKDIR /home/user ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 ENV HAYSTACK_DOCKER_CONTAINER="HAYSTACK_MINIMAL_GPU_CONTAINER" # Install software dependencies RUN apt-get update && apt-get install -y software-properties-common && \ add-apt-repository ppa:deadsnakes/ppa && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ curl \ git \ poppler-utils \ python3-pip \ python3.8 \ python3.8-distutils \ swig \ tesseract-ocr && \ # Cleanup apt cache rm -rf /var/lib/apt/lists/* && \ # Install PDF converter curl -s https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz \ | tar -xvzf - -C /usr/local/bin --strip-components=2 xpdf-tools-linux-4.04/bin64/pdftotext # Copy Haystack source code COPY pyproject.toml VERSION.txt LICENSE README.md /home/user/ COPY haystack /home/user/haystack/ # Install all the dependencies, including ocr component RUN pip3 install --upgrade pip requests six idna certifi setuptools RUN pip3 install .[ocr] RUN pip3 install --no-cache-dir torch==1.10.2+cu113 -f https://download.pytorch.org/whl/torch_stable.html # Cleanup copied files after installation is completed RUN rm -rf /home/user/*